Command that produces this log: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> type_embedding.weight: torch.Size([123, 100]) >>> trans_rep.weight: torch.Size([1024, 1124]) >>> trans_rep.bias: torch.Size([1024]) >>> coref_type_ffn.weight: torch.Size([3, 4096]) >>> coref_type_ffn.bias: torch.Size([3]) n_trainable_params: 561067023, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 19:27:14.946358: step: 4/529, loss: 0.07990811765193939 2023-01-22 19:27:15.986177: step: 8/529, loss: 0.014827210456132889 2023-01-22 19:27:17.024265: step: 12/529, loss: 0.020841358229517937 2023-01-22 19:27:18.069999: step: 16/529, loss: 0.027017703279852867 2023-01-22 19:27:19.128662: step: 20/529, loss: 0.05911005660891533 2023-01-22 19:27:20.187195: step: 24/529, loss: 0.016413463279604912 2023-01-22 19:27:21.248639: step: 28/529, loss: 0.06190330535173416 2023-01-22 19:27:22.300891: step: 32/529, loss: 0.10229873657226562 2023-01-22 19:27:23.334068: step: 36/529, loss: 0.02176942303776741 2023-01-22 19:27:24.385571: step: 40/529, loss: 0.01597859524190426 2023-01-22 19:27:25.428884: step: 44/529, loss: 0.019977517426013947 2023-01-22 19:27:26.477216: step: 48/529, loss: 0.012980473227798939 2023-01-22 19:27:27.526950: step: 52/529, loss: 0.03213465213775635 2023-01-22 19:27:28.577073: step: 56/529, loss: 0.0116121219471097 2023-01-22 19:27:29.630212: step: 60/529, loss: 0.03135773912072182 2023-01-22 19:27:30.693373: step: 64/529, loss: 0.09360960125923157 2023-01-22 19:27:31.757702: step: 68/529, loss: 0.0260729119181633 2023-01-22 19:27:32.832431: step: 72/529, loss: 0.04594697430729866 2023-01-22 19:27:33.881591: step: 76/529, loss: 0.02163085900247097 2023-01-22 19:27:34.937840: step: 80/529, loss: 0.028550773859024048 2023-01-22 19:27:35.987925: step: 84/529, loss: 0.07296416163444519 2023-01-22 19:27:37.047985: step: 88/529, loss: 0.05942222476005554 2023-01-22 19:27:38.098707: step: 92/529, loss: 0.015289267525076866 2023-01-22 19:27:39.156453: step: 96/529, loss: 0.019732104614377022 2023-01-22 19:27:40.228869: step: 100/529, loss: 0.045478615909814835 2023-01-22 19:27:41.289024: step: 104/529, loss: 0.0859794020652771 2023-01-22 19:27:42.330312: step: 108/529, loss: 0.02028040401637554 2023-01-22 19:27:43.385743: step: 112/529, loss: 0.09041469544172287 2023-01-22 19:27:44.430878: step: 116/529, loss: 0.1333049088716507 2023-01-22 19:27:45.496378: step: 120/529, loss: 0.010391704738140106 2023-01-22 19:27:46.545084: step: 124/529, loss: 0.008062057197093964 2023-01-22 19:27:47.619087: step: 128/529, loss: 0.06313350796699524 2023-01-22 19:27:48.671969: step: 132/529, loss: 0.062153410166502 2023-01-22 19:27:49.731164: step: 136/529, loss: 0.04618854820728302 2023-01-22 19:27:50.799458: step: 140/529, loss: 0.02758634462952614 2023-01-22 19:27:51.862581: step: 144/529, loss: 0.032050687819719315 2023-01-22 19:27:52.920863: step: 148/529, loss: 0.01767510175704956 2023-01-22 19:27:53.986776: step: 152/529, loss: 0.020812753587961197 2023-01-22 19:27:55.041748: step: 156/529, loss: 0.11757136881351471 2023-01-22 19:27:56.088739: step: 160/529, loss: 0.057380955666303635 2023-01-22 19:27:57.153728: step: 164/529, loss: 0.09593145549297333 2023-01-22 19:27:58.257799: step: 168/529, loss: 0.055508315563201904 2023-01-22 19:27:59.328615: step: 172/529, loss: 0.023263294249773026 2023-01-22 19:28:00.394542: step: 176/529, loss: 0.017478350549936295 2023-01-22 19:28:01.451541: step: 180/529, loss: 0.023210899904370308 2023-01-22 19:28:02.517931: step: 184/529, loss: 0.013754370622336864 2023-01-22 19:28:03.587581: step: 188/529, loss: 0.015873238444328308 2023-01-22 19:28:04.642324: step: 192/529, loss: 0.01860697753727436 2023-01-22 19:28:05.716391: step: 196/529, loss: 0.04310062900185585 2023-01-22 19:28:06.762486: step: 200/529, loss: 0.014168181456625462 2023-01-22 19:28:07.817077: step: 204/529, loss: 0.00864088162779808 2023-01-22 19:28:08.878048: step: 208/529, loss: 0.044519148766994476 2023-01-22 19:28:09.935795: step: 212/529, loss: 0.017318738624453545 2023-01-22 19:28:10.989171: step: 216/529, loss: 0.03859895467758179 2023-01-22 19:28:12.054180: step: 220/529, loss: 0.018590757623314857 2023-01-22 19:28:13.122965: step: 224/529, loss: 0.016344435513019562 2023-01-22 19:28:14.171194: step: 228/529, loss: 0.02652830071747303 2023-01-22 19:28:15.222859: step: 232/529, loss: 0.06508877873420715 2023-01-22 19:28:16.283008: step: 236/529, loss: 0.024108638986945152 2023-01-22 19:28:17.344001: step: 240/529, loss: 0.02974044904112816 2023-01-22 19:28:18.397232: step: 244/529, loss: 0.009025337174534798 2023-01-22 19:28:19.458729: step: 248/529, loss: 0.03340541943907738 2023-01-22 19:28:20.514481: step: 252/529, loss: 0.04070824384689331 2023-01-22 19:28:21.598279: step: 256/529, loss: 0.010483766905963421 2023-01-22 19:28:22.654535: step: 260/529, loss: 0.03871284797787666 2023-01-22 19:28:23.717657: step: 264/529, loss: 0.036469489336013794 2023-01-22 19:28:24.776263: step: 268/529, loss: 0.07533961534500122 2023-01-22 19:28:25.824218: step: 272/529, loss: 0.017238104715943336 2023-01-22 19:28:26.874595: step: 276/529, loss: 0.048346228897571564 2023-01-22 19:28:27.943067: step: 280/529, loss: 0.009969507344067097 2023-01-22 19:28:29.011944: step: 284/529, loss: 0.0049746073782444 2023-01-22 19:28:30.076279: step: 288/529, loss: 0.011463984847068787 2023-01-22 19:28:31.127512: step: 292/529, loss: 0.012212565168738365 2023-01-22 19:28:32.198907: step: 296/529, loss: 0.07199540734291077 2023-01-22 19:28:33.263447: step: 300/529, loss: 0.0088880630210042 2023-01-22 19:28:34.320176: step: 304/529, loss: 0.012977623380720615 2023-01-22 19:28:35.368694: step: 308/529, loss: 0.0069293733686208725 2023-01-22 19:28:36.439443: step: 312/529, loss: 0.014588208869099617 2023-01-22 19:28:37.501263: step: 316/529, loss: 0.048842888325452805 2023-01-22 19:28:38.558503: step: 320/529, loss: 0.06106474623084068 2023-01-22 19:28:39.614994: step: 324/529, loss: 0.022975649684667587 2023-01-22 19:28:40.684705: step: 328/529, loss: 0.07811424881219864 2023-01-22 19:28:41.748895: step: 332/529, loss: 0.037955913692712784 2023-01-22 19:28:42.817227: step: 336/529, loss: 0.015377218835055828 2023-01-22 19:28:43.871928: step: 340/529, loss: 0.05786743387579918 2023-01-22 19:28:44.924995: step: 344/529, loss: 0.01873079501092434 2023-01-22 19:28:45.987516: step: 348/529, loss: 0.022346196696162224 2023-01-22 19:28:47.049463: step: 352/529, loss: 0.014364014379680157 2023-01-22 19:28:48.102951: step: 356/529, loss: 0.012166554108262062 2023-01-22 19:28:49.173457: step: 360/529, loss: 0.020425790920853615 2023-01-22 19:28:50.235290: step: 364/529, loss: 0.01742572896182537 2023-01-22 19:28:51.282538: step: 368/529, loss: 0.057203106582164764 2023-01-22 19:28:52.364476: step: 372/529, loss: 0.005404599942266941 2023-01-22 19:28:53.436921: step: 376/529, loss: 0.024079270660877228 2023-01-22 19:28:54.504402: step: 380/529, loss: 0.017385238781571388 2023-01-22 19:28:55.570831: step: 384/529, loss: 0.04751858115196228 2023-01-22 19:28:56.613811: step: 388/529, loss: 0.009507374837994576 2023-01-22 19:28:57.679928: step: 392/529, loss: 0.022454271093010902 2023-01-22 19:28:58.745708: step: 396/529, loss: 0.056297417730093 2023-01-22 19:28:59.805384: step: 400/529, loss: 0.012025311589241028 2023-01-22 19:29:00.860330: step: 404/529, loss: 0.016522690653800964 2023-01-22 19:29:01.944859: step: 408/529, loss: 0.020530816167593002 2023-01-22 19:29:02.999582: step: 412/529, loss: 0.016060838475823402 2023-01-22 19:29:04.063293: step: 416/529, loss: 0.02488415688276291 2023-01-22 19:29:05.123218: step: 420/529, loss: 0.020370163023471832 2023-01-22 19:29:06.179004: step: 424/529, loss: 0.019878340885043144 2023-01-22 19:29:07.236570: step: 428/529, loss: 0.004518384579569101 2023-01-22 19:29:08.302973: step: 432/529, loss: 0.03485770523548126 2023-01-22 19:29:09.358185: step: 436/529, loss: 0.01472142618149519 2023-01-22 19:29:10.425049: step: 440/529, loss: 0.035705842077732086 2023-01-22 19:29:11.490630: step: 444/529, loss: 0.011892641894519329 2023-01-22 19:29:12.556623: step: 448/529, loss: 0.05103680491447449 2023-01-22 19:29:13.616541: step: 452/529, loss: 0.012885753996670246 2023-01-22 19:29:14.675801: step: 456/529, loss: 0.008832535706460476 2023-01-22 19:29:15.738451: step: 460/529, loss: 0.010149504989385605 2023-01-22 19:29:16.793649: step: 464/529, loss: 0.014036014676094055 2023-01-22 19:29:17.840584: step: 468/529, loss: 0.043695222586393356 2023-01-22 19:29:18.920625: step: 472/529, loss: 0.013185055926442146 2023-01-22 19:29:19.973820: step: 476/529, loss: 0.04924473166465759 2023-01-22 19:29:21.025307: step: 480/529, loss: 0.012842758558690548 2023-01-22 19:29:22.076138: step: 484/529, loss: 0.01636068895459175 2023-01-22 19:29:23.144921: step: 488/529, loss: 0.011149060912430286 2023-01-22 19:29:24.199579: step: 492/529, loss: 0.040947914123535156 2023-01-22 19:29:25.259049: step: 496/529, loss: 0.015999894589185715 2023-01-22 19:29:26.317448: step: 500/529, loss: 0.008272661827504635 2023-01-22 19:29:27.370542: step: 504/529, loss: 0.016934100538492203 2023-01-22 19:29:28.433702: step: 508/529, loss: 0.010307268239557743 2023-01-22 19:29:29.499702: step: 512/529, loss: 0.0783538892865181 2023-01-22 19:29:30.544625: step: 516/529, loss: 0.008236422203481197 2023-01-22 19:29:31.606851: step: 520/529, loss: 0.012457845732569695 2023-01-22 19:29:32.682721: step: 524/529, loss: 0.009356766939163208 2023-01-22 19:29:33.731871: step: 528/529, loss: 0.002937601413577795 2023-01-22 19:29:34.817304: step: 532/529, loss: 0.008586671203374863 2023-01-22 19:29:35.863517: step: 536/529, loss: 0.04243906959891319 2023-01-22 19:29:36.918965: step: 540/529, loss: 0.008048910647630692 2023-01-22 19:29:37.977025: step: 544/529, loss: 0.062262147665023804 2023-01-22 19:29:39.042602: step: 548/529, loss: 0.028016511350870132 2023-01-22 19:29:40.095188: step: 552/529, loss: 0.012315897271037102 2023-01-22 19:29:41.155116: step: 556/529, loss: 0.01563815400004387 2023-01-22 19:29:42.217492: step: 560/529, loss: 0.030016034841537476 2023-01-22 19:29:43.271086: step: 564/529, loss: 0.04939638078212738 2023-01-22 19:29:44.319555: step: 568/529, loss: 0.005209965165704489 2023-01-22 19:29:45.383735: step: 572/529, loss: 0.00775504857301712 2023-01-22 19:29:46.435309: step: 576/529, loss: 0.010861270129680634 2023-01-22 19:29:47.499742: step: 580/529, loss: 0.020030926913022995 2023-01-22 19:29:48.574268: step: 584/529, loss: 0.020260784775018692 2023-01-22 19:29:49.627926: step: 588/529, loss: 0.008802361786365509 2023-01-22 19:29:50.679294: step: 592/529, loss: 0.008022286929190159 2023-01-22 19:29:51.745152: step: 596/529, loss: 0.004244903568178415 2023-01-22 19:29:52.804400: step: 600/529, loss: 0.007763395085930824 2023-01-22 19:29:53.855111: step: 604/529, loss: 0.08844374120235443 2023-01-22 19:29:54.910793: step: 608/529, loss: 0.01280874665826559 2023-01-22 19:29:55.957278: step: 612/529, loss: 0.03086327202618122 2023-01-22 19:29:57.005954: step: 616/529, loss: 0.012198288924992085 2023-01-22 19:29:58.065253: step: 620/529, loss: 0.01792272739112377 2023-01-22 19:29:59.129955: step: 624/529, loss: 0.006117998156696558 2023-01-22 19:30:00.195306: step: 628/529, loss: 0.01971031166613102 2023-01-22 19:30:01.261829: step: 632/529, loss: 0.031505003571510315 2023-01-22 19:30:02.321128: step: 636/529, loss: 0.02580244466662407 2023-01-22 19:30:03.384624: step: 640/529, loss: 0.022704098373651505 2023-01-22 19:30:04.462966: step: 644/529, loss: 0.039758216589689255 2023-01-22 19:30:05.513794: step: 648/529, loss: 0.015006549656391144 2023-01-22 19:30:06.559971: step: 652/529, loss: 0.007417464628815651 2023-01-22 19:30:07.608897: step: 656/529, loss: 0.025882484391331673 2023-01-22 19:30:08.650516: step: 660/529, loss: 0.012646481394767761 2023-01-22 19:30:09.709142: step: 664/529, loss: 0.02698586694896221 2023-01-22 19:30:10.768220: step: 668/529, loss: 0.008092718198895454 2023-01-22 19:30:11.833109: step: 672/529, loss: 0.006220159586519003 2023-01-22 19:30:12.888813: step: 676/529, loss: 0.04047471284866333 2023-01-22 19:30:13.935932: step: 680/529, loss: 0.03407733514904976 2023-01-22 19:30:14.991711: step: 684/529, loss: 0.010705075226724148 2023-01-22 19:30:16.038373: step: 688/529, loss: 0.013215168379247189 2023-01-22 19:30:17.082292: step: 692/529, loss: 0.011659905314445496 2023-01-22 19:30:18.155727: step: 696/529, loss: 0.010189338587224483 2023-01-22 19:30:19.242147: step: 700/529, loss: 0.008281991817057133 2023-01-22 19:30:20.315264: step: 704/529, loss: 0.04534679278731346 2023-01-22 19:30:21.372634: step: 708/529, loss: 0.002377257449552417 2023-01-22 19:30:22.426497: step: 712/529, loss: 0.012265834026038647 2023-01-22 19:30:23.473534: step: 716/529, loss: 0.014682501554489136 2023-01-22 19:30:24.534410: step: 720/529, loss: 0.00967047642916441 2023-01-22 19:30:25.582255: step: 724/529, loss: 0.04534053057432175 2023-01-22 19:30:26.645075: step: 728/529, loss: 0.009595212526619434 2023-01-22 19:30:27.708441: step: 732/529, loss: 0.016976941376924515 2023-01-22 19:30:28.776060: step: 736/529, loss: 0.03377556800842285 2023-01-22 19:30:29.840121: step: 740/529, loss: 0.021184319630265236 2023-01-22 19:30:30.895993: step: 744/529, loss: 0.012997478246688843 2023-01-22 19:30:31.956293: step: 748/529, loss: 0.024401886388659477 2023-01-22 19:30:33.002239: step: 752/529, loss: 0.05132303759455681 2023-01-22 19:30:34.059893: step: 756/529, loss: 0.011608830653131008 2023-01-22 19:30:35.112219: step: 760/529, loss: 0.012680543586611748 2023-01-22 19:30:36.175785: step: 764/529, loss: 0.03228800371289253 2023-01-22 19:30:37.210781: step: 768/529, loss: 0.026904243975877762 2023-01-22 19:30:38.269164: step: 772/529, loss: 0.027837267145514488 2023-01-22 19:30:39.323755: step: 776/529, loss: 0.04205892235040665 2023-01-22 19:30:40.397578: step: 780/529, loss: 0.008602812886238098 2023-01-22 19:30:41.444658: step: 784/529, loss: 0.06736654043197632 2023-01-22 19:30:42.494462: step: 788/529, loss: 0.03358781337738037 2023-01-22 19:30:43.552810: step: 792/529, loss: 0.025038380175828934 2023-01-22 19:30:44.623899: step: 796/529, loss: 0.049624573439359665 2023-01-22 19:30:45.701659: step: 800/529, loss: 0.06829757243394852 2023-01-22 19:30:46.745875: step: 804/529, loss: 0.04474746435880661 2023-01-22 19:30:47.791514: step: 808/529, loss: 0.052247192710638046 2023-01-22 19:30:48.838665: step: 812/529, loss: 0.03364172205328941 2023-01-22 19:30:49.902607: step: 816/529, loss: 0.010718918405473232 2023-01-22 19:30:50.951161: step: 820/529, loss: 0.005122782196849585 2023-01-22 19:30:52.014628: step: 824/529, loss: 0.03489316627383232 2023-01-22 19:30:53.070115: step: 828/529, loss: 0.009914408437907696 2023-01-22 19:30:54.138241: step: 832/529, loss: 0.027348777279257774 2023-01-22 19:30:55.185499: step: 836/529, loss: 0.009882641024887562 2023-01-22 19:30:56.245014: step: 840/529, loss: 0.013581992127001286 2023-01-22 19:30:57.341141: step: 844/529, loss: 0.03712452948093414 2023-01-22 19:30:58.400092: step: 848/529, loss: 0.014871868304908276 2023-01-22 19:30:59.474900: step: 852/529, loss: 0.018262458965182304 2023-01-22 19:31:00.536438: step: 856/529, loss: 0.034095119684934616 2023-01-22 19:31:01.596026: step: 860/529, loss: 0.010032016783952713 2023-01-22 19:31:02.665803: step: 864/529, loss: 0.034494798630476 2023-01-22 19:31:03.723570: step: 868/529, loss: 0.04357277601957321 2023-01-22 19:31:04.782427: step: 872/529, loss: 0.027265435084700584 2023-01-22 19:31:05.853432: step: 876/529, loss: 0.013961665332317352 2023-01-22 19:31:06.925460: step: 880/529, loss: 0.007439268287271261 2023-01-22 19:31:07.980043: step: 884/529, loss: 0.017650846391916275 2023-01-22 19:31:09.024014: step: 888/529, loss: 0.043088555335998535 2023-01-22 19:31:10.098887: step: 892/529, loss: 0.023871731013059616 2023-01-22 19:31:11.176823: step: 896/529, loss: 0.0229964442551136 2023-01-22 19:31:12.236882: step: 900/529, loss: 0.0036926409229636192 2023-01-22 19:31:13.304804: step: 904/529, loss: 0.013771359808743 2023-01-22 19:31:14.381959: step: 908/529, loss: 0.010296299122273922 2023-01-22 19:31:15.442093: step: 912/529, loss: 0.017732994630932808 2023-01-22 19:31:16.506338: step: 916/529, loss: 0.025946009904146194 2023-01-22 19:31:17.550437: step: 920/529, loss: 0.049282774329185486 2023-01-22 19:31:18.600000: step: 924/529, loss: 0.015098088420927525 2023-01-22 19:31:19.655211: step: 928/529, loss: 0.005763272289186716 2023-01-22 19:31:20.706296: step: 932/529, loss: 0.00590843241661787 2023-01-22 19:31:21.763419: step: 936/529, loss: 0.0069006336852908134 2023-01-22 19:31:22.836540: step: 940/529, loss: 0.011012403294444084 2023-01-22 19:31:23.890326: step: 944/529, loss: 0.034919530153274536 2023-01-22 19:31:24.944078: step: 948/529, loss: 0.006291705183684826 2023-01-22 19:31:26.006280: step: 952/529, loss: 0.00841736514121294 2023-01-22 19:31:27.053989: step: 956/529, loss: 0.011827602051198483 2023-01-22 19:31:28.106603: step: 960/529, loss: 0.019710896536707878 2023-01-22 19:31:29.156798: step: 964/529, loss: 0.0038844586815685034 2023-01-22 19:31:30.201607: step: 968/529, loss: 0.0070901853032410145 2023-01-22 19:31:31.247138: step: 972/529, loss: 0.014970028772950172 2023-01-22 19:31:32.294770: step: 976/529, loss: 0.014748920686542988 2023-01-22 19:31:33.357442: step: 980/529, loss: 0.04005814716219902 2023-01-22 19:31:34.410044: step: 984/529, loss: 0.019384905695915222 2023-01-22 19:31:35.476372: step: 988/529, loss: 0.013861997984349728 2023-01-22 19:31:36.545326: step: 992/529, loss: 0.01489313691854477 2023-01-22 19:31:37.592977: step: 996/529, loss: 0.012733696959912777 2023-01-22 19:31:38.626481: step: 1000/529, loss: 0.015110542997717857 2023-01-22 19:31:39.661438: step: 1004/529, loss: 0.016543498262763023 2023-01-22 19:31:40.699911: step: 1008/529, loss: 0.053024791181087494 2023-01-22 19:31:41.759245: step: 1012/529, loss: 0.009391451254487038 2023-01-22 19:31:42.828529: step: 1016/529, loss: 0.014434714801609516 2023-01-22 19:31:43.879635: step: 1020/529, loss: 0.031691450625658035 2023-01-22 19:31:44.943540: step: 1024/529, loss: 0.009974936954677105 2023-01-22 19:31:46.036270: step: 1028/529, loss: 0.06868696957826614 2023-01-22 19:31:47.109928: step: 1032/529, loss: 0.05398740991950035 2023-01-22 19:31:48.166967: step: 1036/529, loss: 0.0282050222158432 2023-01-22 19:31:49.241382: step: 1040/529, loss: 0.0057012466713786125 2023-01-22 19:31:50.295994: step: 1044/529, loss: 0.04486163705587387 2023-01-22 19:31:51.353337: step: 1048/529, loss: 0.0355050191283226 2023-01-22 19:31:52.410914: step: 1052/529, loss: 0.01387431938201189 2023-01-22 19:31:53.464595: step: 1056/529, loss: 0.008000961504876614 2023-01-22 19:31:54.508355: step: 1060/529, loss: 0.0053679319098591805 2023-01-22 19:31:55.579421: step: 1064/529, loss: 0.01253668311983347 2023-01-22 19:31:56.630781: step: 1068/529, loss: 0.009002232924103737 2023-01-22 19:31:57.680226: step: 1072/529, loss: 0.00570894218981266 2023-01-22 19:31:58.736468: step: 1076/529, loss: 0.054735440760850906 2023-01-22 19:31:59.782876: step: 1080/529, loss: 0.006681904662400484 2023-01-22 19:32:00.845413: step: 1084/529, loss: 0.017748413607478142 2023-01-22 19:32:01.908284: step: 1088/529, loss: 0.007863124832510948 2023-01-22 19:32:02.955814: step: 1092/529, loss: 0.006316049490123987 2023-01-22 19:32:04.022679: step: 1096/529, loss: 0.011292953044176102 2023-01-22 19:32:05.097636: step: 1100/529, loss: 0.012597151100635529 2023-01-22 19:32:06.155735: step: 1104/529, loss: 0.04695584252476692 2023-01-22 19:32:07.204293: step: 1108/529, loss: 0.007417854852974415 2023-01-22 19:32:08.256816: step: 1112/529, loss: 0.032040588557720184 2023-01-22 19:32:09.311863: step: 1116/529, loss: 0.019973304122686386 2023-01-22 19:32:10.354990: step: 1120/529, loss: 0.012334685772657394 2023-01-22 19:32:11.417061: step: 1124/529, loss: 0.00912613794207573 2023-01-22 19:32:12.464419: step: 1128/529, loss: 0.011598779819905758 2023-01-22 19:32:13.502570: step: 1132/529, loss: 0.058191776275634766 2023-01-22 19:32:14.565026: step: 1136/529, loss: 0.04152904823422432 2023-01-22 19:32:15.603919: step: 1140/529, loss: 0.013890745118260384 2023-01-22 19:32:16.639491: step: 1144/529, loss: 0.014456016942858696 2023-01-22 19:32:17.716547: step: 1148/529, loss: 0.01755940169095993 2023-01-22 19:32:18.766223: step: 1152/529, loss: 0.039868682622909546 2023-01-22 19:32:19.804823: step: 1156/529, loss: 0.008498022332787514 2023-01-22 19:32:20.852780: step: 1160/529, loss: 0.006830099504441023 2023-01-22 19:32:21.928315: step: 1164/529, loss: 0.012597493827342987 2023-01-22 19:32:22.987548: step: 1168/529, loss: 0.006805849727243185 2023-01-22 19:32:24.048669: step: 1172/529, loss: 0.037344783544540405 2023-01-22 19:32:25.096955: step: 1176/529, loss: 0.033238671720027924 2023-01-22 19:32:26.153018: step: 1180/529, loss: 0.00649050110951066 2023-01-22 19:32:27.207859: step: 1184/529, loss: 0.017561092972755432 2023-01-22 19:32:28.271346: step: 1188/529, loss: 0.013161523267626762 2023-01-22 19:32:29.335016: step: 1192/529, loss: 0.028273042291402817 2023-01-22 19:32:30.390164: step: 1196/529, loss: 0.05599603429436684 2023-01-22 19:32:31.434446: step: 1200/529, loss: 0.04154041409492493 2023-01-22 19:32:32.493310: step: 1204/529, loss: 0.00657001743093133 2023-01-22 19:32:33.558673: step: 1208/529, loss: 0.011616759933531284 2023-01-22 19:32:34.627788: step: 1212/529, loss: 0.010004591196775436 2023-01-22 19:32:35.680719: step: 1216/529, loss: 0.006797294598072767 2023-01-22 19:32:36.740924: step: 1220/529, loss: 0.015774471685290337 2023-01-22 19:32:37.788741: step: 1224/529, loss: 0.06791481375694275 2023-01-22 19:32:38.841752: step: 1228/529, loss: 0.06490154564380646 2023-01-22 19:32:39.888526: step: 1232/529, loss: 0.007676573935896158 2023-01-22 19:32:40.952325: step: 1236/529, loss: 0.015669338405132294 2023-01-22 19:32:42.004793: step: 1240/529, loss: 0.010901951231062412 2023-01-22 19:32:43.052221: step: 1244/529, loss: 0.031759507954120636 2023-01-22 19:32:44.099492: step: 1248/529, loss: 0.05133504047989845 2023-01-22 19:32:45.154888: step: 1252/529, loss: 0.01476442813873291 2023-01-22 19:32:46.217964: step: 1256/529, loss: 0.011178816668689251 2023-01-22 19:32:47.285018: step: 1260/529, loss: 0.00566266430541873 2023-01-22 19:32:48.360274: step: 1264/529, loss: 0.0105745829641819 2023-01-22 19:32:49.440329: step: 1268/529, loss: 0.010931075550615788 2023-01-22 19:32:50.491842: step: 1272/529, loss: 0.019857797771692276 2023-01-22 19:32:51.553592: step: 1276/529, loss: 0.026732290163636208 2023-01-22 19:32:52.601364: step: 1280/529, loss: 0.03947070240974426 2023-01-22 19:32:53.656260: step: 1284/529, loss: 0.016308046877384186 2023-01-22 19:32:54.722769: step: 1288/529, loss: 0.009445912204682827 2023-01-22 19:32:55.769982: step: 1292/529, loss: 0.008985220454633236 2023-01-22 19:32:56.813894: step: 1296/529, loss: 0.009500646032392979 2023-01-22 19:32:57.866665: step: 1300/529, loss: 0.020225435495376587 2023-01-22 19:32:58.947232: step: 1304/529, loss: 0.005524156149476767 2023-01-22 19:33:00.005389: step: 1308/529, loss: 0.008743815124034882 2023-01-22 19:33:01.051748: step: 1312/529, loss: 0.029500380158424377 2023-01-22 19:33:02.116445: step: 1316/529, loss: 0.007472315803170204 2023-01-22 19:33:03.169519: step: 1320/529, loss: 0.007899194024503231 2023-01-22 19:33:04.222443: step: 1324/529, loss: 0.006183188408613205 2023-01-22 19:33:05.275517: step: 1328/529, loss: 0.010686767287552357 2023-01-22 19:33:06.313028: step: 1332/529, loss: 0.007414683699607849 2023-01-22 19:33:07.371820: step: 1336/529, loss: 0.01777159422636032 2023-01-22 19:33:08.433081: step: 1340/529, loss: 0.0460592545568943 2023-01-22 19:33:09.494080: step: 1344/529, loss: 0.015033723786473274 2023-01-22 19:33:10.535824: step: 1348/529, loss: 0.0044701192528009415 2023-01-22 19:33:11.582148: step: 1352/529, loss: 0.007065320387482643 2023-01-22 19:33:12.619350: step: 1356/529, loss: 0.013166139833629131 2023-01-22 19:33:13.667569: step: 1360/529, loss: 0.018071308732032776 2023-01-22 19:33:14.715870: step: 1364/529, loss: 0.05738624185323715 2023-01-22 19:33:15.762785: step: 1368/529, loss: 0.016066044569015503 2023-01-22 19:33:16.842780: step: 1372/529, loss: 0.020052537322044373 2023-01-22 19:33:17.908420: step: 1376/529, loss: 0.02242439240217209 2023-01-22 19:33:18.957870: step: 1380/529, loss: 0.0059359255246818066 2023-01-22 19:33:20.025044: step: 1384/529, loss: 0.009813246317207813 2023-01-22 19:33:21.064375: step: 1388/529, loss: 0.02666051872074604 2023-01-22 19:33:22.120496: step: 1392/529, loss: 0.015051918104290962 2023-01-22 19:33:23.182739: step: 1396/529, loss: 0.02678958885371685 2023-01-22 19:33:24.242890: step: 1400/529, loss: 0.01420626137405634 2023-01-22 19:33:25.311063: step: 1404/529, loss: 0.006496739108115435 2023-01-22 19:33:26.355853: step: 1408/529, loss: 0.00599666265770793 2023-01-22 19:33:27.401191: step: 1412/529, loss: 0.0068876054137945175 2023-01-22 19:33:28.460893: step: 1416/529, loss: 0.009554808959364891 2023-01-22 19:33:29.514237: step: 1420/529, loss: 0.022742340341210365 2023-01-22 19:33:30.565496: step: 1424/529, loss: 0.008414790034294128 2023-01-22 19:33:31.614975: step: 1428/529, loss: 0.012524743564426899 2023-01-22 19:33:32.672119: step: 1432/529, loss: 0.017238492146134377 2023-01-22 19:33:33.714782: step: 1436/529, loss: 0.006536091212183237 2023-01-22 19:33:34.767041: step: 1440/529, loss: 0.014208833687007427 2023-01-22 19:33:35.826260: step: 1444/529, loss: 0.009598673321306705 2023-01-22 19:33:36.880821: step: 1448/529, loss: 0.03988807275891304 2023-01-22 19:33:37.944432: step: 1452/529, loss: 0.011487000621855259 2023-01-22 19:33:38.997959: step: 1456/529, loss: 0.036507438868284225 2023-01-22 19:33:40.062635: step: 1460/529, loss: 0.021896976977586746 2023-01-22 19:33:41.118587: step: 1464/529, loss: 0.016844185069203377 2023-01-22 19:33:42.169371: step: 1468/529, loss: 0.011329648084938526 2023-01-22 19:33:43.212134: step: 1472/529, loss: 0.013393408618867397 2023-01-22 19:33:44.261138: step: 1476/529, loss: 0.007768715266138315 2023-01-22 19:33:45.311009: step: 1480/529, loss: 0.006772950757294893 2023-01-22 19:33:46.375341: step: 1484/529, loss: 0.013589548878371716 2023-01-22 19:33:47.424622: step: 1488/529, loss: 0.02934175357222557 2023-01-22 19:33:48.484574: step: 1492/529, loss: 0.033652111887931824 2023-01-22 19:33:49.534500: step: 1496/529, loss: 0.017602689564228058 2023-01-22 19:33:50.585092: step: 1500/529, loss: 0.059618715196847916 2023-01-22 19:33:51.641389: step: 1504/529, loss: 0.007108935620635748 2023-01-22 19:33:52.695865: step: 1508/529, loss: 0.011854098178446293 2023-01-22 19:33:53.750210: step: 1512/529, loss: 0.01667722873389721 2023-01-22 19:33:54.801731: step: 1516/529, loss: 0.008631790056824684 2023-01-22 19:33:55.841348: step: 1520/529, loss: 0.01149787474423647 2023-01-22 19:33:56.895580: step: 1524/529, loss: 0.04335061460733414 2023-01-22 19:33:57.943662: step: 1528/529, loss: 0.01972944103181362 2023-01-22 19:33:58.988114: step: 1532/529, loss: 0.03526837006211281 2023-01-22 19:34:00.039465: step: 1536/529, loss: 0.026519054546952248 2023-01-22 19:34:01.088711: step: 1540/529, loss: 0.028840841725468636 2023-01-22 19:34:02.154791: step: 1544/529, loss: 0.014388822950422764 2023-01-22 19:34:03.202198: step: 1548/529, loss: 0.011385641992092133 2023-01-22 19:34:04.271813: step: 1552/529, loss: 0.014687527902424335 2023-01-22 19:34:05.330188: step: 1556/529, loss: 0.011664765886962414 2023-01-22 19:34:06.374140: step: 1560/529, loss: 0.017614420503377914 2023-01-22 19:34:07.436281: step: 1564/529, loss: 0.07989468425512314 2023-01-22 19:34:08.496490: step: 1568/529, loss: 0.05172615125775337 2023-01-22 19:34:09.548095: step: 1572/529, loss: 0.013751637190580368 2023-01-22 19:34:10.625675: step: 1576/529, loss: 0.00734727643430233 2023-01-22 19:34:11.682103: step: 1580/529, loss: 0.026030514389276505 2023-01-22 19:34:12.745659: step: 1584/529, loss: 0.05756397545337677 2023-01-22 19:34:13.794416: step: 1588/529, loss: 0.015477882698178291 2023-01-22 19:34:14.843750: step: 1592/529, loss: 0.007318898104131222 2023-01-22 19:34:15.894040: step: 1596/529, loss: 0.011881709098815918 2023-01-22 19:34:16.942088: step: 1600/529, loss: 0.01190242636948824 2023-01-22 19:34:18.003682: step: 1604/529, loss: 0.005208552815020084 2023-01-22 19:34:19.061944: step: 1608/529, loss: 0.006001685280352831 2023-01-22 19:34:20.117861: step: 1612/529, loss: 0.014555458910763264 2023-01-22 19:34:21.160559: step: 1616/529, loss: 0.04213704168796539 2023-01-22 19:34:22.226727: step: 1620/529, loss: 0.004783819429576397 2023-01-22 19:34:23.285114: step: 1624/529, loss: 0.008820750750601292 2023-01-22 19:34:24.332411: step: 1628/529, loss: 0.00777822220697999 2023-01-22 19:34:25.377301: step: 1632/529, loss: 0.02491750754415989 2023-01-22 19:34:26.417108: step: 1636/529, loss: 0.0062635233625769615 2023-01-22 19:34:27.464070: step: 1640/529, loss: 0.00697286007925868 2023-01-22 19:34:28.513517: step: 1644/529, loss: 0.002969509456306696 2023-01-22 19:34:29.559380: step: 1648/529, loss: 0.012704771012067795 2023-01-22 19:34:30.592765: step: 1652/529, loss: 0.006044019944965839 2023-01-22 19:34:31.653202: step: 1656/529, loss: 0.059056662023067474 2023-01-22 19:34:32.695394: step: 1660/529, loss: 0.06059417128562927 2023-01-22 19:34:33.746257: step: 1664/529, loss: 0.027641812339425087 2023-01-22 19:34:34.813590: step: 1668/529, loss: 0.01789151318371296 2023-01-22 19:34:35.854073: step: 1672/529, loss: 0.005856847390532494 2023-01-22 19:34:36.901541: step: 1676/529, loss: 0.0241501796990633 2023-01-22 19:34:37.948466: step: 1680/529, loss: 0.05967726558446884 2023-01-22 19:34:39.000885: step: 1684/529, loss: 0.030968137085437775 2023-01-22 19:34:40.055577: step: 1688/529, loss: 0.042150989174842834 2023-01-22 19:34:41.139356: step: 1692/529, loss: 0.006490673869848251 2023-01-22 19:34:42.187251: step: 1696/529, loss: 0.010684220120310783 2023-01-22 19:34:43.241599: step: 1700/529, loss: 0.008071883581578732 2023-01-22 19:34:44.322013: step: 1704/529, loss: 0.061374738812446594 2023-01-22 19:34:45.364949: step: 1708/529, loss: 0.009467871859669685 2023-01-22 19:34:46.410720: step: 1712/529, loss: 0.00471325870603323 2023-01-22 19:34:47.452431: step: 1716/529, loss: 0.010763736441731453 2023-01-22 19:34:48.509245: step: 1720/529, loss: 0.02753886952996254 2023-01-22 19:34:49.557469: step: 1724/529, loss: 0.0 2023-01-22 19:34:50.614909: step: 1728/529, loss: 0.07493897527456284 2023-01-22 19:34:51.674642: step: 1732/529, loss: 0.06929270178079605 2023-01-22 19:34:52.730970: step: 1736/529, loss: 0.015859266743063927 2023-01-22 19:34:53.798876: step: 1740/529, loss: 0.00466306135058403 2023-01-22 19:34:54.867272: step: 1744/529, loss: 0.01314464956521988 2023-01-22 19:34:55.920903: step: 1748/529, loss: 0.013157541863620281 2023-01-22 19:34:56.994473: step: 1752/529, loss: 0.011773504316806793 2023-01-22 19:34:58.049103: step: 1756/529, loss: 0.0172476414591074 2023-01-22 19:34:59.103475: step: 1760/529, loss: 0.003939643502235413 2023-01-22 19:35:00.157072: step: 1764/529, loss: 0.02368035353720188 2023-01-22 19:35:01.207965: step: 1768/529, loss: 0.04851626232266426 2023-01-22 19:35:02.293183: step: 1772/529, loss: 0.011612799018621445 2023-01-22 19:35:03.362211: step: 1776/529, loss: 0.037775591015815735 2023-01-22 19:35:04.399033: step: 1780/529, loss: 0.017987987026572227 2023-01-22 19:35:05.453689: step: 1784/529, loss: 0.01180795207619667 2023-01-22 19:35:06.518546: step: 1788/529, loss: 0.008999248966574669 2023-01-22 19:35:07.580915: step: 1792/529, loss: 0.0 2023-01-22 19:35:08.645231: step: 1796/529, loss: 0.04875265434384346 2023-01-22 19:35:09.701679: step: 1800/529, loss: 0.06563683599233627 2023-01-22 19:35:10.752862: step: 1804/529, loss: 0.01794552430510521 2023-01-22 19:35:11.801124: step: 1808/529, loss: 0.004249773919582367 2023-01-22 19:35:12.846145: step: 1812/529, loss: 0.07462327927350998 2023-01-22 19:35:13.893391: step: 1816/529, loss: 0.034947510808706284 2023-01-22 19:35:14.920224: step: 1820/529, loss: 0.012136394158005714 2023-01-22 19:35:15.969280: step: 1824/529, loss: 0.0226756501942873 2023-01-22 19:35:17.014385: step: 1828/529, loss: 0.03708178922533989 2023-01-22 19:35:18.076186: step: 1832/529, loss: 0.04001658037304878 2023-01-22 19:35:19.141896: step: 1836/529, loss: 0.002695823786780238 2023-01-22 19:35:20.206544: step: 1840/529, loss: 0.03813839703798294 2023-01-22 19:35:21.252740: step: 1844/529, loss: 0.009640282951295376 2023-01-22 19:35:22.307378: step: 1848/529, loss: 0.017239144071936607 2023-01-22 19:35:23.353529: step: 1852/529, loss: 0.028444131836295128 2023-01-22 19:35:24.391181: step: 1856/529, loss: 0.09313053637742996 2023-01-22 19:35:25.443418: step: 1860/529, loss: 0.011019841767847538 2023-01-22 19:35:26.496966: step: 1864/529, loss: 0.013844712637364864 2023-01-22 19:35:27.555624: step: 1868/529, loss: 0.002909870119765401 2023-01-22 19:35:28.609428: step: 1872/529, loss: 0.012615899555385113 2023-01-22 19:35:29.657080: step: 1876/529, loss: 0.00492201978340745 2023-01-22 19:35:30.727361: step: 1880/529, loss: 0.0061734337359666824 2023-01-22 19:35:31.799004: step: 1884/529, loss: 0.01600251905620098 2023-01-22 19:35:32.847674: step: 1888/529, loss: 0.01045612245798111 2023-01-22 19:35:33.901353: step: 1892/529, loss: 0.014157215133309364 2023-01-22 19:35:34.952526: step: 1896/529, loss: 0.01483991090208292 2023-01-22 19:35:36.002902: step: 1900/529, loss: 0.09080835431814194 2023-01-22 19:35:37.069883: step: 1904/529, loss: 0.014748928137123585 2023-01-22 19:35:38.116135: step: 1908/529, loss: 0.0028220252133905888 2023-01-22 19:35:39.165902: step: 1912/529, loss: 0.03067260980606079 2023-01-22 19:35:40.216014: step: 1916/529, loss: 0.009249846450984478 2023-01-22 19:35:41.277641: step: 1920/529, loss: 0.013515912927687168 2023-01-22 19:35:42.317386: step: 1924/529, loss: 0.003011218970641494 2023-01-22 19:35:43.384188: step: 1928/529, loss: 0.010608375072479248 2023-01-22 19:35:44.443469: step: 1932/529, loss: 0.011607460677623749 2023-01-22 19:35:45.502069: step: 1936/529, loss: 0.008696336299180984 2023-01-22 19:35:46.566005: step: 1940/529, loss: 0.011690421029925346 2023-01-22 19:35:47.621086: step: 1944/529, loss: 0.01527523435652256 2023-01-22 19:35:48.675203: step: 1948/529, loss: 0.04605474695563316 2023-01-22 19:35:49.723391: step: 1952/529, loss: 0.029919173568487167 2023-01-22 19:35:50.787031: step: 1956/529, loss: 0.022050464525818825 2023-01-22 19:35:51.839138: step: 1960/529, loss: 0.03613423556089401 2023-01-22 19:35:52.903731: step: 1964/529, loss: 0.01881842315196991 2023-01-22 19:35:53.971017: step: 1968/529, loss: 0.009463679045438766 2023-01-22 19:35:55.041518: step: 1972/529, loss: 0.05390448868274689 2023-01-22 19:35:56.081488: step: 1976/529, loss: 0.00708890613168478 2023-01-22 19:35:57.152334: step: 1980/529, loss: 0.026748839765787125 2023-01-22 19:35:58.201511: step: 1984/529, loss: 0.00588876660913229 2023-01-22 19:35:59.259098: step: 1988/529, loss: 0.05423406884074211 2023-01-22 19:36:00.317436: step: 1992/529, loss: 0.07117541134357452 2023-01-22 19:36:01.377582: step: 1996/529, loss: 0.061825137585401535 2023-01-22 19:36:02.433783: step: 2000/529, loss: 0.028582893311977386 2023-01-22 19:36:03.505377: step: 2004/529, loss: 0.01340029202401638 2023-01-22 19:36:04.575169: step: 2008/529, loss: 0.008067806251347065 2023-01-22 19:36:05.635204: step: 2012/529, loss: 0.012643524445593357 2023-01-22 19:36:06.707200: step: 2016/529, loss: 0.008147449232637882 2023-01-22 19:36:07.767047: step: 2020/529, loss: 0.006248083431273699 2023-01-22 19:36:08.812420: step: 2024/529, loss: 0.013585902750492096 2023-01-22 19:36:09.861248: step: 2028/529, loss: 0.0667610764503479 2023-01-22 19:36:10.915181: step: 2032/529, loss: 0.017219895496964455 2023-01-22 19:36:11.971922: step: 2036/529, loss: 0.006015201099216938 2023-01-22 19:36:13.012571: step: 2040/529, loss: 0.01091675739735365 2023-01-22 19:36:14.074426: step: 2044/529, loss: 0.0078117321245372295 2023-01-22 19:36:15.132706: step: 2048/529, loss: 0.009715505875647068 2023-01-22 19:36:16.188046: step: 2052/529, loss: 0.016771098598837852 2023-01-22 19:36:17.234855: step: 2056/529, loss: 0.019362032413482666 2023-01-22 19:36:18.275902: step: 2060/529, loss: 0.006257068831473589 2023-01-22 19:36:19.310504: step: 2064/529, loss: 0.014119431376457214 2023-01-22 19:36:20.361854: step: 2068/529, loss: 0.04814297705888748 2023-01-22 19:36:21.414482: step: 2072/529, loss: 0.004365266766399145 2023-01-22 19:36:22.469140: step: 2076/529, loss: 0.05356168746948242 2023-01-22 19:36:23.525303: step: 2080/529, loss: 0.03512370586395264 2023-01-22 19:36:24.596338: step: 2084/529, loss: 0.012180483900010586 2023-01-22 19:36:25.630926: step: 2088/529, loss: 0.007121897768229246 2023-01-22 19:36:26.683472: step: 2092/529, loss: 0.009761323221027851 2023-01-22 19:36:27.730871: step: 2096/529, loss: 0.006173328496515751 2023-01-22 19:36:28.789348: step: 2100/529, loss: 0.017521370202302933 2023-01-22 19:36:29.872157: step: 2104/529, loss: 0.04245489835739136 2023-01-22 19:36:30.930074: step: 2108/529, loss: 0.031457506120204926 2023-01-22 19:36:31.966060: step: 2112/529, loss: 0.021836599335074425 2023-01-22 19:36:33.013875: step: 2116/529, loss: 0.008259828202426434 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3274807636469222, 'r': 0.3057315668202765, 'f1': 0.3162326510584607}, 'combined': 0.23301353235886577, 'stategy': 1, 'epoch': 0} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3752228012470289, 'r': 0.32447277140995257, 'f1': 0.34800729624314986}, 'combined': 0.2448292536383969, 'stategy': 1, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30749548909989055, 'r': 0.33433570256971024, 'f1': 0.32035439137134053}, 'combined': 0.23605060416835616, 'stategy': 1, 'epoch': 0} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3738667452983897, 'r': 0.3337746706064089, 'f1': 0.35268498128562975}, 'combined': 0.2504063367127971, 'stategy': 1, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3230408996629505, 'r': 0.33775243968555163, 'f1': 0.3302329048502518}, 'combined': 0.24332950883702764, 'stategy': 1, 'epoch': 0} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3637251022025324, 'r': 0.2970739054638485, 'f1': 0.327038132239741}, 'combined': 0.2321970738902161, 'stategy': 1, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3274807636469222, 'r': 0.3057315668202765, 'f1': 0.3162326510584607}, 'combined': 0.23301353235886577, 'stategy': 1, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3752228012470289, 'r': 0.32447277140995257, 'f1': 0.34800729624314986}, 'combined': 0.2448292536383969, 'stategy': 1, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30749548909989055, 'r': 0.33433570256971024, 'f1': 0.32035439137134053}, 'combined': 0.23605060416835616, 'stategy': 1, 'epoch': 0} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3738667452983897, 'r': 0.3337746706064089, 'f1': 0.35268498128562975}, 'combined': 0.2504063367127971, 'stategy': 1, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3230408996629505, 'r': 0.33775243968555163, 'f1': 0.3302329048502518}, 'combined': 0.24332950883702764, 'stategy': 1, 'epoch': 0} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3637251022025324, 'r': 0.2970739054638485, 'f1': 0.327038132239741}, 'combined': 0.2321970738902161, 'stategy': 1, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 19:39:40.569272: step: 4/529, loss: 0.009702632203698158 2023-01-22 19:39:41.627309: step: 8/529, loss: 0.004212322179228067 2023-01-22 19:39:42.670478: step: 12/529, loss: 0.03304532915353775 2023-01-22 19:39:43.721043: step: 16/529, loss: 0.015185734257102013 2023-01-22 19:39:44.752796: step: 20/529, loss: 0.028924936428666115 2023-01-22 19:39:45.794915: step: 24/529, loss: 0.006101727485656738 2023-01-22 19:39:46.825058: step: 28/529, loss: 0.008059863932430744 2023-01-22 19:39:47.874176: step: 32/529, loss: 0.015525087714195251 2023-01-22 19:39:48.924550: step: 36/529, loss: 0.015883387997746468 2023-01-22 19:39:49.956197: step: 40/529, loss: 0.007611711975187063 2023-01-22 19:39:50.999272: step: 44/529, loss: 0.024166617542505264 2023-01-22 19:39:52.055579: step: 48/529, loss: 0.012313933111727238 2023-01-22 19:39:53.107598: step: 52/529, loss: 0.010366847738623619 2023-01-22 19:39:54.143577: step: 56/529, loss: 0.012623738497495651 2023-01-22 19:39:55.188485: step: 60/529, loss: 0.00899945292621851 2023-01-22 19:39:56.243572: step: 64/529, loss: 0.03763147071003914 2023-01-22 19:39:57.300944: step: 68/529, loss: 0.012430544942617416 2023-01-22 19:39:58.362323: step: 72/529, loss: 0.00794380996376276 2023-01-22 19:39:59.426069: step: 76/529, loss: 0.011743719689548016 2023-01-22 19:40:00.483951: step: 80/529, loss: 0.008317777886986732 2023-01-22 19:40:01.532634: step: 84/529, loss: 0.020757101476192474 2023-01-22 19:40:02.576168: step: 88/529, loss: 0.010150941088795662 2023-01-22 19:40:03.626492: step: 92/529, loss: 0.006616658065468073 2023-01-22 19:40:04.660547: step: 96/529, loss: 0.006203392520546913 2023-01-22 19:40:05.700976: step: 100/529, loss: 0.016931546851992607 2023-01-22 19:40:06.745199: step: 104/529, loss: 0.012396498583257198 2023-01-22 19:40:07.791720: step: 108/529, loss: 0.006779055576771498 2023-01-22 19:40:08.839407: step: 112/529, loss: 0.009066647849977016 2023-01-22 19:40:09.914972: step: 116/529, loss: 0.011995772831141949 2023-01-22 19:40:10.996173: step: 120/529, loss: 0.01516010519117117 2023-01-22 19:40:12.039179: step: 124/529, loss: 0.014176087453961372 2023-01-22 19:40:13.083375: step: 128/529, loss: 0.0 2023-01-22 19:40:14.140201: step: 132/529, loss: 0.048646312206983566 2023-01-22 19:40:15.203676: step: 136/529, loss: 0.009801060892641544 2023-01-22 19:40:16.262489: step: 140/529, loss: 0.02850925363600254 2023-01-22 19:40:17.328158: step: 144/529, loss: 0.014321591705083847 2023-01-22 19:40:18.375103: step: 148/529, loss: 0.0026925771962851286 2023-01-22 19:40:19.447798: step: 152/529, loss: 0.017567764967679977 2023-01-22 19:40:20.501491: step: 156/529, loss: 0.004184122197329998 2023-01-22 19:40:21.551258: step: 160/529, loss: 0.008230687119066715 2023-01-22 19:40:22.619543: step: 164/529, loss: 0.02939199097454548 2023-01-22 19:40:23.675998: step: 168/529, loss: 0.004161420743912458 2023-01-22 19:40:24.728382: step: 172/529, loss: 0.011520899832248688 2023-01-22 19:40:25.781554: step: 176/529, loss: 0.0038108446169644594 2023-01-22 19:40:26.844466: step: 180/529, loss: 0.026291929185390472 2023-01-22 19:40:27.922578: step: 184/529, loss: 0.007606282364577055 2023-01-22 19:40:28.985228: step: 188/529, loss: 0.022314025089144707 2023-01-22 19:40:30.059646: step: 192/529, loss: 0.03174912929534912 2023-01-22 19:40:31.115243: step: 196/529, loss: 0.004038125276565552 2023-01-22 19:40:32.159154: step: 200/529, loss: 0.005661796312779188 2023-01-22 19:40:33.237788: step: 204/529, loss: 0.0023464723490178585 2023-01-22 19:40:34.300785: step: 208/529, loss: 0.00834522396326065 2023-01-22 19:40:35.362946: step: 212/529, loss: 0.01633678749203682 2023-01-22 19:40:36.416581: step: 216/529, loss: 0.004283620044589043 2023-01-22 19:40:37.477289: step: 220/529, loss: 0.028229283168911934 2023-01-22 19:40:38.526236: step: 224/529, loss: 0.00961701013147831 2023-01-22 19:40:39.595055: step: 228/529, loss: 0.011595970019698143 2023-01-22 19:40:40.655202: step: 232/529, loss: 0.06310028582811356 2023-01-22 19:40:41.711611: step: 236/529, loss: 0.006959089078009129 2023-01-22 19:40:42.785632: step: 240/529, loss: 0.027418065816164017 2023-01-22 19:40:43.840739: step: 244/529, loss: 0.01206853985786438 2023-01-22 19:40:44.897470: step: 248/529, loss: 0.03438446670770645 2023-01-22 19:40:45.949356: step: 252/529, loss: 0.009065095335245132 2023-01-22 19:40:47.005927: step: 256/529, loss: 0.04657723009586334 2023-01-22 19:40:48.062594: step: 260/529, loss: 0.007123757153749466 2023-01-22 19:40:49.144997: step: 264/529, loss: 0.0057588298805058 2023-01-22 19:40:50.209906: step: 268/529, loss: 0.029189610853791237 2023-01-22 19:40:51.271038: step: 272/529, loss: 0.013753708451986313 2023-01-22 19:40:52.333870: step: 276/529, loss: 0.005057580303400755 2023-01-22 19:40:53.412797: step: 280/529, loss: 0.05241711810231209 2023-01-22 19:40:54.470669: step: 284/529, loss: 0.036593712866306305 2023-01-22 19:40:55.556701: step: 288/529, loss: 0.028647996485233307 2023-01-22 19:40:56.621142: step: 292/529, loss: 0.04901021718978882 2023-01-22 19:40:57.699312: step: 296/529, loss: 0.05082092061638832 2023-01-22 19:40:58.763515: step: 300/529, loss: 0.008069179952144623 2023-01-22 19:40:59.823767: step: 304/529, loss: 0.012969561852514744 2023-01-22 19:41:00.894075: step: 308/529, loss: 0.00813002698123455 2023-01-22 19:41:01.953849: step: 312/529, loss: 0.014343906193971634 2023-01-22 19:41:03.013870: step: 316/529, loss: 0.016288744285702705 2023-01-22 19:41:04.073555: step: 320/529, loss: 0.008508963510394096 2023-01-22 19:41:05.140584: step: 324/529, loss: 0.011361664161086082 2023-01-22 19:41:06.206406: step: 328/529, loss: 0.015250255353748798 2023-01-22 19:41:07.278726: step: 332/529, loss: 0.0062587447464466095 2023-01-22 19:41:08.353284: step: 336/529, loss: 0.0019832190591841936 2023-01-22 19:41:09.428743: step: 340/529, loss: 0.006038452964276075 2023-01-22 19:41:10.488032: step: 344/529, loss: 0.043527502566576004 2023-01-22 19:41:11.553140: step: 348/529, loss: 0.010479786433279514 2023-01-22 19:41:12.610496: step: 352/529, loss: 0.00537203811109066 2023-01-22 19:41:13.670853: step: 356/529, loss: 0.010623506270349026 2023-01-22 19:41:14.745464: step: 360/529, loss: 0.06597544997930527 2023-01-22 19:41:15.794372: step: 364/529, loss: 0.007426958065479994 2023-01-22 19:41:16.883333: step: 368/529, loss: 0.031153954565525055 2023-01-22 19:41:17.947412: step: 372/529, loss: 0.006872945930808783 2023-01-22 19:41:19.015585: step: 376/529, loss: 0.0061378865502774715 2023-01-22 19:41:20.079805: step: 380/529, loss: 0.00790957361459732 2023-01-22 19:41:21.153552: step: 384/529, loss: 0.005878101103007793 2023-01-22 19:41:22.217424: step: 388/529, loss: 0.009965133853256702 2023-01-22 19:41:23.278422: step: 392/529, loss: 0.003342794021591544 2023-01-22 19:41:24.337850: step: 396/529, loss: 0.021368833258748055 2023-01-22 19:41:25.404492: step: 400/529, loss: 0.01007467694580555 2023-01-22 19:41:26.450911: step: 404/529, loss: 0.007592642214149237 2023-01-22 19:41:27.499268: step: 408/529, loss: 0.009408257901668549 2023-01-22 19:41:28.571412: step: 412/529, loss: 0.008199675939977169 2023-01-22 19:41:29.617874: step: 416/529, loss: 0.01346003171056509 2023-01-22 19:41:30.681945: step: 420/529, loss: 0.009628926403820515 2023-01-22 19:41:31.742054: step: 424/529, loss: 0.024976592510938644 2023-01-22 19:41:32.799937: step: 428/529, loss: 0.010551844723522663 2023-01-22 19:41:33.868685: step: 432/529, loss: 0.009431814774870872 2023-01-22 19:41:34.923830: step: 436/529, loss: 0.014981884509325027 2023-01-22 19:41:35.979226: step: 440/529, loss: 0.013812443241477013 2023-01-22 19:41:37.033910: step: 444/529, loss: 0.025818997994065285 2023-01-22 19:41:38.106624: step: 448/529, loss: 0.009159760549664497 2023-01-22 19:41:39.181066: step: 452/529, loss: 0.003597363131120801 2023-01-22 19:41:40.238704: step: 456/529, loss: 0.018870214000344276 2023-01-22 19:41:41.296750: step: 460/529, loss: 0.023106761276721954 2023-01-22 19:41:42.357530: step: 464/529, loss: 0.005742175038903952 2023-01-22 19:41:43.423941: step: 468/529, loss: 0.01089457981288433 2023-01-22 19:41:44.480219: step: 472/529, loss: 0.02378915436565876 2023-01-22 19:41:45.565573: step: 476/529, loss: 0.015174404717981815 2023-01-22 19:41:46.621046: step: 480/529, loss: 0.005291799549013376 2023-01-22 19:41:47.669223: step: 484/529, loss: 0.011648435145616531 2023-01-22 19:41:48.724455: step: 488/529, loss: 0.010164718143641949 2023-01-22 19:41:49.777892: step: 492/529, loss: 0.006625893525779247 2023-01-22 19:41:50.829807: step: 496/529, loss: 0.01943482831120491 2023-01-22 19:41:51.887527: step: 500/529, loss: 0.011251815594732761 2023-01-22 19:41:52.942064: step: 504/529, loss: 0.006503934506326914 2023-01-22 19:41:54.016134: step: 508/529, loss: 0.03821869194507599 2023-01-22 19:41:55.077001: step: 512/529, loss: 0.011579535901546478 2023-01-22 19:41:56.129717: step: 516/529, loss: 0.006645224057137966 2023-01-22 19:41:57.196786: step: 520/529, loss: 0.0018232990987598896 2023-01-22 19:41:58.268675: step: 524/529, loss: 0.0014213580871000886 2023-01-22 19:41:59.328823: step: 528/529, loss: 0.014252830296754837 2023-01-22 19:42:00.388959: step: 532/529, loss: 0.005493494216352701 2023-01-22 19:42:01.441154: step: 536/529, loss: 0.012886922806501389 2023-01-22 19:42:02.510057: step: 540/529, loss: 0.014141098596155643 2023-01-22 19:42:03.571222: step: 544/529, loss: 0.028720082715153694 2023-01-22 19:42:04.632271: step: 548/529, loss: 0.023180142045021057 2023-01-22 19:42:05.679163: step: 552/529, loss: 0.009422210045158863 2023-01-22 19:42:06.725356: step: 556/529, loss: 0.01278822310268879 2023-01-22 19:42:07.796670: step: 560/529, loss: 0.006602788809686899 2023-01-22 19:42:08.860692: step: 564/529, loss: 0.005738876294344664 2023-01-22 19:42:09.927302: step: 568/529, loss: 0.0072227781638503075 2023-01-22 19:42:10.981035: step: 572/529, loss: 0.004410790745168924 2023-01-22 19:42:12.039018: step: 576/529, loss: 0.005892443936318159 2023-01-22 19:42:13.097764: step: 580/529, loss: 0.010658673010766506 2023-01-22 19:42:14.154660: step: 584/529, loss: 0.020166177302598953 2023-01-22 19:42:15.207881: step: 588/529, loss: 0.012440183199942112 2023-01-22 19:42:16.265347: step: 592/529, loss: 0.008904715068638325 2023-01-22 19:42:17.316464: step: 596/529, loss: 0.02467149682343006 2023-01-22 19:42:18.376504: step: 600/529, loss: 0.02473810315132141 2023-01-22 19:42:19.456348: step: 604/529, loss: 0.010048349387943745 2023-01-22 19:42:20.499405: step: 608/529, loss: 0.012251696549355984 2023-01-22 19:42:21.571505: step: 612/529, loss: 0.0010070670396089554 2023-01-22 19:42:22.634351: step: 616/529, loss: 0.01081684697419405 2023-01-22 19:42:23.692044: step: 620/529, loss: 0.006674682721495628 2023-01-22 19:42:24.754668: step: 624/529, loss: 0.004897418897598982 2023-01-22 19:42:25.813716: step: 628/529, loss: 0.009355539456009865 2023-01-22 19:42:26.861697: step: 632/529, loss: 0.033505599945783615 2023-01-22 19:42:27.912253: step: 636/529, loss: 0.008581315167248249 2023-01-22 19:42:28.963920: step: 640/529, loss: 0.00946006178855896 2023-01-22 19:42:30.024469: step: 644/529, loss: 0.005713715683668852 2023-01-22 19:42:31.073042: step: 648/529, loss: 0.005415044724941254 2023-01-22 19:42:32.142301: step: 652/529, loss: 0.013631592504680157 2023-01-22 19:42:33.195859: step: 656/529, loss: 0.01755356974899769 2023-01-22 19:42:34.251773: step: 660/529, loss: 0.006203013006597757 2023-01-22 19:42:35.322727: step: 664/529, loss: 0.0276893749833107 2023-01-22 19:42:36.393212: step: 668/529, loss: 0.026160378009080887 2023-01-22 19:42:37.444897: step: 672/529, loss: 0.013735873624682426 2023-01-22 19:42:38.525349: step: 676/529, loss: 0.014522930607199669 2023-01-22 19:42:39.573685: step: 680/529, loss: 0.007470645941793919 2023-01-22 19:42:40.647027: step: 684/529, loss: 0.012211167253553867 2023-01-22 19:42:41.717667: step: 688/529, loss: 0.018837451934814453 2023-01-22 19:42:42.769534: step: 692/529, loss: 0.023331498727202415 2023-01-22 19:42:43.835526: step: 696/529, loss: 0.006521758157759905 2023-01-22 19:42:44.907050: step: 700/529, loss: 0.011741899885237217 2023-01-22 19:42:45.958865: step: 704/529, loss: 0.01141168363392353 2023-01-22 19:42:47.018931: step: 708/529, loss: 0.006443439517170191 2023-01-22 19:42:48.100410: step: 712/529, loss: 0.026486095041036606 2023-01-22 19:42:49.155749: step: 716/529, loss: 0.008651353418827057 2023-01-22 19:42:50.213671: step: 720/529, loss: 0.042408592998981476 2023-01-22 19:42:51.284222: step: 724/529, loss: 0.01639365591108799 2023-01-22 19:42:52.332184: step: 728/529, loss: 0.009071496315300465 2023-01-22 19:42:53.385643: step: 732/529, loss: 0.0024754484184086323 2023-01-22 19:42:54.439628: step: 736/529, loss: 0.01669994927942753 2023-01-22 19:42:55.507592: step: 740/529, loss: 0.01184410322457552 2023-01-22 19:42:56.568097: step: 744/529, loss: 0.009721135720610619 2023-01-22 19:42:57.619297: step: 748/529, loss: 0.022059466689825058 2023-01-22 19:42:58.668027: step: 752/529, loss: 0.011720165610313416 2023-01-22 19:42:59.727279: step: 756/529, loss: 0.008919107727706432 2023-01-22 19:43:00.784669: step: 760/529, loss: 0.001974788261577487 2023-01-22 19:43:01.850516: step: 764/529, loss: 0.03654288873076439 2023-01-22 19:43:02.904861: step: 768/529, loss: 0.005718754138797522 2023-01-22 19:43:03.956287: step: 772/529, loss: 0.015371087938547134 2023-01-22 19:43:05.028347: step: 776/529, loss: 0.013817953877151012 2023-01-22 19:43:06.082198: step: 780/529, loss: 0.028627563267946243 2023-01-22 19:43:07.149410: step: 784/529, loss: 0.037508122622966766 2023-01-22 19:43:08.192443: step: 788/529, loss: 0.010139815509319305 2023-01-22 19:43:09.245749: step: 792/529, loss: 0.005906376522034407 2023-01-22 19:43:10.315595: step: 796/529, loss: 0.022158462554216385 2023-01-22 19:43:11.367958: step: 800/529, loss: 0.0038460742216557264 2023-01-22 19:43:12.417599: step: 804/529, loss: 0.0008138243574649096 2023-01-22 19:43:13.462076: step: 808/529, loss: 0.004285029601305723 2023-01-22 19:43:14.511172: step: 812/529, loss: 0.006061118096113205 2023-01-22 19:43:15.564166: step: 816/529, loss: 0.014257565140724182 2023-01-22 19:43:16.613666: step: 820/529, loss: 0.045225296169519424 2023-01-22 19:43:17.679207: step: 824/529, loss: 0.005086400546133518 2023-01-22 19:43:18.737295: step: 828/529, loss: 0.008096187375485897 2023-01-22 19:43:19.787678: step: 832/529, loss: 0.0034793426748365164 2023-01-22 19:43:20.852464: step: 836/529, loss: 0.012109835632145405 2023-01-22 19:43:21.911103: step: 840/529, loss: 0.0046193962916731834 2023-01-22 19:43:22.954754: step: 844/529, loss: 0.018356528133153915 2023-01-22 19:43:24.007764: step: 848/529, loss: 0.006966340821236372 2023-01-22 19:43:25.072370: step: 852/529, loss: 0.03517195209860802 2023-01-22 19:43:26.131434: step: 856/529, loss: 0.06802359223365784 2023-01-22 19:43:27.189761: step: 860/529, loss: 0.009121929295361042 2023-01-22 19:43:28.243292: step: 864/529, loss: 0.017725694924592972 2023-01-22 19:43:29.292841: step: 868/529, loss: 0.035595402121543884 2023-01-22 19:43:30.346582: step: 872/529, loss: 0.0030177319422364235 2023-01-22 19:43:31.386467: step: 876/529, loss: 0.01256206538528204 2023-01-22 19:43:32.439277: step: 880/529, loss: 0.006419451907277107 2023-01-22 19:43:33.499105: step: 884/529, loss: 0.007674932945519686 2023-01-22 19:43:34.546166: step: 888/529, loss: 0.006237064488232136 2023-01-22 19:43:35.613978: step: 892/529, loss: 0.010784771293401718 2023-01-22 19:43:36.670989: step: 896/529, loss: 0.028481515124440193 2023-01-22 19:43:37.743440: step: 900/529, loss: 0.00234957505017519 2023-01-22 19:43:38.807915: step: 904/529, loss: 0.013047631829977036 2023-01-22 19:43:39.859837: step: 908/529, loss: 0.007418323308229446 2023-01-22 19:43:40.919268: step: 912/529, loss: 0.021263813599944115 2023-01-22 19:43:41.961956: step: 916/529, loss: 0.030619753524661064 2023-01-22 19:43:43.023750: step: 920/529, loss: 0.012389450334012508 2023-01-22 19:43:44.080247: step: 924/529, loss: 0.005037079099565744 2023-01-22 19:43:45.138213: step: 928/529, loss: 0.00879708118736744 2023-01-22 19:43:46.183411: step: 932/529, loss: 0.0076294527389109135 2023-01-22 19:43:47.229833: step: 936/529, loss: 0.021199515089392662 2023-01-22 19:43:48.273163: step: 940/529, loss: 0.009083127602934837 2023-01-22 19:43:49.326680: step: 944/529, loss: 0.006466279271990061 2023-01-22 19:43:50.394199: step: 948/529, loss: 0.016769271343946457 2023-01-22 19:43:51.449808: step: 952/529, loss: 0.009145503863692284 2023-01-22 19:43:52.498422: step: 956/529, loss: 0.015483645722270012 2023-01-22 19:43:53.545648: step: 960/529, loss: 0.006539743859320879 2023-01-22 19:43:54.608773: step: 964/529, loss: 0.020707696676254272 2023-01-22 19:43:55.657331: step: 968/529, loss: 0.009864192456007004 2023-01-22 19:43:56.694846: step: 972/529, loss: 0.003156255232170224 2023-01-22 19:43:57.760733: step: 976/529, loss: 0.010344979353249073 2023-01-22 19:43:58.820879: step: 980/529, loss: 0.007188082206994295 2023-01-22 19:43:59.894251: step: 984/529, loss: 0.008987879380583763 2023-01-22 19:44:00.951360: step: 988/529, loss: 0.0008328420226462185 2023-01-22 19:44:02.016969: step: 992/529, loss: 0.07001011073589325 2023-01-22 19:44:03.077039: step: 996/529, loss: 0.012010048143565655 2023-01-22 19:44:04.135493: step: 1000/529, loss: 0.002473432570695877 2023-01-22 19:44:05.184975: step: 1004/529, loss: 0.017036965116858482 2023-01-22 19:44:06.261914: step: 1008/529, loss: 0.006179991643875837 2023-01-22 19:44:07.312010: step: 1012/529, loss: 0.028912536799907684 2023-01-22 19:44:08.358734: step: 1016/529, loss: 0.008060370571911335 2023-01-22 19:44:09.406225: step: 1020/529, loss: 0.01860123872756958 2023-01-22 19:44:10.469591: step: 1024/529, loss: 0.010317834094166756 2023-01-22 19:44:11.521244: step: 1028/529, loss: 0.014082814566791058 2023-01-22 19:44:12.570884: step: 1032/529, loss: 0.018244944512844086 2023-01-22 19:44:13.621539: step: 1036/529, loss: 0.025161465629935265 2023-01-22 19:44:14.676790: step: 1040/529, loss: 0.008377800695598125 2023-01-22 19:44:15.727558: step: 1044/529, loss: 0.0006229839636944234 2023-01-22 19:44:16.778157: step: 1048/529, loss: 0.010107414796948433 2023-01-22 19:44:17.827041: step: 1052/529, loss: 0.012382734566926956 2023-01-22 19:44:18.886941: step: 1056/529, loss: 0.055391110479831696 2023-01-22 19:44:19.936086: step: 1060/529, loss: 0.005276706535369158 2023-01-22 19:44:20.984343: step: 1064/529, loss: 0.007801515515893698 2023-01-22 19:44:22.024824: step: 1068/529, loss: 0.002068720292299986 2023-01-22 19:44:23.088643: step: 1072/529, loss: 0.01365474984049797 2023-01-22 19:44:24.142851: step: 1076/529, loss: 0.034711677581071854 2023-01-22 19:44:25.214072: step: 1080/529, loss: 0.02822842448949814 2023-01-22 19:44:26.271245: step: 1084/529, loss: 0.0082323607057333 2023-01-22 19:44:27.320390: step: 1088/529, loss: 0.010493023321032524 2023-01-22 19:44:28.366804: step: 1092/529, loss: 0.005425763316452503 2023-01-22 19:44:29.421338: step: 1096/529, loss: 0.0065599605441093445 2023-01-22 19:44:30.472317: step: 1100/529, loss: 0.033498674631118774 2023-01-22 19:44:31.521579: step: 1104/529, loss: 0.00341247022151947 2023-01-22 19:44:32.571187: step: 1108/529, loss: 0.008480322547256947 2023-01-22 19:44:33.639582: step: 1112/529, loss: 0.011904171667993069 2023-01-22 19:44:34.717022: step: 1116/529, loss: 0.007447232026606798 2023-01-22 19:44:35.782988: step: 1120/529, loss: 0.048919033259153366 2023-01-22 19:44:36.827483: step: 1124/529, loss: 0.0039873565547168255 2023-01-22 19:44:37.874695: step: 1128/529, loss: 0.005117345135658979 2023-01-22 19:44:38.912387: step: 1132/529, loss: 0.008980763144791126 2023-01-22 19:44:39.963474: step: 1136/529, loss: 0.009688722901046276 2023-01-22 19:44:41.016613: step: 1140/529, loss: 0.019318226724863052 2023-01-22 19:44:42.074419: step: 1144/529, loss: 0.01431686244904995 2023-01-22 19:44:43.163176: step: 1148/529, loss: 0.004099272191524506 2023-01-22 19:44:44.209840: step: 1152/529, loss: 0.04144096374511719 2023-01-22 19:44:45.275768: step: 1156/529, loss: 0.010227304883301258 2023-01-22 19:44:46.344051: step: 1160/529, loss: 0.06978435814380646 2023-01-22 19:44:47.387769: step: 1164/529, loss: 0.0045188236981630325 2023-01-22 19:44:48.444907: step: 1168/529, loss: 0.03477945178747177 2023-01-22 19:44:49.493518: step: 1172/529, loss: 0.005270407535135746 2023-01-22 19:44:50.570483: step: 1176/529, loss: 0.03386178985238075 2023-01-22 19:44:51.613769: step: 1180/529, loss: 0.006988477427512407 2023-01-22 19:44:52.653850: step: 1184/529, loss: 0.02751697599887848 2023-01-22 19:44:53.711007: step: 1188/529, loss: 0.00937473401427269 2023-01-22 19:44:54.773009: step: 1192/529, loss: 0.007683223113417625 2023-01-22 19:44:55.816337: step: 1196/529, loss: 0.00957627221941948 2023-01-22 19:44:56.867937: step: 1200/529, loss: 0.009469663724303246 2023-01-22 19:44:57.917548: step: 1204/529, loss: 0.004552146885544062 2023-01-22 19:44:58.961542: step: 1208/529, loss: 0.019856223836541176 2023-01-22 19:45:00.022604: step: 1212/529, loss: 0.016805380582809448 2023-01-22 19:45:01.067092: step: 1216/529, loss: 0.009647201746702194 2023-01-22 19:45:02.107064: step: 1220/529, loss: 0.00820984411984682 2023-01-22 19:45:03.160720: step: 1224/529, loss: 0.015046448446810246 2023-01-22 19:45:04.213570: step: 1228/529, loss: 0.012992354109883308 2023-01-22 19:45:05.263336: step: 1232/529, loss: 0.005617902148514986 2023-01-22 19:45:06.303253: step: 1236/529, loss: 0.010727177374064922 2023-01-22 19:45:07.366512: step: 1240/529, loss: 0.004621490836143494 2023-01-22 19:45:08.409824: step: 1244/529, loss: 0.011618840508162975 2023-01-22 19:45:09.481745: step: 1248/529, loss: 0.003948628436774015 2023-01-22 19:45:10.536687: step: 1252/529, loss: 0.012753070332109928 2023-01-22 19:45:11.589297: step: 1256/529, loss: 0.006294343154877424 2023-01-22 19:45:12.627841: step: 1260/529, loss: 0.004398762248456478 2023-01-22 19:45:13.670836: step: 1264/529, loss: 0.0062956372275948524 2023-01-22 19:45:14.732208: step: 1268/529, loss: 0.015930350869894028 2023-01-22 19:45:15.786150: step: 1272/529, loss: 0.006502767093479633 2023-01-22 19:45:16.838153: step: 1276/529, loss: 0.008406359702348709 2023-01-22 19:45:17.883999: step: 1280/529, loss: 0.008282057009637356 2023-01-22 19:45:18.934383: step: 1284/529, loss: 0.023298965767025948 2023-01-22 19:45:19.989620: step: 1288/529, loss: 0.005118640139698982 2023-01-22 19:45:21.037310: step: 1292/529, loss: 0.02254495956003666 2023-01-22 19:45:22.090192: step: 1296/529, loss: 0.004884684924036264 2023-01-22 19:45:23.154658: step: 1300/529, loss: 0.014652098529040813 2023-01-22 19:45:24.209337: step: 1304/529, loss: 0.0018594631692394614 2023-01-22 19:45:25.271049: step: 1308/529, loss: 0.006399250589311123 2023-01-22 19:45:26.319493: step: 1312/529, loss: 0.04432792216539383 2023-01-22 19:45:27.363462: step: 1316/529, loss: 0.014780706726014614 2023-01-22 19:45:28.410962: step: 1320/529, loss: 0.018753711134195328 2023-01-22 19:45:29.453957: step: 1324/529, loss: 0.012934209778904915 2023-01-22 19:45:30.508303: step: 1328/529, loss: 0.0080748051404953 2023-01-22 19:45:31.569643: step: 1332/529, loss: 0.013834808953106403 2023-01-22 19:45:32.619129: step: 1336/529, loss: 0.009937616065144539 2023-01-22 19:45:33.686667: step: 1340/529, loss: 0.005996472202241421 2023-01-22 19:45:34.730003: step: 1344/529, loss: 0.003624585224315524 2023-01-22 19:45:35.778268: step: 1348/529, loss: 0.0077989050187170506 2023-01-22 19:45:36.831956: step: 1352/529, loss: 0.017789974808692932 2023-01-22 19:45:37.887207: step: 1356/529, loss: 0.08401846140623093 2023-01-22 19:45:38.963834: step: 1360/529, loss: 0.004022348206490278 2023-01-22 19:45:40.013460: step: 1364/529, loss: 0.009396846406161785 2023-01-22 19:45:41.086281: step: 1368/529, loss: 0.03380804508924484 2023-01-22 19:45:42.137132: step: 1372/529, loss: 0.0024178980384021997 2023-01-22 19:45:43.189936: step: 1376/529, loss: 0.001569257816299796 2023-01-22 19:45:44.241463: step: 1380/529, loss: 0.004394140560179949 2023-01-22 19:45:45.306700: step: 1384/529, loss: 0.0056083169765770435 2023-01-22 19:45:46.364682: step: 1388/529, loss: 0.005862198770046234 2023-01-22 19:45:47.425448: step: 1392/529, loss: 0.0633922815322876 2023-01-22 19:45:48.471386: step: 1396/529, loss: 0.0053275092504918575 2023-01-22 19:45:49.550644: step: 1400/529, loss: 0.007326117716729641 2023-01-22 19:45:50.593969: step: 1404/529, loss: 0.004777191206812859 2023-01-22 19:45:51.651873: step: 1408/529, loss: 0.045870207250118256 2023-01-22 19:45:52.707873: step: 1412/529, loss: 0.01463842112571001 2023-01-22 19:45:53.745995: step: 1416/529, loss: 0.04828391969203949 2023-01-22 19:45:54.826193: step: 1420/529, loss: 0.008017132990062237 2023-01-22 19:45:55.886428: step: 1424/529, loss: 0.008438260294497013 2023-01-22 19:45:56.942281: step: 1428/529, loss: 0.012026888318359852 2023-01-22 19:45:57.990589: step: 1432/529, loss: 0.017127279192209244 2023-01-22 19:45:59.032601: step: 1436/529, loss: 0.004902486223727465 2023-01-22 19:46:00.086419: step: 1440/529, loss: 0.010724497959017754 2023-01-22 19:46:01.139464: step: 1444/529, loss: 0.010780728422105312 2023-01-22 19:46:02.188739: step: 1448/529, loss: 0.0036838180385529995 2023-01-22 19:46:03.249502: step: 1452/529, loss: 0.012205828912556171 2023-01-22 19:46:04.304147: step: 1456/529, loss: 0.02813957817852497 2023-01-22 19:46:05.378506: step: 1460/529, loss: 0.009332367219030857 2023-01-22 19:46:06.432316: step: 1464/529, loss: 0.006843236740678549 2023-01-22 19:46:07.500781: step: 1468/529, loss: 0.02566590905189514 2023-01-22 19:46:08.582319: step: 1472/529, loss: 0.00420921528711915 2023-01-22 19:46:09.628150: step: 1476/529, loss: 0.016612662002444267 2023-01-22 19:46:10.668566: step: 1480/529, loss: 0.008609619922935963 2023-01-22 19:46:11.722022: step: 1484/529, loss: 0.009465090930461884 2023-01-22 19:46:12.771545: step: 1488/529, loss: 0.03811675310134888 2023-01-22 19:46:13.841966: step: 1492/529, loss: 0.02892463468015194 2023-01-22 19:46:14.892610: step: 1496/529, loss: 0.0032942022662609816 2023-01-22 19:46:15.952968: step: 1500/529, loss: 0.007878275588154793 2023-01-22 19:46:17.006349: step: 1504/529, loss: 0.01102651096880436 2023-01-22 19:46:18.076167: step: 1508/529, loss: 0.027395185083150864 2023-01-22 19:46:19.126457: step: 1512/529, loss: 0.004893516656011343 2023-01-22 19:46:20.190723: step: 1516/529, loss: 0.0019189275335520506 2023-01-22 19:46:21.244989: step: 1520/529, loss: 0.06153124198317528 2023-01-22 19:46:22.303484: step: 1524/529, loss: 0.007808441761881113 2023-01-22 19:46:23.354896: step: 1528/529, loss: 0.020678263157606125 2023-01-22 19:46:24.407491: step: 1532/529, loss: 0.0011994382366538048 2023-01-22 19:46:25.464979: step: 1536/529, loss: 0.014690292999148369 2023-01-22 19:46:26.513392: step: 1540/529, loss: 0.020666059106588364 2023-01-22 19:46:27.572882: step: 1544/529, loss: 0.01238057017326355 2023-01-22 19:46:28.633625: step: 1548/529, loss: 0.02977786399424076 2023-01-22 19:46:29.675612: step: 1552/529, loss: 0.019417615607380867 2023-01-22 19:46:30.722246: step: 1556/529, loss: 0.007700175046920776 2023-01-22 19:46:31.779591: step: 1560/529, loss: 0.02705598808825016 2023-01-22 19:46:32.832322: step: 1564/529, loss: 0.03783218935132027 2023-01-22 19:46:33.896168: step: 1568/529, loss: 0.012546454556286335 2023-01-22 19:46:34.950182: step: 1572/529, loss: 0.014727315865457058 2023-01-22 19:46:36.003521: step: 1576/529, loss: 0.007398312911391258 2023-01-22 19:46:37.038478: step: 1580/529, loss: 0.004260462708771229 2023-01-22 19:46:38.082529: step: 1584/529, loss: 0.006616853177547455 2023-01-22 19:46:39.129521: step: 1588/529, loss: 0.022465188056230545 2023-01-22 19:46:40.191558: step: 1592/529, loss: 0.002625127322971821 2023-01-22 19:46:41.269319: step: 1596/529, loss: 0.008390899747610092 2023-01-22 19:46:42.321280: step: 1600/529, loss: 0.0766826644539833 2023-01-22 19:46:43.373782: step: 1604/529, loss: 0.04171181842684746 2023-01-22 19:46:44.415171: step: 1608/529, loss: 0.007223764434456825 2023-01-22 19:46:45.465497: step: 1612/529, loss: 0.004279943183064461 2023-01-22 19:46:46.509062: step: 1616/529, loss: 0.03503376618027687 2023-01-22 19:46:47.571444: step: 1620/529, loss: 0.008486324921250343 2023-01-22 19:46:48.634969: step: 1624/529, loss: 0.007074407767504454 2023-01-22 19:46:49.707505: step: 1628/529, loss: 0.012124099768698215 2023-01-22 19:46:50.747368: step: 1632/529, loss: 0.005812091287225485 2023-01-22 19:46:51.818661: step: 1636/529, loss: 0.005729333031922579 2023-01-22 19:46:52.875873: step: 1640/529, loss: 0.009815212339162827 2023-01-22 19:46:53.919053: step: 1644/529, loss: 0.010043101385235786 2023-01-22 19:46:54.973641: step: 1648/529, loss: 0.013427400961518288 2023-01-22 19:46:56.029109: step: 1652/529, loss: 0.008553661406040192 2023-01-22 19:46:57.084910: step: 1656/529, loss: 0.007796815596520901 2023-01-22 19:46:58.119858: step: 1660/529, loss: 0.0036519530694931746 2023-01-22 19:46:59.177703: step: 1664/529, loss: 0.0008156916592270136 2023-01-22 19:47:00.237148: step: 1668/529, loss: 0.008023594506084919 2023-01-22 19:47:01.275368: step: 1672/529, loss: 0.007903888821601868 2023-01-22 19:47:02.328070: step: 1676/529, loss: 0.03816872090101242 2023-01-22 19:47:03.403452: step: 1680/529, loss: 0.006926502101123333 2023-01-22 19:47:04.458505: step: 1684/529, loss: 0.011442071758210659 2023-01-22 19:47:05.494915: step: 1688/529, loss: 0.006777629721909761 2023-01-22 19:47:06.536483: step: 1692/529, loss: 0.010109285824000835 2023-01-22 19:47:07.575414: step: 1696/529, loss: 0.005739683285355568 2023-01-22 19:47:08.634923: step: 1700/529, loss: 0.0106072798371315 2023-01-22 19:47:09.691557: step: 1704/529, loss: 0.005274866707623005 2023-01-22 19:47:10.753360: step: 1708/529, loss: 0.007199095096439123 2023-01-22 19:47:11.805810: step: 1712/529, loss: 0.03927978500723839 2023-01-22 19:47:12.845303: step: 1716/529, loss: 0.00974242389202118 2023-01-22 19:47:13.897201: step: 1720/529, loss: 0.01649884320795536 2023-01-22 19:47:14.936960: step: 1724/529, loss: 0.024332767352461815 2023-01-22 19:47:15.987859: step: 1728/529, loss: 0.009163670241832733 2023-01-22 19:47:17.035528: step: 1732/529, loss: 0.01078770775347948 2023-01-22 19:47:18.112486: step: 1736/529, loss: 0.00305701675824821 2023-01-22 19:47:19.161650: step: 1740/529, loss: 0.007236138917505741 2023-01-22 19:47:20.224949: step: 1744/529, loss: 0.01068450789898634 2023-01-22 19:47:21.252408: step: 1748/529, loss: 0.002962834667414427 2023-01-22 19:47:22.300034: step: 1752/529, loss: 0.004199921619147062 2023-01-22 19:47:23.345311: step: 1756/529, loss: 0.010702506639063358 2023-01-22 19:47:24.393332: step: 1760/529, loss: 0.001984353642910719 2023-01-22 19:47:25.434198: step: 1764/529, loss: 0.03887616842985153 2023-01-22 19:47:26.498757: step: 1768/529, loss: 0.031201494857668877 2023-01-22 19:47:27.545574: step: 1772/529, loss: 0.010206346400082111 2023-01-22 19:47:28.613115: step: 1776/529, loss: 0.048408858478069305 2023-01-22 19:47:29.674508: step: 1780/529, loss: 0.00793641060590744 2023-01-22 19:47:30.728283: step: 1784/529, loss: 0.004356002435088158 2023-01-22 19:47:31.783862: step: 1788/529, loss: 0.00403329124674201 2023-01-22 19:47:32.840411: step: 1792/529, loss: 0.00289630563929677 2023-01-22 19:47:33.882127: step: 1796/529, loss: 0.033533867448568344 2023-01-22 19:47:34.936190: step: 1800/529, loss: 0.009799486957490444 2023-01-22 19:47:35.999741: step: 1804/529, loss: 0.0028348518535494804 2023-01-22 19:47:37.056957: step: 1808/529, loss: 0.024223024025559425 2023-01-22 19:47:38.097987: step: 1812/529, loss: 0.005609686020761728 2023-01-22 19:47:39.136282: step: 1816/529, loss: 0.004338175058364868 2023-01-22 19:47:40.179618: step: 1820/529, loss: 0.03996272757649422 2023-01-22 19:47:41.230480: step: 1824/529, loss: 0.0038215667009353638 2023-01-22 19:47:42.280208: step: 1828/529, loss: 0.0062724510207772255 2023-01-22 19:47:43.333607: step: 1832/529, loss: 0.019856298342347145 2023-01-22 19:47:44.406471: step: 1836/529, loss: 0.04596890136599541 2023-01-22 19:47:45.488819: step: 1840/529, loss: 0.003978193271905184 2023-01-22 19:47:46.533153: step: 1844/529, loss: 0.014827871695160866 2023-01-22 19:47:47.589016: step: 1848/529, loss: 0.012244523502886295 2023-01-22 19:47:48.664753: step: 1852/529, loss: 0.002460882533341646 2023-01-22 19:47:49.711294: step: 1856/529, loss: 0.007105730473995209 2023-01-22 19:47:50.755207: step: 1860/529, loss: 0.009142741560935974 2023-01-22 19:47:51.803743: step: 1864/529, loss: 0.027887966483831406 2023-01-22 19:47:52.850144: step: 1868/529, loss: 0.005558456294238567 2023-01-22 19:47:53.904629: step: 1872/529, loss: 0.0025893880520015955 2023-01-22 19:47:54.978161: step: 1876/529, loss: 0.005603237543255091 2023-01-22 19:47:56.046104: step: 1880/529, loss: 0.0294538214802742 2023-01-22 19:47:57.099176: step: 1884/529, loss: 0.0014858595095574856 2023-01-22 19:47:58.144955: step: 1888/529, loss: 0.07919655740261078 2023-01-22 19:47:59.233478: step: 1892/529, loss: 0.009422773495316505 2023-01-22 19:48:00.299601: step: 1896/529, loss: 0.007626679260283709 2023-01-22 19:48:01.334277: step: 1900/529, loss: 0.02495105005800724 2023-01-22 19:48:02.389018: step: 1904/529, loss: 0.03703620657324791 2023-01-22 19:48:03.443092: step: 1908/529, loss: 0.003523309715092182 2023-01-22 19:48:04.493151: step: 1912/529, loss: 0.009008413180708885 2023-01-22 19:48:05.553254: step: 1916/529, loss: 0.007903202436864376 2023-01-22 19:48:06.603978: step: 1920/529, loss: 0.04858347028493881 2023-01-22 19:48:07.679465: step: 1924/529, loss: 0.010702469386160374 2023-01-22 19:48:08.741398: step: 1928/529, loss: 0.02246721088886261 2023-01-22 19:48:09.800551: step: 1932/529, loss: 0.043326664716005325 2023-01-22 19:48:10.851364: step: 1936/529, loss: 0.006225182209163904 2023-01-22 19:48:11.910896: step: 1940/529, loss: 0.007526633329689503 2023-01-22 19:48:12.965264: step: 1944/529, loss: 0.026046033948659897 2023-01-22 19:48:14.005359: step: 1948/529, loss: 0.0008503666613250971 2023-01-22 19:48:15.056385: step: 1952/529, loss: 0.007444911636412144 2023-01-22 19:48:16.106918: step: 1956/529, loss: 0.018954968079924583 2023-01-22 19:48:17.188209: step: 1960/529, loss: 0.008903324604034424 2023-01-22 19:48:18.247285: step: 1964/529, loss: 0.023453492671251297 2023-01-22 19:48:19.294545: step: 1968/529, loss: 0.006797189824283123 2023-01-22 19:48:20.352859: step: 1972/529, loss: 0.012226155959069729 2023-01-22 19:48:21.400376: step: 1976/529, loss: 0.013284561224281788 2023-01-22 19:48:22.458188: step: 1980/529, loss: 0.0025997436605393887 2023-01-22 19:48:23.506420: step: 1984/529, loss: 0.020554156973958015 2023-01-22 19:48:24.551377: step: 1988/529, loss: 0.017263587564229965 2023-01-22 19:48:25.600928: step: 1992/529, loss: 0.021719371899962425 2023-01-22 19:48:26.652963: step: 1996/529, loss: 0.01578526571393013 2023-01-22 19:48:27.693060: step: 2000/529, loss: 0.003141375258564949 2023-01-22 19:48:28.732936: step: 2004/529, loss: 0.006471711676567793 2023-01-22 19:48:29.774927: step: 2008/529, loss: 0.007639199960976839 2023-01-22 19:48:30.826294: step: 2012/529, loss: 0.005469005089253187 2023-01-22 19:48:31.884024: step: 2016/529, loss: 0.02692493051290512 2023-01-22 19:48:32.954020: step: 2020/529, loss: 0.0201416052877903 2023-01-22 19:48:34.000789: step: 2024/529, loss: 0.004232287406921387 2023-01-22 19:48:35.056138: step: 2028/529, loss: 0.01146592479199171 2023-01-22 19:48:36.107687: step: 2032/529, loss: 0.012300624512135983 2023-01-22 19:48:37.186498: step: 2036/529, loss: 0.04645624756813049 2023-01-22 19:48:38.236224: step: 2040/529, loss: 0.040244873613119125 2023-01-22 19:48:39.280114: step: 2044/529, loss: 0.01166488416492939 2023-01-22 19:48:40.333315: step: 2048/529, loss: 0.009399492293596268 2023-01-22 19:48:41.370524: step: 2052/529, loss: 0.03597395494580269 2023-01-22 19:48:42.415885: step: 2056/529, loss: 0.007753419689834118 2023-01-22 19:48:43.467224: step: 2060/529, loss: 0.007896293886005878 2023-01-22 19:48:44.500157: step: 2064/529, loss: 0.00878907646983862 2023-01-22 19:48:45.556582: step: 2068/529, loss: 0.007226321380585432 2023-01-22 19:48:46.612028: step: 2072/529, loss: 0.01596832647919655 2023-01-22 19:48:47.664716: step: 2076/529, loss: 0.025622377172112465 2023-01-22 19:48:48.718403: step: 2080/529, loss: 0.034276705235242844 2023-01-22 19:48:49.803949: step: 2084/529, loss: 0.0031056769657880068 2023-01-22 19:48:50.867845: step: 2088/529, loss: 0.0360809750854969 2023-01-22 19:48:51.932621: step: 2092/529, loss: 0.008559904992580414 2023-01-22 19:48:52.989537: step: 2096/529, loss: 0.020412065088748932 2023-01-22 19:48:54.035733: step: 2100/529, loss: 0.004759244155138731 2023-01-22 19:48:55.096503: step: 2104/529, loss: 0.007820584811270237 2023-01-22 19:48:56.152809: step: 2108/529, loss: 0.028815461322665215 2023-01-22 19:48:57.227776: step: 2112/529, loss: 0.026477031409740448 2023-01-22 19:48:58.286585: step: 2116/529, loss: 0.02446635812520981 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33578520512074483, 'r': 0.3128473163458932, 'f1': 0.3239106792029189}, 'combined': 0.2386710267810981, 'stategy': 1, 'epoch': 1} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3802860954728274, 'r': 0.3198916195774918, 'f1': 0.34748416685858347}, 'combined': 0.24446122291558636, 'stategy': 1, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3164259395725866, 'r': 0.3452465185089892, 'f1': 0.3302085576301947}, 'combined': 0.24331156878014343, 'stategy': 1, 'epoch': 1} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3807202674530041, 'r': 0.3315950716526164, 'f1': 0.3544636972838313}, 'combined': 0.25166922507152023, 'stategy': 1, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3272646103896104, 'r': 0.3415475060992139, 'f1': 0.3342535482159438}, 'combined': 0.24629208815911646, 'stategy': 1, 'epoch': 1} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3742603006123995, 'r': 0.2994735564237089, 'f1': 0.33271613733550204}, 'combined': 0.23622845750820642, 'stategy': 1, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33578520512074483, 'r': 0.3128473163458932, 'f1': 0.3239106792029189}, 'combined': 0.2386710267810981, 'stategy': 1, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3802860954728274, 'r': 0.3198916195774918, 'f1': 0.34748416685858347}, 'combined': 0.24446122291558636, 'stategy': 1, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3164259395725866, 'r': 0.3452465185089892, 'f1': 0.3302085576301947}, 'combined': 0.24331156878014343, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3807202674530041, 'r': 0.3315950716526164, 'f1': 0.3544636972838313}, 'combined': 0.25166922507152023, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3272646103896104, 'r': 0.3415475060992139, 'f1': 0.3342535482159438}, 'combined': 0.24629208815911646, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3742603006123995, 'r': 0.2994735564237089, 'f1': 0.33271613733550204}, 'combined': 0.23622845750820642, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 19:51:56.486339: step: 4/529, loss: 0.016769899055361748 2023-01-22 19:51:57.519100: step: 8/529, loss: 0.004776649177074432 2023-01-22 19:51:58.592719: step: 12/529, loss: 0.005138264037668705 2023-01-22 19:51:59.628763: step: 16/529, loss: 0.003971568308770657 2023-01-22 19:52:00.695387: step: 20/529, loss: 0.03057672828435898 2023-01-22 19:52:01.740462: step: 24/529, loss: 0.017046503722667694 2023-01-22 19:52:02.772787: step: 28/529, loss: 0.014190775342285633 2023-01-22 19:52:03.824508: step: 32/529, loss: 0.006420804653316736 2023-01-22 19:52:04.866690: step: 36/529, loss: 0.012994932942092419 2023-01-22 19:52:05.926127: step: 40/529, loss: 0.015033001080155373 2023-01-22 19:52:06.974009: step: 44/529, loss: 0.006633009761571884 2023-01-22 19:52:08.017871: step: 48/529, loss: 0.0019002615008503199 2023-01-22 19:52:09.082563: step: 52/529, loss: 0.006438171956688166 2023-01-22 19:52:10.153418: step: 56/529, loss: 0.02033413201570511 2023-01-22 19:52:11.202634: step: 60/529, loss: 0.009090491570532322 2023-01-22 19:52:12.272004: step: 64/529, loss: 0.03538947552442551 2023-01-22 19:52:13.356649: step: 68/529, loss: 0.041352227330207825 2023-01-22 19:52:14.403898: step: 72/529, loss: 0.003349186619743705 2023-01-22 19:52:15.449689: step: 76/529, loss: 0.0077846418134868145 2023-01-22 19:52:16.495215: step: 80/529, loss: 0.0008108459878712893 2023-01-22 19:52:17.549856: step: 84/529, loss: 0.005242703948169947 2023-01-22 19:52:18.613518: step: 88/529, loss: 0.027445776388049126 2023-01-22 19:52:19.688242: step: 92/529, loss: 0.012781407684087753 2023-01-22 19:52:20.728660: step: 96/529, loss: 0.005942841526120901 2023-01-22 19:52:21.795712: step: 100/529, loss: 0.0059811207465827465 2023-01-22 19:52:22.827310: step: 104/529, loss: 0.009064748883247375 2023-01-22 19:52:23.870040: step: 108/529, loss: 0.0010065316455438733 2023-01-22 19:52:24.922119: step: 112/529, loss: 0.04195992276072502 2023-01-22 19:52:25.989160: step: 116/529, loss: 0.004021812696009874 2023-01-22 19:52:27.032350: step: 120/529, loss: 0.019543664529919624 2023-01-22 19:52:28.093492: step: 124/529, loss: 0.0022094366140663624 2023-01-22 19:52:29.150565: step: 128/529, loss: 0.001434655045159161 2023-01-22 19:52:30.183535: step: 132/529, loss: 0.009645376354455948 2023-01-22 19:52:31.240643: step: 136/529, loss: 0.022103631868958473 2023-01-22 19:52:32.293496: step: 140/529, loss: 0.014725483022630215 2023-01-22 19:52:33.340608: step: 144/529, loss: 0.005404842551797628 2023-01-22 19:52:34.384403: step: 148/529, loss: 0.0 2023-01-22 19:52:35.430671: step: 152/529, loss: 0.0026340028271079063 2023-01-22 19:52:36.487752: step: 156/529, loss: 0.004697186406701803 2023-01-22 19:52:37.549964: step: 160/529, loss: 0.01897864043712616 2023-01-22 19:52:38.587173: step: 164/529, loss: 0.0031704457942396402 2023-01-22 19:52:39.638662: step: 168/529, loss: 0.014682571403682232 2023-01-22 19:52:40.686891: step: 172/529, loss: 0.008217915892601013 2023-01-22 19:52:41.733111: step: 176/529, loss: 0.0386650450527668 2023-01-22 19:52:42.793239: step: 180/529, loss: 0.011005488224327564 2023-01-22 19:52:43.854296: step: 184/529, loss: 0.006656793411821127 2023-01-22 19:52:44.900735: step: 188/529, loss: 0.007286156993359327 2023-01-22 19:52:45.953294: step: 192/529, loss: 0.00744550209492445 2023-01-22 19:52:47.020192: step: 196/529, loss: 0.01166602037847042 2023-01-22 19:52:48.062136: step: 200/529, loss: 0.000550617347471416 2023-01-22 19:52:49.118408: step: 204/529, loss: 0.011234098114073277 2023-01-22 19:52:50.208557: step: 208/529, loss: 0.006204743403941393 2023-01-22 19:52:51.260575: step: 212/529, loss: 0.007148205768316984 2023-01-22 19:52:52.326947: step: 216/529, loss: 0.012816202826797962 2023-01-22 19:52:53.388468: step: 220/529, loss: 0.002769813407212496 2023-01-22 19:52:54.437616: step: 224/529, loss: 0.013259481638669968 2023-01-22 19:52:55.506157: step: 228/529, loss: 0.008491086773574352 2023-01-22 19:52:56.561036: step: 232/529, loss: 0.009515050798654556 2023-01-22 19:52:57.604186: step: 236/529, loss: 0.005249901209026575 2023-01-22 19:52:58.650919: step: 240/529, loss: 0.008257444947957993 2023-01-22 19:52:59.694632: step: 244/529, loss: 0.030426081269979477 2023-01-22 19:53:00.751947: step: 248/529, loss: 0.020016733556985855 2023-01-22 19:53:01.810699: step: 252/529, loss: 0.0119384890422225 2023-01-22 19:53:02.856406: step: 256/529, loss: 0.016481148079037666 2023-01-22 19:53:03.919129: step: 260/529, loss: 0.001971587771549821 2023-01-22 19:53:04.980268: step: 264/529, loss: 0.006094732321798801 2023-01-22 19:53:06.023114: step: 268/529, loss: 0.00820962619036436 2023-01-22 19:53:07.105888: step: 272/529, loss: 0.005577035713940859 2023-01-22 19:53:08.157662: step: 276/529, loss: 0.005854811519384384 2023-01-22 19:53:09.205320: step: 280/529, loss: 0.006015812512487173 2023-01-22 19:53:10.253524: step: 284/529, loss: 0.0055336481891572475 2023-01-22 19:53:11.311895: step: 288/529, loss: 0.010216631926596165 2023-01-22 19:53:12.362780: step: 292/529, loss: 0.0022466795053333044 2023-01-22 19:53:13.410915: step: 296/529, loss: 0.03526534512639046 2023-01-22 19:53:14.483074: step: 300/529, loss: 0.011181001551449299 2023-01-22 19:53:15.529296: step: 304/529, loss: 0.010571149177849293 2023-01-22 19:53:16.596462: step: 308/529, loss: 0.00047349909436888993 2023-01-22 19:53:17.649655: step: 312/529, loss: 0.04009775444865227 2023-01-22 19:53:18.705225: step: 316/529, loss: 0.035851314663887024 2023-01-22 19:53:19.778917: step: 320/529, loss: 0.003651312319561839 2023-01-22 19:53:20.841693: step: 324/529, loss: 0.004341717343777418 2023-01-22 19:53:21.885264: step: 328/529, loss: 0.02432491071522236 2023-01-22 19:53:22.936662: step: 332/529, loss: 0.016215045005083084 2023-01-22 19:53:23.983333: step: 336/529, loss: 0.01603284664452076 2023-01-22 19:53:25.056801: step: 340/529, loss: 0.006143355276435614 2023-01-22 19:53:26.112935: step: 344/529, loss: 0.004680093843489885 2023-01-22 19:53:27.164201: step: 348/529, loss: 0.04227147996425629 2023-01-22 19:53:28.220721: step: 352/529, loss: 0.010936867445707321 2023-01-22 19:53:29.281745: step: 356/529, loss: 0.030647749081254005 2023-01-22 19:53:30.342381: step: 360/529, loss: 0.058322589844465256 2023-01-22 19:53:31.388106: step: 364/529, loss: 0.00575517863035202 2023-01-22 19:53:32.446135: step: 368/529, loss: 0.005529760383069515 2023-01-22 19:53:33.511428: step: 372/529, loss: 0.021522492170333862 2023-01-22 19:53:34.586842: step: 376/529, loss: 0.028332088142633438 2023-01-22 19:53:35.639355: step: 380/529, loss: 0.017103740945458412 2023-01-22 19:53:36.703789: step: 384/529, loss: 0.004928337410092354 2023-01-22 19:53:37.756237: step: 388/529, loss: 0.0012269220314919949 2023-01-22 19:53:38.821845: step: 392/529, loss: 0.00917463842779398 2023-01-22 19:53:39.878106: step: 396/529, loss: 0.030700793489813805 2023-01-22 19:53:40.937030: step: 400/529, loss: 0.003996746614575386 2023-01-22 19:53:41.983909: step: 404/529, loss: 0.005860441364347935 2023-01-22 19:53:43.046925: step: 408/529, loss: 0.0008575360989198089 2023-01-22 19:53:44.111592: step: 412/529, loss: 0.032596684992313385 2023-01-22 19:53:45.168530: step: 416/529, loss: 0.003979000262916088 2023-01-22 19:53:46.218871: step: 420/529, loss: 0.01501777395606041 2023-01-22 19:53:47.263088: step: 424/529, loss: 0.012400008738040924 2023-01-22 19:53:48.319334: step: 428/529, loss: 0.012709521688520908 2023-01-22 19:53:49.367187: step: 432/529, loss: 0.004359858110547066 2023-01-22 19:53:50.434962: step: 436/529, loss: 0.020600806921720505 2023-01-22 19:53:51.481632: step: 440/529, loss: 0.0013151929015293717 2023-01-22 19:53:52.534589: step: 444/529, loss: 0.03075871430337429 2023-01-22 19:53:53.603210: step: 448/529, loss: 0.011884140782058239 2023-01-22 19:53:54.664898: step: 452/529, loss: 0.014811577275395393 2023-01-22 19:53:55.719693: step: 456/529, loss: 0.0040180878713727 2023-01-22 19:53:56.768744: step: 460/529, loss: 0.003297898219898343 2023-01-22 19:53:57.835653: step: 464/529, loss: 0.0032817753963172436 2023-01-22 19:53:58.886572: step: 468/529, loss: 0.0627603828907013 2023-01-22 19:53:59.953855: step: 472/529, loss: 0.013188144192099571 2023-01-22 19:54:01.004049: step: 476/529, loss: 0.014035405591130257 2023-01-22 19:54:02.061637: step: 480/529, loss: 0.009819992817938328 2023-01-22 19:54:03.124500: step: 484/529, loss: 0.010854062624275684 2023-01-22 19:54:04.174250: step: 488/529, loss: 0.03837253153324127 2023-01-22 19:54:05.224846: step: 492/529, loss: 0.010389309376478195 2023-01-22 19:54:06.274993: step: 496/529, loss: 0.01501044537872076 2023-01-22 19:54:07.320596: step: 500/529, loss: 0.004261473193764687 2023-01-22 19:54:08.367122: step: 504/529, loss: 0.011986332014203072 2023-01-22 19:54:09.429553: step: 508/529, loss: 0.029767416417598724 2023-01-22 19:54:10.481021: step: 512/529, loss: 0.006752622313797474 2023-01-22 19:54:11.538973: step: 516/529, loss: 0.03037738986313343 2023-01-22 19:54:12.602153: step: 520/529, loss: 0.008480550721287727 2023-01-22 19:54:13.657050: step: 524/529, loss: 0.0014338225591927767 2023-01-22 19:54:14.702847: step: 528/529, loss: 0.009833659045398235 2023-01-22 19:54:15.757651: step: 532/529, loss: 0.017395272850990295 2023-01-22 19:54:16.807680: step: 536/529, loss: 0.0011940039694309235 2023-01-22 19:54:17.856793: step: 540/529, loss: 0.024256566539406776 2023-01-22 19:54:18.920181: step: 544/529, loss: 0.005568946711719036 2023-01-22 19:54:19.962303: step: 548/529, loss: 0.056146688759326935 2023-01-22 19:54:21.018689: step: 552/529, loss: 0.027910880744457245 2023-01-22 19:54:22.076504: step: 556/529, loss: 0.0055074989795684814 2023-01-22 19:54:23.138184: step: 560/529, loss: 0.050471581518650055 2023-01-22 19:54:24.180607: step: 564/529, loss: 0.022269802168011665 2023-01-22 19:54:25.240792: step: 568/529, loss: 0.010979728773236275 2023-01-22 19:54:26.297102: step: 572/529, loss: 0.012627990916371346 2023-01-22 19:54:27.350497: step: 576/529, loss: 0.005599132739007473 2023-01-22 19:54:28.416843: step: 580/529, loss: 0.037314314395189285 2023-01-22 19:54:29.488620: step: 584/529, loss: 0.00323654362000525 2023-01-22 19:54:30.550743: step: 588/529, loss: 0.028864700347185135 2023-01-22 19:54:31.598986: step: 592/529, loss: 0.0031013910192996264 2023-01-22 19:54:32.653173: step: 596/529, loss: 0.0014465071726590395 2023-01-22 19:54:33.703721: step: 600/529, loss: 0.005186670459806919 2023-01-22 19:54:34.753453: step: 604/529, loss: 0.008730238303542137 2023-01-22 19:54:35.815349: step: 608/529, loss: 0.013101747259497643 2023-01-22 19:54:36.881767: step: 612/529, loss: 0.007453371305018663 2023-01-22 19:54:37.929391: step: 616/529, loss: 0.02398344874382019 2023-01-22 19:54:38.999075: step: 620/529, loss: 0.04100363329052925 2023-01-22 19:54:40.049333: step: 624/529, loss: 0.003994930535554886 2023-01-22 19:54:41.120217: step: 628/529, loss: 0.009700490161776543 2023-01-22 19:54:42.181881: step: 632/529, loss: 0.004932609852403402 2023-01-22 19:54:43.213617: step: 636/529, loss: 0.006336611695587635 2023-01-22 19:54:44.275127: step: 640/529, loss: 0.004764177370816469 2023-01-22 19:54:45.320519: step: 644/529, loss: 0.04181733354926109 2023-01-22 19:54:46.395309: step: 648/529, loss: 0.004476585425436497 2023-01-22 19:54:47.447002: step: 652/529, loss: 0.007823966443538666 2023-01-22 19:54:48.505481: step: 656/529, loss: 0.022270355373620987 2023-01-22 19:54:49.545458: step: 660/529, loss: 0.004363867919892073 2023-01-22 19:54:50.594108: step: 664/529, loss: 0.003580256598070264 2023-01-22 19:54:51.655906: step: 668/529, loss: 0.004970157518982887 2023-01-22 19:54:52.725218: step: 672/529, loss: 0.01594248041510582 2023-01-22 19:54:53.792135: step: 676/529, loss: 0.03383409604430199 2023-01-22 19:54:54.858987: step: 680/529, loss: 0.011703518219292164 2023-01-22 19:54:55.916979: step: 684/529, loss: 0.01246214471757412 2023-01-22 19:54:56.970690: step: 688/529, loss: 0.035678885877132416 2023-01-22 19:54:58.025099: step: 692/529, loss: 0.008345757611095905 2023-01-22 19:54:59.070897: step: 696/529, loss: 0.010660410858690739 2023-01-22 19:55:00.130573: step: 700/529, loss: 0.02864742837846279 2023-01-22 19:55:01.186217: step: 704/529, loss: 0.0632297545671463 2023-01-22 19:55:02.247778: step: 708/529, loss: 0.0077431160025298595 2023-01-22 19:55:03.295480: step: 712/529, loss: 0.008357308804988861 2023-01-22 19:55:04.355175: step: 716/529, loss: 0.007670058868825436 2023-01-22 19:55:05.407916: step: 720/529, loss: 0.00536184199154377 2023-01-22 19:55:06.455103: step: 724/529, loss: 0.004081163089722395 2023-01-22 19:55:07.507030: step: 728/529, loss: 0.032472047954797745 2023-01-22 19:55:08.557111: step: 732/529, loss: 0.00711841881275177 2023-01-22 19:55:09.608173: step: 736/529, loss: 0.012580592185258865 2023-01-22 19:55:10.668597: step: 740/529, loss: 0.0074384198524057865 2023-01-22 19:55:11.730725: step: 744/529, loss: 0.0 2023-01-22 19:55:12.788812: step: 748/529, loss: 0.01796574890613556 2023-01-22 19:55:13.865481: step: 752/529, loss: 0.006997659802436829 2023-01-22 19:55:14.922819: step: 756/529, loss: 0.008173210546374321 2023-01-22 19:55:15.979655: step: 760/529, loss: 0.00728617375716567 2023-01-22 19:55:17.042636: step: 764/529, loss: 0.05500132590532303 2023-01-22 19:55:18.081825: step: 768/529, loss: 0.007074093446135521 2023-01-22 19:55:19.131298: step: 772/529, loss: 0.004343635402619839 2023-01-22 19:55:20.203085: step: 776/529, loss: 0.009615425951778889 2023-01-22 19:55:21.258824: step: 780/529, loss: 0.004212587606161833 2023-01-22 19:55:22.309101: step: 784/529, loss: 0.010027064010500908 2023-01-22 19:55:23.383895: step: 788/529, loss: 0.027680957689881325 2023-01-22 19:55:24.448336: step: 792/529, loss: 0.016969498246908188 2023-01-22 19:55:25.506709: step: 796/529, loss: 0.0042504798620939255 2023-01-22 19:55:26.558512: step: 800/529, loss: 0.00416368106380105 2023-01-22 19:55:27.621886: step: 804/529, loss: 0.010829695500433445 2023-01-22 19:55:28.680506: step: 808/529, loss: 0.008041387423872948 2023-01-22 19:55:29.725670: step: 812/529, loss: 0.015539851039648056 2023-01-22 19:55:30.767422: step: 816/529, loss: 0.005472727119922638 2023-01-22 19:55:31.828313: step: 820/529, loss: 0.0138620063662529 2023-01-22 19:55:32.872484: step: 824/529, loss: 0.013219275511801243 2023-01-22 19:55:33.929326: step: 828/529, loss: 0.005984208546578884 2023-01-22 19:55:34.974337: step: 832/529, loss: 0.019417408853769302 2023-01-22 19:55:36.044824: step: 836/529, loss: 0.00668446347117424 2023-01-22 19:55:37.103831: step: 840/529, loss: 0.0042038229294121265 2023-01-22 19:55:38.142919: step: 844/529, loss: 0.010490312241017818 2023-01-22 19:55:39.182588: step: 848/529, loss: 0.0029428787529468536 2023-01-22 19:55:40.235967: step: 852/529, loss: 0.008916784077882767 2023-01-22 19:55:41.285580: step: 856/529, loss: 0.015029284171760082 2023-01-22 19:55:42.336572: step: 860/529, loss: 0.010284416377544403 2023-01-22 19:55:43.394018: step: 864/529, loss: 0.010174809023737907 2023-01-22 19:55:44.462361: step: 868/529, loss: 0.0019343511667102575 2023-01-22 19:55:45.492067: step: 872/529, loss: 0.013984021730720997 2023-01-22 19:55:46.529657: step: 876/529, loss: 0.015247956849634647 2023-01-22 19:55:47.592784: step: 880/529, loss: 0.009768275544047356 2023-01-22 19:55:48.645070: step: 884/529, loss: 0.010797047056257725 2023-01-22 19:55:49.703686: step: 888/529, loss: 0.008494413457810879 2023-01-22 19:55:50.763344: step: 892/529, loss: 0.02661999873816967 2023-01-22 19:55:51.826180: step: 896/529, loss: 0.019694015383720398 2023-01-22 19:55:52.886218: step: 900/529, loss: 0.005477281752973795 2023-01-22 19:55:53.937615: step: 904/529, loss: 0.006969843525439501 2023-01-22 19:55:54.992477: step: 908/529, loss: 0.0034596214536577463 2023-01-22 19:55:56.044625: step: 912/529, loss: 0.11315653473138809 2023-01-22 19:55:57.089646: step: 916/529, loss: 0.001395003986544907 2023-01-22 19:55:58.143057: step: 920/529, loss: 0.013216778635978699 2023-01-22 19:55:59.185728: step: 924/529, loss: 0.0031609954312443733 2023-01-22 19:56:00.246660: step: 928/529, loss: 0.011521650478243828 2023-01-22 19:56:01.311021: step: 932/529, loss: 0.007828129455447197 2023-01-22 19:56:02.358454: step: 936/529, loss: 0.011576209217309952 2023-01-22 19:56:03.416839: step: 940/529, loss: 0.00382436765357852 2023-01-22 19:56:04.460470: step: 944/529, loss: 0.008218149654567242 2023-01-22 19:56:05.504973: step: 948/529, loss: 0.022023677825927734 2023-01-22 19:56:06.565246: step: 952/529, loss: 0.039256900548934937 2023-01-22 19:56:07.612053: step: 956/529, loss: 0.007159958593547344 2023-01-22 19:56:08.650890: step: 960/529, loss: 0.021554918959736824 2023-01-22 19:56:09.698231: step: 964/529, loss: 0.032055631279945374 2023-01-22 19:56:10.739711: step: 968/529, loss: 0.00902123935520649 2023-01-22 19:56:11.789523: step: 972/529, loss: 0.005437079817056656 2023-01-22 19:56:12.851262: step: 976/529, loss: 0.05399130657315254 2023-01-22 19:56:13.900414: step: 980/529, loss: 0.004657253157347441 2023-01-22 19:56:14.968004: step: 984/529, loss: 0.009438995271921158 2023-01-22 19:56:16.018697: step: 988/529, loss: 0.011192361824214458 2023-01-22 19:56:17.061681: step: 992/529, loss: 0.0027497827541083097 2023-01-22 19:56:18.101222: step: 996/529, loss: 0.0037775139790028334 2023-01-22 19:56:19.152983: step: 1000/529, loss: 0.0052798413671553135 2023-01-22 19:56:20.200223: step: 1004/529, loss: 0.0042763869278132915 2023-01-22 19:56:21.244736: step: 1008/529, loss: 0.06839495152235031 2023-01-22 19:56:22.290271: step: 1012/529, loss: 0.011692789383232594 2023-01-22 19:56:23.328909: step: 1016/529, loss: 0.009035824798047543 2023-01-22 19:56:24.382860: step: 1020/529, loss: 0.012222695164382458 2023-01-22 19:56:25.433079: step: 1024/529, loss: 0.007833332754671574 2023-01-22 19:56:26.484476: step: 1028/529, loss: 0.014396791346371174 2023-01-22 19:56:27.524200: step: 1032/529, loss: 0.002740769414231181 2023-01-22 19:56:28.576891: step: 1036/529, loss: 0.025663260370492935 2023-01-22 19:56:29.625729: step: 1040/529, loss: 0.009062030352652073 2023-01-22 19:56:30.696730: step: 1044/529, loss: 0.00875516515225172 2023-01-22 19:56:31.762103: step: 1048/529, loss: 0.0007383400807157159 2023-01-22 19:56:32.804868: step: 1052/529, loss: 0.01957930438220501 2023-01-22 19:56:33.870724: step: 1056/529, loss: 0.009622927755117416 2023-01-22 19:56:34.917160: step: 1060/529, loss: 0.07894708216190338 2023-01-22 19:56:35.985252: step: 1064/529, loss: 0.0038776430301368237 2023-01-22 19:56:37.055627: step: 1068/529, loss: 0.005884992890059948 2023-01-22 19:56:38.096031: step: 1072/529, loss: 0.009725396521389484 2023-01-22 19:56:39.152217: step: 1076/529, loss: 0.01828581653535366 2023-01-22 19:56:40.192516: step: 1080/529, loss: 0.0029913042671978474 2023-01-22 19:56:41.242490: step: 1084/529, loss: 0.006572544574737549 2023-01-22 19:56:42.288162: step: 1088/529, loss: 0.00037372627411969006 2023-01-22 19:56:43.344149: step: 1092/529, loss: 0.012744470499455929 2023-01-22 19:56:44.388639: step: 1096/529, loss: 0.006190674379467964 2023-01-22 19:56:45.448348: step: 1100/529, loss: 0.005227075889706612 2023-01-22 19:56:46.500973: step: 1104/529, loss: 0.018090782687067986 2023-01-22 19:56:47.541200: step: 1108/529, loss: 0.016637571156024933 2023-01-22 19:56:48.588979: step: 1112/529, loss: 0.008095446974039078 2023-01-22 19:56:49.632142: step: 1116/529, loss: 0.014164726249873638 2023-01-22 19:56:50.692744: step: 1120/529, loss: 0.003821906168013811 2023-01-22 19:56:51.744692: step: 1124/529, loss: 0.004693558905273676 2023-01-22 19:56:52.795127: step: 1128/529, loss: 0.005518096964806318 2023-01-22 19:56:53.839990: step: 1132/529, loss: 0.008891682140529156 2023-01-22 19:56:54.877113: step: 1136/529, loss: 0.010577702894806862 2023-01-22 19:56:55.937401: step: 1140/529, loss: 0.018562953919172287 2023-01-22 19:56:56.987240: step: 1144/529, loss: 0.020861614495515823 2023-01-22 19:56:58.035817: step: 1148/529, loss: 0.0013329458888620138 2023-01-22 19:56:59.084409: step: 1152/529, loss: 0.010229039005935192 2023-01-22 19:57:00.131174: step: 1156/529, loss: 0.014662380330264568 2023-01-22 19:57:01.192387: step: 1160/529, loss: 0.007699093781411648 2023-01-22 19:57:02.235592: step: 1164/529, loss: 0.0010371600510552526 2023-01-22 19:57:03.297232: step: 1168/529, loss: 0.004747601691633463 2023-01-22 19:57:04.350521: step: 1172/529, loss: 0.009238509461283684 2023-01-22 19:57:05.389113: step: 1176/529, loss: 0.035539064556360245 2023-01-22 19:57:06.437733: step: 1180/529, loss: 0.010874062776565552 2023-01-22 19:57:07.486679: step: 1184/529, loss: 0.012060358189046383 2023-01-22 19:57:08.535851: step: 1188/529, loss: 0.0037941443733870983 2023-01-22 19:57:09.587383: step: 1192/529, loss: 0.004162719007581472 2023-01-22 19:57:10.639879: step: 1196/529, loss: 0.011726537719368935 2023-01-22 19:57:11.680551: step: 1200/529, loss: 0.0064646461978554726 2023-01-22 19:57:12.744834: step: 1204/529, loss: 0.00655716098845005 2023-01-22 19:57:13.791022: step: 1208/529, loss: 0.0048755561001598835 2023-01-22 19:57:14.826460: step: 1212/529, loss: 0.006016227882355452 2023-01-22 19:57:15.870975: step: 1216/529, loss: 0.00848670955747366 2023-01-22 19:57:16.914957: step: 1220/529, loss: 0.01083903294056654 2023-01-22 19:57:17.958601: step: 1224/529, loss: 0.015806326642632484 2023-01-22 19:57:19.021077: step: 1228/529, loss: 0.007597902789711952 2023-01-22 19:57:20.061777: step: 1232/529, loss: 0.0015265881083905697 2023-01-22 19:57:21.109819: step: 1236/529, loss: 0.008632718585431576 2023-01-22 19:57:22.158342: step: 1240/529, loss: 0.03639453276991844 2023-01-22 19:57:23.229280: step: 1244/529, loss: 0.014228655956685543 2023-01-22 19:57:24.278971: step: 1248/529, loss: 0.02356448397040367 2023-01-22 19:57:25.331855: step: 1252/529, loss: 0.0028892129193991423 2023-01-22 19:57:26.376631: step: 1256/529, loss: 0.020002085715532303 2023-01-22 19:57:27.418946: step: 1260/529, loss: 0.006404080428183079 2023-01-22 19:57:28.466633: step: 1264/529, loss: 0.03517806529998779 2023-01-22 19:57:29.515116: step: 1268/529, loss: 0.015657609328627586 2023-01-22 19:57:30.568098: step: 1272/529, loss: 0.012968228198587894 2023-01-22 19:57:31.614428: step: 1276/529, loss: 0.007121562957763672 2023-01-22 19:57:32.669097: step: 1280/529, loss: 0.031889598816633224 2023-01-22 19:57:33.719360: step: 1284/529, loss: 0.005741504952311516 2023-01-22 19:57:34.791591: step: 1288/529, loss: 0.024667486548423767 2023-01-22 19:57:35.853607: step: 1292/529, loss: 0.050742294639348984 2023-01-22 19:57:36.910892: step: 1296/529, loss: 0.008235281333327293 2023-01-22 19:57:37.952440: step: 1300/529, loss: 0.00891843717545271 2023-01-22 19:57:39.005226: step: 1304/529, loss: 0.00452026491984725 2023-01-22 19:57:40.067191: step: 1308/529, loss: 0.004861308261752129 2023-01-22 19:57:41.125357: step: 1312/529, loss: 0.01076226681470871 2023-01-22 19:57:42.210198: step: 1316/529, loss: 0.022111037746071815 2023-01-22 19:57:43.264542: step: 1320/529, loss: 0.019388973712921143 2023-01-22 19:57:44.316928: step: 1324/529, loss: 0.005586306098848581 2023-01-22 19:57:45.371068: step: 1328/529, loss: 0.006610429380089045 2023-01-22 19:57:46.417156: step: 1332/529, loss: 0.007396426983177662 2023-01-22 19:57:47.463381: step: 1336/529, loss: 0.003939315211027861 2023-01-22 19:57:48.528148: step: 1340/529, loss: 0.005185093265026808 2023-01-22 19:57:49.587072: step: 1344/529, loss: 0.010334577411413193 2023-01-22 19:57:50.637510: step: 1348/529, loss: 0.00763534102588892 2023-01-22 19:57:51.701083: step: 1352/529, loss: 0.001809293869882822 2023-01-22 19:57:52.739641: step: 1356/529, loss: 0.009515292942523956 2023-01-22 19:57:53.785031: step: 1360/529, loss: 0.0022127137053757906 2023-01-22 19:57:54.834998: step: 1364/529, loss: 0.005330778658390045 2023-01-22 19:57:55.876212: step: 1368/529, loss: 0.013178571127355099 2023-01-22 19:57:56.933513: step: 1372/529, loss: 0.008235168643295765 2023-01-22 19:57:58.006197: step: 1376/529, loss: 0.0032991915941238403 2023-01-22 19:57:59.052106: step: 1380/529, loss: 0.039646901190280914 2023-01-22 19:58:00.125440: step: 1384/529, loss: 0.09910629689693451 2023-01-22 19:58:01.180467: step: 1388/529, loss: 0.006392909213900566 2023-01-22 19:58:02.228350: step: 1392/529, loss: 0.006639172323048115 2023-01-22 19:58:03.282821: step: 1396/529, loss: 0.004099604208022356 2023-01-22 19:58:04.330968: step: 1400/529, loss: 0.04006190970540047 2023-01-22 19:58:05.386898: step: 1404/529, loss: 0.026349922642111778 2023-01-22 19:58:06.426782: step: 1408/529, loss: 0.004725749138742685 2023-01-22 19:58:07.464177: step: 1412/529, loss: 0.02060350403189659 2023-01-22 19:58:08.520085: step: 1416/529, loss: 0.005630532745271921 2023-01-22 19:58:09.582909: step: 1420/529, loss: 0.005727951880544424 2023-01-22 19:58:10.614001: step: 1424/529, loss: 0.006338499952107668 2023-01-22 19:58:11.678685: step: 1428/529, loss: 0.007026389241218567 2023-01-22 19:58:12.736928: step: 1432/529, loss: 0.006613961886614561 2023-01-22 19:58:13.773265: step: 1436/529, loss: 0.007747239898890257 2023-01-22 19:58:14.820534: step: 1440/529, loss: 0.003882375545799732 2023-01-22 19:58:15.872074: step: 1444/529, loss: 0.006315236911177635 2023-01-22 19:58:16.928094: step: 1448/529, loss: 0.01657625287771225 2023-01-22 19:58:17.990279: step: 1452/529, loss: 0.10470747202634811 2023-01-22 19:58:19.037436: step: 1456/529, loss: 0.024988019838929176 2023-01-22 19:58:20.085133: step: 1460/529, loss: 0.00462451484054327 2023-01-22 19:58:21.127833: step: 1464/529, loss: 0.0057892827317118645 2023-01-22 19:58:22.191760: step: 1468/529, loss: 0.0042790574952960014 2023-01-22 19:58:23.252829: step: 1472/529, loss: 0.0398414246737957 2023-01-22 19:58:24.317503: step: 1476/529, loss: 0.004792596213519573 2023-01-22 19:58:25.366430: step: 1480/529, loss: 0.008492857217788696 2023-01-22 19:58:26.419298: step: 1484/529, loss: 0.009868143126368523 2023-01-22 19:58:27.482533: step: 1488/529, loss: 0.003622837597504258 2023-01-22 19:58:28.513570: step: 1492/529, loss: 0.0035666325129568577 2023-01-22 19:58:29.579228: step: 1496/529, loss: 0.003052003914490342 2023-01-22 19:58:30.643725: step: 1500/529, loss: 0.0025895137805491686 2023-01-22 19:58:31.681125: step: 1504/529, loss: 0.004610867239534855 2023-01-22 19:58:32.722799: step: 1508/529, loss: 0.0033706333488225937 2023-01-22 19:58:33.772421: step: 1512/529, loss: 0.0006292694015428424 2023-01-22 19:58:34.823221: step: 1516/529, loss: 0.0029224965255707502 2023-01-22 19:58:35.859727: step: 1520/529, loss: 0.005507317371666431 2023-01-22 19:58:36.910784: step: 1524/529, loss: 0.013604824431240559 2023-01-22 19:58:37.957953: step: 1528/529, loss: 0.009229722432792187 2023-01-22 19:58:39.018055: step: 1532/529, loss: 0.002615570090711117 2023-01-22 19:58:40.059208: step: 1536/529, loss: 0.03387366980314255 2023-01-22 19:58:41.118282: step: 1540/529, loss: 0.03629336133599281 2023-01-22 19:58:42.178293: step: 1544/529, loss: 0.010828995145857334 2023-01-22 19:58:43.234770: step: 1548/529, loss: 0.04535053297877312 2023-01-22 19:58:44.291031: step: 1552/529, loss: 0.004056266508996487 2023-01-22 19:58:45.339865: step: 1556/529, loss: 0.01961745321750641 2023-01-22 19:58:46.401968: step: 1560/529, loss: 0.0023532933555543423 2023-01-22 19:58:47.473037: step: 1564/529, loss: 0.005861486308276653 2023-01-22 19:58:48.527158: step: 1568/529, loss: 0.005907162092626095 2023-01-22 19:58:49.576276: step: 1572/529, loss: 0.038817085325717926 2023-01-22 19:58:50.638278: step: 1576/529, loss: 0.004155512899160385 2023-01-22 19:58:51.685338: step: 1580/529, loss: 0.019145265221595764 2023-01-22 19:58:52.746556: step: 1584/529, loss: 0.01050573494285345 2023-01-22 19:58:53.792952: step: 1588/529, loss: 0.030349288135766983 2023-01-22 19:58:54.849448: step: 1592/529, loss: 0.010635003447532654 2023-01-22 19:58:55.894276: step: 1596/529, loss: 0.0036935261450707912 2023-01-22 19:58:56.916479: step: 1600/529, loss: 0.003251268295571208 2023-01-22 19:58:57.951648: step: 1604/529, loss: 0.005543887615203857 2023-01-22 19:58:59.015053: step: 1608/529, loss: 0.006734075490385294 2023-01-22 19:59:00.071954: step: 1612/529, loss: 0.0661957636475563 2023-01-22 19:59:01.115007: step: 1616/529, loss: 0.028330544009804726 2023-01-22 19:59:02.159506: step: 1620/529, loss: 0.009527577087283134 2023-01-22 19:59:03.191040: step: 1624/529, loss: 0.005371913313865662 2023-01-22 19:59:04.245849: step: 1628/529, loss: 0.015321719460189342 2023-01-22 19:59:05.298549: step: 1632/529, loss: 0.02746753580868244 2023-01-22 19:59:06.338781: step: 1636/529, loss: 0.006988075096160173 2023-01-22 19:59:07.399213: step: 1640/529, loss: 0.07256995886564255 2023-01-22 19:59:08.437837: step: 1644/529, loss: 0.007382863201200962 2023-01-22 19:59:09.481137: step: 1648/529, loss: 0.008030509576201439 2023-01-22 19:59:10.529344: step: 1652/529, loss: 0.013200175948441029 2023-01-22 19:59:11.584917: step: 1656/529, loss: 0.0049641928635537624 2023-01-22 19:59:12.637048: step: 1660/529, loss: 0.014688693918287754 2023-01-22 19:59:13.672190: step: 1664/529, loss: 0.005491009913384914 2023-01-22 19:59:14.716278: step: 1668/529, loss: 0.008575434796512127 2023-01-22 19:59:15.763145: step: 1672/529, loss: 0.0077672600746154785 2023-01-22 19:59:16.822174: step: 1676/529, loss: 0.007272324524819851 2023-01-22 19:59:17.871049: step: 1680/529, loss: 0.003114038612693548 2023-01-22 19:59:18.925848: step: 1684/529, loss: 0.036389756947755814 2023-01-22 19:59:19.990613: step: 1688/529, loss: 0.00475142989307642 2023-01-22 19:59:21.033289: step: 1692/529, loss: 0.0025000954046845436 2023-01-22 19:59:22.073202: step: 1696/529, loss: 0.004743554629385471 2023-01-22 19:59:23.130313: step: 1700/529, loss: 0.0030509463977068663 2023-01-22 19:59:24.182607: step: 1704/529, loss: 0.033920906484127045 2023-01-22 19:59:25.253284: step: 1708/529, loss: 0.012621713802218437 2023-01-22 19:59:26.331129: step: 1712/529, loss: 0.007365634199231863 2023-01-22 19:59:27.375185: step: 1716/529, loss: 0.008016137406229973 2023-01-22 19:59:28.433381: step: 1720/529, loss: 0.0019524346571415663 2023-01-22 19:59:29.473330: step: 1724/529, loss: 0.040085408836603165 2023-01-22 19:59:30.534022: step: 1728/529, loss: 0.007748884614557028 2023-01-22 19:59:31.573782: step: 1732/529, loss: 0.012218799442052841 2023-01-22 19:59:32.606761: step: 1736/529, loss: 0.00045600938028655946 2023-01-22 19:59:33.668799: step: 1740/529, loss: 0.008562753908336163 2023-01-22 19:59:34.716904: step: 1744/529, loss: 0.014135128818452358 2023-01-22 19:59:35.764572: step: 1748/529, loss: 0.037984106689691544 2023-01-22 19:59:36.827087: step: 1752/529, loss: 0.003595915623009205 2023-01-22 19:59:37.866129: step: 1756/529, loss: 0.00888009276241064 2023-01-22 19:59:38.913368: step: 1760/529, loss: 0.0021698069758713245 2023-01-22 19:59:39.947099: step: 1764/529, loss: 0.01275579258799553 2023-01-22 19:59:40.987732: step: 1768/529, loss: 0.020149672403931618 2023-01-22 19:59:42.052775: step: 1772/529, loss: 0.014540082775056362 2023-01-22 19:59:43.114984: step: 1776/529, loss: 0.07143697887659073 2023-01-22 19:59:44.166088: step: 1780/529, loss: 0.001856763381510973 2023-01-22 19:59:45.198956: step: 1784/529, loss: 0.001417514868080616 2023-01-22 19:59:46.237650: step: 1788/529, loss: 0.0062180873937904835 2023-01-22 19:59:47.302908: step: 1792/529, loss: 0.004356327001005411 2023-01-22 19:59:48.360393: step: 1796/529, loss: 0.010803253389894962 2023-01-22 19:59:49.434948: step: 1800/529, loss: 0.01248183473944664 2023-01-22 19:59:50.479611: step: 1804/529, loss: 0.04637638479471207 2023-01-22 19:59:51.527370: step: 1808/529, loss: 0.011410241015255451 2023-01-22 19:59:52.562813: step: 1812/529, loss: 0.0009561876649968326 2023-01-22 19:59:53.618733: step: 1816/529, loss: 0.08122409880161285 2023-01-22 19:59:54.651945: step: 1820/529, loss: 0.021820086985826492 2023-01-22 19:59:55.702075: step: 1824/529, loss: 0.0022133137099444866 2023-01-22 19:59:56.746596: step: 1828/529, loss: 0.032528430223464966 2023-01-22 19:59:57.801229: step: 1832/529, loss: 0.005764440633356571 2023-01-22 19:59:58.848774: step: 1836/529, loss: 0.004100393038243055 2023-01-22 19:59:59.902765: step: 1840/529, loss: 0.007540260907262564 2023-01-22 20:00:00.968195: step: 1844/529, loss: 0.006106378044933081 2023-01-22 20:00:02.034541: step: 1848/529, loss: 0.009126215241849422 2023-01-22 20:00:03.081314: step: 1852/529, loss: 0.04526838660240173 2023-01-22 20:00:04.132930: step: 1856/529, loss: 0.008118906058371067 2023-01-22 20:00:05.180241: step: 1860/529, loss: 0.017415540292859077 2023-01-22 20:00:06.244389: step: 1864/529, loss: 0.018857847899198532 2023-01-22 20:00:07.290849: step: 1868/529, loss: 0.004406822379678488 2023-01-22 20:00:08.345610: step: 1872/529, loss: 0.053060151636600494 2023-01-22 20:00:09.405751: step: 1876/529, loss: 0.0066854082979261875 2023-01-22 20:00:10.453864: step: 1880/529, loss: 0.006910445634275675 2023-01-22 20:00:11.514379: step: 1884/529, loss: 0.011445866897702217 2023-01-22 20:00:12.569424: step: 1888/529, loss: 0.012498559430241585 2023-01-22 20:00:13.601778: step: 1892/529, loss: 0.010547908022999763 2023-01-22 20:00:14.645627: step: 1896/529, loss: 0.019504351541399956 2023-01-22 20:00:15.704879: step: 1900/529, loss: 0.012519661337137222 2023-01-22 20:00:16.750675: step: 1904/529, loss: 0.008370031602680683 2023-01-22 20:00:17.801156: step: 1908/529, loss: 0.0030402480624616146 2023-01-22 20:00:18.866797: step: 1912/529, loss: 0.012232047505676746 2023-01-22 20:00:19.957080: step: 1916/529, loss: 0.001376642961986363 2023-01-22 20:00:21.008907: step: 1920/529, loss: 0.01706608571112156 2023-01-22 20:00:22.057508: step: 1924/529, loss: 0.009613309055566788 2023-01-22 20:00:23.103830: step: 1928/529, loss: 0.0331423357129097 2023-01-22 20:00:24.149901: step: 1932/529, loss: 0.007739585358649492 2023-01-22 20:00:25.201938: step: 1936/529, loss: 0.007921899668872356 2023-01-22 20:00:26.231194: step: 1940/529, loss: 0.007345499936491251 2023-01-22 20:00:27.287966: step: 1944/529, loss: 0.005489768460392952 2023-01-22 20:00:28.328189: step: 1948/529, loss: 0.014905540272593498 2023-01-22 20:00:29.372311: step: 1952/529, loss: 0.01877637952566147 2023-01-22 20:00:30.393431: step: 1956/529, loss: 0.006906555034220219 2023-01-22 20:00:31.443963: step: 1960/529, loss: 0.01362372562289238 2023-01-22 20:00:32.480601: step: 1964/529, loss: 0.020083505660295486 2023-01-22 20:00:33.540552: step: 1968/529, loss: 0.0038358040619641542 2023-01-22 20:00:34.575801: step: 1972/529, loss: 0.0048320540226995945 2023-01-22 20:00:35.643494: step: 1976/529, loss: 0.0037067104130983353 2023-01-22 20:00:36.708954: step: 1980/529, loss: 0.04109601303935051 2023-01-22 20:00:37.761133: step: 1984/529, loss: 0.0031430714298039675 2023-01-22 20:00:38.816750: step: 1988/529, loss: 0.001824588980525732 2023-01-22 20:00:39.879056: step: 1992/529, loss: 0.025527898222208023 2023-01-22 20:00:40.931923: step: 1996/529, loss: 0.025560569018125534 2023-01-22 20:00:41.974020: step: 2000/529, loss: 0.014694432727992535 2023-01-22 20:00:43.029414: step: 2004/529, loss: 0.014217589050531387 2023-01-22 20:00:44.075870: step: 2008/529, loss: 0.006769063416868448 2023-01-22 20:00:45.131607: step: 2012/529, loss: 0.004187212325632572 2023-01-22 20:00:46.169365: step: 2016/529, loss: 0.007873183116316795 2023-01-22 20:00:47.216292: step: 2020/529, loss: 0.007136023137718439 2023-01-22 20:00:48.269423: step: 2024/529, loss: 0.01103308517485857 2023-01-22 20:00:49.318883: step: 2028/529, loss: 0.02309064380824566 2023-01-22 20:00:50.369205: step: 2032/529, loss: 0.003891361178830266 2023-01-22 20:00:51.421950: step: 2036/529, loss: 0.006231565494090319 2023-01-22 20:00:52.458526: step: 2040/529, loss: 0.013575805351138115 2023-01-22 20:00:53.509938: step: 2044/529, loss: 0.01943735033273697 2023-01-22 20:00:54.570104: step: 2048/529, loss: 0.004879387095570564 2023-01-22 20:00:55.629932: step: 2052/529, loss: 0.028755979612469673 2023-01-22 20:00:56.671817: step: 2056/529, loss: 0.0026007622946053743 2023-01-22 20:00:57.709618: step: 2060/529, loss: 0.019823603332042694 2023-01-22 20:00:58.775925: step: 2064/529, loss: 0.009842712432146072 2023-01-22 20:00:59.833558: step: 2068/529, loss: 0.020212415605783463 2023-01-22 20:01:00.883406: step: 2072/529, loss: 0.01061770785599947 2023-01-22 20:01:01.940189: step: 2076/529, loss: 0.027065467089414597 2023-01-22 20:01:02.994556: step: 2080/529, loss: 0.021899422630667686 2023-01-22 20:01:04.049038: step: 2084/529, loss: 0.015626907348632812 2023-01-22 20:01:05.102097: step: 2088/529, loss: 0.00826290063560009 2023-01-22 20:01:06.142474: step: 2092/529, loss: 0.006980634294450283 2023-01-22 20:01:07.194907: step: 2096/529, loss: 0.005161152221262455 2023-01-22 20:01:08.237155: step: 2100/529, loss: 1.7548653659105184e-06 2023-01-22 20:01:09.293378: step: 2104/529, loss: 0.01337167713791132 2023-01-22 20:01:10.340035: step: 2108/529, loss: 0.009191742166876793 2023-01-22 20:01:11.383171: step: 2112/529, loss: 0.01953154243528843 2023-01-22 20:01:12.429309: step: 2116/529, loss: 0.014801283366978168 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3352642423366108, 'r': 0.31427046625101657, 'f1': 0.3244280817126068}, 'combined': 0.23905227073560498, 'stategy': 1, 'epoch': 2} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37769273447697677, 'r': 0.31771011870489146, 'f1': 0.34511449861213805}, 'combined': 0.2427941196266298, 'stategy': 1, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31391853850214063, 'r': 0.3442977519055736, 'f1': 0.3284070864330087}, 'combined': 0.24198416895063798, 'stategy': 1, 'epoch': 2} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37884724166888883, 'r': 0.3296334325942032, 'f1': 0.3525310463268541}, 'combined': 0.2502970428920664, 'stategy': 1, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3297020574534162, 'r': 0.34534257251287614, 'f1': 0.337341122732689}, 'combined': 0.2485671430661919, 'stategy': 1, 'epoch': 2} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37549195557441667, 'r': 0.29751020563836855, 'f1': 0.33198315059549205}, 'combined': 0.23570803692279935, 'stategy': 1, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 2} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3352642423366108, 'r': 0.31427046625101657, 'f1': 0.3244280817126068}, 'combined': 0.23905227073560498, 'stategy': 1, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37769273447697677, 'r': 0.31771011870489146, 'f1': 0.34511449861213805}, 'combined': 0.2427941196266298, 'stategy': 1, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3164259395725866, 'r': 0.3452465185089892, 'f1': 0.3302085576301947}, 'combined': 0.24331156878014343, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3807202674530041, 'r': 0.3315950716526164, 'f1': 0.3544636972838313}, 'combined': 0.25166922507152023, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3297020574534162, 'r': 0.34534257251287614, 'f1': 0.337341122732689}, 'combined': 0.2485671430661919, 'stategy': 1, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37549195557441667, 'r': 0.29751020563836855, 'f1': 0.33198315059549205}, 'combined': 0.23570803692279935, 'stategy': 1, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 20:03:56.144483: step: 4/529, loss: 0.0208442322909832 2023-01-22 20:03:57.187872: step: 8/529, loss: 0.012691561132669449 2023-01-22 20:03:58.235435: step: 12/529, loss: 0.01606355980038643 2023-01-22 20:03:59.271505: step: 16/529, loss: 0.011063763871788979 2023-01-22 20:04:00.327516: step: 20/529, loss: 0.010010536760091782 2023-01-22 20:04:01.356075: step: 24/529, loss: 0.006352666299790144 2023-01-22 20:04:02.391351: step: 28/529, loss: 0.009280327707529068 2023-01-22 20:04:03.420139: step: 32/529, loss: 0.0015990022802725434 2023-01-22 20:04:04.470069: step: 36/529, loss: 0.0035235313698649406 2023-01-22 20:04:05.509815: step: 40/529, loss: 0.020566530525684357 2023-01-22 20:04:06.572079: step: 44/529, loss: 0.014135626144707203 2023-01-22 20:04:07.623464: step: 48/529, loss: 0.01160863321274519 2023-01-22 20:04:08.680645: step: 52/529, loss: 0.005531683564186096 2023-01-22 20:04:09.711675: step: 56/529, loss: 4.929221177008003e-06 2023-01-22 20:04:10.771077: step: 60/529, loss: 0.01231343112885952 2023-01-22 20:04:11.815382: step: 64/529, loss: 0.006655183155089617 2023-01-22 20:04:12.856862: step: 68/529, loss: 0.012532497756183147 2023-01-22 20:04:13.917825: step: 72/529, loss: 0.006677405908703804 2023-01-22 20:04:14.964335: step: 76/529, loss: 0.012505155988037586 2023-01-22 20:04:16.015184: step: 80/529, loss: 0.011351573280990124 2023-01-22 20:04:17.065714: step: 84/529, loss: 0.007938939146697521 2023-01-22 20:04:18.147466: step: 88/529, loss: 0.01606198027729988 2023-01-22 20:04:19.197176: step: 92/529, loss: 0.03394055366516113 2023-01-22 20:04:20.252882: step: 96/529, loss: 0.020595764741301537 2023-01-22 20:04:21.302453: step: 100/529, loss: 0.0067069679498672485 2023-01-22 20:04:22.352739: step: 104/529, loss: 0.005074886605143547 2023-01-22 20:04:23.396534: step: 108/529, loss: 0.006034509278833866 2023-01-22 20:04:24.446505: step: 112/529, loss: 0.004844900220632553 2023-01-22 20:04:25.492129: step: 116/529, loss: 0.023634810000658035 2023-01-22 20:04:26.538608: step: 120/529, loss: 0.006076115649193525 2023-01-22 20:04:27.589516: step: 124/529, loss: 0.02622343599796295 2023-01-22 20:04:28.640431: step: 128/529, loss: 0.005706985015422106 2023-01-22 20:04:29.681680: step: 132/529, loss: 0.024679511785507202 2023-01-22 20:04:30.742517: step: 136/529, loss: 0.010190864093601704 2023-01-22 20:04:31.790558: step: 140/529, loss: 0.004191117361187935 2023-01-22 20:04:32.827809: step: 144/529, loss: 0.013690647669136524 2023-01-22 20:04:33.900038: step: 148/529, loss: 0.008994690142571926 2023-01-22 20:04:34.941627: step: 152/529, loss: 0.01983303762972355 2023-01-22 20:04:35.979893: step: 156/529, loss: 0.0020555160008370876 2023-01-22 20:04:37.019673: step: 160/529, loss: 0.00725891487672925 2023-01-22 20:04:38.070087: step: 164/529, loss: 0.013728871010243893 2023-01-22 20:04:39.129219: step: 168/529, loss: 0.009484872221946716 2023-01-22 20:04:40.184501: step: 172/529, loss: 0.009091203100979328 2023-01-22 20:04:41.229382: step: 176/529, loss: 0.010535717010498047 2023-01-22 20:04:42.265021: step: 180/529, loss: 0.03529338538646698 2023-01-22 20:04:43.309263: step: 184/529, loss: 0.003982928115874529 2023-01-22 20:04:44.361522: step: 188/529, loss: 0.01871403492987156 2023-01-22 20:04:45.410053: step: 192/529, loss: 0.03191760554909706 2023-01-22 20:04:46.489884: step: 196/529, loss: 0.0031718104146420956 2023-01-22 20:04:47.531814: step: 200/529, loss: 0.003036828013136983 2023-01-22 20:04:48.578577: step: 204/529, loss: 0.02409512922167778 2023-01-22 20:04:49.642403: step: 208/529, loss: 0.0219293013215065 2023-01-22 20:04:50.687456: step: 212/529, loss: 0.0011673939879983664 2023-01-22 20:04:51.737215: step: 216/529, loss: 0.001010302104987204 2023-01-22 20:04:52.791057: step: 220/529, loss: 0.002797893015667796 2023-01-22 20:04:53.844757: step: 224/529, loss: 0.0021156298462301493 2023-01-22 20:04:54.886873: step: 228/529, loss: 0.008352869190275669 2023-01-22 20:04:55.944616: step: 232/529, loss: 0.005403296090662479 2023-01-22 20:04:56.985524: step: 236/529, loss: 0.005087476689368486 2023-01-22 20:04:58.041722: step: 240/529, loss: 0.009882204234600067 2023-01-22 20:04:59.100460: step: 244/529, loss: 0.004028747323900461 2023-01-22 20:05:00.140593: step: 248/529, loss: 0.0029030530713498592 2023-01-22 20:05:01.194825: step: 252/529, loss: 0.03338465094566345 2023-01-22 20:05:02.235063: step: 256/529, loss: 0.0006526591023430228 2023-01-22 20:05:03.307681: step: 260/529, loss: 0.008654743432998657 2023-01-22 20:05:04.360863: step: 264/529, loss: 0.009621884673833847 2023-01-22 20:05:05.414892: step: 268/529, loss: 0.005640773568302393 2023-01-22 20:05:06.470310: step: 272/529, loss: 0.002039456507191062 2023-01-22 20:05:07.530715: step: 276/529, loss: 0.028312239795923233 2023-01-22 20:05:08.577677: step: 280/529, loss: 0.0019282049033790827 2023-01-22 20:05:09.617007: step: 284/529, loss: 0.00046667290735058486 2023-01-22 20:05:10.662173: step: 288/529, loss: 0.015825647860765457 2023-01-22 20:05:11.722763: step: 292/529, loss: 0.03494861721992493 2023-01-22 20:05:12.763086: step: 296/529, loss: 0.005160809960216284 2023-01-22 20:05:13.825594: step: 300/529, loss: 0.023768030107021332 2023-01-22 20:05:14.887347: step: 304/529, loss: 0.01773795112967491 2023-01-22 20:05:15.960098: step: 308/529, loss: 0.010129460133612156 2023-01-22 20:05:17.003905: step: 312/529, loss: 0.012912639416754246 2023-01-22 20:05:18.051283: step: 316/529, loss: 0.0030903536826372147 2023-01-22 20:05:19.116624: step: 320/529, loss: 0.003855168353766203 2023-01-22 20:05:20.188602: step: 324/529, loss: 0.000534023973159492 2023-01-22 20:05:21.248406: step: 328/529, loss: 0.0013166150311008096 2023-01-22 20:05:22.298031: step: 332/529, loss: 0.008628039620816708 2023-01-22 20:05:23.349849: step: 336/529, loss: 0.005714814644306898 2023-01-22 20:05:24.409081: step: 340/529, loss: 0.0032538832165300846 2023-01-22 20:05:25.458788: step: 344/529, loss: 0.03416123986244202 2023-01-22 20:05:26.501197: step: 348/529, loss: 0.005888884421437979 2023-01-22 20:05:27.589186: step: 352/529, loss: 0.01538755465298891 2023-01-22 20:05:28.642319: step: 356/529, loss: 0.007395452819764614 2023-01-22 20:05:29.695445: step: 360/529, loss: 0.011833365075290203 2023-01-22 20:05:30.741007: step: 364/529, loss: 0.013952210545539856 2023-01-22 20:05:31.792353: step: 368/529, loss: 0.007630400359630585 2023-01-22 20:05:32.840451: step: 372/529, loss: 0.005060497671365738 2023-01-22 20:05:33.883807: step: 376/529, loss: 0.0363008975982666 2023-01-22 20:05:34.961869: step: 380/529, loss: 0.009673144668340683 2023-01-22 20:05:36.008957: step: 384/529, loss: 0.01499855238944292 2023-01-22 20:05:37.084180: step: 388/529, loss: 0.035699956119060516 2023-01-22 20:05:38.174873: step: 392/529, loss: 0.004454293288290501 2023-01-22 20:05:39.235390: step: 396/529, loss: 0.03605381026864052 2023-01-22 20:05:40.284466: step: 400/529, loss: 0.003094918094575405 2023-01-22 20:05:41.333433: step: 404/529, loss: 0.0022648752201348543 2023-01-22 20:05:42.372984: step: 408/529, loss: 0.000579286424908787 2023-01-22 20:05:43.424984: step: 412/529, loss: 0.004001363646239042 2023-01-22 20:05:44.465261: step: 416/529, loss: 6.269874575082213e-05 2023-01-22 20:05:45.528018: step: 420/529, loss: 0.010995334014296532 2023-01-22 20:05:46.585629: step: 424/529, loss: 0.014665324240922928 2023-01-22 20:05:47.635014: step: 428/529, loss: 0.008718844503164291 2023-01-22 20:05:48.695491: step: 432/529, loss: 0.00610192259773612 2023-01-22 20:05:49.751922: step: 436/529, loss: 0.007937761023640633 2023-01-22 20:05:50.839735: step: 440/529, loss: 0.006486135069280863 2023-01-22 20:05:51.874671: step: 444/529, loss: 0.00448181014508009 2023-01-22 20:05:52.928922: step: 448/529, loss: 0.0032839749474078417 2023-01-22 20:05:53.991811: step: 452/529, loss: 0.022131407633423805 2023-01-22 20:05:55.054431: step: 456/529, loss: 0.002007158938795328 2023-01-22 20:05:56.096244: step: 460/529, loss: 0.008322598412632942 2023-01-22 20:05:57.143146: step: 464/529, loss: 0.030704252421855927 2023-01-22 20:05:58.189929: step: 468/529, loss: 0.003225067863240838 2023-01-22 20:05:59.235887: step: 472/529, loss: 0.005971058737486601 2023-01-22 20:06:00.270879: step: 476/529, loss: 0.003775882301852107 2023-01-22 20:06:01.317357: step: 480/529, loss: 0.008580246940255165 2023-01-22 20:06:02.364269: step: 484/529, loss: 0.0051669105887413025 2023-01-22 20:06:03.413697: step: 488/529, loss: 0.002639737445861101 2023-01-22 20:06:04.459774: step: 492/529, loss: 0.004285009112209082 2023-01-22 20:06:05.516174: step: 496/529, loss: 0.007803808897733688 2023-01-22 20:06:06.564568: step: 500/529, loss: 0.0030957337003201246 2023-01-22 20:06:07.639139: step: 504/529, loss: 0.0022118673659861088 2023-01-22 20:06:08.682242: step: 508/529, loss: 0.013683885335922241 2023-01-22 20:06:09.740923: step: 512/529, loss: 0.005295691546052694 2023-01-22 20:06:10.833215: step: 516/529, loss: 0.00547033129259944 2023-01-22 20:06:11.888460: step: 520/529, loss: 0.011031736619770527 2023-01-22 20:06:12.945335: step: 524/529, loss: 0.007539558690041304 2023-01-22 20:06:13.995895: step: 528/529, loss: 0.0042389435693621635 2023-01-22 20:06:15.043250: step: 532/529, loss: 0.007577598560601473 2023-01-22 20:06:16.079597: step: 536/529, loss: 0.012358488515019417 2023-01-22 20:06:17.119232: step: 540/529, loss: 0.00582906836643815 2023-01-22 20:06:18.178225: step: 544/529, loss: 0.0047879330813884735 2023-01-22 20:06:19.246833: step: 548/529, loss: 0.006215913221240044 2023-01-22 20:06:20.292310: step: 552/529, loss: 0.009544332511723042 2023-01-22 20:06:21.338107: step: 556/529, loss: 0.005756858270615339 2023-01-22 20:06:22.378549: step: 560/529, loss: 0.032766226679086685 2023-01-22 20:06:23.424220: step: 564/529, loss: 0.004622172098606825 2023-01-22 20:06:24.508319: step: 568/529, loss: 0.004793106112629175 2023-01-22 20:06:25.574562: step: 572/529, loss: 0.00674926582723856 2023-01-22 20:06:26.629957: step: 576/529, loss: 0.022429246455430984 2023-01-22 20:06:27.685469: step: 580/529, loss: 0.00271551962941885 2023-01-22 20:06:28.747687: step: 584/529, loss: 0.04102171212434769 2023-01-22 20:06:29.782388: step: 588/529, loss: 0.0070463139563798904 2023-01-22 20:06:30.839659: step: 592/529, loss: 0.017212038859725 2023-01-22 20:06:31.895047: step: 596/529, loss: 0.004826388321816921 2023-01-22 20:06:32.956484: step: 600/529, loss: 0.0104568712413311 2023-01-22 20:06:34.005188: step: 604/529, loss: 0.009162485599517822 2023-01-22 20:06:35.056407: step: 608/529, loss: 0.0017922725528478622 2023-01-22 20:06:36.107536: step: 612/529, loss: 0.003059778129681945 2023-01-22 20:06:37.162739: step: 616/529, loss: 0.008877222426235676 2023-01-22 20:06:38.216195: step: 620/529, loss: 0.000661179656162858 2023-01-22 20:06:39.266602: step: 624/529, loss: 0.021183345466852188 2023-01-22 20:06:40.300446: step: 628/529, loss: 0.007263918872922659 2023-01-22 20:06:41.350645: step: 632/529, loss: 0.05522396042943001 2023-01-22 20:06:42.409300: step: 636/529, loss: 0.001382191781885922 2023-01-22 20:06:43.458111: step: 640/529, loss: 0.00908604171127081 2023-01-22 20:06:44.490701: step: 644/529, loss: 0.0021070127841085196 2023-01-22 20:06:45.531887: step: 648/529, loss: 0.005988290533423424 2023-01-22 20:06:46.592083: step: 652/529, loss: 0.006849177181720734 2023-01-22 20:06:47.648531: step: 656/529, loss: 0.007633228320628405 2023-01-22 20:06:48.691813: step: 660/529, loss: 0.015383109450340271 2023-01-22 20:06:49.760050: step: 664/529, loss: 0.006174037698656321 2023-01-22 20:06:50.829045: step: 668/529, loss: 6.417378699552501e-06 2023-01-22 20:06:51.876226: step: 672/529, loss: 0.006210414692759514 2023-01-22 20:06:52.925927: step: 676/529, loss: 0.015877971425652504 2023-01-22 20:06:53.987712: step: 680/529, loss: 0.0027899490669369698 2023-01-22 20:06:55.048966: step: 684/529, loss: 0.02167505770921707 2023-01-22 20:06:56.108981: step: 688/529, loss: 0.007627988699823618 2023-01-22 20:06:57.158959: step: 692/529, loss: 0.0033301939256489277 2023-01-22 20:06:58.213058: step: 696/529, loss: 0.007913576439023018 2023-01-22 20:06:59.254468: step: 700/529, loss: 0.009650005027651787 2023-01-22 20:07:00.309327: step: 704/529, loss: 0.009845939464867115 2023-01-22 20:07:01.372931: step: 708/529, loss: 0.006602531764656305 2023-01-22 20:07:02.421823: step: 712/529, loss: 0.015772363170981407 2023-01-22 20:07:03.474761: step: 716/529, loss: 0.008152037858963013 2023-01-22 20:07:04.524644: step: 720/529, loss: 0.004063493572175503 2023-01-22 20:07:05.578366: step: 724/529, loss: 0.004320870153605938 2023-01-22 20:07:06.618708: step: 728/529, loss: 0.008242408744990826 2023-01-22 20:07:07.655677: step: 732/529, loss: 0.0031298690009862185 2023-01-22 20:07:08.699117: step: 736/529, loss: 0.004474529065191746 2023-01-22 20:07:09.763862: step: 740/529, loss: 0.012551347725093365 2023-01-22 20:07:10.832051: step: 744/529, loss: 0.01277049258351326 2023-01-22 20:07:11.871497: step: 748/529, loss: 0.00032135413493961096 2023-01-22 20:07:12.938103: step: 752/529, loss: 0.0009411997743882239 2023-01-22 20:07:13.970517: step: 756/529, loss: 0.05965364724397659 2023-01-22 20:07:15.018400: step: 760/529, loss: 0.00435011088848114 2023-01-22 20:07:16.068474: step: 764/529, loss: 0.008131771348416805 2023-01-22 20:07:17.126910: step: 768/529, loss: 0.007096108514815569 2023-01-22 20:07:18.173528: step: 772/529, loss: 0.008854346349835396 2023-01-22 20:07:19.233841: step: 776/529, loss: 0.0010187854059040546 2023-01-22 20:07:20.294025: step: 780/529, loss: 0.00849572941660881 2023-01-22 20:07:21.350833: step: 784/529, loss: 0.013858995400369167 2023-01-22 20:07:22.413228: step: 788/529, loss: 0.009879359975457191 2023-01-22 20:07:23.457323: step: 792/529, loss: 0.006350253242999315 2023-01-22 20:07:24.509479: step: 796/529, loss: 0.023349575698375702 2023-01-22 20:07:25.581854: step: 800/529, loss: 0.006229089573025703 2023-01-22 20:07:26.631872: step: 804/529, loss: 0.021389836445450783 2023-01-22 20:07:27.675475: step: 808/529, loss: 0.003919560927897692 2023-01-22 20:07:28.723164: step: 812/529, loss: 0.006096747703850269 2023-01-22 20:07:29.771998: step: 816/529, loss: 0.03172317519783974 2023-01-22 20:07:30.837093: step: 820/529, loss: 0.0035406118258833885 2023-01-22 20:07:31.867730: step: 824/529, loss: 0.009710850194096565 2023-01-22 20:07:32.912790: step: 828/529, loss: 0.006400146521627903 2023-01-22 20:07:33.957729: step: 832/529, loss: 0.005716054700314999 2023-01-22 20:07:34.998850: step: 836/529, loss: 0.0031776505056768656 2023-01-22 20:07:36.036770: step: 840/529, loss: 0.004783932119607925 2023-01-22 20:07:37.094014: step: 844/529, loss: 0.010583682917058468 2023-01-22 20:07:38.142669: step: 848/529, loss: 0.005367538891732693 2023-01-22 20:07:39.194588: step: 852/529, loss: 0.008427374996244907 2023-01-22 20:07:40.242922: step: 856/529, loss: 0.0007629571482539177 2023-01-22 20:07:41.282434: step: 860/529, loss: 0.010558458045125008 2023-01-22 20:07:42.339942: step: 864/529, loss: 0.003048550570383668 2023-01-22 20:07:43.404692: step: 868/529, loss: 0.002945945132523775 2023-01-22 20:07:44.472140: step: 872/529, loss: 0.007721399422734976 2023-01-22 20:07:45.518232: step: 876/529, loss: 0.01770276203751564 2023-01-22 20:07:46.570067: step: 880/529, loss: 0.00855009350925684 2023-01-22 20:07:47.628810: step: 884/529, loss: 0.0036043853033334017 2023-01-22 20:07:48.691209: step: 888/529, loss: 0.009992311708629131 2023-01-22 20:07:49.732277: step: 892/529, loss: 0.023117542266845703 2023-01-22 20:07:50.788514: step: 896/529, loss: 0.012532321736216545 2023-01-22 20:07:51.831270: step: 900/529, loss: 0.0018556644208729267 2023-01-22 20:07:52.888978: step: 904/529, loss: 0.004856654442846775 2023-01-22 20:07:53.949996: step: 908/529, loss: 0.019890183582901955 2023-01-22 20:07:54.990698: step: 912/529, loss: 0.007285080850124359 2023-01-22 20:07:56.034264: step: 916/529, loss: 0.0059034014120697975 2023-01-22 20:07:57.078960: step: 920/529, loss: 0.00897412933409214 2023-01-22 20:07:58.126587: step: 924/529, loss: 0.010564886964857578 2023-01-22 20:07:59.179045: step: 928/529, loss: 0.002053814474493265 2023-01-22 20:08:00.229628: step: 932/529, loss: 0.0012606256641447544 2023-01-22 20:08:01.293955: step: 936/529, loss: 0.005641499534249306 2023-01-22 20:08:02.348294: step: 940/529, loss: 0.005151058547198772 2023-01-22 20:08:03.409836: step: 944/529, loss: 0.004544899333268404 2023-01-22 20:08:04.466452: step: 948/529, loss: 0.004842931870371103 2023-01-22 20:08:05.508802: step: 952/529, loss: 0.01224144920706749 2023-01-22 20:08:06.555235: step: 956/529, loss: 0.011474881321191788 2023-01-22 20:08:07.612435: step: 960/529, loss: 0.0151266073808074 2023-01-22 20:08:08.669497: step: 964/529, loss: 0.047485072165727615 2023-01-22 20:08:09.715392: step: 968/529, loss: 0.016444802284240723 2023-01-22 20:08:10.754822: step: 972/529, loss: 0.014128783717751503 2023-01-22 20:08:11.813406: step: 976/529, loss: 0.04584663361310959 2023-01-22 20:08:12.863195: step: 980/529, loss: 0.0043280962854623795 2023-01-22 20:08:13.897466: step: 984/529, loss: 0.005949350539594889 2023-01-22 20:08:14.978164: step: 988/529, loss: 0.008175183087587357 2023-01-22 20:08:16.044577: step: 992/529, loss: 0.01832376793026924 2023-01-22 20:08:17.096638: step: 996/529, loss: 0.03893590718507767 2023-01-22 20:08:18.162058: step: 1000/529, loss: 0.006128888577222824 2023-01-22 20:08:19.237084: step: 1004/529, loss: 0.009077107533812523 2023-01-22 20:08:20.280199: step: 1008/529, loss: 0.015400653705000877 2023-01-22 20:08:21.337063: step: 1012/529, loss: 0.004606524482369423 2023-01-22 20:08:22.386463: step: 1016/529, loss: 0.00999111495912075 2023-01-22 20:08:23.435051: step: 1020/529, loss: 0.006618921644985676 2023-01-22 20:08:24.479082: step: 1024/529, loss: 0.03537015616893768 2023-01-22 20:08:25.522538: step: 1028/529, loss: 0.012193504720926285 2023-01-22 20:08:26.560226: step: 1032/529, loss: 0.010620943270623684 2023-01-22 20:08:27.613852: step: 1036/529, loss: 0.0030468408949673176 2023-01-22 20:08:28.670879: step: 1040/529, loss: 0.0024327056016772985 2023-01-22 20:08:29.716114: step: 1044/529, loss: 0.008371617645025253 2023-01-22 20:08:30.754996: step: 1048/529, loss: 0.0028606997802853584 2023-01-22 20:08:31.824233: step: 1052/529, loss: 0.008406925946474075 2023-01-22 20:08:32.874651: step: 1056/529, loss: 0.0023276079446077347 2023-01-22 20:08:33.936243: step: 1060/529, loss: 0.024634700268507004 2023-01-22 20:08:34.974796: step: 1064/529, loss: 0.007429181132465601 2023-01-22 20:08:36.035343: step: 1068/529, loss: 0.03736819699406624 2023-01-22 20:08:37.086510: step: 1072/529, loss: 0.011959219351410866 2023-01-22 20:08:38.139702: step: 1076/529, loss: 0.015632368624210358 2023-01-22 20:08:39.188424: step: 1080/529, loss: 0.0036548601929098368 2023-01-22 20:08:40.253716: step: 1084/529, loss: 0.009078995324671268 2023-01-22 20:08:41.303083: step: 1088/529, loss: 0.006588313262909651 2023-01-22 20:08:42.346202: step: 1092/529, loss: 0.006441383622586727 2023-01-22 20:08:43.395078: step: 1096/529, loss: 0.014562997967004776 2023-01-22 20:08:44.442123: step: 1100/529, loss: 0.005533707328140736 2023-01-22 20:08:45.498272: step: 1104/529, loss: 0.01959068700671196 2023-01-22 20:08:46.530990: step: 1108/529, loss: 0.0026985234580934048 2023-01-22 20:08:47.577856: step: 1112/529, loss: 0.005703798495233059 2023-01-22 20:08:48.614072: step: 1116/529, loss: 0.06922367215156555 2023-01-22 20:08:49.674843: step: 1120/529, loss: 0.015925617888569832 2023-01-22 20:08:50.742566: step: 1124/529, loss: 0.015083757229149342 2023-01-22 20:08:51.790695: step: 1128/529, loss: 0.018662579357624054 2023-01-22 20:08:52.842035: step: 1132/529, loss: 0.01122731901705265 2023-01-22 20:08:53.891629: step: 1136/529, loss: 0.004638632293790579 2023-01-22 20:08:54.945467: step: 1140/529, loss: 0.02509639598429203 2023-01-22 20:08:56.015428: step: 1144/529, loss: 0.09302464872598648 2023-01-22 20:08:57.059868: step: 1148/529, loss: 0.0030289706774055958 2023-01-22 20:08:58.104123: step: 1152/529, loss: 0.00594885041937232 2023-01-22 20:08:59.142875: step: 1156/529, loss: 0.006528529338538647 2023-01-22 20:09:00.181609: step: 1160/529, loss: 0.04342134669423103 2023-01-22 20:09:01.214192: step: 1164/529, loss: 0.02307465299963951 2023-01-22 20:09:02.250105: step: 1168/529, loss: 0.0072258529253304005 2023-01-22 20:09:03.323003: step: 1172/529, loss: 0.0020435911137610674 2023-01-22 20:09:04.370064: step: 1176/529, loss: 0.004534694366157055 2023-01-22 20:09:05.428369: step: 1180/529, loss: 0.016949469223618507 2023-01-22 20:09:06.475597: step: 1184/529, loss: 0.005833040457218885 2023-01-22 20:09:07.530009: step: 1188/529, loss: 0.009445838630199432 2023-01-22 20:09:08.566436: step: 1192/529, loss: 0.00451301783323288 2023-01-22 20:09:09.630854: step: 1196/529, loss: 0.00893206987529993 2023-01-22 20:09:10.699717: step: 1200/529, loss: 0.009515452198684216 2023-01-22 20:09:11.742311: step: 1204/529, loss: 0.0037146389950066805 2023-01-22 20:09:12.793406: step: 1208/529, loss: 0.0010736786061897874 2023-01-22 20:09:13.848854: step: 1212/529, loss: 0.004894861951470375 2023-01-22 20:09:14.896677: step: 1216/529, loss: 0.006374821998178959 2023-01-22 20:09:15.939756: step: 1220/529, loss: 0.005195472855120897 2023-01-22 20:09:16.987580: step: 1224/529, loss: 0.007244238164275885 2023-01-22 20:09:18.045014: step: 1228/529, loss: 0.004765903111547232 2023-01-22 20:09:19.094170: step: 1232/529, loss: 0.038049325346946716 2023-01-22 20:09:20.159307: step: 1236/529, loss: 0.027681902050971985 2023-01-22 20:09:21.219201: step: 1240/529, loss: 0.024872610345482826 2023-01-22 20:09:22.270873: step: 1244/529, loss: 0.007776146288961172 2023-01-22 20:09:23.316739: step: 1248/529, loss: 0.019104285165667534 2023-01-22 20:09:24.376969: step: 1252/529, loss: 0.0027267064433544874 2023-01-22 20:09:25.436032: step: 1256/529, loss: 0.0015529862139374018 2023-01-22 20:09:26.475257: step: 1260/529, loss: 0.004872492514550686 2023-01-22 20:09:27.522250: step: 1264/529, loss: 0.01610831916332245 2023-01-22 20:09:28.585285: step: 1268/529, loss: 0.0039037438109517097 2023-01-22 20:09:29.633823: step: 1272/529, loss: 0.00940401665866375 2023-01-22 20:09:30.706005: step: 1276/529, loss: 0.0041777146980166435 2023-01-22 20:09:31.756208: step: 1280/529, loss: 0.06750694662332535 2023-01-22 20:09:32.817452: step: 1284/529, loss: 0.04656890407204628 2023-01-22 20:09:33.867334: step: 1288/529, loss: 0.006866731680929661 2023-01-22 20:09:34.906608: step: 1292/529, loss: 0.005039165262132883 2023-01-22 20:09:35.977079: step: 1296/529, loss: 0.0035023544915020466 2023-01-22 20:09:37.048065: step: 1300/529, loss: 0.0032049373257905245 2023-01-22 20:09:38.084446: step: 1304/529, loss: 0.007449497934430838 2023-01-22 20:09:39.119107: step: 1308/529, loss: 0.003058676142245531 2023-01-22 20:09:40.172145: step: 1312/529, loss: 0.00487959198653698 2023-01-22 20:09:41.220941: step: 1316/529, loss: 0.002432426670566201 2023-01-22 20:09:42.279131: step: 1320/529, loss: 0.003428333904594183 2023-01-22 20:09:43.330565: step: 1324/529, loss: 0.00448904512450099 2023-01-22 20:09:44.381017: step: 1328/529, loss: 0.047091979533433914 2023-01-22 20:09:45.436145: step: 1332/529, loss: 0.031008722260594368 2023-01-22 20:09:46.487944: step: 1336/529, loss: 0.003948854748159647 2023-01-22 20:09:47.557857: step: 1340/529, loss: 0.01910504326224327 2023-01-22 20:09:48.613145: step: 1344/529, loss: 0.04447054862976074 2023-01-22 20:09:49.682191: step: 1348/529, loss: 0.009323792532086372 2023-01-22 20:09:50.721561: step: 1352/529, loss: 0.008373000659048557 2023-01-22 20:09:51.763475: step: 1356/529, loss: 0.007204152178019285 2023-01-22 20:09:52.837213: step: 1360/529, loss: 0.011526384390890598 2023-01-22 20:09:53.877697: step: 1364/529, loss: 0.04535789415240288 2023-01-22 20:09:54.925037: step: 1368/529, loss: 0.004789196420460939 2023-01-22 20:09:55.966835: step: 1372/529, loss: 0.004318946041166782 2023-01-22 20:09:57.031928: step: 1376/529, loss: 0.001332128420472145 2023-01-22 20:09:58.075291: step: 1380/529, loss: 0.0017280379543080926 2023-01-22 20:09:59.139300: step: 1384/529, loss: 0.01079876720905304 2023-01-22 20:10:00.188148: step: 1388/529, loss: 0.04532318562269211 2023-01-22 20:10:01.236450: step: 1392/529, loss: 0.010674613527953625 2023-01-22 20:10:02.290112: step: 1396/529, loss: 0.0007004258222877979 2023-01-22 20:10:03.338446: step: 1400/529, loss: 0.01211795024573803 2023-01-22 20:10:04.394618: step: 1404/529, loss: 0.0042478106915950775 2023-01-22 20:10:05.467549: step: 1408/529, loss: 0.015085237100720406 2023-01-22 20:10:06.526773: step: 1412/529, loss: 0.0063300905749201775 2023-01-22 20:10:07.583769: step: 1416/529, loss: 0.022326374426484108 2023-01-22 20:10:08.636990: step: 1420/529, loss: 0.011718065477907658 2023-01-22 20:10:09.681656: step: 1424/529, loss: 0.007911349646747112 2023-01-22 20:10:10.729768: step: 1428/529, loss: 0.022973133251070976 2023-01-22 20:10:11.784551: step: 1432/529, loss: 0.009008155204355717 2023-01-22 20:10:12.822087: step: 1436/529, loss: 0.00622814055532217 2023-01-22 20:10:13.859355: step: 1440/529, loss: 0.005001131910830736 2023-01-22 20:10:14.897352: step: 1444/529, loss: 0.02673487178981304 2023-01-22 20:10:15.940969: step: 1448/529, loss: 0.020341381430625916 2023-01-22 20:10:16.979907: step: 1452/529, loss: 0.0009337136289104819 2023-01-22 20:10:18.032121: step: 1456/529, loss: 0.029698602855205536 2023-01-22 20:10:19.083434: step: 1460/529, loss: 0.024077508598566055 2023-01-22 20:10:20.126769: step: 1464/529, loss: 0.023606255650520325 2023-01-22 20:10:21.199583: step: 1468/529, loss: 0.00319235073402524 2023-01-22 20:10:22.244522: step: 1472/529, loss: 0.010188245214521885 2023-01-22 20:10:23.299245: step: 1476/529, loss: 0.00591333257034421 2023-01-22 20:10:24.344189: step: 1480/529, loss: 0.00010252791253151372 2023-01-22 20:10:25.398679: step: 1484/529, loss: 0.05019362270832062 2023-01-22 20:10:26.455033: step: 1488/529, loss: 0.0006311187171377242 2023-01-22 20:10:27.503603: step: 1492/529, loss: 0.02612415701150894 2023-01-22 20:10:28.563097: step: 1496/529, loss: 0.007584640756249428 2023-01-22 20:10:29.616945: step: 1500/529, loss: 0.027187936007976532 2023-01-22 20:10:30.675443: step: 1504/529, loss: 0.004561016336083412 2023-01-22 20:10:31.725526: step: 1508/529, loss: 0.025386089459061623 2023-01-22 20:10:32.764715: step: 1512/529, loss: 0.004865674301981926 2023-01-22 20:10:33.807786: step: 1516/529, loss: 0.022975487634539604 2023-01-22 20:10:34.860792: step: 1520/529, loss: 0.01226430106908083 2023-01-22 20:10:35.917800: step: 1524/529, loss: 0.0039506349712610245 2023-01-22 20:10:36.956477: step: 1528/529, loss: 0.0018974298145622015 2023-01-22 20:10:38.005985: step: 1532/529, loss: 0.015290437266230583 2023-01-22 20:10:39.051537: step: 1536/529, loss: 0.005913300905376673 2023-01-22 20:10:40.104595: step: 1540/529, loss: 0.007190620061010122 2023-01-22 20:10:41.147405: step: 1544/529, loss: 0.010892827063798904 2023-01-22 20:10:42.192996: step: 1548/529, loss: 0.0034412264358252287 2023-01-22 20:10:43.256750: step: 1552/529, loss: 0.027340717613697052 2023-01-22 20:10:44.299791: step: 1556/529, loss: 0.00887272972613573 2023-01-22 20:10:45.341801: step: 1560/529, loss: 0.026076752692461014 2023-01-22 20:10:46.392429: step: 1564/529, loss: 0.01325869932770729 2023-01-22 20:10:47.427822: step: 1568/529, loss: 0.019369937479496002 2023-01-22 20:10:48.473152: step: 1572/529, loss: 0.001958054257556796 2023-01-22 20:10:49.554385: step: 1576/529, loss: 0.0316927470266819 2023-01-22 20:10:50.604289: step: 1580/529, loss: 0.0063720098696649075 2023-01-22 20:10:51.645187: step: 1584/529, loss: 0.01358823012560606 2023-01-22 20:10:52.697604: step: 1588/529, loss: 0.0068548438139259815 2023-01-22 20:10:53.735315: step: 1592/529, loss: 0.006176367402076721 2023-01-22 20:10:54.785991: step: 1596/529, loss: 0.005565831437706947 2023-01-22 20:10:55.845694: step: 1600/529, loss: 0.014127799309790134 2023-01-22 20:10:56.894226: step: 1604/529, loss: 0.002745439065620303 2023-01-22 20:10:57.929822: step: 1608/529, loss: 0.0030092273373156786 2023-01-22 20:10:58.978891: step: 1612/529, loss: 0.03675774484872818 2023-01-22 20:11:00.012894: step: 1616/529, loss: 0.0002944464795291424 2023-01-22 20:11:01.052887: step: 1620/529, loss: 0.005533001385629177 2023-01-22 20:11:02.115550: step: 1624/529, loss: 0.0026450827717781067 2023-01-22 20:11:03.178165: step: 1628/529, loss: 0.0037081041373312473 2023-01-22 20:11:04.226024: step: 1632/529, loss: 0.011961379088461399 2023-01-22 20:11:05.277520: step: 1636/529, loss: 0.03103446587920189 2023-01-22 20:11:06.320762: step: 1640/529, loss: 0.0016517981421202421 2023-01-22 20:11:07.372101: step: 1644/529, loss: 0.02793392539024353 2023-01-22 20:11:08.434812: step: 1648/529, loss: 0.005642566364258528 2023-01-22 20:11:09.501857: step: 1652/529, loss: 0.006860267836600542 2023-01-22 20:11:10.549076: step: 1656/529, loss: 0.006212129257619381 2023-01-22 20:11:11.591635: step: 1660/529, loss: 0.002021106192842126 2023-01-22 20:11:12.640058: step: 1664/529, loss: 0.002543028211221099 2023-01-22 20:11:13.686003: step: 1668/529, loss: 0.006388460751622915 2023-01-22 20:11:14.744326: step: 1672/529, loss: 0.07572527229785919 2023-01-22 20:11:15.807511: step: 1676/529, loss: 0.029526885598897934 2023-01-22 20:11:16.867448: step: 1680/529, loss: 0.0019284343579784036 2023-01-22 20:11:17.932072: step: 1684/529, loss: 0.03118252195417881 2023-01-22 20:11:18.971892: step: 1688/529, loss: 0.004910324700176716 2023-01-22 20:11:20.025344: step: 1692/529, loss: 0.017921822145581245 2023-01-22 20:11:21.077763: step: 1696/529, loss: 0.04709478095173836 2023-01-22 20:11:22.115412: step: 1700/529, loss: 0.0 2023-01-22 20:11:23.164587: step: 1704/529, loss: 0.007378748618066311 2023-01-22 20:11:24.216961: step: 1708/529, loss: 0.07707908749580383 2023-01-22 20:11:25.255679: step: 1712/529, loss: 0.01795400120317936 2023-01-22 20:11:26.318542: step: 1716/529, loss: 0.04505113884806633 2023-01-22 20:11:27.353310: step: 1720/529, loss: 0.0050595165230333805 2023-01-22 20:11:28.410033: step: 1724/529, loss: 0.0020369517151266336 2023-01-22 20:11:29.455468: step: 1728/529, loss: 0.0037170760333538055 2023-01-22 20:11:30.508220: step: 1732/529, loss: 0.0060445088893175125 2023-01-22 20:11:31.545668: step: 1736/529, loss: 0.02983376383781433 2023-01-22 20:11:32.599816: step: 1740/529, loss: 0.03832179307937622 2023-01-22 20:11:33.645894: step: 1744/529, loss: 0.0038015972822904587 2023-01-22 20:11:34.694971: step: 1748/529, loss: 0.0025327997282147408 2023-01-22 20:11:35.749846: step: 1752/529, loss: 0.012353229336440563 2023-01-22 20:11:36.818989: step: 1756/529, loss: 0.0031786933541297913 2023-01-22 20:11:37.888417: step: 1760/529, loss: 0.010260271839797497 2023-01-22 20:11:38.927316: step: 1764/529, loss: 0.022884979844093323 2023-01-22 20:11:39.973658: step: 1768/529, loss: 0.006630528252571821 2023-01-22 20:11:41.011301: step: 1772/529, loss: 0.0006608838448300958 2023-01-22 20:11:42.061111: step: 1776/529, loss: 0.006252211052924395 2023-01-22 20:11:43.101670: step: 1780/529, loss: 0.017570924013853073 2023-01-22 20:11:44.135017: step: 1784/529, loss: 0.001727188704535365 2023-01-22 20:11:45.214955: step: 1788/529, loss: 0.025053316727280617 2023-01-22 20:11:46.268785: step: 1792/529, loss: 0.0077858539298176765 2023-01-22 20:11:47.344270: step: 1796/529, loss: 0.008492161519825459 2023-01-22 20:11:48.392845: step: 1800/529, loss: 0.006147698033601046 2023-01-22 20:11:49.460859: step: 1804/529, loss: 0.0027198914904147387 2023-01-22 20:11:50.524431: step: 1808/529, loss: 0.004067946691066027 2023-01-22 20:11:51.588229: step: 1812/529, loss: 0.011367902159690857 2023-01-22 20:11:52.646788: step: 1816/529, loss: 0.009668429382145405 2023-01-22 20:11:53.698072: step: 1820/529, loss: 0.009239554405212402 2023-01-22 20:11:54.734306: step: 1824/529, loss: 0.028391003608703613 2023-01-22 20:11:55.813271: step: 1828/529, loss: 0.009757833555340767 2023-01-22 20:11:56.874111: step: 1832/529, loss: 0.009194308891892433 2023-01-22 20:11:57.937596: step: 1836/529, loss: 0.03597308695316315 2023-01-22 20:11:58.994438: step: 1840/529, loss: 0.027468081563711166 2023-01-22 20:12:00.049426: step: 1844/529, loss: 0.024140816181898117 2023-01-22 20:12:01.094195: step: 1848/529, loss: 0.0008999168057925999 2023-01-22 20:12:02.157311: step: 1852/529, loss: 0.026558518409729004 2023-01-22 20:12:03.200310: step: 1856/529, loss: 0.023927735164761543 2023-01-22 20:12:04.244250: step: 1860/529, loss: 0.004387508146464825 2023-01-22 20:12:05.303043: step: 1864/529, loss: 0.0365816093981266 2023-01-22 20:12:06.355592: step: 1868/529, loss: 0.007019015494734049 2023-01-22 20:12:07.423484: step: 1872/529, loss: 0.004949501249939203 2023-01-22 20:12:08.466107: step: 1876/529, loss: 0.011920304037630558 2023-01-22 20:12:09.527010: step: 1880/529, loss: 0.002672867150977254 2023-01-22 20:12:10.569313: step: 1884/529, loss: 0.013008982874453068 2023-01-22 20:12:11.626411: step: 1888/529, loss: 0.008184824138879776 2023-01-22 20:12:12.670759: step: 1892/529, loss: 0.0009707360877655447 2023-01-22 20:12:13.719858: step: 1896/529, loss: 0.004644864238798618 2023-01-22 20:12:14.770107: step: 1900/529, loss: 0.0055560884065926075 2023-01-22 20:12:15.808970: step: 1904/529, loss: 0.03194780275225639 2023-01-22 20:12:16.842393: step: 1908/529, loss: 0.01184525154531002 2023-01-22 20:12:17.892497: step: 1912/529, loss: 0.006322196684777737 2023-01-22 20:12:18.950968: step: 1916/529, loss: 0.005896944552659988 2023-01-22 20:12:20.011962: step: 1920/529, loss: 0.004764569457620382 2023-01-22 20:12:21.073476: step: 1924/529, loss: 0.014276344329118729 2023-01-22 20:12:22.129396: step: 1928/529, loss: 0.006889773067086935 2023-01-22 20:12:23.177574: step: 1932/529, loss: 0.03408796712756157 2023-01-22 20:12:24.236751: step: 1936/529, loss: 0.005409834906458855 2023-01-22 20:12:25.293504: step: 1940/529, loss: 0.02871054969727993 2023-01-22 20:12:26.341630: step: 1944/529, loss: 0.0030628531239926815 2023-01-22 20:12:27.380083: step: 1948/529, loss: 0.017044296488165855 2023-01-22 20:12:28.429934: step: 1952/529, loss: 0.004314434714615345 2023-01-22 20:12:29.507564: step: 1956/529, loss: 0.006943211425095797 2023-01-22 20:12:30.539818: step: 1960/529, loss: 0.010337771847844124 2023-01-22 20:12:31.573036: step: 1964/529, loss: 0.022018078714609146 2023-01-22 20:12:32.626031: step: 1968/529, loss: 0.008535580709576607 2023-01-22 20:12:33.672851: step: 1972/529, loss: 0.0002158692805096507 2023-01-22 20:12:34.731867: step: 1976/529, loss: 0.04927452653646469 2023-01-22 20:12:35.767104: step: 1980/529, loss: 0.004697205498814583 2023-01-22 20:12:36.826544: step: 1984/529, loss: 0.04207763075828552 2023-01-22 20:12:37.865892: step: 1988/529, loss: 0.005888705141842365 2023-01-22 20:12:38.915750: step: 1992/529, loss: 0.012034029699862003 2023-01-22 20:12:39.969276: step: 1996/529, loss: 0.016230523586273193 2023-01-22 20:12:41.015344: step: 2000/529, loss: 0.005281147547066212 2023-01-22 20:12:42.053847: step: 2004/529, loss: 0.0016457450110465288 2023-01-22 20:12:43.107990: step: 2008/529, loss: 0.014115831814706326 2023-01-22 20:12:44.163191: step: 2012/529, loss: 0.01586255058646202 2023-01-22 20:12:45.228974: step: 2016/529, loss: 0.006525507662445307 2023-01-22 20:12:46.289185: step: 2020/529, loss: 0.04435554891824722 2023-01-22 20:12:47.328492: step: 2024/529, loss: 0.0064546032808721066 2023-01-22 20:12:48.394034: step: 2028/529, loss: 0.005708738230168819 2023-01-22 20:12:49.461200: step: 2032/529, loss: 0.02517998218536377 2023-01-22 20:12:50.503471: step: 2036/529, loss: 0.01227293349802494 2023-01-22 20:12:51.563625: step: 2040/529, loss: 0.014538360759615898 2023-01-22 20:12:52.617588: step: 2044/529, loss: 0.015914535149931908 2023-01-22 20:12:53.668518: step: 2048/529, loss: 0.0041007110849022865 2023-01-22 20:12:54.718753: step: 2052/529, loss: 0.012785022146999836 2023-01-22 20:12:55.761647: step: 2056/529, loss: 0.003201248124241829 2023-01-22 20:12:56.819748: step: 2060/529, loss: 0.004081238526850939 2023-01-22 20:12:57.867777: step: 2064/529, loss: 0.011463594622910023 2023-01-22 20:12:58.917761: step: 2068/529, loss: 0.0006879451684653759 2023-01-22 20:12:59.973294: step: 2072/529, loss: 0.025968505069613457 2023-01-22 20:13:01.018660: step: 2076/529, loss: 0.013166551478207111 2023-01-22 20:13:02.078788: step: 2080/529, loss: 0.018307577818632126 2023-01-22 20:13:03.114674: step: 2084/529, loss: 0.008127505891025066 2023-01-22 20:13:04.168690: step: 2088/529, loss: 0.05062512308359146 2023-01-22 20:13:05.248824: step: 2092/529, loss: 0.005797439254820347 2023-01-22 20:13:06.306964: step: 2096/529, loss: 0.003359495894983411 2023-01-22 20:13:07.369752: step: 2100/529, loss: 0.0053307414054870605 2023-01-22 20:13:08.408523: step: 2104/529, loss: 0.028980115428566933 2023-01-22 20:13:09.457469: step: 2108/529, loss: 0.005553788039833307 2023-01-22 20:13:10.495243: step: 2112/529, loss: 0.019708609208464622 2023-01-22 20:13:11.543712: step: 2116/529, loss: 0.015630964189767838 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33070958646616544, 'r': 0.31000101653564655, 'f1': 0.3200206380299427}, 'combined': 0.23580468065364196, 'stategy': 1, 'epoch': 3} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37689896711502635, 'r': 0.3196734694902318, 'f1': 0.3459355958789477}, 'combined': 0.2433717759952396, 'stategy': 1, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31426429991985516, 'r': 0.34050268549191137, 'f1': 0.32685776913340125}, 'combined': 0.24084256672987459, 'stategy': 1, 'epoch': 3} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37782261151645236, 'r': 0.33137711175723716, 'f1': 0.3530790034236424}, 'combined': 0.2506860924307861, 'stategy': 1, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3290359112394958, 'r': 0.3396499728923828, 'f1': 0.33425870348139264}, 'combined': 0.24629588677576297, 'stategy': 1, 'epoch': 3} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3765897754522751, 'r': 0.2990372562491888, 'f1': 0.33336254441787005}, 'combined': 0.23668740653668774, 'stategy': 1, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34188034188034183, 'r': 0.38095238095238093, 'f1': 0.36036036036036034}, 'combined': 0.2402402402402402, 'stategy': 1, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3352642423366108, 'r': 0.31427046625101657, 'f1': 0.3244280817126068}, 'combined': 0.23905227073560498, 'stategy': 1, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37769273447697677, 'r': 0.31771011870489146, 'f1': 0.34511449861213805}, 'combined': 0.2427941196266298, 'stategy': 1, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3164259395725866, 'r': 0.3452465185089892, 'f1': 0.3302085576301947}, 'combined': 0.24331156878014343, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3807202674530041, 'r': 0.3315950716526164, 'f1': 0.3544636972838313}, 'combined': 0.25166922507152023, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3297020574534162, 'r': 0.34534257251287614, 'f1': 0.337341122732689}, 'combined': 0.2485671430661919, 'stategy': 1, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37549195557441667, 'r': 0.29751020563836855, 'f1': 0.33198315059549205}, 'combined': 0.23570803692279935, 'stategy': 1, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 20:15:47.302837: step: 4/529, loss: 0.004972977098077536 2023-01-22 20:15:48.364278: step: 8/529, loss: 0.015315771102905273 2023-01-22 20:15:49.397106: step: 12/529, loss: 0.006792586296796799 2023-01-22 20:15:50.427575: step: 16/529, loss: 0.002312940079718828 2023-01-22 20:15:51.486643: step: 20/529, loss: 0.016723699867725372 2023-01-22 20:15:52.519768: step: 24/529, loss: 0.02990327961742878 2023-01-22 20:15:53.572281: step: 28/529, loss: 0.006618340499699116 2023-01-22 20:15:54.612591: step: 32/529, loss: 0.001345322118140757 2023-01-22 20:15:55.651292: step: 36/529, loss: 0.03215472027659416 2023-01-22 20:15:56.712198: step: 40/529, loss: 0.005890315864235163 2023-01-22 20:15:57.748925: step: 44/529, loss: 0.021393928676843643 2023-01-22 20:15:58.786849: step: 48/529, loss: 0.0010949615389108658 2023-01-22 20:15:59.805538: step: 52/529, loss: 0.0029673182871192694 2023-01-22 20:16:00.869988: step: 56/529, loss: 0.002470282604917884 2023-01-22 20:16:01.927328: step: 60/529, loss: 0.006345274392515421 2023-01-22 20:16:02.966412: step: 64/529, loss: 0.008853521198034286 2023-01-22 20:16:04.004044: step: 68/529, loss: 0.009107585996389389 2023-01-22 20:16:05.044040: step: 72/529, loss: 0.0016798933502286673 2023-01-22 20:16:06.069837: step: 76/529, loss: 0.006911927834153175 2023-01-22 20:16:07.124637: step: 80/529, loss: 0.007749161217361689 2023-01-22 20:16:08.170773: step: 84/529, loss: 0.008011680096387863 2023-01-22 20:16:09.211660: step: 88/529, loss: 0.004777104128152132 2023-01-22 20:16:10.256989: step: 92/529, loss: 0.03773088753223419 2023-01-22 20:16:11.293210: step: 96/529, loss: 0.0062218159437179565 2023-01-22 20:16:12.340033: step: 100/529, loss: 0.0026181309949606657 2023-01-22 20:16:13.410424: step: 104/529, loss: 0.0029570963233709335 2023-01-22 20:16:14.451717: step: 108/529, loss: 0.005767789203673601 2023-01-22 20:16:15.489592: step: 112/529, loss: 0.025732675567269325 2023-01-22 20:16:16.540395: step: 116/529, loss: 0.015178782865405083 2023-01-22 20:16:17.605685: step: 120/529, loss: 0.01200809795409441 2023-01-22 20:16:18.657100: step: 124/529, loss: 0.008931677788496017 2023-01-22 20:16:19.706373: step: 128/529, loss: 0.007425300311297178 2023-01-22 20:16:20.748194: step: 132/529, loss: 0.0013094847090542316 2023-01-22 20:16:21.821909: step: 136/529, loss: 0.013010287657380104 2023-01-22 20:16:22.882743: step: 140/529, loss: 0.007465814705938101 2023-01-22 20:16:23.932253: step: 144/529, loss: 0.002438231138512492 2023-01-22 20:16:24.983492: step: 148/529, loss: 0.004434029571712017 2023-01-22 20:16:26.048455: step: 152/529, loss: 0.003251565620303154 2023-01-22 20:16:27.098140: step: 156/529, loss: 0.006463929545134306 2023-01-22 20:16:28.162362: step: 160/529, loss: 0.00255812075920403 2023-01-22 20:16:29.200771: step: 164/529, loss: 0.001769814407452941 2023-01-22 20:16:30.267247: step: 168/529, loss: 0.012185326777398586 2023-01-22 20:16:31.327898: step: 172/529, loss: 0.00596971670165658 2023-01-22 20:16:32.386282: step: 176/529, loss: 0.005892569664865732 2023-01-22 20:16:33.463317: step: 180/529, loss: 0.03773678094148636 2023-01-22 20:16:34.504000: step: 184/529, loss: 0.003641950897872448 2023-01-22 20:16:35.566670: step: 188/529, loss: 0.006376815028488636 2023-01-22 20:16:36.619946: step: 192/529, loss: 0.005442554596811533 2023-01-22 20:16:37.656455: step: 196/529, loss: 0.06592044979333878 2023-01-22 20:16:38.715594: step: 200/529, loss: 0.019574610516428947 2023-01-22 20:16:39.759841: step: 204/529, loss: 0.0016029856633394957 2023-01-22 20:16:40.797709: step: 208/529, loss: 0.005014206748455763 2023-01-22 20:16:41.846925: step: 212/529, loss: 0.012270529754459858 2023-01-22 20:16:42.916472: step: 216/529, loss: 0.010463295504450798 2023-01-22 20:16:43.954850: step: 220/529, loss: 0.011553673073649406 2023-01-22 20:16:45.008303: step: 224/529, loss: 0.009193331934511662 2023-01-22 20:16:46.043425: step: 228/529, loss: 0.003707186784595251 2023-01-22 20:16:47.114944: step: 232/529, loss: 0.00783197209239006 2023-01-22 20:16:48.152917: step: 236/529, loss: 0.02482003904879093 2023-01-22 20:16:49.193802: step: 240/529, loss: 0.0977710410952568 2023-01-22 20:16:50.280658: step: 244/529, loss: 0.09337173402309418 2023-01-22 20:16:51.304718: step: 248/529, loss: 0.005586524028331041 2023-01-22 20:16:52.350012: step: 252/529, loss: 0.009423908777534962 2023-01-22 20:16:53.402381: step: 256/529, loss: 0.04345278441905975 2023-01-22 20:16:54.446459: step: 260/529, loss: 0.0033215321600437164 2023-01-22 20:16:55.489160: step: 264/529, loss: 0.031740445643663406 2023-01-22 20:16:56.529393: step: 268/529, loss: 0.007093391381204128 2023-01-22 20:16:57.576516: step: 272/529, loss: 0.006825506221503019 2023-01-22 20:16:58.620557: step: 276/529, loss: 0.016430441290140152 2023-01-22 20:16:59.678623: step: 280/529, loss: 0.022994298487901688 2023-01-22 20:17:00.732006: step: 284/529, loss: 0.002012432087212801 2023-01-22 20:17:01.785208: step: 288/529, loss: 0.004674348048865795 2023-01-22 20:17:02.838267: step: 292/529, loss: 0.008034372702240944 2023-01-22 20:17:03.893802: step: 296/529, loss: 0.0038768788799643517 2023-01-22 20:17:04.940530: step: 300/529, loss: 0.010274171829223633 2023-01-22 20:17:05.992121: step: 304/529, loss: 0.003792906878516078 2023-01-22 20:17:07.036953: step: 308/529, loss: 0.0043530636467039585 2023-01-22 20:17:08.081826: step: 312/529, loss: 0.005646038800477982 2023-01-22 20:17:09.128234: step: 316/529, loss: 0.013997458852827549 2023-01-22 20:17:10.178240: step: 320/529, loss: 0.027582945302128792 2023-01-22 20:17:11.247134: step: 324/529, loss: 0.03427688404917717 2023-01-22 20:17:12.305964: step: 328/529, loss: 0.0034100592602044344 2023-01-22 20:17:13.372896: step: 332/529, loss: 0.004041348118335009 2023-01-22 20:17:14.424395: step: 336/529, loss: 0.006184133235365152 2023-01-22 20:17:15.512440: step: 340/529, loss: 0.026666175574064255 2023-01-22 20:17:16.580486: step: 344/529, loss: 0.009437326341867447 2023-01-22 20:17:17.631915: step: 348/529, loss: 0.04825510457158089 2023-01-22 20:17:18.689439: step: 352/529, loss: 0.009116174653172493 2023-01-22 20:17:19.742337: step: 356/529, loss: 0.037557389587163925 2023-01-22 20:17:20.778556: step: 360/529, loss: 0.003182884305715561 2023-01-22 20:17:21.845048: step: 364/529, loss: 0.006011029705405235 2023-01-22 20:17:22.913283: step: 368/529, loss: 0.008684770204126835 2023-01-22 20:17:23.958140: step: 372/529, loss: 0.004323802422732115 2023-01-22 20:17:25.005304: step: 376/529, loss: 0.019081899896264076 2023-01-22 20:17:26.059145: step: 380/529, loss: 0.013846813701093197 2023-01-22 20:17:27.115946: step: 384/529, loss: 0.013083360157907009 2023-01-22 20:17:28.168269: step: 388/529, loss: 0.002659061225131154 2023-01-22 20:17:29.206602: step: 392/529, loss: 0.0121193528175354 2023-01-22 20:17:30.258073: step: 396/529, loss: 0.01882922649383545 2023-01-22 20:17:31.303016: step: 400/529, loss: 0.017904605716466904 2023-01-22 20:17:32.382084: step: 404/529, loss: 0.0022688633762300014 2023-01-22 20:17:33.446137: step: 408/529, loss: 0.009439710527658463 2023-01-22 20:17:34.510964: step: 412/529, loss: 0.02439112402498722 2023-01-22 20:17:35.571164: step: 416/529, loss: 0.00419649900868535 2023-01-22 20:17:36.631072: step: 420/529, loss: 0.007332560606300831 2023-01-22 20:17:37.695032: step: 424/529, loss: 0.010715237818658352 2023-01-22 20:17:38.739271: step: 428/529, loss: 0.009345471858978271 2023-01-22 20:17:39.800370: step: 432/529, loss: 0.00828529056161642 2023-01-22 20:17:40.841785: step: 436/529, loss: 0.0039924131706357 2023-01-22 20:17:41.887753: step: 440/529, loss: 0.0009335471550002694 2023-01-22 20:17:42.967040: step: 444/529, loss: 0.024950075894594193 2023-01-22 20:17:44.026202: step: 448/529, loss: 0.003914753906428814 2023-01-22 20:17:45.083316: step: 452/529, loss: 0.011607585474848747 2023-01-22 20:17:46.146192: step: 456/529, loss: 0.010729657486081123 2023-01-22 20:17:47.195949: step: 460/529, loss: 0.016343696042895317 2023-01-22 20:17:48.244627: step: 464/529, loss: 0.00885714404284954 2023-01-22 20:17:49.292195: step: 468/529, loss: 0.03314967453479767 2023-01-22 20:17:50.338062: step: 472/529, loss: 0.008124709129333496 2023-01-22 20:17:51.403529: step: 476/529, loss: 0.0014915474457666278 2023-01-22 20:17:52.469101: step: 480/529, loss: 0.02934328466653824 2023-01-22 20:17:53.519534: step: 484/529, loss: 0.008532838895916939 2023-01-22 20:17:54.571074: step: 488/529, loss: 0.0038052164018154144 2023-01-22 20:17:55.623625: step: 492/529, loss: 0.01491015125066042 2023-01-22 20:17:56.697704: step: 496/529, loss: 0.0035228889901190996 2023-01-22 20:17:57.742706: step: 500/529, loss: 0.0019470122642815113 2023-01-22 20:17:58.793413: step: 504/529, loss: 0.038061466068029404 2023-01-22 20:17:59.835179: step: 508/529, loss: 0.008572950027883053 2023-01-22 20:18:00.898215: step: 512/529, loss: 0.009340458549559116 2023-01-22 20:18:01.961289: step: 516/529, loss: 0.0041932035237550735 2023-01-22 20:18:03.020362: step: 520/529, loss: 0.0030380780808627605 2023-01-22 20:18:04.091496: step: 524/529, loss: 0.008492720313370228 2023-01-22 20:18:05.147111: step: 528/529, loss: 0.003924419637769461 2023-01-22 20:18:06.196634: step: 532/529, loss: 0.0033622782211750746 2023-01-22 20:18:07.263191: step: 536/529, loss: 0.0030823287088423967 2023-01-22 20:18:08.315856: step: 540/529, loss: 0.005688593722879887 2023-01-22 20:18:09.364789: step: 544/529, loss: 0.005316615104675293 2023-01-22 20:18:10.423628: step: 548/529, loss: 0.0007958625792525709 2023-01-22 20:18:11.475832: step: 552/529, loss: 0.0039175511337816715 2023-01-22 20:18:12.525943: step: 556/529, loss: 0.0003252278547734022 2023-01-22 20:18:13.576454: step: 560/529, loss: 0.0011216717539355159 2023-01-22 20:18:14.646628: step: 564/529, loss: 0.008563704788684845 2023-01-22 20:18:15.691262: step: 568/529, loss: 0.004667587578296661 2023-01-22 20:18:16.748309: step: 572/529, loss: 0.0037889331579208374 2023-01-22 20:18:17.795726: step: 576/529, loss: 0.0018241889774799347 2023-01-22 20:18:18.850617: step: 580/529, loss: 0.023664621636271477 2023-01-22 20:18:19.930050: step: 584/529, loss: 0.009895668365061283 2023-01-22 20:18:20.973899: step: 588/529, loss: 0.005173603072762489 2023-01-22 20:18:22.045233: step: 592/529, loss: 0.006426761858165264 2023-01-22 20:18:23.103740: step: 596/529, loss: 0.0012574323918670416 2023-01-22 20:18:24.183640: step: 600/529, loss: 0.011035253293812275 2023-01-22 20:18:25.230897: step: 604/529, loss: 0.049063120037317276 2023-01-22 20:18:26.286075: step: 608/529, loss: 0.01464060042053461 2023-01-22 20:18:27.335062: step: 612/529, loss: 0.0024741431698203087 2023-01-22 20:18:28.386384: step: 616/529, loss: 0.007257701829075813 2023-01-22 20:18:29.441788: step: 620/529, loss: 0.0327448695898056 2023-01-22 20:18:30.483189: step: 624/529, loss: 0.025429610162973404 2023-01-22 20:18:31.517889: step: 628/529, loss: 0.005170813761651516 2023-01-22 20:18:32.578411: step: 632/529, loss: 0.010116888210177422 2023-01-22 20:18:33.627322: step: 636/529, loss: 0.010732799768447876 2023-01-22 20:18:34.680409: step: 640/529, loss: 0.006285341922193766 2023-01-22 20:18:35.742438: step: 644/529, loss: 0.008337733335793018 2023-01-22 20:18:36.808044: step: 648/529, loss: 0.030470702797174454 2023-01-22 20:18:37.875819: step: 652/529, loss: 0.002225441625341773 2023-01-22 20:18:38.925088: step: 656/529, loss: 0.0058744559064507484 2023-01-22 20:18:39.982339: step: 660/529, loss: 0.07836455851793289 2023-01-22 20:18:41.041387: step: 664/529, loss: 0.0032941410318017006 2023-01-22 20:18:42.097940: step: 668/529, loss: 0.006888167932629585 2023-01-22 20:18:43.145188: step: 672/529, loss: 0.0037240704987198114 2023-01-22 20:18:44.188536: step: 676/529, loss: 0.003179890336468816 2023-01-22 20:18:45.238226: step: 680/529, loss: 0.01531173475086689 2023-01-22 20:18:46.293159: step: 684/529, loss: 0.008959028869867325 2023-01-22 20:18:47.350748: step: 688/529, loss: 0.009936016984283924 2023-01-22 20:18:48.405865: step: 692/529, loss: 0.009759964421391487 2023-01-22 20:18:49.462043: step: 696/529, loss: 0.00517621822655201 2023-01-22 20:18:50.520265: step: 700/529, loss: 0.028976377099752426 2023-01-22 20:18:51.566178: step: 704/529, loss: 0.00782828126102686 2023-01-22 20:18:52.637482: step: 708/529, loss: 0.05551927909255028 2023-01-22 20:18:53.701867: step: 712/529, loss: 0.0066059185191988945 2023-01-22 20:18:54.731797: step: 716/529, loss: 0.0003861311124637723 2023-01-22 20:18:55.790139: step: 720/529, loss: 0.000487752549815923 2023-01-22 20:18:56.843160: step: 724/529, loss: 0.008599737659096718 2023-01-22 20:18:57.892933: step: 728/529, loss: 0.019140299409627914 2023-01-22 20:18:58.950583: step: 732/529, loss: 0.02642989344894886 2023-01-22 20:19:00.000717: step: 736/529, loss: 0.010605587624013424 2023-01-22 20:19:01.044887: step: 740/529, loss: 0.0062395790591835976 2023-01-22 20:19:02.102409: step: 744/529, loss: 0.005483039654791355 2023-01-22 20:19:03.164675: step: 748/529, loss: 0.0035146691370755434 2023-01-22 20:19:04.252329: step: 752/529, loss: 0.003327739890664816 2023-01-22 20:19:05.291267: step: 756/529, loss: 0.007802151143550873 2023-01-22 20:19:06.337978: step: 760/529, loss: 0.01196114532649517 2023-01-22 20:19:07.385543: step: 764/529, loss: 0.0493021234869957 2023-01-22 20:19:08.424034: step: 768/529, loss: 0.04037129133939743 2023-01-22 20:19:09.467805: step: 772/529, loss: 0.002169232117012143 2023-01-22 20:19:10.530943: step: 776/529, loss: 0.004256423097103834 2023-01-22 20:19:11.577647: step: 780/529, loss: 0.006131183821707964 2023-01-22 20:19:12.631361: step: 784/529, loss: 0.005016126669943333 2023-01-22 20:19:13.695695: step: 788/529, loss: 0.007721309084445238 2023-01-22 20:19:14.760432: step: 792/529, loss: 0.02674700878560543 2023-01-22 20:19:15.833556: step: 796/529, loss: 0.010915335267782211 2023-01-22 20:19:16.877591: step: 800/529, loss: 0.008327090181410313 2023-01-22 20:19:17.932060: step: 804/529, loss: 0.005092321429401636 2023-01-22 20:19:18.984663: step: 808/529, loss: 0.008589416742324829 2023-01-22 20:19:20.027321: step: 812/529, loss: 0.0205709096044302 2023-01-22 20:19:21.066095: step: 816/529, loss: 0.0023053919430822134 2023-01-22 20:19:22.124283: step: 820/529, loss: 0.0009984575444832444 2023-01-22 20:19:23.172179: step: 824/529, loss: 0.016650976613163948 2023-01-22 20:19:24.212649: step: 828/529, loss: 0.009080648422241211 2023-01-22 20:19:25.268354: step: 832/529, loss: 0.011175430379807949 2023-01-22 20:19:26.322551: step: 836/529, loss: 0.014000511728227139 2023-01-22 20:19:27.371430: step: 840/529, loss: 0.011510957032442093 2023-01-22 20:19:28.427925: step: 844/529, loss: 0.0035364616196602583 2023-01-22 20:19:29.477746: step: 848/529, loss: 0.04261104390025139 2023-01-22 20:19:30.528082: step: 852/529, loss: 0.006573243997991085 2023-01-22 20:19:31.576929: step: 856/529, loss: 0.006428370252251625 2023-01-22 20:19:32.619430: step: 860/529, loss: 0.0022880961187183857 2023-01-22 20:19:33.669830: step: 864/529, loss: 0.01483568549156189 2023-01-22 20:19:34.705817: step: 868/529, loss: 0.04817057400941849 2023-01-22 20:19:35.754551: step: 872/529, loss: 0.007387330289930105 2023-01-22 20:19:36.810714: step: 876/529, loss: 0.009630956687033176 2023-01-22 20:19:37.856084: step: 880/529, loss: 0.007426950614899397 2023-01-22 20:19:38.908209: step: 884/529, loss: 0.010775621049106121 2023-01-22 20:19:39.954821: step: 888/529, loss: 0.004999483469873667 2023-01-22 20:19:41.002734: step: 892/529, loss: 0.061828307807445526 2023-01-22 20:19:42.047503: step: 896/529, loss: 0.00502614863216877 2023-01-22 20:19:43.075514: step: 900/529, loss: 0.0044181994162499905 2023-01-22 20:19:44.126980: step: 904/529, loss: 0.0017552727367728949 2023-01-22 20:19:45.164623: step: 908/529, loss: 0.0025432519614696503 2023-01-22 20:19:46.209330: step: 912/529, loss: 0.002674221294000745 2023-01-22 20:19:47.261653: step: 916/529, loss: 0.022594809532165527 2023-01-22 20:19:48.311939: step: 920/529, loss: 0.0010962142841890454 2023-01-22 20:19:49.356929: step: 924/529, loss: 0.0017544376896694303 2023-01-22 20:19:50.406966: step: 928/529, loss: 0.005362001247704029 2023-01-22 20:19:51.458087: step: 932/529, loss: 0.016685402020812035 2023-01-22 20:19:52.497169: step: 936/529, loss: 0.006691533140838146 2023-01-22 20:19:53.565962: step: 940/529, loss: 0.004304968751966953 2023-01-22 20:19:54.608954: step: 944/529, loss: 0.0039007433224469423 2023-01-22 20:19:55.665290: step: 948/529, loss: 0.06480841338634491 2023-01-22 20:19:56.710280: step: 952/529, loss: 0.02269637957215309 2023-01-22 20:19:57.770581: step: 956/529, loss: 0.009550395421683788 2023-01-22 20:19:58.794901: step: 960/529, loss: 0.005923955235630274 2023-01-22 20:19:59.843769: step: 964/529, loss: 0.006150707136839628 2023-01-22 20:20:00.903199: step: 968/529, loss: 0.003835597075521946 2023-01-22 20:20:01.948094: step: 972/529, loss: 0.012381745502352715 2023-01-22 20:20:03.011294: step: 976/529, loss: 0.004982245620340109 2023-01-22 20:20:04.068717: step: 980/529, loss: 0.0028508882969617844 2023-01-22 20:20:05.130606: step: 984/529, loss: 0.0011175476247444749 2023-01-22 20:20:06.182386: step: 988/529, loss: 0.006891999859362841 2023-01-22 20:20:07.229227: step: 992/529, loss: 0.005373796913772821 2023-01-22 20:20:08.276478: step: 996/529, loss: 0.007324570789933205 2023-01-22 20:20:09.318231: step: 1000/529, loss: 0.013494228944182396 2023-01-22 20:20:10.361484: step: 1004/529, loss: 0.004327763803303242 2023-01-22 20:20:11.408899: step: 1008/529, loss: 0.011314337141811848 2023-01-22 20:20:12.451307: step: 1012/529, loss: 0.009498815052211285 2023-01-22 20:20:13.489626: step: 1016/529, loss: 0.0012131336843594909 2023-01-22 20:20:14.541830: step: 1020/529, loss: 0.0061645167879760265 2023-01-22 20:20:15.591425: step: 1024/529, loss: 0.03723980858922005 2023-01-22 20:20:16.633157: step: 1028/529, loss: 0.012592650018632412 2023-01-22 20:20:17.693329: step: 1032/529, loss: 0.04426978901028633 2023-01-22 20:20:18.746570: step: 1036/529, loss: 0.018988968804478645 2023-01-22 20:20:19.799676: step: 1040/529, loss: 0.004733963869512081 2023-01-22 20:20:20.855131: step: 1044/529, loss: 0.011850671842694283 2023-01-22 20:20:21.902068: step: 1048/529, loss: 0.0020663172472268343 2023-01-22 20:20:22.941297: step: 1052/529, loss: 0.018322918564081192 2023-01-22 20:20:23.996087: step: 1056/529, loss: 0.0638110488653183 2023-01-22 20:20:25.049919: step: 1060/529, loss: 0.005235969088971615 2023-01-22 20:20:26.105157: step: 1064/529, loss: 0.011508745141327381 2023-01-22 20:20:27.168358: step: 1068/529, loss: 0.06403662264347076 2023-01-22 20:20:28.210890: step: 1072/529, loss: 0.00485689053311944 2023-01-22 20:20:29.279962: step: 1076/529, loss: 0.0005276335868984461 2023-01-22 20:20:30.335726: step: 1080/529, loss: 0.009044112637639046 2023-01-22 20:20:31.380074: step: 1084/529, loss: 0.0005016711074858904 2023-01-22 20:20:32.434791: step: 1088/529, loss: 0.018513882532715797 2023-01-22 20:20:33.490538: step: 1092/529, loss: 0.0055494150146842 2023-01-22 20:20:34.560296: step: 1096/529, loss: 0.011405867524445057 2023-01-22 20:20:35.608446: step: 1100/529, loss: 0.02754393219947815 2023-01-22 20:20:36.673426: step: 1104/529, loss: 0.036888349801301956 2023-01-22 20:20:37.725779: step: 1108/529, loss: 0.01456030085682869 2023-01-22 20:20:38.774667: step: 1112/529, loss: 0.004660456441342831 2023-01-22 20:20:39.826978: step: 1116/529, loss: 0.01688554137945175 2023-01-22 20:20:40.874293: step: 1120/529, loss: 0.005367800127714872 2023-01-22 20:20:41.920059: step: 1124/529, loss: 0.0020470386371016502 2023-01-22 20:20:42.975725: step: 1128/529, loss: 0.020189180970191956 2023-01-22 20:20:44.018274: step: 1132/529, loss: 0.006316610146313906 2023-01-22 20:20:45.072082: step: 1136/529, loss: 0.017518723383545876 2023-01-22 20:20:46.125072: step: 1140/529, loss: 0.03667588531970978 2023-01-22 20:20:47.173579: step: 1144/529, loss: 0.007369551341980696 2023-01-22 20:20:48.206733: step: 1148/529, loss: 0.008760740980505943 2023-01-22 20:20:49.291935: step: 1152/529, loss: 0.014112078584730625 2023-01-22 20:20:50.325178: step: 1156/529, loss: 0.0018694648751989007 2023-01-22 20:20:51.378500: step: 1160/529, loss: 0.019291620701551437 2023-01-22 20:20:52.423484: step: 1164/529, loss: 0.01479153148829937 2023-01-22 20:20:53.445936: step: 1168/529, loss: 0.00680797453969717 2023-01-22 20:20:54.502402: step: 1172/529, loss: 0.022763116285204887 2023-01-22 20:20:55.551610: step: 1176/529, loss: 0.005885292775928974 2023-01-22 20:20:56.618154: step: 1180/529, loss: 0.0026538600213825703 2023-01-22 20:20:57.665220: step: 1184/529, loss: 0.0019506238168105483 2023-01-22 20:20:58.700716: step: 1188/529, loss: 0.015076315961778164 2023-01-22 20:20:59.753701: step: 1192/529, loss: 0.030256301164627075 2023-01-22 20:21:00.803254: step: 1196/529, loss: 0.0036665780935436487 2023-01-22 20:21:01.841160: step: 1200/529, loss: 0.010747399181127548 2023-01-22 20:21:02.891173: step: 1204/529, loss: 0.012254535220563412 2023-01-22 20:21:03.944311: step: 1208/529, loss: 0.010111925192177296 2023-01-22 20:21:04.991420: step: 1212/529, loss: 0.00012033485836582258 2023-01-22 20:21:06.057348: step: 1216/529, loss: 0.0429873913526535 2023-01-22 20:21:07.124773: step: 1220/529, loss: 0.0037914959248155355 2023-01-22 20:21:08.177801: step: 1224/529, loss: 0.008630845695734024 2023-01-22 20:21:09.246886: step: 1228/529, loss: 0.04507167637348175 2023-01-22 20:21:10.295280: step: 1232/529, loss: 0.01614045538008213 2023-01-22 20:21:11.338733: step: 1236/529, loss: 0.010540727525949478 2023-01-22 20:21:12.374826: step: 1240/529, loss: 0.005351443309336901 2023-01-22 20:21:13.441919: step: 1244/529, loss: 0.004536953754723072 2023-01-22 20:21:14.483659: step: 1248/529, loss: 0.0011442664545029402 2023-01-22 20:21:15.520954: step: 1252/529, loss: 0.014306227676570415 2023-01-22 20:21:16.565450: step: 1256/529, loss: 0.0005278648459352553 2023-01-22 20:21:17.613561: step: 1260/529, loss: 0.010199516080319881 2023-01-22 20:21:18.685542: step: 1264/529, loss: 0.0035249805077910423 2023-01-22 20:21:19.739774: step: 1268/529, loss: 0.006654068361967802 2023-01-22 20:21:20.790881: step: 1272/529, loss: 0.0036469653714448214 2023-01-22 20:21:21.842066: step: 1276/529, loss: 0.007703786715865135 2023-01-22 20:21:22.885573: step: 1280/529, loss: 0.002827998483553529 2023-01-22 20:21:23.940395: step: 1284/529, loss: 0.0 2023-01-22 20:21:24.984473: step: 1288/529, loss: 0.04221372306346893 2023-01-22 20:21:26.048045: step: 1292/529, loss: 0.004524911288172007 2023-01-22 20:21:27.092836: step: 1296/529, loss: 0.023314034566283226 2023-01-22 20:21:28.164179: step: 1300/529, loss: 0.03874615207314491 2023-01-22 20:21:29.208748: step: 1304/529, loss: 0.04160419479012489 2023-01-22 20:21:30.264515: step: 1308/529, loss: 0.022686084732413292 2023-01-22 20:21:31.312996: step: 1312/529, loss: 0.0058804345317184925 2023-01-22 20:21:32.374158: step: 1316/529, loss: 0.012320570647716522 2023-01-22 20:21:33.419915: step: 1320/529, loss: 0.005599238444119692 2023-01-22 20:21:34.479835: step: 1324/529, loss: 0.00872561801224947 2023-01-22 20:21:35.524690: step: 1328/529, loss: 0.018861278891563416 2023-01-22 20:21:36.598424: step: 1332/529, loss: 0.009698063135147095 2023-01-22 20:21:37.657337: step: 1336/529, loss: 0.024158351123332977 2023-01-22 20:21:38.701537: step: 1340/529, loss: 0.008478447794914246 2023-01-22 20:21:39.766376: step: 1344/529, loss: 0.006360412575304508 2023-01-22 20:21:40.814081: step: 1348/529, loss: 0.042263664305210114 2023-01-22 20:21:41.870057: step: 1352/529, loss: 0.027825692668557167 2023-01-22 20:21:42.915577: step: 1356/529, loss: 0.004278220236301422 2023-01-22 20:21:43.948164: step: 1360/529, loss: 0.0018108503427356482 2023-01-22 20:21:45.004519: step: 1364/529, loss: 0.007207158487290144 2023-01-22 20:21:46.061135: step: 1368/529, loss: 0.012632081285119057 2023-01-22 20:21:47.097197: step: 1372/529, loss: 0.024664968252182007 2023-01-22 20:21:48.143602: step: 1376/529, loss: 0.004429580643773079 2023-01-22 20:21:49.190189: step: 1380/529, loss: 0.0011284881038591266 2023-01-22 20:21:50.259772: step: 1384/529, loss: 0.003701432142406702 2023-01-22 20:21:51.292532: step: 1388/529, loss: 0.004340532701462507 2023-01-22 20:21:52.355664: step: 1392/529, loss: 0.07653127610683441 2023-01-22 20:21:53.407433: step: 1396/529, loss: 0.0027604878414422274 2023-01-22 20:21:54.482362: step: 1400/529, loss: 0.04153519496321678 2023-01-22 20:21:55.524976: step: 1404/529, loss: 0.004973276052623987 2023-01-22 20:21:56.572270: step: 1408/529, loss: 0.01086418330669403 2023-01-22 20:21:57.619873: step: 1412/529, loss: 0.004495950415730476 2023-01-22 20:21:58.658934: step: 1416/529, loss: 0.00830911099910736 2023-01-22 20:21:59.716861: step: 1420/529, loss: 0.008292761631309986 2023-01-22 20:22:00.751400: step: 1424/529, loss: 0.03062346950173378 2023-01-22 20:22:01.805216: step: 1428/529, loss: 0.005511709488928318 2023-01-22 20:22:02.853433: step: 1432/529, loss: 0.002510402351617813 2023-01-22 20:22:03.911162: step: 1436/529, loss: 0.009135331027209759 2023-01-22 20:22:04.967795: step: 1440/529, loss: 0.004969322122633457 2023-01-22 20:22:06.005823: step: 1444/529, loss: 0.0034258062951266766 2023-01-22 20:22:07.069178: step: 1448/529, loss: 0.017483189702033997 2023-01-22 20:22:08.111028: step: 1452/529, loss: 0.013794650323688984 2023-01-22 20:22:09.167742: step: 1456/529, loss: 0.01200064830482006 2023-01-22 20:22:10.239344: step: 1460/529, loss: 0.016609076410531998 2023-01-22 20:22:11.299609: step: 1464/529, loss: 0.010299348272383213 2023-01-22 20:22:12.347171: step: 1468/529, loss: 0.008094343356788158 2023-01-22 20:22:13.395888: step: 1472/529, loss: 0.008975815959274769 2023-01-22 20:22:14.451005: step: 1476/529, loss: 0.011079206131398678 2023-01-22 20:22:15.499812: step: 1480/529, loss: 0.010500152595341206 2023-01-22 20:22:16.550347: step: 1484/529, loss: 0.004410214256495237 2023-01-22 20:22:17.611746: step: 1488/529, loss: 0.006896296050399542 2023-01-22 20:22:18.672647: step: 1492/529, loss: 0.003716336563229561 2023-01-22 20:22:19.718897: step: 1496/529, loss: 0.0036580548621714115 2023-01-22 20:22:20.769324: step: 1500/529, loss: 0.0013570705195888877 2023-01-22 20:22:21.819182: step: 1504/529, loss: 0.01896820217370987 2023-01-22 20:22:22.874201: step: 1508/529, loss: 0.00613460224121809 2023-01-22 20:22:23.939391: step: 1512/529, loss: 0.014554130844771862 2023-01-22 20:22:24.988196: step: 1516/529, loss: 0.021483778953552246 2023-01-22 20:22:26.042304: step: 1520/529, loss: 0.038009241223335266 2023-01-22 20:22:27.094135: step: 1524/529, loss: 0.005554992239922285 2023-01-22 20:22:28.149644: step: 1528/529, loss: 0.006833712104707956 2023-01-22 20:22:29.200250: step: 1532/529, loss: 0.014637970365583897 2023-01-22 20:22:30.252177: step: 1536/529, loss: 0.017603188753128052 2023-01-22 20:22:31.297111: step: 1540/529, loss: 0.006117392331361771 2023-01-22 20:22:32.343307: step: 1544/529, loss: 0.03072996623814106 2023-01-22 20:22:33.387897: step: 1548/529, loss: 0.005408911500126123 2023-01-22 20:22:34.438804: step: 1552/529, loss: 0.0013016500743106008 2023-01-22 20:22:35.476994: step: 1556/529, loss: 0.019840022549033165 2023-01-22 20:22:36.521966: step: 1560/529, loss: 0.004837490618228912 2023-01-22 20:22:37.578901: step: 1564/529, loss: 0.028960296884179115 2023-01-22 20:22:38.628122: step: 1568/529, loss: 0.005580108147114515 2023-01-22 20:22:39.683992: step: 1572/529, loss: 0.0157366544008255 2023-01-22 20:22:40.731558: step: 1576/529, loss: 0.02272208221256733 2023-01-22 20:22:41.799810: step: 1580/529, loss: 0.04798628017306328 2023-01-22 20:22:42.846529: step: 1584/529, loss: 0.007160256151109934 2023-01-22 20:22:43.899684: step: 1588/529, loss: 0.011720363050699234 2023-01-22 20:22:44.942047: step: 1592/529, loss: 0.009463370777666569 2023-01-22 20:22:46.001057: step: 1596/529, loss: 0.0019328281050547957 2023-01-22 20:22:47.073625: step: 1600/529, loss: 0.0050371852703392506 2023-01-22 20:22:48.117164: step: 1604/529, loss: 0.03656010702252388 2023-01-22 20:22:49.168554: step: 1608/529, loss: 0.002126434352248907 2023-01-22 20:22:50.234339: step: 1612/529, loss: 0.0036571999080479145 2023-01-22 20:22:51.292525: step: 1616/529, loss: 0.005796024575829506 2023-01-22 20:22:52.345678: step: 1620/529, loss: 0.0052338275127112865 2023-01-22 20:22:53.399650: step: 1624/529, loss: 0.003084834199398756 2023-01-22 20:22:54.449248: step: 1628/529, loss: 0.024948598816990852 2023-01-22 20:22:55.484703: step: 1632/529, loss: 0.002601920161396265 2023-01-22 20:22:56.537324: step: 1636/529, loss: 0.018423190340399742 2023-01-22 20:22:57.607463: step: 1640/529, loss: 0.0028306390158832073 2023-01-22 20:22:58.659180: step: 1644/529, loss: 0.012982552871108055 2023-01-22 20:22:59.688064: step: 1648/529, loss: 0.002954066963866353 2023-01-22 20:23:00.737110: step: 1652/529, loss: 0.01015490386635065 2023-01-22 20:23:01.774777: step: 1656/529, loss: 0.011678203009068966 2023-01-22 20:23:02.825094: step: 1660/529, loss: 0.0052405777387320995 2023-01-22 20:23:03.874655: step: 1664/529, loss: 0.00795319490134716 2023-01-22 20:23:04.921855: step: 1668/529, loss: 0.03392279893159866 2023-01-22 20:23:05.966871: step: 1672/529, loss: 0.007351542357355356 2023-01-22 20:23:07.015387: step: 1676/529, loss: 0.005623773671686649 2023-01-22 20:23:08.063489: step: 1680/529, loss: 0.008912540972232819 2023-01-22 20:23:09.117942: step: 1684/529, loss: 0.013553430326282978 2023-01-22 20:23:10.158169: step: 1688/529, loss: 0.005385417956858873 2023-01-22 20:23:11.189668: step: 1692/529, loss: 0.009424775838851929 2023-01-22 20:23:12.238538: step: 1696/529, loss: 0.005319713149219751 2023-01-22 20:23:13.289106: step: 1700/529, loss: 0.0015801258850842714 2023-01-22 20:23:14.332178: step: 1704/529, loss: 0.006742890924215317 2023-01-22 20:23:15.372924: step: 1708/529, loss: 0.004740002565085888 2023-01-22 20:23:16.415738: step: 1712/529, loss: 0.013140940107405186 2023-01-22 20:23:17.480930: step: 1716/529, loss: 0.039648085832595825 2023-01-22 20:23:18.548380: step: 1720/529, loss: 0.002026183530688286 2023-01-22 20:23:19.579009: step: 1724/529, loss: 0.004469913896173239 2023-01-22 20:23:20.627263: step: 1728/529, loss: 0.007142144255340099 2023-01-22 20:23:21.677513: step: 1732/529, loss: 0.008009987883269787 2023-01-22 20:23:22.713732: step: 1736/529, loss: 0.0020983973518013954 2023-01-22 20:23:23.756848: step: 1740/529, loss: 0.005713168065994978 2023-01-22 20:23:24.793522: step: 1744/529, loss: 0.0019814029801636934 2023-01-22 20:23:25.834781: step: 1748/529, loss: 0.01931850053369999 2023-01-22 20:23:26.883575: step: 1752/529, loss: 0.02419920451939106 2023-01-22 20:23:27.922228: step: 1756/529, loss: 0.03908029571175575 2023-01-22 20:23:28.968249: step: 1760/529, loss: 0.006510796491056681 2023-01-22 20:23:30.032535: step: 1764/529, loss: 0.015054167248308659 2023-01-22 20:23:31.092078: step: 1768/529, loss: 0.009906504303216934 2023-01-22 20:23:32.145348: step: 1772/529, loss: 0.011733652092516422 2023-01-22 20:23:33.192224: step: 1776/529, loss: 0.011637475341558456 2023-01-22 20:23:34.240960: step: 1780/529, loss: 0.009348433464765549 2023-01-22 20:23:35.304792: step: 1784/529, loss: 0.010028909891843796 2023-01-22 20:23:36.368182: step: 1788/529, loss: 0.0031729049514979124 2023-01-22 20:23:37.424015: step: 1792/529, loss: 0.00335936201736331 2023-01-22 20:23:38.476581: step: 1796/529, loss: 0.009620536118745804 2023-01-22 20:23:39.524615: step: 1800/529, loss: 0.014442645944654942 2023-01-22 20:23:40.585683: step: 1804/529, loss: 0.006247695069760084 2023-01-22 20:23:41.634209: step: 1808/529, loss: 0.0036743278615176678 2023-01-22 20:23:42.660863: step: 1812/529, loss: 0.009299288503825665 2023-01-22 20:23:43.715752: step: 1816/529, loss: 0.010432195849716663 2023-01-22 20:23:44.768161: step: 1820/529, loss: 0.03408021852374077 2023-01-22 20:23:45.821233: step: 1824/529, loss: 0.020112326368689537 2023-01-22 20:23:46.881709: step: 1828/529, loss: 0.03447868674993515 2023-01-22 20:23:47.921553: step: 1832/529, loss: 0.0028231474570930004 2023-01-22 20:23:48.962303: step: 1836/529, loss: 0.006711357273161411 2023-01-22 20:23:50.018164: step: 1840/529, loss: 0.002459987299516797 2023-01-22 20:23:51.075924: step: 1844/529, loss: 0.008084967732429504 2023-01-22 20:23:52.108203: step: 1848/529, loss: 0.004666895139962435 2023-01-22 20:23:53.181122: step: 1852/529, loss: 0.007382987532764673 2023-01-22 20:23:54.236652: step: 1856/529, loss: 0.025518856942653656 2023-01-22 20:23:55.292800: step: 1860/529, loss: 0.005023759324103594 2023-01-22 20:23:56.351366: step: 1864/529, loss: 0.017002597451210022 2023-01-22 20:23:57.404332: step: 1868/529, loss: 0.004340636543929577 2023-01-22 20:23:58.449352: step: 1872/529, loss: 0.02818182483315468 2023-01-22 20:23:59.489260: step: 1876/529, loss: 0.0005230815731920302 2023-01-22 20:24:00.535787: step: 1880/529, loss: 0.06654531508684158 2023-01-22 20:24:01.579706: step: 1884/529, loss: 0.0022801330778747797 2023-01-22 20:24:02.643931: step: 1888/529, loss: 0.007670076563954353 2023-01-22 20:24:03.696975: step: 1892/529, loss: 0.009617668576538563 2023-01-22 20:24:04.746768: step: 1896/529, loss: 0.06711260974407196 2023-01-22 20:24:05.802459: step: 1900/529, loss: 0.0067568388767540455 2023-01-22 20:24:06.868876: step: 1904/529, loss: 0.008602142333984375 2023-01-22 20:24:07.920473: step: 1908/529, loss: 0.003868421074002981 2023-01-22 20:24:08.976051: step: 1912/529, loss: 0.006566599477082491 2023-01-22 20:24:10.017364: step: 1916/529, loss: 0.0032664001919329166 2023-01-22 20:24:11.066292: step: 1920/529, loss: 0.006884898990392685 2023-01-22 20:24:12.112074: step: 1924/529, loss: 0.029060304164886475 2023-01-22 20:24:13.158402: step: 1928/529, loss: 0.006362695246934891 2023-01-22 20:24:14.221458: step: 1932/529, loss: 0.026920685544610023 2023-01-22 20:24:15.265934: step: 1936/529, loss: 0.028686033561825752 2023-01-22 20:24:16.322532: step: 1940/529, loss: 0.010637717321515083 2023-01-22 20:24:17.383950: step: 1944/529, loss: 0.014736822806298733 2023-01-22 20:24:18.423657: step: 1948/529, loss: 0.014455122873187065 2023-01-22 20:24:19.474184: step: 1952/529, loss: 0.0034001150634139776 2023-01-22 20:24:20.524251: step: 1956/529, loss: 0.0005323382210917771 2023-01-22 20:24:21.589127: step: 1960/529, loss: 0.009136514738202095 2023-01-22 20:24:22.641495: step: 1964/529, loss: 0.006039501167833805 2023-01-22 20:24:23.688806: step: 1968/529, loss: 0.0016622879775241017 2023-01-22 20:24:24.736158: step: 1972/529, loss: 0.002139670541509986 2023-01-22 20:24:25.786946: step: 1976/529, loss: 0.005963315721601248 2023-01-22 20:24:26.834475: step: 1980/529, loss: 0.001540248398669064 2023-01-22 20:24:27.879525: step: 1984/529, loss: 0.004051562398672104 2023-01-22 20:24:28.945935: step: 1988/529, loss: 0.047785885632038116 2023-01-22 20:24:29.978570: step: 1992/529, loss: 0.01642420142889023 2023-01-22 20:24:31.022186: step: 1996/529, loss: 0.013428923673927784 2023-01-22 20:24:32.067205: step: 2000/529, loss: 0.006780707743018866 2023-01-22 20:24:33.123974: step: 2004/529, loss: 0.00836184062063694 2023-01-22 20:24:34.176040: step: 2008/529, loss: 0.03754450008273125 2023-01-22 20:24:35.213975: step: 2012/529, loss: 0.0017651290399953723 2023-01-22 20:24:36.264758: step: 2016/529, loss: 0.01928066276013851 2023-01-22 20:24:37.315899: step: 2020/529, loss: 0.011845334433019161 2023-01-22 20:24:38.370047: step: 2024/529, loss: 0.0038715298287570477 2023-01-22 20:24:39.431251: step: 2028/529, loss: 0.007522902451455593 2023-01-22 20:24:40.494464: step: 2032/529, loss: 0.021873516961932182 2023-01-22 20:24:41.545750: step: 2036/529, loss: 0.00497586652636528 2023-01-22 20:24:42.590636: step: 2040/529, loss: 0.03909914568066597 2023-01-22 20:24:43.658284: step: 2044/529, loss: 0.004596895072609186 2023-01-22 20:24:44.692271: step: 2048/529, loss: 0.003683784743770957 2023-01-22 20:24:45.742787: step: 2052/529, loss: 0.004018933046609163 2023-01-22 20:24:46.797144: step: 2056/529, loss: 0.0062164198607206345 2023-01-22 20:24:47.842310: step: 2060/529, loss: 0.01098989974707365 2023-01-22 20:24:48.902346: step: 2064/529, loss: 0.018533045426011086 2023-01-22 20:24:49.976723: step: 2068/529, loss: 0.012348774820566177 2023-01-22 20:24:51.032875: step: 2072/529, loss: 0.030592339113354683 2023-01-22 20:24:52.080848: step: 2076/529, loss: 0.01008901558816433 2023-01-22 20:24:53.140551: step: 2080/529, loss: 0.0062245470471680164 2023-01-22 20:24:54.194617: step: 2084/529, loss: 0.008082900196313858 2023-01-22 20:24:55.245559: step: 2088/529, loss: 0.011140274815261364 2023-01-22 20:24:56.295413: step: 2092/529, loss: 0.0274446289986372 2023-01-22 20:24:57.344216: step: 2096/529, loss: 0.00805787555873394 2023-01-22 20:24:58.402309: step: 2100/529, loss: 0.03877079114317894 2023-01-22 20:24:59.444307: step: 2104/529, loss: 0.005485412199050188 2023-01-22 20:25:00.501623: step: 2108/529, loss: 0.006384048145264387 2023-01-22 20:25:01.559382: step: 2112/529, loss: 0.00483703101053834 2023-01-22 20:25:02.622176: step: 2116/529, loss: 0.0844535157084465 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33426339285714285, 'r': 0.30952663323393875, 'f1': 0.3214197748064743}, 'combined': 0.23683562354161264, 'stategy': 1, 'epoch': 4} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.38270979983055275, 'r': 0.32360017106091243, 'f1': 0.35068160381636465}, 'combined': 0.24671067605171385, 'stategy': 1, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3126226583407672, 'r': 0.33813076898337247, 'f1': 0.324876782596604}, 'combined': 0.23938289243960295, 'stategy': 1, 'epoch': 4} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.382559309945768, 'r': 0.33486447008330517, 'f1': 0.357126496685775}, 'combined': 0.25355981264690025, 'stategy': 1, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32591765873015877, 'r': 0.3339573732718894, 'f1': 0.3298885392957558}, 'combined': 0.24307576579687268, 'stategy': 1, 'epoch': 4} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3797231646574539, 'r': 0.30318210790712946, 'f1': 0.33716321752699696}, 'combined': 0.23938588444416783, 'stategy': 1, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3352642423366108, 'r': 0.31427046625101657, 'f1': 0.3244280817126068}, 'combined': 0.23905227073560498, 'stategy': 1, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37769273447697677, 'r': 0.31771011870489146, 'f1': 0.34511449861213805}, 'combined': 0.2427941196266298, 'stategy': 1, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3164259395725866, 'r': 0.3452465185089892, 'f1': 0.3302085576301947}, 'combined': 0.24331156878014343, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3807202674530041, 'r': 0.3315950716526164, 'f1': 0.3544636972838313}, 'combined': 0.25166922507152023, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3297020574534162, 'r': 0.34534257251287614, 'f1': 0.337341122732689}, 'combined': 0.2485671430661919, 'stategy': 1, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37549195557441667, 'r': 0.29751020563836855, 'f1': 0.33198315059549205}, 'combined': 0.23570803692279935, 'stategy': 1, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 20:27:33.406833: step: 4/529, loss: 0.005364897195249796 2023-01-22 20:27:34.447964: step: 8/529, loss: 0.004931538365781307 2023-01-22 20:27:35.480864: step: 12/529, loss: 0.042458392679691315 2023-01-22 20:27:36.520601: step: 16/529, loss: 0.008594376966357231 2023-01-22 20:27:37.564306: step: 20/529, loss: 0.06183550879359245 2023-01-22 20:27:38.627385: step: 24/529, loss: 0.018887361511588097 2023-01-22 20:27:39.671558: step: 28/529, loss: 0.017884232103824615 2023-01-22 20:27:40.710768: step: 32/529, loss: 0.005103182978928089 2023-01-22 20:27:41.777130: step: 36/529, loss: 0.009733383543789387 2023-01-22 20:27:42.818327: step: 40/529, loss: 0.019029386341571808 2023-01-22 20:27:43.868376: step: 44/529, loss: 0.004970462527126074 2023-01-22 20:27:44.911436: step: 48/529, loss: 0.004223539959639311 2023-01-22 20:27:45.950312: step: 52/529, loss: 0.004985135514289141 2023-01-22 20:27:47.014861: step: 56/529, loss: 0.005571466404944658 2023-01-22 20:27:48.050644: step: 60/529, loss: 0.019706817343831062 2023-01-22 20:27:49.116901: step: 64/529, loss: 0.011472995392978191 2023-01-22 20:27:50.172017: step: 68/529, loss: 0.018539801239967346 2023-01-22 20:27:51.228410: step: 72/529, loss: 0.007469809614121914 2023-01-22 20:27:52.277064: step: 76/529, loss: 0.006544867530465126 2023-01-22 20:27:53.317748: step: 80/529, loss: 0.006753964815288782 2023-01-22 20:27:54.370497: step: 84/529, loss: 0.007025041151791811 2023-01-22 20:27:55.438110: step: 88/529, loss: 0.02341260015964508 2023-01-22 20:27:56.492417: step: 92/529, loss: 0.012081352062523365 2023-01-22 20:27:57.532337: step: 96/529, loss: 0.006929039489477873 2023-01-22 20:27:58.573514: step: 100/529, loss: 0.002449444029480219 2023-01-22 20:27:59.644061: step: 104/529, loss: 0.0008001966052688658 2023-01-22 20:28:00.706182: step: 108/529, loss: 0.004132513888180256 2023-01-22 20:28:01.762860: step: 112/529, loss: 0.02204287238419056 2023-01-22 20:28:02.832080: step: 116/529, loss: 0.007078323047608137 2023-01-22 20:28:03.888816: step: 120/529, loss: 0.04411591589450836 2023-01-22 20:28:04.955771: step: 124/529, loss: 0.00865920353680849 2023-01-22 20:28:06.010375: step: 128/529, loss: 0.007296320516616106 2023-01-22 20:28:07.070136: step: 132/529, loss: 0.025038892403244972 2023-01-22 20:28:08.111566: step: 136/529, loss: 0.01331130601465702 2023-01-22 20:28:09.157684: step: 140/529, loss: 0.004764609970152378 2023-01-22 20:28:10.213399: step: 144/529, loss: 0.1314569115638733 2023-01-22 20:28:11.262664: step: 148/529, loss: 0.012255770154297352 2023-01-22 20:28:12.323938: step: 152/529, loss: 0.025610357522964478 2023-01-22 20:28:13.369206: step: 156/529, loss: 0.001146187772974372 2023-01-22 20:28:14.418581: step: 160/529, loss: 0.026933500543236732 2023-01-22 20:28:15.460706: step: 164/529, loss: 0.006794935557991266 2023-01-22 20:28:16.510886: step: 168/529, loss: 0.011631562374532223 2023-01-22 20:28:17.560117: step: 172/529, loss: 0.009567644447088242 2023-01-22 20:28:18.602551: step: 176/529, loss: 0.008561263792216778 2023-01-22 20:28:19.654064: step: 180/529, loss: 0.007446425501257181 2023-01-22 20:28:20.698029: step: 184/529, loss: 0.0043754479847848415 2023-01-22 20:28:21.758136: step: 188/529, loss: 0.014223598875105381 2023-01-22 20:28:22.791427: step: 192/529, loss: 0.003788086585700512 2023-01-22 20:28:23.842450: step: 196/529, loss: 0.009034320712089539 2023-01-22 20:28:24.901643: step: 200/529, loss: 0.008966310881078243 2023-01-22 20:28:25.966927: step: 204/529, loss: 0.007116635330021381 2023-01-22 20:28:27.036410: step: 208/529, loss: 0.025811906903982162 2023-01-22 20:28:28.095079: step: 212/529, loss: 0.001223785220645368 2023-01-22 20:28:29.154075: step: 216/529, loss: 0.01091198530048132 2023-01-22 20:28:30.197831: step: 220/529, loss: 0.016014914959669113 2023-01-22 20:28:31.247947: step: 224/529, loss: 0.0011714407010003924 2023-01-22 20:28:32.311657: step: 228/529, loss: 0.013587542809545994 2023-01-22 20:28:33.377219: step: 232/529, loss: 0.023932525888085365 2023-01-22 20:28:34.466496: step: 236/529, loss: 0.08474171161651611 2023-01-22 20:28:35.510389: step: 240/529, loss: 0.0032490412704646587 2023-01-22 20:28:36.574086: step: 244/529, loss: 0.01775207184255123 2023-01-22 20:28:37.617409: step: 248/529, loss: 0.032986804842948914 2023-01-22 20:28:38.663442: step: 252/529, loss: 0.0101573895663023 2023-01-22 20:28:39.724272: step: 256/529, loss: 0.007543529849499464 2023-01-22 20:28:40.777112: step: 260/529, loss: 0.0067266845144331455 2023-01-22 20:28:41.847478: step: 264/529, loss: 0.01214275136590004 2023-01-22 20:28:42.893961: step: 268/529, loss: 0.004478552378714085 2023-01-22 20:28:43.941867: step: 272/529, loss: 0.003686230629682541 2023-01-22 20:28:44.984077: step: 276/529, loss: 0.0021067685447633266 2023-01-22 20:28:46.038261: step: 280/529, loss: 0.05271173268556595 2023-01-22 20:28:47.098025: step: 284/529, loss: 0.004390914458781481 2023-01-22 20:28:48.159383: step: 288/529, loss: 0.0009522793116047978 2023-01-22 20:28:49.226404: step: 292/529, loss: 0.0046539995819330215 2023-01-22 20:28:50.269080: step: 296/529, loss: 0.04465093836188316 2023-01-22 20:28:51.315087: step: 300/529, loss: 0.000985415535978973 2023-01-22 20:28:52.368081: step: 304/529, loss: 0.003451620927080512 2023-01-22 20:28:53.425257: step: 308/529, loss: 0.03451988101005554 2023-01-22 20:28:54.467361: step: 312/529, loss: 0.02739393152296543 2023-01-22 20:28:55.521110: step: 316/529, loss: 0.009458549320697784 2023-01-22 20:28:56.578196: step: 320/529, loss: 0.02049271948635578 2023-01-22 20:28:57.624622: step: 324/529, loss: 0.004238644149154425 2023-01-22 20:28:58.680174: step: 328/529, loss: 0.014069757424294949 2023-01-22 20:28:59.740768: step: 332/529, loss: 0.010747433640062809 2023-01-22 20:29:00.800987: step: 336/529, loss: 0.010514998808503151 2023-01-22 20:29:01.846743: step: 340/529, loss: 0.0031810454092919827 2023-01-22 20:29:02.904630: step: 344/529, loss: 0.015154997818171978 2023-01-22 20:29:03.950604: step: 348/529, loss: 0.0017330572009086609 2023-01-22 20:29:05.015644: step: 352/529, loss: 0.02123137190937996 2023-01-22 20:29:06.090043: step: 356/529, loss: 0.010340305045247078 2023-01-22 20:29:07.146509: step: 360/529, loss: 0.04764502868056297 2023-01-22 20:29:08.197532: step: 364/529, loss: 0.009089462459087372 2023-01-22 20:29:09.266323: step: 368/529, loss: 0.004281953442841768 2023-01-22 20:29:10.319640: step: 372/529, loss: 0.009463080205023289 2023-01-22 20:29:11.387752: step: 376/529, loss: 0.0063528623431921005 2023-01-22 20:29:12.451296: step: 380/529, loss: 0.07916349172592163 2023-01-22 20:29:13.500745: step: 384/529, loss: 0.0025667110458016396 2023-01-22 20:29:14.555121: step: 388/529, loss: 0.007856834679841995 2023-01-22 20:29:15.604131: step: 392/529, loss: 0.03807508572936058 2023-01-22 20:29:16.675252: step: 396/529, loss: 0.007178302854299545 2023-01-22 20:29:17.724861: step: 400/529, loss: 0.006062962580472231 2023-01-22 20:29:18.795387: step: 404/529, loss: 0.002284921007230878 2023-01-22 20:29:19.864712: step: 408/529, loss: 0.004220856819301844 2023-01-22 20:29:20.923084: step: 412/529, loss: 0.013605697080492973 2023-01-22 20:29:21.990485: step: 416/529, loss: 0.003448856296017766 2023-01-22 20:29:23.045738: step: 420/529, loss: 0.008349359966814518 2023-01-22 20:29:24.113295: step: 424/529, loss: 0.011480436660349369 2023-01-22 20:29:25.159965: step: 428/529, loss: 0.012101083062589169 2023-01-22 20:29:26.216770: step: 432/529, loss: 0.006895421538501978 2023-01-22 20:29:27.263288: step: 436/529, loss: 0.007857260294258595 2023-01-22 20:29:28.326973: step: 440/529, loss: 0.009770847856998444 2023-01-22 20:29:29.384710: step: 444/529, loss: 0.01389055885374546 2023-01-22 20:29:30.446706: step: 448/529, loss: 0.004073754884302616 2023-01-22 20:29:31.491164: step: 452/529, loss: 0.008267389610409737 2023-01-22 20:29:32.532811: step: 456/529, loss: 0.006309390999376774 2023-01-22 20:29:33.594870: step: 460/529, loss: 0.0031993803568184376 2023-01-22 20:29:34.640890: step: 464/529, loss: 0.006238818634301424 2023-01-22 20:29:35.704752: step: 468/529, loss: 0.002088994951918721 2023-01-22 20:29:36.764770: step: 472/529, loss: 0.00038658682024106383 2023-01-22 20:29:37.828445: step: 476/529, loss: 0.006095749791711569 2023-01-22 20:29:38.895815: step: 480/529, loss: 0.01574626937508583 2023-01-22 20:29:39.959751: step: 484/529, loss: 0.02289753407239914 2023-01-22 20:29:41.017540: step: 488/529, loss: 0.04616186022758484 2023-01-22 20:29:42.076852: step: 492/529, loss: 0.018004965037107468 2023-01-22 20:29:43.121306: step: 496/529, loss: 0.06021159142255783 2023-01-22 20:29:44.190265: step: 500/529, loss: 0.022619400173425674 2023-01-22 20:29:45.229548: step: 504/529, loss: 0.00624881824478507 2023-01-22 20:29:46.280575: step: 508/529, loss: 0.001245412277057767 2023-01-22 20:29:47.313382: step: 512/529, loss: 0.011291474103927612 2023-01-22 20:29:48.354909: step: 516/529, loss: 0.015675190836191177 2023-01-22 20:29:49.396765: step: 520/529, loss: 0.016101239249110222 2023-01-22 20:29:50.429441: step: 524/529, loss: 0.004864449147135019 2023-01-22 20:29:51.478019: step: 528/529, loss: 0.043788231909275055 2023-01-22 20:29:52.513042: step: 532/529, loss: 0.0017816489562392235 2023-01-22 20:29:53.564845: step: 536/529, loss: 0.05425938591361046 2023-01-22 20:29:54.621121: step: 540/529, loss: 0.013950561173260212 2023-01-22 20:29:55.673548: step: 544/529, loss: 0.014342723414301872 2023-01-22 20:29:56.716183: step: 548/529, loss: 0.0007592327892780304 2023-01-22 20:29:57.769832: step: 552/529, loss: 0.013166018761694431 2023-01-22 20:29:58.797276: step: 556/529, loss: 0.002326821442693472 2023-01-22 20:29:59.843649: step: 560/529, loss: 0.004837039392441511 2023-01-22 20:30:00.910568: step: 564/529, loss: 0.01746991276741028 2023-01-22 20:30:01.964561: step: 568/529, loss: 0.02432219497859478 2023-01-22 20:30:03.010393: step: 572/529, loss: 0.04641801491379738 2023-01-22 20:30:04.059049: step: 576/529, loss: 0.01043460052460432 2023-01-22 20:30:05.105835: step: 580/529, loss: 0.0017497893422842026 2023-01-22 20:30:06.141521: step: 584/529, loss: 0.019565477967262268 2023-01-22 20:30:07.195699: step: 588/529, loss: 0.004007019102573395 2023-01-22 20:30:08.244849: step: 592/529, loss: 0.005320222117006779 2023-01-22 20:30:09.303352: step: 596/529, loss: 0.005766451358795166 2023-01-22 20:30:10.348817: step: 600/529, loss: 0.0033813375048339367 2023-01-22 20:30:11.388044: step: 604/529, loss: 0.01414481271058321 2023-01-22 20:30:12.428314: step: 608/529, loss: 0.020853595808148384 2023-01-22 20:30:13.481487: step: 612/529, loss: 0.03695572912693024 2023-01-22 20:30:14.535666: step: 616/529, loss: 0.0061587863601744175 2023-01-22 20:30:15.586885: step: 620/529, loss: 0.020196961238980293 2023-01-22 20:30:16.627628: step: 624/529, loss: 0.0011422814568504691 2023-01-22 20:30:17.679741: step: 628/529, loss: 0.006703149992972612 2023-01-22 20:30:18.738720: step: 632/529, loss: 0.005099339410662651 2023-01-22 20:30:19.797611: step: 636/529, loss: 0.002160494914278388 2023-01-22 20:30:20.852915: step: 640/529, loss: 0.0052924067713320255 2023-01-22 20:30:21.908215: step: 644/529, loss: 0.0031330962665379047 2023-01-22 20:30:22.979564: step: 648/529, loss: 0.024307547137141228 2023-01-22 20:30:24.038797: step: 652/529, loss: 0.0043894401751458645 2023-01-22 20:30:25.085830: step: 656/529, loss: 0.008192530833184719 2023-01-22 20:30:26.129472: step: 660/529, loss: 0.004176565445959568 2023-01-22 20:30:27.169951: step: 664/529, loss: 0.00844507198780775 2023-01-22 20:30:28.229118: step: 668/529, loss: 0.009645390324294567 2023-01-22 20:30:29.286234: step: 672/529, loss: 0.02573040872812271 2023-01-22 20:30:30.340710: step: 676/529, loss: 0.03124813735485077 2023-01-22 20:30:31.405645: step: 680/529, loss: 0.003575752954930067 2023-01-22 20:30:32.458340: step: 684/529, loss: 0.005831555463373661 2023-01-22 20:30:33.492488: step: 688/529, loss: 0.0034943372011184692 2023-01-22 20:30:34.534648: step: 692/529, loss: 0.006965093780308962 2023-01-22 20:30:35.587852: step: 696/529, loss: 0.015788927674293518 2023-01-22 20:30:36.626752: step: 700/529, loss: 4.810214522876777e-05 2023-01-22 20:30:37.678029: step: 704/529, loss: 0.007447210140526295 2023-01-22 20:30:38.740113: step: 708/529, loss: 0.0047540850937366486 2023-01-22 20:30:39.781375: step: 712/529, loss: 0.021725395694375038 2023-01-22 20:30:40.850848: step: 716/529, loss: 0.0037376945838332176 2023-01-22 20:30:41.895357: step: 720/529, loss: 0.0064669400453567505 2023-01-22 20:30:42.945063: step: 724/529, loss: 0.02181701548397541 2023-01-22 20:30:43.997582: step: 728/529, loss: 0.00855700671672821 2023-01-22 20:30:45.055250: step: 732/529, loss: 0.0017172066727653146 2023-01-22 20:30:46.113747: step: 736/529, loss: 0.01525746937841177 2023-01-22 20:30:47.171774: step: 740/529, loss: 0.004627066198736429 2023-01-22 20:30:48.215967: step: 744/529, loss: 0.01301474031060934 2023-01-22 20:30:49.291781: step: 748/529, loss: 0.062107790261507034 2023-01-22 20:30:50.336093: step: 752/529, loss: 0.003088456578552723 2023-01-22 20:30:51.386921: step: 756/529, loss: 0.0012178504839539528 2023-01-22 20:30:52.448948: step: 760/529, loss: 0.011936604976654053 2023-01-22 20:30:53.497643: step: 764/529, loss: 0.0017584498273208737 2023-01-22 20:30:54.544002: step: 768/529, loss: 0.012138997204601765 2023-01-22 20:30:55.591188: step: 772/529, loss: 0.0034058126620948315 2023-01-22 20:30:56.644252: step: 776/529, loss: 0.004693990107625723 2023-01-22 20:30:57.685513: step: 780/529, loss: 0.04076867923140526 2023-01-22 20:30:58.759988: step: 784/529, loss: 0.012388116680085659 2023-01-22 20:30:59.811234: step: 788/529, loss: 0.05503527820110321 2023-01-22 20:31:00.852623: step: 792/529, loss: 0.004318998195230961 2023-01-22 20:31:01.910898: step: 796/529, loss: 0.016699189320206642 2023-01-22 20:31:02.938751: step: 800/529, loss: 0.0 2023-01-22 20:31:03.983113: step: 804/529, loss: 0.0037614901084452868 2023-01-22 20:31:05.052394: step: 808/529, loss: 0.05063679441809654 2023-01-22 20:31:06.124563: step: 812/529, loss: 0.010210197418928146 2023-01-22 20:31:07.162813: step: 816/529, loss: 0.003371186088770628 2023-01-22 20:31:08.211783: step: 820/529, loss: 0.004693231545388699 2023-01-22 20:31:09.262801: step: 824/529, loss: 0.015938010066747665 2023-01-22 20:31:10.320129: step: 828/529, loss: 0.005017869174480438 2023-01-22 20:31:11.370136: step: 832/529, loss: 0.02590348944067955 2023-01-22 20:31:12.417164: step: 836/529, loss: 0.005456249229609966 2023-01-22 20:31:13.474191: step: 840/529, loss: 0.021907538175582886 2023-01-22 20:31:14.530605: step: 844/529, loss: 0.019583018496632576 2023-01-22 20:31:15.581215: step: 848/529, loss: 0.016717204824090004 2023-01-22 20:31:16.627800: step: 852/529, loss: 0.0024689314886927605 2023-01-22 20:31:17.668949: step: 856/529, loss: 0.011851905845105648 2023-01-22 20:31:18.697636: step: 860/529, loss: 0.008563940413296223 2023-01-22 20:31:19.735976: step: 864/529, loss: 0.007139807567000389 2023-01-22 20:31:20.787180: step: 868/529, loss: 0.0045534586533904076 2023-01-22 20:31:21.841412: step: 872/529, loss: 0.012220026925206184 2023-01-22 20:31:22.891668: step: 876/529, loss: 0.009112739004194736 2023-01-22 20:31:23.946837: step: 880/529, loss: 0.006252597086131573 2023-01-22 20:31:25.009333: step: 884/529, loss: 0.0022403374314308167 2023-01-22 20:31:26.053827: step: 888/529, loss: 0.01112377643585205 2023-01-22 20:31:27.100094: step: 892/529, loss: 0.002552672289311886 2023-01-22 20:31:28.148630: step: 896/529, loss: 0.014382297173142433 2023-01-22 20:31:29.211208: step: 900/529, loss: 0.003760137129575014 2023-01-22 20:31:30.270668: step: 904/529, loss: 0.0017142000142484903 2023-01-22 20:31:31.320120: step: 908/529, loss: 0.02293550781905651 2023-01-22 20:31:32.373567: step: 912/529, loss: 0.004898466635495424 2023-01-22 20:31:33.418410: step: 916/529, loss: 0.0018605765653774142 2023-01-22 20:31:34.467839: step: 920/529, loss: 0.002343763131648302 2023-01-22 20:31:35.515692: step: 924/529, loss: 0.005788714624941349 2023-01-22 20:31:36.565966: step: 928/529, loss: 0.002147121587768197 2023-01-22 20:31:37.603580: step: 932/529, loss: 0.009194801561534405 2023-01-22 20:31:38.647941: step: 936/529, loss: 0.03976268693804741 2023-01-22 20:31:39.695566: step: 940/529, loss: 0.009609239175915718 2023-01-22 20:31:40.753837: step: 944/529, loss: 0.02151433378458023 2023-01-22 20:31:41.808159: step: 948/529, loss: 0.005309975706040859 2023-01-22 20:31:42.841209: step: 952/529, loss: 0.004263193812221289 2023-01-22 20:31:43.902360: step: 956/529, loss: 0.011733454652130604 2023-01-22 20:31:44.960022: step: 960/529, loss: 0.004412689711898565 2023-01-22 20:31:46.012397: step: 964/529, loss: 0.00568025466054678 2023-01-22 20:31:47.061366: step: 968/529, loss: 0.0010944223031401634 2023-01-22 20:31:48.106351: step: 972/529, loss: 0.03886739909648895 2023-01-22 20:31:49.153984: step: 976/529, loss: 0.005676777567714453 2023-01-22 20:31:50.217394: step: 980/529, loss: 0.01659751869738102 2023-01-22 20:31:51.263460: step: 984/529, loss: 0.004885291680693626 2023-01-22 20:31:52.311629: step: 988/529, loss: 0.07421057671308517 2023-01-22 20:31:53.357211: step: 992/529, loss: 0.00395796587690711 2023-01-22 20:31:54.402565: step: 996/529, loss: 0.0024158861488103867 2023-01-22 20:31:55.448527: step: 1000/529, loss: 0.007917395792901516 2023-01-22 20:31:56.494525: step: 1004/529, loss: 0.005638586822897196 2023-01-22 20:31:57.567671: step: 1008/529, loss: 0.06871917843818665 2023-01-22 20:31:58.619678: step: 1012/529, loss: 0.002509874990209937 2023-01-22 20:31:59.666750: step: 1016/529, loss: 0.003588253166526556 2023-01-22 20:32:00.707487: step: 1020/529, loss: 0.003243096172809601 2023-01-22 20:32:01.748954: step: 1024/529, loss: 0.009450647979974747 2023-01-22 20:32:02.797822: step: 1028/529, loss: 0.003258467884734273 2023-01-22 20:32:03.838525: step: 1032/529, loss: 6.282091635512188e-05 2023-01-22 20:32:04.894707: step: 1036/529, loss: 0.004367231857031584 2023-01-22 20:32:05.975250: step: 1040/529, loss: 0.0035699144937098026 2023-01-22 20:32:07.021657: step: 1044/529, loss: 0.008112414740025997 2023-01-22 20:32:08.075413: step: 1048/529, loss: 0.019451016560196877 2023-01-22 20:32:09.123856: step: 1052/529, loss: 0.01603654958307743 2023-01-22 20:32:10.173476: step: 1056/529, loss: 0.009122052229940891 2023-01-22 20:32:11.232179: step: 1060/529, loss: 0.018931401893496513 2023-01-22 20:32:12.287698: step: 1064/529, loss: 0.014438833110034466 2023-01-22 20:32:13.342167: step: 1068/529, loss: 0.0016548129497095942 2023-01-22 20:32:14.393668: step: 1072/529, loss: 0.0038174220826476812 2023-01-22 20:32:15.443376: step: 1076/529, loss: 0.012156601995229721 2023-01-22 20:32:16.492624: step: 1080/529, loss: 0.011824763379991055 2023-01-22 20:32:17.553838: step: 1084/529, loss: 0.00810436625033617 2023-01-22 20:32:18.603144: step: 1088/529, loss: 0.00559873366728425 2023-01-22 20:32:19.656830: step: 1092/529, loss: 0.006152109242975712 2023-01-22 20:32:20.703847: step: 1096/529, loss: 0.004729011561721563 2023-01-22 20:32:21.767986: step: 1100/529, loss: 0.01180565357208252 2023-01-22 20:32:22.814153: step: 1104/529, loss: 0.007704509422183037 2023-01-22 20:32:23.867211: step: 1108/529, loss: 0.009104235097765923 2023-01-22 20:32:24.914656: step: 1112/529, loss: 0.009237431921064854 2023-01-22 20:32:25.962291: step: 1116/529, loss: 0.000431711261626333 2023-01-22 20:32:27.028815: step: 1120/529, loss: 0.029774652794003487 2023-01-22 20:32:28.075468: step: 1124/529, loss: 0.01060602255165577 2023-01-22 20:32:29.115313: step: 1128/529, loss: 0.004135953728109598 2023-01-22 20:32:30.169889: step: 1132/529, loss: 0.022009432315826416 2023-01-22 20:32:31.228311: step: 1136/529, loss: 0.007538885809481144 2023-01-22 20:32:32.281059: step: 1140/529, loss: 0.012654234655201435 2023-01-22 20:32:33.334326: step: 1144/529, loss: 0.003313296940177679 2023-01-22 20:32:34.387567: step: 1148/529, loss: 0.010472552850842476 2023-01-22 20:32:35.450404: step: 1152/529, loss: 0.002015798119828105 2023-01-22 20:32:36.498492: step: 1156/529, loss: 0.004793969914317131 2023-01-22 20:32:37.550720: step: 1160/529, loss: 0.008662129752337933 2023-01-22 20:32:38.590586: step: 1164/529, loss: 0.007208022754639387 2023-01-22 20:32:39.641540: step: 1168/529, loss: 0.002850503195077181 2023-01-22 20:32:40.688050: step: 1172/529, loss: 0.009645634330809116 2023-01-22 20:32:41.756449: step: 1176/529, loss: 0.009073411114513874 2023-01-22 20:32:42.803063: step: 1180/529, loss: 0.004340526182204485 2023-01-22 20:32:43.845893: step: 1184/529, loss: 0.0012939763255417347 2023-01-22 20:32:44.893470: step: 1188/529, loss: 0.0013405996141955256 2023-01-22 20:32:45.947653: step: 1192/529, loss: 0.0062379734590649605 2023-01-22 20:32:46.987747: step: 1196/529, loss: 0.019409896805882454 2023-01-22 20:32:48.033049: step: 1200/529, loss: 0.006402980536222458 2023-01-22 20:32:49.088163: step: 1204/529, loss: 0.0 2023-01-22 20:32:50.163938: step: 1208/529, loss: 0.010526749305427074 2023-01-22 20:32:51.213663: step: 1212/529, loss: 0.015439791604876518 2023-01-22 20:32:52.257298: step: 1216/529, loss: 0.03193796053528786 2023-01-22 20:32:53.296990: step: 1220/529, loss: 0.00831770058721304 2023-01-22 20:32:54.344400: step: 1224/529, loss: 0.0038911544252187014 2023-01-22 20:32:55.401228: step: 1228/529, loss: 0.00986575335264206 2023-01-22 20:32:56.453962: step: 1232/529, loss: 0.0018234552117064595 2023-01-22 20:32:57.511134: step: 1236/529, loss: 0.029101356863975525 2023-01-22 20:32:58.559994: step: 1240/529, loss: 0.004143872763961554 2023-01-22 20:32:59.603047: step: 1244/529, loss: 0.004696296527981758 2023-01-22 20:33:00.650282: step: 1248/529, loss: 0.002087387954816222 2023-01-22 20:33:01.703947: step: 1252/529, loss: 0.004218920134007931 2023-01-22 20:33:02.750430: step: 1256/529, loss: 0.01317879930138588 2023-01-22 20:33:03.797397: step: 1260/529, loss: 0.004585915245115757 2023-01-22 20:33:04.847336: step: 1264/529, loss: 0.028849679976701736 2023-01-22 20:33:05.913283: step: 1268/529, loss: 0.004952044226229191 2023-01-22 20:33:06.972353: step: 1272/529, loss: 0.006961579900234938 2023-01-22 20:33:08.019684: step: 1276/529, loss: 0.004382673650979996 2023-01-22 20:33:09.072823: step: 1280/529, loss: 0.009211041033267975 2023-01-22 20:33:10.113960: step: 1284/529, loss: 0.002410503337159753 2023-01-22 20:33:11.171587: step: 1288/529, loss: 0.01267029158771038 2023-01-22 20:33:12.211251: step: 1292/529, loss: 0.003697623498737812 2023-01-22 20:33:13.270854: step: 1296/529, loss: 0.0025258862879127264 2023-01-22 20:33:14.312942: step: 1300/529, loss: 0.0022309906780719757 2023-01-22 20:33:15.380040: step: 1304/529, loss: 0.007378202863037586 2023-01-22 20:33:16.423663: step: 1308/529, loss: 0.0098244184628129 2023-01-22 20:33:17.480354: step: 1312/529, loss: 0.003958418034017086 2023-01-22 20:33:18.527286: step: 1316/529, loss: 0.0542299821972847 2023-01-22 20:33:19.574314: step: 1320/529, loss: 0.008136685006320477 2023-01-22 20:33:20.624840: step: 1324/529, loss: 0.007354076951742172 2023-01-22 20:33:21.673602: step: 1328/529, loss: 0.0011552138021215796 2023-01-22 20:33:22.730314: step: 1332/529, loss: 0.013669280335307121 2023-01-22 20:33:23.782774: step: 1336/529, loss: 0.01054068561643362 2023-01-22 20:33:24.832290: step: 1340/529, loss: 0.010914186015725136 2023-01-22 20:33:25.870747: step: 1344/529, loss: 0.0061391075141727924 2023-01-22 20:33:26.925468: step: 1348/529, loss: 0.015552214346826077 2023-01-22 20:33:27.991172: step: 1352/529, loss: 0.005872183945029974 2023-01-22 20:33:29.055290: step: 1356/529, loss: 0.009975351393222809 2023-01-22 20:33:30.111125: step: 1360/529, loss: 0.02735932543873787 2023-01-22 20:33:31.155364: step: 1364/529, loss: 0.007191893644630909 2023-01-22 20:33:32.205661: step: 1368/529, loss: 0.011145122349262238 2023-01-22 20:33:33.249232: step: 1372/529, loss: 0.0003216253244318068 2023-01-22 20:33:34.309155: step: 1376/529, loss: 0.010975110344588757 2023-01-22 20:33:35.351122: step: 1380/529, loss: 0.0009541421895846725 2023-01-22 20:33:36.403158: step: 1384/529, loss: 0.00790709350258112 2023-01-22 20:33:37.445156: step: 1388/529, loss: 0.0011792776640504599 2023-01-22 20:33:38.497384: step: 1392/529, loss: 0.0034517974127084017 2023-01-22 20:33:39.551261: step: 1396/529, loss: 0.009446525014936924 2023-01-22 20:33:40.603087: step: 1400/529, loss: 0.011488438583910465 2023-01-22 20:33:41.652027: step: 1404/529, loss: 0.0019498568726703525 2023-01-22 20:33:42.698931: step: 1408/529, loss: 0.00443405332043767 2023-01-22 20:33:43.737506: step: 1412/529, loss: 3.3390904718544334e-05 2023-01-22 20:33:44.795558: step: 1416/529, loss: 0.0006125321961008012 2023-01-22 20:33:45.855085: step: 1420/529, loss: 0.007208527065813541 2023-01-22 20:33:46.907765: step: 1424/529, loss: 0.022011684253811836 2023-01-22 20:33:47.964765: step: 1428/529, loss: 0.0076451716013252735 2023-01-22 20:33:49.011826: step: 1432/529, loss: 0.021180637180805206 2023-01-22 20:33:50.063907: step: 1436/529, loss: 0.005078395828604698 2023-01-22 20:33:51.135179: step: 1440/529, loss: 0.0032672728411853313 2023-01-22 20:33:52.187893: step: 1444/529, loss: 0.0034984382800757885 2023-01-22 20:33:53.241966: step: 1448/529, loss: 0.0012936186976730824 2023-01-22 20:33:54.312515: step: 1452/529, loss: 0.004346802830696106 2023-01-22 20:33:55.363526: step: 1456/529, loss: 0.012945122085511684 2023-01-22 20:33:56.415894: step: 1460/529, loss: 0.02515929564833641 2023-01-22 20:33:57.449542: step: 1464/529, loss: 0.008811013773083687 2023-01-22 20:33:58.504531: step: 1468/529, loss: 0.016155611723661423 2023-01-22 20:33:59.534761: step: 1472/529, loss: 0.00851103849709034 2023-01-22 20:34:00.581411: step: 1476/529, loss: 0.023528946563601494 2023-01-22 20:34:01.622660: step: 1480/529, loss: 0.006576701533049345 2023-01-22 20:34:02.680326: step: 1484/529, loss: 0.004462636541575193 2023-01-22 20:34:03.725400: step: 1488/529, loss: 0.0058347429148852825 2023-01-22 20:34:04.770767: step: 1492/529, loss: 0.00028820225270465016 2023-01-22 20:34:05.820060: step: 1496/529, loss: 0.013267778791487217 2023-01-22 20:34:06.864030: step: 1500/529, loss: 0.0006704726256430149 2023-01-22 20:34:07.921820: step: 1504/529, loss: 0.008622322231531143 2023-01-22 20:34:08.979922: step: 1508/529, loss: 0.005056119058281183 2023-01-22 20:34:10.028287: step: 1512/529, loss: 0.005860959179699421 2023-01-22 20:34:11.076677: step: 1516/529, loss: 0.005031220149248838 2023-01-22 20:34:12.144410: step: 1520/529, loss: 0.003332293126732111 2023-01-22 20:34:13.200906: step: 1524/529, loss: 0.005705941002815962 2023-01-22 20:34:14.245993: step: 1528/529, loss: 0.0161629356443882 2023-01-22 20:34:15.292792: step: 1532/529, loss: 0.009504578076303005 2023-01-22 20:34:16.329084: step: 1536/529, loss: 0.018351538106799126 2023-01-22 20:34:17.360351: step: 1540/529, loss: 0.01501642819494009 2023-01-22 20:34:18.418481: step: 1544/529, loss: 0.009062089025974274 2023-01-22 20:34:19.459900: step: 1548/529, loss: 0.005738650448620319 2023-01-22 20:34:20.507126: step: 1552/529, loss: 0.007332745473831892 2023-01-22 20:34:21.582392: step: 1556/529, loss: 0.010884511284530163 2023-01-22 20:34:22.656204: step: 1560/529, loss: 0.0022634186316281557 2023-01-22 20:34:23.708512: step: 1564/529, loss: 0.0089864581823349 2023-01-22 20:34:24.775987: step: 1568/529, loss: 0.054286062717437744 2023-01-22 20:34:25.829174: step: 1572/529, loss: 0.004494091030210257 2023-01-22 20:34:26.897824: step: 1576/529, loss: 0.0038285499904304743 2023-01-22 20:34:27.932157: step: 1580/529, loss: 0.0014065966242924333 2023-01-22 20:34:28.980079: step: 1584/529, loss: 0.011168353259563446 2023-01-22 20:34:30.023889: step: 1588/529, loss: 0.002279081381857395 2023-01-22 20:34:31.097879: step: 1592/529, loss: 0.0026932191103696823 2023-01-22 20:34:32.132386: step: 1596/529, loss: 0.00128878653049469 2023-01-22 20:34:33.197521: step: 1600/529, loss: 0.006494956091046333 2023-01-22 20:34:34.231329: step: 1604/529, loss: 0.01833772473037243 2023-01-22 20:34:35.271922: step: 1608/529, loss: 0.01639338582754135 2023-01-22 20:34:36.338488: step: 1612/529, loss: 0.08848409354686737 2023-01-22 20:34:37.385553: step: 1616/529, loss: 0.005832102615386248 2023-01-22 20:34:38.431377: step: 1620/529, loss: 0.002370870439335704 2023-01-22 20:34:39.483308: step: 1624/529, loss: 0.02834174782037735 2023-01-22 20:34:40.531096: step: 1628/529, loss: 0.000945900974329561 2023-01-22 20:34:41.589393: step: 1632/529, loss: 0.0153167974203825 2023-01-22 20:34:42.638957: step: 1636/529, loss: 0.003908591810613871 2023-01-22 20:34:43.682083: step: 1640/529, loss: 0.017594778910279274 2023-01-22 20:34:44.737589: step: 1644/529, loss: 0.002155149821192026 2023-01-22 20:34:45.772950: step: 1648/529, loss: 0.04294760897755623 2023-01-22 20:34:46.815484: step: 1652/529, loss: 0.0 2023-01-22 20:34:47.866998: step: 1656/529, loss: 0.0072180768474936485 2023-01-22 20:34:48.911519: step: 1660/529, loss: 0.007787659298628569 2023-01-22 20:34:49.964557: step: 1664/529, loss: 0.006282668095082045 2023-01-22 20:34:51.016392: step: 1668/529, loss: 0.007096918765455484 2023-01-22 20:34:52.060170: step: 1672/529, loss: 0.014727005735039711 2023-01-22 20:34:53.121566: step: 1676/529, loss: 0.0067855981178581715 2023-01-22 20:34:54.165456: step: 1680/529, loss: 0.001603766344487667 2023-01-22 20:34:55.205955: step: 1684/529, loss: 0.004314210265874863 2023-01-22 20:34:56.266312: step: 1688/529, loss: 0.008225671015679836 2023-01-22 20:34:57.315440: step: 1692/529, loss: 0.004177814815193415 2023-01-22 20:34:58.361003: step: 1696/529, loss: 0.0005804678658023477 2023-01-22 20:34:59.421670: step: 1700/529, loss: 0.020535405725240707 2023-01-22 20:35:00.468269: step: 1704/529, loss: 0.009242628701031208 2023-01-22 20:35:01.513815: step: 1708/529, loss: 0.004243496339768171 2023-01-22 20:35:02.563331: step: 1712/529, loss: 0.012518382631242275 2023-01-22 20:35:03.617833: step: 1716/529, loss: 0.004606115166097879 2023-01-22 20:35:04.676768: step: 1720/529, loss: 0.007482695858925581 2023-01-22 20:35:05.734005: step: 1724/529, loss: 0.06701914966106415 2023-01-22 20:35:06.784566: step: 1728/529, loss: 0.05648639798164368 2023-01-22 20:35:07.843418: step: 1732/529, loss: 0.017292285338044167 2023-01-22 20:35:08.892856: step: 1736/529, loss: 0.02491857297718525 2023-01-22 20:35:09.944924: step: 1740/529, loss: 0.014872078783810139 2023-01-22 20:35:10.987111: step: 1744/529, loss: 0.029166337102651596 2023-01-22 20:35:12.056013: step: 1748/529, loss: 0.012679904699325562 2023-01-22 20:35:13.111915: step: 1752/529, loss: 0.009680034592747688 2023-01-22 20:35:14.139428: step: 1756/529, loss: 0.013375414535403252 2023-01-22 20:35:15.194416: step: 1760/529, loss: 0.004014882724732161 2023-01-22 20:35:16.239579: step: 1764/529, loss: 0.004619982559233904 2023-01-22 20:35:17.279902: step: 1768/529, loss: 0.00829849299043417 2023-01-22 20:35:18.336290: step: 1772/529, loss: 0.005871128290891647 2023-01-22 20:35:19.394747: step: 1776/529, loss: 0.002497660694643855 2023-01-22 20:35:20.452436: step: 1780/529, loss: 0.016429323703050613 2023-01-22 20:35:21.502105: step: 1784/529, loss: 0.016005050390958786 2023-01-22 20:35:22.549193: step: 1788/529, loss: 0.011435303837060928 2023-01-22 20:35:23.583980: step: 1792/529, loss: 0.003906558267772198 2023-01-22 20:35:24.651808: step: 1796/529, loss: 0.019485201686620712 2023-01-22 20:35:25.701978: step: 1800/529, loss: 0.004587591625750065 2023-01-22 20:35:26.740308: step: 1804/529, loss: 0.0027694660238921642 2023-01-22 20:35:27.783976: step: 1808/529, loss: 0.007504927460104227 2023-01-22 20:35:28.827622: step: 1812/529, loss: 0.004087294451892376 2023-01-22 20:35:29.870879: step: 1816/529, loss: 0.007184195797890425 2023-01-22 20:35:30.937630: step: 1820/529, loss: 0.007019483018666506 2023-01-22 20:35:31.985467: step: 1824/529, loss: 0.0346452035009861 2023-01-22 20:35:33.027039: step: 1828/529, loss: 0.0024064257740974426 2023-01-22 20:35:34.052538: step: 1832/529, loss: 0.0026801915373653173 2023-01-22 20:35:35.091470: step: 1836/529, loss: 4.24527024733834e-05 2023-01-22 20:35:36.149734: step: 1840/529, loss: 0.038760650902986526 2023-01-22 20:35:37.195587: step: 1844/529, loss: 0.012975029647350311 2023-01-22 20:35:38.250312: step: 1848/529, loss: 0.010356522165238857 2023-01-22 20:35:39.302970: step: 1852/529, loss: 0.008925179950892925 2023-01-22 20:35:40.368639: step: 1856/529, loss: 0.010802081786096096 2023-01-22 20:35:41.437250: step: 1860/529, loss: 0.00594439497217536 2023-01-22 20:35:42.485320: step: 1864/529, loss: 0.016758378595113754 2023-01-22 20:35:43.527587: step: 1868/529, loss: 0.0022920190822333097 2023-01-22 20:35:44.583120: step: 1872/529, loss: 0.01095986645668745 2023-01-22 20:35:45.632887: step: 1876/529, loss: 0.018821481615304947 2023-01-22 20:35:46.667481: step: 1880/529, loss: 0.054840877652168274 2023-01-22 20:35:47.709011: step: 1884/529, loss: 0.007524373009800911 2023-01-22 20:35:48.744324: step: 1888/529, loss: 0.032600287348032 2023-01-22 20:35:49.786426: step: 1892/529, loss: 0.009916742332279682 2023-01-22 20:35:50.824913: step: 1896/529, loss: 0.013087676838040352 2023-01-22 20:35:51.873617: step: 1900/529, loss: 0.006917009595781565 2023-01-22 20:35:52.942260: step: 1904/529, loss: 0.0009078065631911159 2023-01-22 20:35:53.986133: step: 1908/529, loss: 0.010523991659283638 2023-01-22 20:35:55.038858: step: 1912/529, loss: 0.001266071223653853 2023-01-22 20:35:56.084665: step: 1916/529, loss: 0.003956879023462534 2023-01-22 20:35:57.132849: step: 1920/529, loss: 0.044897980988025665 2023-01-22 20:35:58.177586: step: 1924/529, loss: 0.00875797402113676 2023-01-22 20:35:59.239854: step: 1928/529, loss: 0.0034518318716436625 2023-01-22 20:36:00.283893: step: 1932/529, loss: 0.00888245552778244 2023-01-22 20:36:01.340188: step: 1936/529, loss: 0.012659145519137383 2023-01-22 20:36:02.406696: step: 1940/529, loss: 0.023738525807857513 2023-01-22 20:36:03.446834: step: 1944/529, loss: 0.0008952609496191144 2023-01-22 20:36:04.503086: step: 1948/529, loss: 0.02853865921497345 2023-01-22 20:36:05.555884: step: 1952/529, loss: 0.0011694193817675114 2023-01-22 20:36:06.595263: step: 1956/529, loss: 0.008649631403386593 2023-01-22 20:36:07.643421: step: 1960/529, loss: 0.0089822793379426 2023-01-22 20:36:08.680281: step: 1964/529, loss: 0.005365021526813507 2023-01-22 20:36:09.738763: step: 1968/529, loss: 0.07763940840959549 2023-01-22 20:36:10.781313: step: 1972/529, loss: 0.01008632406592369 2023-01-22 20:36:11.846596: step: 1976/529, loss: 0.0007939829956740141 2023-01-22 20:36:12.908720: step: 1980/529, loss: 0.014864037744700909 2023-01-22 20:36:13.960084: step: 1984/529, loss: 0.013313878327608109 2023-01-22 20:36:15.004974: step: 1988/529, loss: 0.010894953273236752 2023-01-22 20:36:16.064152: step: 1992/529, loss: 0.004349092021584511 2023-01-22 20:36:17.113871: step: 1996/529, loss: 0.009638441726565361 2023-01-22 20:36:18.162076: step: 2000/529, loss: 0.0061707524582743645 2023-01-22 20:36:19.230274: step: 2004/529, loss: 0.002053322969004512 2023-01-22 20:36:20.248879: step: 2008/529, loss: 0.0005394183099269867 2023-01-22 20:36:21.296200: step: 2012/529, loss: 0.009594489820301533 2023-01-22 20:36:22.348946: step: 2016/529, loss: 0.0015609830152243376 2023-01-22 20:36:23.403196: step: 2020/529, loss: 0.009851885959506035 2023-01-22 20:36:24.454827: step: 2024/529, loss: 0.023001283407211304 2023-01-22 20:36:25.509877: step: 2028/529, loss: 0.0032469695433974266 2023-01-22 20:36:26.595145: step: 2032/529, loss: 0.02607772871851921 2023-01-22 20:36:27.628255: step: 2036/529, loss: 0.011506660841405392 2023-01-22 20:36:28.670566: step: 2040/529, loss: 0.00343112600967288 2023-01-22 20:36:29.723181: step: 2044/529, loss: 0.0008999130222946405 2023-01-22 20:36:30.776198: step: 2048/529, loss: 0.007807687856256962 2023-01-22 20:36:31.824579: step: 2052/529, loss: 0.02372725121676922 2023-01-22 20:36:32.871138: step: 2056/529, loss: 0.03458522632718086 2023-01-22 20:36:33.916060: step: 2060/529, loss: 0.003015015507116914 2023-01-22 20:36:34.979285: step: 2064/529, loss: 0.008491916581988335 2023-01-22 20:36:36.034456: step: 2068/529, loss: 0.00469997338950634 2023-01-22 20:36:37.106333: step: 2072/529, loss: 0.0129986722022295 2023-01-22 20:36:38.151763: step: 2076/529, loss: 0.035609424114227295 2023-01-22 20:36:39.207726: step: 2080/529, loss: 0.05366597697138786 2023-01-22 20:36:40.249015: step: 2084/529, loss: 0.004191091284155846 2023-01-22 20:36:41.300524: step: 2088/529, loss: 0.007715316955000162 2023-01-22 20:36:42.342335: step: 2092/529, loss: 0.004350960720330477 2023-01-22 20:36:43.409502: step: 2096/529, loss: 0.043105389922857285 2023-01-22 20:36:44.469145: step: 2100/529, loss: 0.007859050296247005 2023-01-22 20:36:45.520640: step: 2104/529, loss: 0.003910794388502836 2023-01-22 20:36:46.571673: step: 2108/529, loss: 0.01939493604004383 2023-01-22 20:36:47.633271: step: 2112/529, loss: 0.006374196149408817 2023-01-22 20:36:48.687303: step: 2116/529, loss: 0.005078229587525129 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33494976532707543, 'r': 0.30952663323393875, 'f1': 0.32173675683291064}, 'combined': 0.23706918924530257, 'stategy': 1, 'epoch': 5} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3838514065159933, 'r': 0.3205460698392719, 'f1': 0.34935406185050466}, 'combined': 0.2457767269300033, 'stategy': 1, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3136114503589407, 'r': 0.33860515228508026, 'f1': 0.3256294073982432}, 'combined': 0.23993745808291603, 'stategy': 1, 'epoch': 5} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38381147339287264, 'r': 0.3309411919664787, 'f1': 0.35542092433104033}, 'combined': 0.2523488562750386, 'stategy': 1, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3247150105429626, 'r': 0.3339573732718894, 'f1': 0.32927134838968325}, 'combined': 0.2426209935502929, 'stategy': 1, 'epoch': 5} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3793561762268988, 'r': 0.29990985659822894, 'f1': 0.33498703280854814}, 'combined': 0.23784079329406915, 'stategy': 1, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3352642423366108, 'r': 0.31427046625101657, 'f1': 0.3244280817126068}, 'combined': 0.23905227073560498, 'stategy': 1, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37769273447697677, 'r': 0.31771011870489146, 'f1': 0.34511449861213805}, 'combined': 0.2427941196266298, 'stategy': 1, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3164259395725866, 'r': 0.3452465185089892, 'f1': 0.3302085576301947}, 'combined': 0.24331156878014343, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3807202674530041, 'r': 0.3315950716526164, 'f1': 0.3544636972838313}, 'combined': 0.25166922507152023, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3297020574534162, 'r': 0.34534257251287614, 'f1': 0.337341122732689}, 'combined': 0.2485671430661919, 'stategy': 1, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37549195557441667, 'r': 0.29751020563836855, 'f1': 0.33198315059549205}, 'combined': 0.23570803692279935, 'stategy': 1, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 2} ****************************** Epoch: 6 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 20:39:13.946242: step: 4/529, loss: 0.004742167890071869 2023-01-22 20:39:14.994654: step: 8/529, loss: 0.0036565768532454967 2023-01-22 20:39:16.031193: step: 12/529, loss: 0.0018223716178908944 2023-01-22 20:39:17.084399: step: 16/529, loss: 0.005090882536023855 2023-01-22 20:39:18.133857: step: 20/529, loss: 0.0199209526181221 2023-01-22 20:39:19.162596: step: 24/529, loss: 0.0003567904932424426 2023-01-22 20:39:20.214489: step: 28/529, loss: 0.02007969468832016 2023-01-22 20:39:21.257239: step: 32/529, loss: 0.00613051000982523 2023-01-22 20:39:22.299938: step: 36/529, loss: 0.003461189568042755 2023-01-22 20:39:23.357199: step: 40/529, loss: 0.0017779492773115635 2023-01-22 20:39:24.404485: step: 44/529, loss: 0.008025181479752064 2023-01-22 20:39:25.449176: step: 48/529, loss: 0.01793014630675316 2023-01-22 20:39:26.497549: step: 52/529, loss: 0.009517989121377468 2023-01-22 20:39:27.543276: step: 56/529, loss: 0.0061738030053675175 2023-01-22 20:39:28.597491: step: 60/529, loss: 0.0035525846760720015 2023-01-22 20:39:29.626733: step: 64/529, loss: 0.004170773550868034 2023-01-22 20:39:30.688976: step: 68/529, loss: 0.010141300968825817 2023-01-22 20:39:31.742534: step: 72/529, loss: 0.001489673973992467 2023-01-22 20:39:32.797575: step: 76/529, loss: 0.00888165831565857 2023-01-22 20:39:33.841302: step: 80/529, loss: 0.003602228593081236 2023-01-22 20:39:34.898558: step: 84/529, loss: 0.00792065542191267 2023-01-22 20:39:35.949431: step: 88/529, loss: 0.02555099129676819 2023-01-22 20:39:37.000489: step: 92/529, loss: 0.007738249376416206 2023-01-22 20:39:38.051860: step: 96/529, loss: 0.011598734185099602 2023-01-22 20:39:39.114365: step: 100/529, loss: 0.0032476165797561407 2023-01-22 20:39:40.169195: step: 104/529, loss: 0.0058266338892281055 2023-01-22 20:39:41.217994: step: 108/529, loss: 0.006213184911757708 2023-01-22 20:39:42.252248: step: 112/529, loss: 0.008658098056912422 2023-01-22 20:39:43.309040: step: 116/529, loss: 0.03862125054001808 2023-01-22 20:39:44.362333: step: 120/529, loss: 0.028483223170042038 2023-01-22 20:39:45.419002: step: 124/529, loss: 0.012792219407856464 2023-01-22 20:39:46.462561: step: 128/529, loss: 0.0007580016390420496 2023-01-22 20:39:47.508008: step: 132/529, loss: 0.07374080270528793 2023-01-22 20:39:48.551838: step: 136/529, loss: 0.010961364023387432 2023-01-22 20:39:49.606125: step: 140/529, loss: 0.011940453201532364 2023-01-22 20:39:50.676663: step: 144/529, loss: 0.013146958313882351 2023-01-22 20:39:51.739042: step: 148/529, loss: 0.010877873748540878 2023-01-22 20:39:52.796473: step: 152/529, loss: 0.0020709247328341007 2023-01-22 20:39:53.847160: step: 156/529, loss: 0.0031474612187594175 2023-01-22 20:39:54.894422: step: 160/529, loss: 0.02551279217004776 2023-01-22 20:39:55.942152: step: 164/529, loss: 0.012841667979955673 2023-01-22 20:39:56.991375: step: 168/529, loss: 0.0032924795523285866 2023-01-22 20:39:58.039951: step: 172/529, loss: 0.0016286119353026152 2023-01-22 20:39:59.090242: step: 176/529, loss: 0.011194484308362007 2023-01-22 20:40:00.149114: step: 180/529, loss: 0.007908947765827179 2023-01-22 20:40:01.183873: step: 184/529, loss: 0.0104463379830122 2023-01-22 20:40:02.241418: step: 188/529, loss: 0.0012408499605953693 2023-01-22 20:40:03.276732: step: 192/529, loss: 0.005134171340614557 2023-01-22 20:40:04.324371: step: 196/529, loss: 0.031151393428444862 2023-01-22 20:40:05.378411: step: 200/529, loss: 0.006274307146668434 2023-01-22 20:40:06.438508: step: 204/529, loss: 0.008539692498743534 2023-01-22 20:40:07.499772: step: 208/529, loss: 0.004239651840180159 2023-01-22 20:40:08.565260: step: 212/529, loss: 0.029458889737725258 2023-01-22 20:40:09.617054: step: 216/529, loss: 0.0017249897355213761 2023-01-22 20:40:10.681095: step: 220/529, loss: 0.007696910761296749 2023-01-22 20:40:11.739369: step: 224/529, loss: 0.004568984732031822 2023-01-22 20:40:12.792458: step: 228/529, loss: 0.028919141739606857 2023-01-22 20:40:13.853719: step: 232/529, loss: 0.008101379498839378 2023-01-22 20:40:14.891731: step: 236/529, loss: 0.0009102600743062794 2023-01-22 20:40:15.950565: step: 240/529, loss: 0.002067000837996602 2023-01-22 20:40:17.005680: step: 244/529, loss: 0.009263182058930397 2023-01-22 20:40:18.057081: step: 248/529, loss: 0.0115694310516119 2023-01-22 20:40:19.117612: step: 252/529, loss: 0.18416275084018707 2023-01-22 20:40:20.197482: step: 256/529, loss: 0.0518353171646595 2023-01-22 20:40:21.253513: step: 260/529, loss: 0.0029434156604111195 2023-01-22 20:40:22.309329: step: 264/529, loss: 0.013119274750351906 2023-01-22 20:40:23.380094: step: 268/529, loss: 0.005223182495683432 2023-01-22 20:40:24.428186: step: 272/529, loss: 0.005790745373815298 2023-01-22 20:40:25.478348: step: 276/529, loss: 0.0026379176415503025 2023-01-22 20:40:26.523019: step: 280/529, loss: 0.0037465891800820827 2023-01-22 20:40:27.595801: step: 284/529, loss: 0.013140812516212463 2023-01-22 20:40:28.640772: step: 288/529, loss: 0.011482231318950653 2023-01-22 20:40:29.698056: step: 292/529, loss: 0.016089053824543953 2023-01-22 20:40:30.755351: step: 296/529, loss: 0.00786222517490387 2023-01-22 20:40:31.796465: step: 300/529, loss: 0.012310792692005634 2023-01-22 20:40:32.846446: step: 304/529, loss: 0.016517577692866325 2023-01-22 20:40:33.909542: step: 308/529, loss: 0.02212962880730629 2023-01-22 20:40:34.944026: step: 312/529, loss: 0.0016913588624447584 2023-01-22 20:40:35.999725: step: 316/529, loss: 0.0044386680237948895 2023-01-22 20:40:37.047740: step: 320/529, loss: 0.041396837681531906 2023-01-22 20:40:38.099539: step: 324/529, loss: 0.0 2023-01-22 20:40:39.151936: step: 328/529, loss: 0.005812098737806082 2023-01-22 20:40:40.204837: step: 332/529, loss: 0.008507197722792625 2023-01-22 20:40:41.258192: step: 336/529, loss: 0.022704802453517914 2023-01-22 20:40:42.310481: step: 340/529, loss: 0.00018631767306942493 2023-01-22 20:40:43.359479: step: 344/529, loss: 0.014528080821037292 2023-01-22 20:40:44.411299: step: 348/529, loss: 0.02608819305896759 2023-01-22 20:40:45.472324: step: 352/529, loss: 0.006677224300801754 2023-01-22 20:40:46.513632: step: 356/529, loss: 0.006541381124407053 2023-01-22 20:40:47.578944: step: 360/529, loss: 0.000843782676383853 2023-01-22 20:40:48.632140: step: 364/529, loss: 0.0013773947721347213 2023-01-22 20:40:49.712128: step: 368/529, loss: 0.004629874601960182 2023-01-22 20:40:50.754638: step: 372/529, loss: 0.023446664214134216 2023-01-22 20:40:51.809778: step: 376/529, loss: 0.017254117876291275 2023-01-22 20:40:52.857532: step: 380/529, loss: 0.011426403187215328 2023-01-22 20:40:53.894629: step: 384/529, loss: 0.002016189508140087 2023-01-22 20:40:54.945063: step: 388/529, loss: 0.011657757684588432 2023-01-22 20:40:55.989344: step: 392/529, loss: 0.005400735419243574 2023-01-22 20:40:57.053366: step: 396/529, loss: 0.010577062144875526 2023-01-22 20:40:58.100630: step: 400/529, loss: 0.005472579505294561 2023-01-22 20:40:59.150474: step: 404/529, loss: 0.007579367142170668 2023-01-22 20:41:00.203729: step: 408/529, loss: 0.0029342707712203264 2023-01-22 20:41:01.250461: step: 412/529, loss: 0.0026867901906371117 2023-01-22 20:41:02.305837: step: 416/529, loss: 0.009467937052249908 2023-01-22 20:41:03.347775: step: 420/529, loss: 0.007822707295417786 2023-01-22 20:41:04.409964: step: 424/529, loss: 0.00530764227733016 2023-01-22 20:41:05.456186: step: 428/529, loss: 0.012376729398965836 2023-01-22 20:41:06.508018: step: 432/529, loss: 0.012746045365929604 2023-01-22 20:41:07.548527: step: 436/529, loss: 0.012784531340003014 2023-01-22 20:41:08.593517: step: 440/529, loss: 0.011401951313018799 2023-01-22 20:41:09.640355: step: 444/529, loss: 0.014958926476538181 2023-01-22 20:41:10.690831: step: 448/529, loss: 0.007821457460522652 2023-01-22 20:41:11.736165: step: 452/529, loss: 4.247689503245056e-05 2023-01-22 20:41:12.794676: step: 456/529, loss: 0.011198826134204865 2023-01-22 20:41:13.845635: step: 460/529, loss: 0.005356063600629568 2023-01-22 20:41:14.896505: step: 464/529, loss: 0.015696559101343155 2023-01-22 20:41:15.952360: step: 468/529, loss: 0.019132040441036224 2023-01-22 20:41:17.006073: step: 472/529, loss: 0.003525756299495697 2023-01-22 20:41:18.040228: step: 476/529, loss: 0.02598177269101143 2023-01-22 20:41:19.091927: step: 480/529, loss: 0.005338669288903475 2023-01-22 20:41:20.186977: step: 484/529, loss: 0.00499527296051383 2023-01-22 20:41:21.234260: step: 488/529, loss: 0.05169805511832237 2023-01-22 20:41:22.266577: step: 492/529, loss: 0.012880816124379635 2023-01-22 20:41:23.303722: step: 496/529, loss: 0.0283990316092968 2023-01-22 20:41:24.352095: step: 500/529, loss: 0.0036583489272743464 2023-01-22 20:41:25.403969: step: 504/529, loss: 0.0064158630557358265 2023-01-22 20:41:26.444851: step: 508/529, loss: 0.0031435575801879168 2023-01-22 20:41:27.492481: step: 512/529, loss: 0.004737897776067257 2023-01-22 20:41:28.546988: step: 516/529, loss: 0.0034656536299735308 2023-01-22 20:41:29.597151: step: 520/529, loss: 0.00832302961498499 2023-01-22 20:41:30.658605: step: 524/529, loss: 0.0 2023-01-22 20:41:31.716125: step: 528/529, loss: 0.0032608206383883953 2023-01-22 20:41:32.767769: step: 532/529, loss: 0.003181769512593746 2023-01-22 20:41:33.804048: step: 536/529, loss: 0.0018491375958546996 2023-01-22 20:41:34.841724: step: 540/529, loss: 0.003867859486490488 2023-01-22 20:41:35.880853: step: 544/529, loss: 9.876215335680172e-05 2023-01-22 20:41:36.935217: step: 548/529, loss: 0.006455567199736834 2023-01-22 20:41:37.994282: step: 552/529, loss: 0.012330739758908749 2023-01-22 20:41:39.050915: step: 556/529, loss: 0.0028007859364151955 2023-01-22 20:41:40.097048: step: 560/529, loss: 0.024414516985416412 2023-01-22 20:41:41.156129: step: 564/529, loss: 0.0048380568623542786 2023-01-22 20:41:42.204510: step: 568/529, loss: 0.0009671932784840465 2023-01-22 20:41:43.266906: step: 572/529, loss: 0.007043642457574606 2023-01-22 20:41:44.336416: step: 576/529, loss: 0.012031720019876957 2023-01-22 20:41:45.381405: step: 580/529, loss: 0.007606689352542162 2023-01-22 20:41:46.429044: step: 584/529, loss: 0.005316408816725016 2023-01-22 20:41:47.464831: step: 588/529, loss: 0.018632540479302406 2023-01-22 20:41:48.514798: step: 592/529, loss: 0.0007140995585359633 2023-01-22 20:41:49.563844: step: 596/529, loss: 0.01693444326519966 2023-01-22 20:41:50.642915: step: 600/529, loss: 0.002484208205714822 2023-01-22 20:41:51.690991: step: 604/529, loss: 0.012808961793780327 2023-01-22 20:41:52.745847: step: 608/529, loss: 0.023313501849770546 2023-01-22 20:41:53.798276: step: 612/529, loss: 0.0005394491017796099 2023-01-22 20:41:54.841498: step: 616/529, loss: 0.0213242806494236 2023-01-22 20:41:55.897512: step: 620/529, loss: 0.0038967744912952185 2023-01-22 20:41:56.964559: step: 624/529, loss: 0.0005699424655176699 2023-01-22 20:41:58.016300: step: 628/529, loss: 0.0024028411135077477 2023-01-22 20:41:59.054668: step: 632/529, loss: 0.005883142817765474 2023-01-22 20:42:00.089169: step: 636/529, loss: 0.0005097048124298453 2023-01-22 20:42:01.133740: step: 640/529, loss: 0.005305488593876362 2023-01-22 20:42:02.182714: step: 644/529, loss: 0.007276056334376335 2023-01-22 20:42:03.223094: step: 648/529, loss: 0.020804421976208687 2023-01-22 20:42:04.269097: step: 652/529, loss: 0.005687142256647348 2023-01-22 20:42:05.334027: step: 656/529, loss: 7.923376688268036e-05 2023-01-22 20:42:06.370094: step: 660/529, loss: 0.013029214926064014 2023-01-22 20:42:07.412453: step: 664/529, loss: 0.003912067040801048 2023-01-22 20:42:08.463255: step: 668/529, loss: 0.034013353288173676 2023-01-22 20:42:09.499460: step: 672/529, loss: 0.008656191639602184 2023-01-22 20:42:10.558358: step: 676/529, loss: 0.005629325285553932 2023-01-22 20:42:11.623129: step: 680/529, loss: 0.0034782651346176863 2023-01-22 20:42:12.680586: step: 684/529, loss: 0.00882573239505291 2023-01-22 20:42:13.726227: step: 688/529, loss: 0.0016626326832920313 2023-01-22 20:42:14.769040: step: 692/529, loss: 2.3558945031254552e-05 2023-01-22 20:42:15.832342: step: 696/529, loss: 0.019673597067594528 2023-01-22 20:42:16.891471: step: 700/529, loss: 0.009327026084065437 2023-01-22 20:42:17.945900: step: 704/529, loss: 0.0001285663020098582 2023-01-22 20:42:18.986716: step: 708/529, loss: 0.00439535453915596 2023-01-22 20:42:20.050215: step: 712/529, loss: 0.003424061695113778 2023-01-22 20:42:21.093832: step: 716/529, loss: 0.009291629306972027 2023-01-22 20:42:22.166432: step: 720/529, loss: 0.007102983072400093 2023-01-22 20:42:23.223173: step: 724/529, loss: 0.00016753238742239773 2023-01-22 20:42:24.301987: step: 728/529, loss: 0.00897889118641615 2023-01-22 20:42:25.361202: step: 732/529, loss: 0.019379105418920517 2023-01-22 20:42:26.423780: step: 736/529, loss: 0.0031782547011971474 2023-01-22 20:42:27.469733: step: 740/529, loss: 0.0006825546734035015 2023-01-22 20:42:28.535269: step: 744/529, loss: 0.04118768125772476 2023-01-22 20:42:29.584151: step: 748/529, loss: 0.009162982925772667 2023-01-22 20:42:30.636994: step: 752/529, loss: 0.02648012712597847 2023-01-22 20:42:31.673966: step: 756/529, loss: 0.00011568747140699998 2023-01-22 20:42:32.713301: step: 760/529, loss: 0.009785100817680359 2023-01-22 20:42:33.783364: step: 764/529, loss: 0.02606017328798771 2023-01-22 20:42:34.825940: step: 768/529, loss: 0.0034594100434333086 2023-01-22 20:42:35.877719: step: 772/529, loss: 0.005184119567275047 2023-01-22 20:42:36.939630: step: 776/529, loss: 0.024212410673499107 2023-01-22 20:42:37.977052: step: 780/529, loss: 0.02686261385679245 2023-01-22 20:42:39.032548: step: 784/529, loss: 0.015509597025811672 2023-01-22 20:42:40.084875: step: 788/529, loss: 0.014809778891503811 2023-01-22 20:42:41.147654: step: 792/529, loss: 0.0032797246240079403 2023-01-22 20:42:42.198694: step: 796/529, loss: 0.0006011223886162043 2023-01-22 20:42:43.241988: step: 800/529, loss: 0.005283440928906202 2023-01-22 20:42:44.292964: step: 804/529, loss: 0.011103903874754906 2023-01-22 20:42:45.337164: step: 808/529, loss: 0.004357552621513605 2023-01-22 20:42:46.393036: step: 812/529, loss: 0.00954181607812643 2023-01-22 20:42:47.432045: step: 816/529, loss: 0.008148825727403164 2023-01-22 20:42:48.486596: step: 820/529, loss: 0.006697388365864754 2023-01-22 20:42:49.544160: step: 824/529, loss: 0.0010361068416386843 2023-01-22 20:42:50.606396: step: 828/529, loss: 0.003057020017877221 2023-01-22 20:42:51.665097: step: 832/529, loss: 0.09200213104486465 2023-01-22 20:42:52.705123: step: 836/529, loss: 0.002408974803984165 2023-01-22 20:42:53.749453: step: 840/529, loss: 0.005381615832448006 2023-01-22 20:42:54.799579: step: 844/529, loss: 0.002756470814347267 2023-01-22 20:42:55.837445: step: 848/529, loss: 0.005640943069010973 2023-01-22 20:42:56.866981: step: 852/529, loss: 0.002816244261339307 2023-01-22 20:42:57.906144: step: 856/529, loss: 0.017023885622620583 2023-01-22 20:42:58.964762: step: 860/529, loss: 0.0034948564134538174 2023-01-22 20:43:00.012381: step: 864/529, loss: 0.012284182012081146 2023-01-22 20:43:01.070163: step: 868/529, loss: 0.007817283272743225 2023-01-22 20:43:02.132060: step: 872/529, loss: 0.005652410443872213 2023-01-22 20:43:03.202944: step: 876/529, loss: 0.0035922580864280462 2023-01-22 20:43:04.255475: step: 880/529, loss: 0.009137238375842571 2023-01-22 20:43:05.307014: step: 884/529, loss: 0.0075898319482803345 2023-01-22 20:43:06.360849: step: 888/529, loss: 0.005378589499741793 2023-01-22 20:43:07.408768: step: 892/529, loss: 0.0027653842698782682 2023-01-22 20:43:08.465342: step: 896/529, loss: 0.009672717191278934 2023-01-22 20:43:09.516286: step: 900/529, loss: 0.013271757401525974 2023-01-22 20:43:10.556483: step: 904/529, loss: 0.01503598690032959 2023-01-22 20:43:11.625694: step: 908/529, loss: 0.006287833210080862 2023-01-22 20:43:12.681250: step: 912/529, loss: 0.0029748398810625076 2023-01-22 20:43:13.743186: step: 916/529, loss: 0.008829597383737564 2023-01-22 20:43:14.782459: step: 920/529, loss: 0.007881560362875462 2023-01-22 20:43:15.832892: step: 924/529, loss: 0.00848472025245428 2023-01-22 20:43:16.870001: step: 928/529, loss: 0.043218858540058136 2023-01-22 20:43:17.920065: step: 932/529, loss: 0.0205976665019989 2023-01-22 20:43:18.955258: step: 936/529, loss: 0.001519115292467177 2023-01-22 20:43:20.013556: step: 940/529, loss: 0.0715329498052597 2023-01-22 20:43:21.053933: step: 944/529, loss: 0.01387772336602211 2023-01-22 20:43:22.126005: step: 948/529, loss: 0.005131635349243879 2023-01-22 20:43:23.176156: step: 952/529, loss: 0.006119557190686464 2023-01-22 20:43:24.240805: step: 956/529, loss: 0.013821277767419815 2023-01-22 20:43:25.302577: step: 960/529, loss: 0.011609617620706558 2023-01-22 20:43:26.354057: step: 964/529, loss: 0.0023277131840586662 2023-01-22 20:43:27.401739: step: 968/529, loss: 0.04275273531675339 2023-01-22 20:43:28.443190: step: 972/529, loss: 0.007000874727964401 2023-01-22 20:43:29.514039: step: 976/529, loss: 0.05131024122238159 2023-01-22 20:43:30.565796: step: 980/529, loss: 0.027935031801462173 2023-01-22 20:43:31.613353: step: 984/529, loss: 0.006313640158623457 2023-01-22 20:43:32.670546: step: 988/529, loss: 0.013393081724643707 2023-01-22 20:43:33.709348: step: 992/529, loss: 0.0013849545503035188 2023-01-22 20:43:34.764332: step: 996/529, loss: 0.0034446455538272858 2023-01-22 20:43:35.813185: step: 1000/529, loss: 0.0019654599018394947 2023-01-22 20:43:36.854675: step: 1004/529, loss: 0.008411969058215618 2023-01-22 20:43:37.924474: step: 1008/529, loss: 0.012529555708169937 2023-01-22 20:43:38.982468: step: 1012/529, loss: 0.01764804497361183 2023-01-22 20:43:40.037308: step: 1016/529, loss: 0.004600695800036192 2023-01-22 20:43:41.076803: step: 1020/529, loss: 0.007580797653645277 2023-01-22 20:43:42.131152: step: 1024/529, loss: 0.005055199842900038 2023-01-22 20:43:43.177336: step: 1028/529, loss: 0.009149763733148575 2023-01-22 20:43:44.239330: step: 1032/529, loss: 0.04893635958433151 2023-01-22 20:43:45.284213: step: 1036/529, loss: 0.008037861436605453 2023-01-22 20:43:46.322139: step: 1040/529, loss: 0.0021698998752981424 2023-01-22 20:43:47.375517: step: 1044/529, loss: 0.009808819741010666 2023-01-22 20:43:48.437064: step: 1048/529, loss: 0.022708097472786903 2023-01-22 20:43:49.482941: step: 1052/529, loss: 0.007051931694149971 2023-01-22 20:43:50.528624: step: 1056/529, loss: 0.01182352565228939 2023-01-22 20:43:51.584711: step: 1060/529, loss: 0.005562472157180309 2023-01-22 20:43:52.633551: step: 1064/529, loss: 0.005953978281468153 2023-01-22 20:43:53.680242: step: 1068/529, loss: 0.008256226778030396 2023-01-22 20:43:54.725644: step: 1072/529, loss: 0.008127371780574322 2023-01-22 20:43:55.756079: step: 1076/529, loss: 0.008761660195887089 2023-01-22 20:43:56.819126: step: 1080/529, loss: 0.005018271040171385 2023-01-22 20:43:57.872417: step: 1084/529, loss: 0.018078407272696495 2023-01-22 20:43:58.921378: step: 1088/529, loss: 0.006192586850374937 2023-01-22 20:43:59.966196: step: 1092/529, loss: 0.009030920453369617 2023-01-22 20:44:01.016224: step: 1096/529, loss: 0.0027431806083768606 2023-01-22 20:44:02.074210: step: 1100/529, loss: 0.0041840458288788795 2023-01-22 20:44:03.140380: step: 1104/529, loss: 0.004774863366037607 2023-01-22 20:44:04.179105: step: 1108/529, loss: 0.0007985194679349661 2023-01-22 20:44:05.230972: step: 1112/529, loss: 0.02322738990187645 2023-01-22 20:44:06.273297: step: 1116/529, loss: 0.000559560488909483 2023-01-22 20:44:07.330208: step: 1120/529, loss: 0.0015954470727592707 2023-01-22 20:44:08.381553: step: 1124/529, loss: 0.0021640071645379066 2023-01-22 20:44:09.447120: step: 1128/529, loss: 0.03130905702710152 2023-01-22 20:44:10.510592: step: 1132/529, loss: 0.0035858768969774246 2023-01-22 20:44:11.557840: step: 1136/529, loss: 0.00625680573284626 2023-01-22 20:44:12.620619: step: 1140/529, loss: 0.005452013574540615 2023-01-22 20:44:13.678745: step: 1144/529, loss: 0.007115437649190426 2023-01-22 20:44:14.734943: step: 1148/529, loss: 0.004546810407191515 2023-01-22 20:44:15.795201: step: 1152/529, loss: 0.0034040322061628103 2023-01-22 20:44:16.839606: step: 1156/529, loss: 0.006536812521517277 2023-01-22 20:44:17.882398: step: 1160/529, loss: 0.006710847374051809 2023-01-22 20:44:18.934617: step: 1164/529, loss: 0.00020382300135679543 2023-01-22 20:44:19.977950: step: 1168/529, loss: 0.01146881002932787 2023-01-22 20:44:21.018731: step: 1172/529, loss: 0.0019391977693885565 2023-01-22 20:44:22.089111: step: 1176/529, loss: 0.04001364856958389 2023-01-22 20:44:23.141990: step: 1180/529, loss: 0.003103894181549549 2023-01-22 20:44:24.168679: step: 1184/529, loss: 0.000752914696931839 2023-01-22 20:44:25.216172: step: 1188/529, loss: 0.029446875676512718 2023-01-22 20:44:26.297822: step: 1192/529, loss: 0.010021678172051907 2023-01-22 20:44:27.358911: step: 1196/529, loss: 0.007594669237732887 2023-01-22 20:44:28.409210: step: 1200/529, loss: 0.012734473682940006 2023-01-22 20:44:29.453976: step: 1204/529, loss: 0.005931871943175793 2023-01-22 20:44:30.506984: step: 1208/529, loss: 0.013309831731021404 2023-01-22 20:44:31.568116: step: 1212/529, loss: 0.006066114641726017 2023-01-22 20:44:32.608022: step: 1216/529, loss: 0.003518663579598069 2023-01-22 20:44:33.660661: step: 1220/529, loss: 0.011779836378991604 2023-01-22 20:44:34.720350: step: 1224/529, loss: 0.005570013076066971 2023-01-22 20:44:35.770556: step: 1228/529, loss: 0.0026514141354709864 2023-01-22 20:44:36.863292: step: 1232/529, loss: 0.0028603507671505213 2023-01-22 20:44:37.907721: step: 1236/529, loss: 0.002618933329358697 2023-01-22 20:44:38.947973: step: 1240/529, loss: 0.015570678748190403 2023-01-22 20:44:39.986695: step: 1244/529, loss: 0.012244373559951782 2023-01-22 20:44:41.032651: step: 1248/529, loss: 0.0038620950654149055 2023-01-22 20:44:42.094124: step: 1252/529, loss: 0.005834421142935753 2023-01-22 20:44:43.148263: step: 1256/529, loss: 0.007925285957753658 2023-01-22 20:44:44.207959: step: 1260/529, loss: 0.0010500873904675245 2023-01-22 20:44:45.262989: step: 1264/529, loss: 0.0038320235908031464 2023-01-22 20:44:46.305386: step: 1268/529, loss: 0.0023174425587058067 2023-01-22 20:44:47.374898: step: 1272/529, loss: 0.0249689519405365 2023-01-22 20:44:48.418971: step: 1276/529, loss: 0.01003737561404705 2023-01-22 20:44:49.485469: step: 1280/529, loss: 0.008079884573817253 2023-01-22 20:44:50.528175: step: 1284/529, loss: 0.0019671660847961903 2023-01-22 20:44:51.567172: step: 1288/529, loss: 0.005430907476693392 2023-01-22 20:44:52.608498: step: 1292/529, loss: 0.004998387303203344 2023-01-22 20:44:53.661375: step: 1296/529, loss: 0.010008488781750202 2023-01-22 20:44:54.718592: step: 1300/529, loss: 0.007138208486139774 2023-01-22 20:44:55.787662: step: 1304/529, loss: 0.004537483677268028 2023-01-22 20:44:56.854103: step: 1308/529, loss: 0.0026460825465619564 2023-01-22 20:44:57.902065: step: 1312/529, loss: 0.008456207811832428 2023-01-22 20:44:58.939661: step: 1316/529, loss: 0.003798689227551222 2023-01-22 20:44:59.986127: step: 1320/529, loss: 0.0051660300232470036 2023-01-22 20:45:01.026281: step: 1324/529, loss: 0.0005838748766109347 2023-01-22 20:45:02.093778: step: 1328/529, loss: 0.03099573403596878 2023-01-22 20:45:03.152264: step: 1332/529, loss: 0.00020527427841443568 2023-01-22 20:45:04.193262: step: 1336/529, loss: 6.52045855531469e-05 2023-01-22 20:45:05.243827: step: 1340/529, loss: 0.010828651487827301 2023-01-22 20:45:06.303644: step: 1344/529, loss: 0.008284438401460648 2023-01-22 20:45:07.363757: step: 1348/529, loss: 0.004853402730077505 2023-01-22 20:45:08.419137: step: 1352/529, loss: 0.01000240258872509 2023-01-22 20:45:09.470045: step: 1356/529, loss: 0.00013289348862599581 2023-01-22 20:45:10.520810: step: 1360/529, loss: 0.006068001501262188 2023-01-22 20:45:11.574334: step: 1364/529, loss: 0.023817013949155807 2023-01-22 20:45:12.625869: step: 1368/529, loss: 0.020465966314077377 2023-01-22 20:45:13.665970: step: 1372/529, loss: 0.008516632951796055 2023-01-22 20:45:14.720670: step: 1376/529, loss: 0.005069859325885773 2023-01-22 20:45:15.772093: step: 1380/529, loss: 0.035662490874528885 2023-01-22 20:45:16.834494: step: 1384/529, loss: 0.0018937982385978103 2023-01-22 20:45:17.882983: step: 1388/529, loss: 0.0024043733719736338 2023-01-22 20:45:18.945527: step: 1392/529, loss: 0.004089605063199997 2023-01-22 20:45:19.991594: step: 1396/529, loss: 0.02496258169412613 2023-01-22 20:45:21.033664: step: 1400/529, loss: 0.004224271513521671 2023-01-22 20:45:22.078537: step: 1404/529, loss: 0.002199976472184062 2023-01-22 20:45:23.123198: step: 1408/529, loss: 0.00746961822733283 2023-01-22 20:45:24.178462: step: 1412/529, loss: 0.00982865784317255 2023-01-22 20:45:25.215712: step: 1416/529, loss: 0.0016633948544040322 2023-01-22 20:45:26.267295: step: 1420/529, loss: 0.004381684586405754 2023-01-22 20:45:27.343316: step: 1424/529, loss: 0.03226260840892792 2023-01-22 20:45:28.423599: step: 1428/529, loss: 0.008671775460243225 2023-01-22 20:45:29.476030: step: 1432/529, loss: 0.003400318557396531 2023-01-22 20:45:30.508071: step: 1436/529, loss: 0.007719533052295446 2023-01-22 20:45:31.553181: step: 1440/529, loss: 0.01135760173201561 2023-01-22 20:45:32.597359: step: 1444/529, loss: 0.009906083345413208 2023-01-22 20:45:33.658665: step: 1448/529, loss: 0.006750479340553284 2023-01-22 20:45:34.717096: step: 1452/529, loss: 0.008141616359353065 2023-01-22 20:45:35.768438: step: 1456/529, loss: 0.0022892772685736418 2023-01-22 20:45:36.829014: step: 1460/529, loss: 0.007740786299109459 2023-01-22 20:45:37.868478: step: 1464/529, loss: 0.0022128659766167402 2023-01-22 20:45:38.916272: step: 1468/529, loss: 0.003114899154752493 2023-01-22 20:45:39.977188: step: 1472/529, loss: 0.0039520529098808765 2023-01-22 20:45:41.066427: step: 1476/529, loss: 0.004174373112618923 2023-01-22 20:45:42.099423: step: 1480/529, loss: 0.005985192954540253 2023-01-22 20:45:43.146981: step: 1484/529, loss: 0.010510027408599854 2023-01-22 20:45:44.191954: step: 1488/529, loss: 0.002910297829657793 2023-01-22 20:45:45.258185: step: 1492/529, loss: 0.004397583659738302 2023-01-22 20:45:46.314710: step: 1496/529, loss: 0.009632998146116734 2023-01-22 20:45:47.364339: step: 1500/529, loss: 0.02090447209775448 2023-01-22 20:45:48.400285: step: 1504/529, loss: 0.0013201754773035645 2023-01-22 20:45:49.462054: step: 1508/529, loss: 0.015047918073832989 2023-01-22 20:45:50.513569: step: 1512/529, loss: 0.017057547345757484 2023-01-22 20:45:51.569567: step: 1516/529, loss: 0.0014086196897551417 2023-01-22 20:45:52.612269: step: 1520/529, loss: 0.005536028183996677 2023-01-22 20:45:53.655220: step: 1524/529, loss: 0.007469982840120792 2023-01-22 20:45:54.703554: step: 1528/529, loss: 0.00017267375369556248 2023-01-22 20:45:55.743592: step: 1532/529, loss: 0.067486472427845 2023-01-22 20:45:56.802181: step: 1536/529, loss: 0.003867912571877241 2023-01-22 20:45:57.850288: step: 1540/529, loss: 0.0032956181094050407 2023-01-22 20:45:58.900915: step: 1544/529, loss: 0.015459376387298107 2023-01-22 20:45:59.935932: step: 1548/529, loss: 0.004179155919700861 2023-01-22 20:46:00.998635: step: 1552/529, loss: 0.005018417723476887 2023-01-22 20:46:02.048174: step: 1556/529, loss: 0.0007420636829920113 2023-01-22 20:46:03.106238: step: 1560/529, loss: 0.008511663414537907 2023-01-22 20:46:04.198197: step: 1564/529, loss: 0.008958940394222736 2023-01-22 20:46:05.243756: step: 1568/529, loss: 0.006533031817525625 2023-01-22 20:46:06.297002: step: 1572/529, loss: 0.0024251060094684362 2023-01-22 20:46:07.352668: step: 1576/529, loss: 0.015983154997229576 2023-01-22 20:46:08.402167: step: 1580/529, loss: 0.004558037035167217 2023-01-22 20:46:09.466368: step: 1584/529, loss: 0.0016301891300827265 2023-01-22 20:46:10.526425: step: 1588/529, loss: 0.003920048475265503 2023-01-22 20:46:11.565828: step: 1592/529, loss: 0.00460980786010623 2023-01-22 20:46:12.609898: step: 1596/529, loss: 0.0022013839334249496 2023-01-22 20:46:13.649784: step: 1600/529, loss: 0.0028462756890803576 2023-01-22 20:46:14.694664: step: 1604/529, loss: 0.011513668112456799 2023-01-22 20:46:15.764795: step: 1608/529, loss: 0.00922415778040886 2023-01-22 20:46:16.819172: step: 1612/529, loss: 0.027424195781350136 2023-01-22 20:46:17.859204: step: 1616/529, loss: 0.0104572344571352 2023-01-22 20:46:18.915780: step: 1620/529, loss: 0.007106281351298094 2023-01-22 20:46:19.967494: step: 1624/529, loss: 0.01389244757592678 2023-01-22 20:46:21.021392: step: 1628/529, loss: 0.01727847009897232 2023-01-22 20:46:22.081762: step: 1632/529, loss: 0.016096945852041245 2023-01-22 20:46:23.140674: step: 1636/529, loss: 0.0060960156843066216 2023-01-22 20:46:24.215992: step: 1640/529, loss: 0.030349215492606163 2023-01-22 20:46:25.267380: step: 1644/529, loss: 0.026542028412222862 2023-01-22 20:46:26.318080: step: 1648/529, loss: 0.006606078706681728 2023-01-22 20:46:27.361714: step: 1652/529, loss: 0.006048164330422878 2023-01-22 20:46:28.405455: step: 1656/529, loss: 0.040182679891586304 2023-01-22 20:46:29.443619: step: 1660/529, loss: 0.012790779583156109 2023-01-22 20:46:30.503219: step: 1664/529, loss: 0.0005048942985013127 2023-01-22 20:46:31.552411: step: 1668/529, loss: 0.007903759367763996 2023-01-22 20:46:32.588186: step: 1672/529, loss: 0.002809855854138732 2023-01-22 20:46:33.630027: step: 1676/529, loss: 0.013880029320716858 2023-01-22 20:46:34.678160: step: 1680/529, loss: 0.030947690829634666 2023-01-22 20:46:35.727763: step: 1684/529, loss: 0.006410995963960886 2023-01-22 20:46:36.761255: step: 1688/529, loss: 0.003931113518774509 2023-01-22 20:46:37.802580: step: 1692/529, loss: 0.0059612346813082695 2023-01-22 20:46:38.846526: step: 1696/529, loss: 0.014973499812185764 2023-01-22 20:46:39.901601: step: 1700/529, loss: 0.006248392630368471 2023-01-22 20:46:40.938422: step: 1704/529, loss: 0.0033808392472565174 2023-01-22 20:46:41.987347: step: 1708/529, loss: 0.003967809025198221 2023-01-22 20:46:43.030644: step: 1712/529, loss: 0.005686058662831783 2023-01-22 20:46:44.060241: step: 1716/529, loss: 0.008055990561842918 2023-01-22 20:46:45.091628: step: 1720/529, loss: 0.017830757424235344 2023-01-22 20:46:46.152229: step: 1724/529, loss: 0.026042364537715912 2023-01-22 20:46:47.195799: step: 1728/529, loss: 0.009538806043565273 2023-01-22 20:46:48.242833: step: 1732/529, loss: 0.02040978893637657 2023-01-22 20:46:49.291881: step: 1736/529, loss: 0.005226320121437311 2023-01-22 20:46:50.328910: step: 1740/529, loss: 0.01148279570043087 2023-01-22 20:46:51.381022: step: 1744/529, loss: 0.002418922958895564 2023-01-22 20:46:52.415009: step: 1748/529, loss: 0.0027232333086431026 2023-01-22 20:46:53.470710: step: 1752/529, loss: 0.0008075407240539789 2023-01-22 20:46:54.532141: step: 1756/529, loss: 0.015472984872758389 2023-01-22 20:46:55.591932: step: 1760/529, loss: 0.0021383606363087893 2023-01-22 20:46:56.650200: step: 1764/529, loss: 0.0044258409179747105 2023-01-22 20:46:57.708417: step: 1768/529, loss: 0.028770407661795616 2023-01-22 20:46:58.751486: step: 1772/529, loss: 0.003733373247087002 2023-01-22 20:46:59.809274: step: 1776/529, loss: 0.01202247105538845 2023-01-22 20:47:00.856791: step: 1780/529, loss: 0.0035271558444947004 2023-01-22 20:47:01.922632: step: 1784/529, loss: 0.018062949180603027 2023-01-22 20:47:02.974940: step: 1788/529, loss: 4.362559047876857e-05 2023-01-22 20:47:04.031790: step: 1792/529, loss: 0.017346929758787155 2023-01-22 20:47:05.091809: step: 1796/529, loss: 0.01283997017890215 2023-01-22 20:47:06.146547: step: 1800/529, loss: 0.041725918650627136 2023-01-22 20:47:07.196912: step: 1804/529, loss: 0.0027287781704217196 2023-01-22 20:47:08.251185: step: 1808/529, loss: 0.01752067171037197 2023-01-22 20:47:09.295628: step: 1812/529, loss: 0.004307575523853302 2023-01-22 20:47:10.329516: step: 1816/529, loss: 0.00557295884937048 2023-01-22 20:47:11.356564: step: 1820/529, loss: 0.00024069013306871057 2023-01-22 20:47:12.402582: step: 1824/529, loss: 0.008719326928257942 2023-01-22 20:47:13.449249: step: 1828/529, loss: 0.005578347481787205 2023-01-22 20:47:14.506644: step: 1832/529, loss: 0.015396920032799244 2023-01-22 20:47:15.564523: step: 1836/529, loss: 0.008813274092972279 2023-01-22 20:47:16.610391: step: 1840/529, loss: 0.01484970934689045 2023-01-22 20:47:17.659186: step: 1844/529, loss: 0.0043105692602694035 2023-01-22 20:47:18.694193: step: 1848/529, loss: 0.027481095865368843 2023-01-22 20:47:19.749411: step: 1852/529, loss: 0.008960974402725697 2023-01-22 20:47:20.812650: step: 1856/529, loss: 0.007280465215444565 2023-01-22 20:47:21.855091: step: 1860/529, loss: 0.003491520183160901 2023-01-22 20:47:22.942823: step: 1864/529, loss: 0.06849480420351028 2023-01-22 20:47:23.997020: step: 1868/529, loss: 3.203741272272964e-08 2023-01-22 20:47:25.037470: step: 1872/529, loss: 0.028073083609342575 2023-01-22 20:47:26.090110: step: 1876/529, loss: 0.0019630317110568285 2023-01-22 20:47:27.145383: step: 1880/529, loss: 0.010865608230233192 2023-01-22 20:47:28.200341: step: 1884/529, loss: 0.008823062293231487 2023-01-22 20:47:29.249154: step: 1888/529, loss: 0.01453643012791872 2023-01-22 20:47:30.323929: step: 1892/529, loss: 0.008397593162953854 2023-01-22 20:47:31.381443: step: 1896/529, loss: 0.009981879964470863 2023-01-22 20:47:32.437650: step: 1900/529, loss: 0.015491724014282227 2023-01-22 20:47:33.479037: step: 1904/529, loss: 0.01963350549340248 2023-01-22 20:47:34.526819: step: 1908/529, loss: 0.006219983100891113 2023-01-22 20:47:35.586739: step: 1912/529, loss: 0.0020787317771464586 2023-01-22 20:47:36.638356: step: 1916/529, loss: 0.01209577638655901 2023-01-22 20:47:37.702881: step: 1920/529, loss: 0.01098283939063549 2023-01-22 20:47:38.778840: step: 1924/529, loss: 0.020216599106788635 2023-01-22 20:47:39.818843: step: 1928/529, loss: 0.001231981790624559 2023-01-22 20:47:40.863522: step: 1932/529, loss: 0.013211369514465332 2023-01-22 20:47:41.902143: step: 1936/529, loss: 0.004367074929177761 2023-01-22 20:47:42.941858: step: 1940/529, loss: 0.006536758970469236 2023-01-22 20:47:43.990607: step: 1944/529, loss: 0.015867548063397408 2023-01-22 20:47:45.059890: step: 1948/529, loss: 0.01573462225496769 2023-01-22 20:47:46.089245: step: 1952/529, loss: 0.02607549913227558 2023-01-22 20:47:47.127753: step: 1956/529, loss: 0.006091012619435787 2023-01-22 20:47:48.168620: step: 1960/529, loss: 0.0029211300425231457 2023-01-22 20:47:49.229518: step: 1964/529, loss: 0.0058385953307151794 2023-01-22 20:47:50.261886: step: 1968/529, loss: 0.0616777129471302 2023-01-22 20:47:51.307074: step: 1972/529, loss: 0.0019854146521538496 2023-01-22 20:47:52.353516: step: 1976/529, loss: 0.025478098541498184 2023-01-22 20:47:53.396950: step: 1980/529, loss: 0.02005152218043804 2023-01-22 20:47:54.455277: step: 1984/529, loss: 0.012796014547348022 2023-01-22 20:47:55.512148: step: 1988/529, loss: 0.03704017400741577 2023-01-22 20:47:56.573928: step: 1992/529, loss: 0.012510542757809162 2023-01-22 20:47:57.630827: step: 1996/529, loss: 0.008263529278337955 2023-01-22 20:47:58.681084: step: 2000/529, loss: 0.007862303406000137 2023-01-22 20:47:59.737004: step: 2004/529, loss: 0.0045217182487249374 2023-01-22 20:48:00.816053: step: 2008/529, loss: 0.005549720488488674 2023-01-22 20:48:01.860340: step: 2012/529, loss: 0.006743513513356447 2023-01-22 20:48:02.913326: step: 2016/529, loss: 0.006827748380601406 2023-01-22 20:48:03.971725: step: 2020/529, loss: 0.014867347665131092 2023-01-22 20:48:05.044869: step: 2024/529, loss: 0.037739746272563934 2023-01-22 20:48:06.087420: step: 2028/529, loss: 0.010698607191443443 2023-01-22 20:48:07.130713: step: 2032/529, loss: 0.013002397492527962 2023-01-22 20:48:08.184804: step: 2036/529, loss: 0.004617432598024607 2023-01-22 20:48:09.228354: step: 2040/529, loss: 0.012744104489684105 2023-01-22 20:48:10.285173: step: 2044/529, loss: 0.006440219469368458 2023-01-22 20:48:11.328480: step: 2048/529, loss: 0.02469000592827797 2023-01-22 20:48:12.376797: step: 2052/529, loss: 0.0010574222542345524 2023-01-22 20:48:13.423862: step: 2056/529, loss: 0.0030768837314099073 2023-01-22 20:48:14.484082: step: 2060/529, loss: 0.003656399203464389 2023-01-22 20:48:15.547894: step: 2064/529, loss: 0.001612451276741922 2023-01-22 20:48:16.593771: step: 2068/529, loss: 0.004286501090973616 2023-01-22 20:48:17.651401: step: 2072/529, loss: 0.008329802192747593 2023-01-22 20:48:18.700172: step: 2076/529, loss: 0.004642146173864603 2023-01-22 20:48:19.774795: step: 2080/529, loss: 0.0008475060458295047 2023-01-22 20:48:20.823195: step: 2084/529, loss: 0.012219317257404327 2023-01-22 20:48:21.849932: step: 2088/529, loss: 0.0028849546797573566 2023-01-22 20:48:22.883973: step: 2092/529, loss: 0.0019038283498957753 2023-01-22 20:48:23.939567: step: 2096/529, loss: 0.0038213240914046764 2023-01-22 20:48:24.992582: step: 2100/529, loss: 0.007835282944142818 2023-01-22 20:48:26.050083: step: 2104/529, loss: 0.001253671245649457 2023-01-22 20:48:27.107676: step: 2108/529, loss: 0.0258998554199934 2023-01-22 20:48:28.155949: step: 2112/529, loss: 0.004032108001410961 2023-01-22 20:48:29.200347: step: 2116/529, loss: 0.008642514236271381 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 7 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 20:51:25.531260: step: 4/529, loss: 0.0023045274429023266 2023-01-22 20:51:26.565198: step: 8/529, loss: 0.011228933930397034 2023-01-22 20:51:27.601869: step: 12/529, loss: 0.004253579769283533 2023-01-22 20:51:28.649010: step: 16/529, loss: 0.021976569667458534 2023-01-22 20:51:29.691491: step: 20/529, loss: 0.006206500809639692 2023-01-22 20:51:30.732753: step: 24/529, loss: 0.006819066591560841 2023-01-22 20:51:31.794977: step: 28/529, loss: 0.0112458486109972 2023-01-22 20:51:32.834126: step: 32/529, loss: 0.011133863590657711 2023-01-22 20:51:33.881432: step: 36/529, loss: 0.03443567082285881 2023-01-22 20:51:34.919148: step: 40/529, loss: 0.02194317616522312 2023-01-22 20:51:35.963634: step: 44/529, loss: 0.0016771041555330157 2023-01-22 20:51:37.006847: step: 48/529, loss: 0.007127740420401096 2023-01-22 20:51:38.050785: step: 52/529, loss: 0.0048787579871714115 2023-01-22 20:51:39.107599: step: 56/529, loss: 0.00648889783769846 2023-01-22 20:51:40.163664: step: 60/529, loss: 0.06373632699251175 2023-01-22 20:51:41.215789: step: 64/529, loss: 0.009561716578900814 2023-01-22 20:51:42.257845: step: 68/529, loss: 0.0013499922351911664 2023-01-22 20:51:43.300799: step: 72/529, loss: 0.0023670061491429806 2023-01-22 20:51:44.346701: step: 76/529, loss: 0.003257190575823188 2023-01-22 20:51:45.405700: step: 80/529, loss: 0.007145741954445839 2023-01-22 20:51:46.469225: step: 84/529, loss: 0.010281720198690891 2023-01-22 20:51:47.516854: step: 88/529, loss: 0.007999295368790627 2023-01-22 20:51:48.569410: step: 92/529, loss: 0.000205404227017425 2023-01-22 20:51:49.621959: step: 96/529, loss: 0.00958223920315504 2023-01-22 20:51:50.677977: step: 100/529, loss: 0.008379525505006313 2023-01-22 20:51:51.745883: step: 104/529, loss: 0.031177092343568802 2023-01-22 20:51:52.793408: step: 108/529, loss: 0.001502710161730647 2023-01-22 20:51:53.846254: step: 112/529, loss: 0.004969471134245396 2023-01-22 20:51:54.907181: step: 116/529, loss: 0.008130726404488087 2023-01-22 20:51:55.961114: step: 120/529, loss: 0.007696308195590973 2023-01-22 20:51:57.002139: step: 124/529, loss: 0.004909994546324015 2023-01-22 20:51:58.056474: step: 128/529, loss: 0.006238368805497885 2023-01-22 20:51:59.110333: step: 132/529, loss: 0.004564911127090454 2023-01-22 20:52:00.149789: step: 136/529, loss: 0.001993659185245633 2023-01-22 20:52:01.201923: step: 140/529, loss: 0.006207111291587353 2023-01-22 20:52:02.247196: step: 144/529, loss: 0.0020644920878112316 2023-01-22 20:52:03.308554: step: 148/529, loss: 0.006921760272234678 2023-01-22 20:52:04.356717: step: 152/529, loss: 7.124265539459884e-05 2023-01-22 20:52:05.426855: step: 156/529, loss: 0.06694398820400238 2023-01-22 20:52:06.486945: step: 160/529, loss: 0.0016502841608598828 2023-01-22 20:52:07.537361: step: 164/529, loss: 0.004898045677691698 2023-01-22 20:52:08.577309: step: 168/529, loss: 0.004016496241092682 2023-01-22 20:52:09.633204: step: 172/529, loss: 0.000222889706492424 2023-01-22 20:52:10.696785: step: 176/529, loss: 0.005828389432281256 2023-01-22 20:52:11.768127: step: 180/529, loss: 0.005710930563509464 2023-01-22 20:52:12.837067: step: 184/529, loss: 0.0019523096270859241 2023-01-22 20:52:13.885943: step: 188/529, loss: 0.0022768601775169373 2023-01-22 20:52:14.928023: step: 192/529, loss: 0.03230486065149307 2023-01-22 20:52:15.967824: step: 196/529, loss: 0.01586371660232544 2023-01-22 20:52:17.015099: step: 200/529, loss: 0.014680094085633755 2023-01-22 20:52:18.077968: step: 204/529, loss: 0.005278198514133692 2023-01-22 20:52:19.128245: step: 208/529, loss: 0.010319733060896397 2023-01-22 20:52:20.183707: step: 212/529, loss: 0.003149011405184865 2023-01-22 20:52:21.237242: step: 216/529, loss: 0.005597005132585764 2023-01-22 20:52:22.294095: step: 220/529, loss: 0.0010876570595428348 2023-01-22 20:52:23.341161: step: 224/529, loss: 0.00404005404561758 2023-01-22 20:52:24.375094: step: 228/529, loss: 0.004743700847029686 2023-01-22 20:52:25.419036: step: 232/529, loss: 0.017000112682580948 2023-01-22 20:52:26.481357: step: 236/529, loss: 0.00542871467769146 2023-01-22 20:52:27.542937: step: 240/529, loss: 0.003282651538029313 2023-01-22 20:52:28.603833: step: 244/529, loss: 0.0023716045543551445 2023-01-22 20:52:29.674744: step: 248/529, loss: 0.0062157646752893925 2023-01-22 20:52:30.732210: step: 252/529, loss: 0.0042026755400002 2023-01-22 20:52:31.791770: step: 256/529, loss: 0.009019405581057072 2023-01-22 20:52:32.861513: step: 260/529, loss: 0.00553872948512435 2023-01-22 20:52:33.932236: step: 264/529, loss: 0.03417680412530899 2023-01-22 20:52:35.008171: step: 268/529, loss: 0.010676387697458267 2023-01-22 20:52:36.061366: step: 272/529, loss: 0.003809739602729678 2023-01-22 20:52:37.108081: step: 276/529, loss: 0.01024115364998579 2023-01-22 20:52:38.165208: step: 280/529, loss: 0.003652864834293723 2023-01-22 20:52:39.218757: step: 284/529, loss: 0.008994110859930515 2023-01-22 20:52:40.277116: step: 288/529, loss: 0.0377771221101284 2023-01-22 20:52:41.328447: step: 292/529, loss: 0.014945383183658123 2023-01-22 20:52:42.388663: step: 296/529, loss: 0.009220863692462444 2023-01-22 20:52:43.437910: step: 300/529, loss: 0.0026840995997190475 2023-01-22 20:52:44.491219: step: 304/529, loss: 0.0006836710963398218 2023-01-22 20:52:45.542358: step: 308/529, loss: 0.004090073984116316 2023-01-22 20:52:46.600576: step: 312/529, loss: 0.001996321137994528 2023-01-22 20:52:47.649043: step: 316/529, loss: 0.01451816689223051 2023-01-22 20:52:48.702656: step: 320/529, loss: 0.0025968351401388645 2023-01-22 20:52:49.778622: step: 324/529, loss: 0.003326504025608301 2023-01-22 20:52:50.847394: step: 328/529, loss: 0.0036721371579915285 2023-01-22 20:52:51.898321: step: 332/529, loss: 0.0005125876050442457 2023-01-22 20:52:52.976692: step: 336/529, loss: 0.009359709918498993 2023-01-22 20:52:54.028921: step: 340/529, loss: 0.006428618915379047 2023-01-22 20:52:55.093669: step: 344/529, loss: 0.01610073819756508 2023-01-22 20:52:56.155465: step: 348/529, loss: 0.001758598256856203 2023-01-22 20:52:57.207260: step: 352/529, loss: 0.014601621776819229 2023-01-22 20:52:58.269960: step: 356/529, loss: 0.020786141976714134 2023-01-22 20:52:59.343991: step: 360/529, loss: 0.004753530956804752 2023-01-22 20:53:00.401701: step: 364/529, loss: 0.0029584027361124754 2023-01-22 20:53:01.465818: step: 368/529, loss: 0.0018564896890893579 2023-01-22 20:53:02.515202: step: 372/529, loss: 0.002674866234883666 2023-01-22 20:53:03.558198: step: 376/529, loss: 0.0014381998917087913 2023-01-22 20:53:04.602827: step: 380/529, loss: 0.0005801094812341034 2023-01-22 20:53:05.652915: step: 384/529, loss: 0.007998351939022541 2023-01-22 20:53:06.727144: step: 388/529, loss: 0.0054602669551968575 2023-01-22 20:53:07.771083: step: 392/529, loss: 0.012484036386013031 2023-01-22 20:53:08.825023: step: 396/529, loss: 0.003845102619379759 2023-01-22 20:53:09.864344: step: 400/529, loss: 0.000901897728908807 2023-01-22 20:53:10.906139: step: 404/529, loss: 0.010156513191759586 2023-01-22 20:53:11.950537: step: 408/529, loss: 0.053882233798503876 2023-01-22 20:53:13.010751: step: 412/529, loss: 0.0032782733906060457 2023-01-22 20:53:14.071054: step: 416/529, loss: 0.013057449832558632 2023-01-22 20:53:15.131731: step: 420/529, loss: 0.018242230638861656 2023-01-22 20:53:16.188846: step: 424/529, loss: 0.0039817881770431995 2023-01-22 20:53:17.236681: step: 428/529, loss: 0.010982389561831951 2023-01-22 20:53:18.290498: step: 432/529, loss: 0.022366533055901527 2023-01-22 20:53:19.335388: step: 436/529, loss: 0.006050330586731434 2023-01-22 20:53:20.392064: step: 440/529, loss: 0.0035406271927058697 2023-01-22 20:53:21.440236: step: 444/529, loss: 0.001523448503576219 2023-01-22 20:53:22.504265: step: 448/529, loss: 0.012140117585659027 2023-01-22 20:53:23.567884: step: 452/529, loss: 0.006151004694402218 2023-01-22 20:53:24.634927: step: 456/529, loss: 0.009581267833709717 2023-01-22 20:53:25.697637: step: 460/529, loss: 0.014867383986711502 2023-01-22 20:53:26.739978: step: 464/529, loss: 0.0007373564876616001 2023-01-22 20:53:27.795530: step: 468/529, loss: 0.007127965800464153 2023-01-22 20:53:28.863398: step: 472/529, loss: 0.032232873141765594 2023-01-22 20:53:29.915397: step: 476/529, loss: 0.0012849037302657962 2023-01-22 20:53:30.959915: step: 480/529, loss: 0.0035016729962080717 2023-01-22 20:53:32.011793: step: 484/529, loss: 0.004301457665860653 2023-01-22 20:53:33.062551: step: 488/529, loss: 0.012563398107886314 2023-01-22 20:53:34.116641: step: 492/529, loss: 0.030823377892374992 2023-01-22 20:53:35.169450: step: 496/529, loss: 0.04405725374817848 2023-01-22 20:53:36.221810: step: 500/529, loss: 0.036730095744132996 2023-01-22 20:53:37.285041: step: 504/529, loss: 0.01858927123248577 2023-01-22 20:53:38.340675: step: 508/529, loss: 0.015251416712999344 2023-01-22 20:53:39.401896: step: 512/529, loss: 0.002448983723297715 2023-01-22 20:53:40.457425: step: 516/529, loss: 0.01166907325387001 2023-01-22 20:53:41.504986: step: 520/529, loss: 0.003254219191148877 2023-01-22 20:53:42.570553: step: 524/529, loss: 0.0464460551738739 2023-01-22 20:53:43.618638: step: 528/529, loss: 0.0031164067331701517 2023-01-22 20:53:44.666389: step: 532/529, loss: 0.009115656837821007 2023-01-22 20:53:45.726935: step: 536/529, loss: 0.009100779891014099 2023-01-22 20:53:46.767595: step: 540/529, loss: 0.0 2023-01-22 20:53:47.811774: step: 544/529, loss: 0.010497533716261387 2023-01-22 20:53:48.882772: step: 548/529, loss: 0.02581813745200634 2023-01-22 20:53:49.935447: step: 552/529, loss: 0.011672892607748508 2023-01-22 20:53:50.986558: step: 556/529, loss: 0.001962442649528384 2023-01-22 20:53:52.027209: step: 560/529, loss: 0.008429953828454018 2023-01-22 20:53:53.074391: step: 564/529, loss: 0.0024303190875798464 2023-01-22 20:53:54.118150: step: 568/529, loss: 0.00038723612669855356 2023-01-22 20:53:55.160534: step: 572/529, loss: 0.013444959186017513 2023-01-22 20:53:56.224450: step: 576/529, loss: 0.01430303230881691 2023-01-22 20:53:57.269972: step: 580/529, loss: 0.009034675545990467 2023-01-22 20:53:58.350516: step: 584/529, loss: 0.006245921831578016 2023-01-22 20:53:59.396134: step: 588/529, loss: 0.00422328058630228 2023-01-22 20:54:00.441670: step: 592/529, loss: 0.0333227813243866 2023-01-22 20:54:01.490025: step: 596/529, loss: 0.003813460934907198 2023-01-22 20:54:02.546584: step: 600/529, loss: 0.00518802460283041 2023-01-22 20:54:03.596575: step: 604/529, loss: 0.004532285500317812 2023-01-22 20:54:04.651781: step: 608/529, loss: 0.004152851644903421 2023-01-22 20:54:05.694481: step: 612/529, loss: 0.002888813614845276 2023-01-22 20:54:06.749071: step: 616/529, loss: 0.0029874832835048437 2023-01-22 20:54:07.803251: step: 620/529, loss: 0.012224213220179081 2023-01-22 20:54:08.855894: step: 624/529, loss: 0.005523674190044403 2023-01-22 20:54:09.914485: step: 628/529, loss: 0.011892084032297134 2023-01-22 20:54:10.962493: step: 632/529, loss: 0.004358323756605387 2023-01-22 20:54:12.023063: step: 636/529, loss: 0.0020290189422667027 2023-01-22 20:54:13.074531: step: 640/529, loss: 0.005568331573158503 2023-01-22 20:54:14.131245: step: 644/529, loss: 0.00047043198719620705 2023-01-22 20:54:15.179498: step: 648/529, loss: 0.015411232598125935 2023-01-22 20:54:16.239113: step: 652/529, loss: 0.0385340191423893 2023-01-22 20:54:17.279857: step: 656/529, loss: 0.006036559119820595 2023-01-22 20:54:18.324386: step: 660/529, loss: 0.0030251743737608194 2023-01-22 20:54:19.410257: step: 664/529, loss: 0.0003360217378940433 2023-01-22 20:54:20.463156: step: 668/529, loss: 0.0020887404680252075 2023-01-22 20:54:21.527370: step: 672/529, loss: 0.005592403933405876 2023-01-22 20:54:22.575527: step: 676/529, loss: 3.7154324672883376e-05 2023-01-22 20:54:23.634265: step: 680/529, loss: 0.0002183835895266384 2023-01-22 20:54:24.690752: step: 684/529, loss: 0.03913566470146179 2023-01-22 20:54:25.748528: step: 688/529, loss: 0.0005605136975646019 2023-01-22 20:54:26.809131: step: 692/529, loss: 0.023040948435664177 2023-01-22 20:54:27.859186: step: 696/529, loss: 0.003610490122810006 2023-01-22 20:54:28.916907: step: 700/529, loss: 0.0033817924559116364 2023-01-22 20:54:29.964168: step: 704/529, loss: 0.0011157958069816232 2023-01-22 20:54:31.015794: step: 708/529, loss: 0.004889626521617174 2023-01-22 20:54:32.061461: step: 712/529, loss: 0.008227786980569363 2023-01-22 20:54:33.125515: step: 716/529, loss: 0.009387004189193249 2023-01-22 20:54:34.167059: step: 720/529, loss: 0.00011768360127462074 2023-01-22 20:54:35.209555: step: 724/529, loss: 0.0030078573618084192 2023-01-22 20:54:36.252140: step: 728/529, loss: 7.887894025770947e-05 2023-01-22 20:54:37.312237: step: 732/529, loss: 0.0013173844199627638 2023-01-22 20:54:38.368747: step: 736/529, loss: 0.004169007297605276 2023-01-22 20:54:39.436926: step: 740/529, loss: 0.006584996823221445 2023-01-22 20:54:40.499965: step: 744/529, loss: 0.003529854817315936 2023-01-22 20:54:41.567473: step: 748/529, loss: 0.004300139844417572 2023-01-22 20:54:42.638525: step: 752/529, loss: 0.0019124459940940142 2023-01-22 20:54:43.703728: step: 756/529, loss: 0.00455786008387804 2023-01-22 20:54:44.763936: step: 760/529, loss: 0.016308844089508057 2023-01-22 20:54:45.828389: step: 764/529, loss: 0.026593182235956192 2023-01-22 20:54:46.869963: step: 768/529, loss: 0.030140027403831482 2023-01-22 20:54:47.918518: step: 772/529, loss: 0.004473466891795397 2023-01-22 20:54:48.971859: step: 776/529, loss: 0.01213291846215725 2023-01-22 20:54:50.009594: step: 780/529, loss: 0.013113019056618214 2023-01-22 20:54:51.084401: step: 784/529, loss: 0.0051805018447339535 2023-01-22 20:54:52.122740: step: 788/529, loss: 0.0035665922332555056 2023-01-22 20:54:53.175469: step: 792/529, loss: 0.005198049359023571 2023-01-22 20:54:54.238185: step: 796/529, loss: 0.00770610012114048 2023-01-22 20:54:55.287044: step: 800/529, loss: 0.008671020157635212 2023-01-22 20:54:56.349016: step: 804/529, loss: 0.015641216188669205 2023-01-22 20:54:57.394378: step: 808/529, loss: 0.003296887269243598 2023-01-22 20:54:58.457221: step: 812/529, loss: 0.003373671555891633 2023-01-22 20:54:59.518553: step: 816/529, loss: 0.019740156829357147 2023-01-22 20:55:00.566368: step: 820/529, loss: 0.011013539507985115 2023-01-22 20:55:01.619890: step: 824/529, loss: 0.008105099201202393 2023-01-22 20:55:02.693526: step: 828/529, loss: 0.007988348603248596 2023-01-22 20:55:03.742192: step: 832/529, loss: 0.002680855104699731 2023-01-22 20:55:04.792480: step: 836/529, loss: 0.0035361938644200563 2023-01-22 20:55:05.849538: step: 840/529, loss: 0.002939994214102626 2023-01-22 20:55:06.912694: step: 844/529, loss: 0.002155761234462261 2023-01-22 20:55:07.985989: step: 848/529, loss: 0.0012092282995581627 2023-01-22 20:55:09.041275: step: 852/529, loss: 0.007570883724838495 2023-01-22 20:55:10.097128: step: 856/529, loss: 0.002645100699737668 2023-01-22 20:55:11.166184: step: 860/529, loss: 0.017419008538126945 2023-01-22 20:55:12.213101: step: 864/529, loss: 0.0007256052922457457 2023-01-22 20:55:13.263487: step: 868/529, loss: 0.003953471779823303 2023-01-22 20:55:14.295038: step: 872/529, loss: 0.00542035885155201 2023-01-22 20:55:15.348986: step: 876/529, loss: 0.0033736240584403276 2023-01-22 20:55:16.403904: step: 880/529, loss: 0.006717555690556765 2023-01-22 20:55:17.445378: step: 884/529, loss: 0.002057659672573209 2023-01-22 20:55:18.507653: step: 888/529, loss: 0.038611918687820435 2023-01-22 20:55:19.562599: step: 892/529, loss: 0.012965199537575245 2023-01-22 20:55:20.598064: step: 896/529, loss: 0.01855887845158577 2023-01-22 20:55:21.653294: step: 900/529, loss: 0.014985074289143085 2023-01-22 20:55:22.712937: step: 904/529, loss: 0.030968019738793373 2023-01-22 20:55:23.779418: step: 908/529, loss: 0.033924926072359085 2023-01-22 20:55:24.839267: step: 912/529, loss: 0.003549429355189204 2023-01-22 20:55:25.927653: step: 916/529, loss: 0.0009133667917922139 2023-01-22 20:55:26.982167: step: 920/529, loss: 0.006067577749490738 2023-01-22 20:55:28.038700: step: 924/529, loss: 0.0076386407017707825 2023-01-22 20:55:29.091536: step: 928/529, loss: 0.005393982399255037 2023-01-22 20:55:30.131676: step: 932/529, loss: 0.0028873516712337732 2023-01-22 20:55:31.189188: step: 936/529, loss: 7.591808389406651e-05 2023-01-22 20:55:32.242858: step: 940/529, loss: 0.0061194030568003654 2023-01-22 20:55:33.295941: step: 944/529, loss: 0.0017075904179364443 2023-01-22 20:55:34.350934: step: 948/529, loss: 0.013637788593769073 2023-01-22 20:55:35.380882: step: 952/529, loss: 0.0040211803279817104 2023-01-22 20:55:36.424035: step: 956/529, loss: 0.008782819844782352 2023-01-22 20:55:37.479910: step: 960/529, loss: 0.009358215145766735 2023-01-22 20:55:38.527412: step: 964/529, loss: 0.004064645618200302 2023-01-22 20:55:39.569661: step: 968/529, loss: 0.0026523994747549295 2023-01-22 20:55:40.621505: step: 972/529, loss: 0.00479916762560606 2023-01-22 20:55:41.682229: step: 976/529, loss: 0.012865317054092884 2023-01-22 20:55:42.721193: step: 980/529, loss: 0.00043983873911201954 2023-01-22 20:55:43.748227: step: 984/529, loss: 0.0053704543970525265 2023-01-22 20:55:44.808411: step: 988/529, loss: 0.006389323156327009 2023-01-22 20:55:45.872046: step: 992/529, loss: 0.00329760042950511 2023-01-22 20:55:46.918971: step: 996/529, loss: 0.015907973051071167 2023-01-22 20:55:47.957000: step: 1000/529, loss: 0.0013318161945790052 2023-01-22 20:55:49.013109: step: 1004/529, loss: 0.01142899040132761 2023-01-22 20:55:50.069705: step: 1008/529, loss: 0.017719736322760582 2023-01-22 20:55:51.111363: step: 1012/529, loss: 0.001215964788571 2023-01-22 20:55:52.164877: step: 1016/529, loss: 0.0021219556219875813 2023-01-22 20:55:53.217814: step: 1020/529, loss: 0.006792683620005846 2023-01-22 20:55:54.282405: step: 1024/529, loss: 0.01315910741686821 2023-01-22 20:55:55.326565: step: 1028/529, loss: 0.003544391365721822 2023-01-22 20:55:56.383591: step: 1032/529, loss: 0.01260813232511282 2023-01-22 20:55:57.437536: step: 1036/529, loss: 0.01115341205149889 2023-01-22 20:55:58.501975: step: 1040/529, loss: 0.0018189814873039722 2023-01-22 20:55:59.552541: step: 1044/529, loss: 0.0248299241065979 2023-01-22 20:56:00.597333: step: 1048/529, loss: 0.0036285428795963526 2023-01-22 20:56:01.653444: step: 1052/529, loss: 0.005281612277030945 2023-01-22 20:56:02.705214: step: 1056/529, loss: 0.007375138811767101 2023-01-22 20:56:03.741322: step: 1060/529, loss: 0.0008645905181765556 2023-01-22 20:56:04.781993: step: 1064/529, loss: 7.84682561061345e-05 2023-01-22 20:56:05.826343: step: 1068/529, loss: 0.005328230559825897 2023-01-22 20:56:06.883753: step: 1072/529, loss: 0.00221210322342813 2023-01-22 20:56:07.916352: step: 1076/529, loss: 0.008061792701482773 2023-01-22 20:56:08.980071: step: 1080/529, loss: 0.010331466794013977 2023-01-22 20:56:10.033207: step: 1084/529, loss: 0.004379634745419025 2023-01-22 20:56:11.078388: step: 1088/529, loss: 0.0032090258318930864 2023-01-22 20:56:12.150634: step: 1092/529, loss: 0.002273597987368703 2023-01-22 20:56:13.202083: step: 1096/529, loss: 0.011950631625950336 2023-01-22 20:56:14.255384: step: 1100/529, loss: 0.0032040292862802744 2023-01-22 20:56:15.303523: step: 1104/529, loss: 0.02441319450736046 2023-01-22 20:56:16.363531: step: 1108/529, loss: 0.0013557999627664685 2023-01-22 20:56:17.414573: step: 1112/529, loss: 0.015338200144469738 2023-01-22 20:56:18.477801: step: 1116/529, loss: 0.012637415900826454 2023-01-22 20:56:19.530025: step: 1120/529, loss: 0.010589070618152618 2023-01-22 20:56:20.595458: step: 1124/529, loss: 0.00889888871461153 2023-01-22 20:56:21.683003: step: 1128/529, loss: 0.04252238571643829 2023-01-22 20:56:22.752698: step: 1132/529, loss: 0.014491250738501549 2023-01-22 20:56:23.796177: step: 1136/529, loss: 0.02215750329196453 2023-01-22 20:56:24.860282: step: 1140/529, loss: 0.003690396435558796 2023-01-22 20:56:25.908111: step: 1144/529, loss: 0.008793325163424015 2023-01-22 20:56:26.974626: step: 1148/529, loss: 0.0024294236209243536 2023-01-22 20:56:28.027704: step: 1152/529, loss: 0.006633859593421221 2023-01-22 20:56:29.075284: step: 1156/529, loss: 0.002428903244435787 2023-01-22 20:56:30.114772: step: 1160/529, loss: 0.00016320333816111088 2023-01-22 20:56:31.165561: step: 1164/529, loss: 0.01386997476220131 2023-01-22 20:56:32.202965: step: 1168/529, loss: 0.00738599942997098 2023-01-22 20:56:33.245932: step: 1172/529, loss: 0.006210622377693653 2023-01-22 20:56:34.298469: step: 1176/529, loss: 0.005919476505368948 2023-01-22 20:56:35.341955: step: 1180/529, loss: 0.003125750692561269 2023-01-22 20:56:36.404537: step: 1184/529, loss: 0.008433100767433643 2023-01-22 20:56:37.460512: step: 1188/529, loss: 0.0072503420524299145 2023-01-22 20:56:38.533179: step: 1192/529, loss: 0.007994864135980606 2023-01-22 20:56:39.588154: step: 1196/529, loss: 0.027764689177274704 2023-01-22 20:56:40.643399: step: 1200/529, loss: 0.0020462661050260067 2023-01-22 20:56:41.700171: step: 1204/529, loss: 3.4006596251856536e-05 2023-01-22 20:56:42.749243: step: 1208/529, loss: 0.01064429059624672 2023-01-22 20:56:43.824414: step: 1212/529, loss: 0.003012238536030054 2023-01-22 20:56:44.878305: step: 1216/529, loss: 0.03419587388634682 2023-01-22 20:56:45.931989: step: 1220/529, loss: 0.0023410115391016006 2023-01-22 20:56:46.971105: step: 1224/529, loss: 0.015048685483634472 2023-01-22 20:56:48.061275: step: 1228/529, loss: 0.016846148297190666 2023-01-22 20:56:49.119261: step: 1232/529, loss: 0.00884309969842434 2023-01-22 20:56:50.183719: step: 1236/529, loss: 0.0032399767078459263 2023-01-22 20:56:51.229482: step: 1240/529, loss: 0.0198514387011528 2023-01-22 20:56:52.271926: step: 1244/529, loss: 0.001148194307461381 2023-01-22 20:56:53.329210: step: 1248/529, loss: 0.015299823135137558 2023-01-22 20:56:54.379676: step: 1252/529, loss: 0.004690001253038645 2023-01-22 20:56:55.451353: step: 1256/529, loss: 0.006144427694380283 2023-01-22 20:56:56.501008: step: 1260/529, loss: 0.008631566539406776 2023-01-22 20:56:57.550834: step: 1264/529, loss: 0.020800327882170677 2023-01-22 20:56:58.605338: step: 1268/529, loss: 0.003588703228160739 2023-01-22 20:56:59.663540: step: 1272/529, loss: 0.0070841144770383835 2023-01-22 20:57:00.704244: step: 1276/529, loss: 0.0013627351727336645 2023-01-22 20:57:01.763916: step: 1280/529, loss: 0.010242646560072899 2023-01-22 20:57:02.832886: step: 1284/529, loss: 0.012464502826333046 2023-01-22 20:57:03.885204: step: 1288/529, loss: 0.026520315557718277 2023-01-22 20:57:04.937587: step: 1292/529, loss: 0.002117802621796727 2023-01-22 20:57:05.981889: step: 1296/529, loss: 0.014670378528535366 2023-01-22 20:57:07.026629: step: 1300/529, loss: 0.008451445028185844 2023-01-22 20:57:08.085674: step: 1304/529, loss: 0.05057818815112114 2023-01-22 20:57:09.132103: step: 1308/529, loss: 0.0013881870545446873 2023-01-22 20:57:10.184045: step: 1312/529, loss: 0.0060692536644637585 2023-01-22 20:57:11.224985: step: 1316/529, loss: 0.010907678864896297 2023-01-22 20:57:12.279329: step: 1320/529, loss: 0.003996334038674831 2023-01-22 20:57:13.334918: step: 1324/529, loss: 0.03376008942723274 2023-01-22 20:57:14.404137: step: 1328/529, loss: 0.005806329194456339 2023-01-22 20:57:15.444393: step: 1332/529, loss: 0.0014413440367206931 2023-01-22 20:57:16.487523: step: 1336/529, loss: 0.0020342892967164516 2023-01-22 20:57:17.519602: step: 1340/529, loss: 0.0023035078775137663 2023-01-22 20:57:18.560755: step: 1344/529, loss: 0.023279035463929176 2023-01-22 20:57:19.613135: step: 1348/529, loss: 0.006513782311230898 2023-01-22 20:57:20.682950: step: 1352/529, loss: 0.011667712591588497 2023-01-22 20:57:21.743948: step: 1356/529, loss: 0.008554453030228615 2023-01-22 20:57:22.803118: step: 1360/529, loss: 0.002800234593451023 2023-01-22 20:57:23.849106: step: 1364/529, loss: 0.010815789923071861 2023-01-22 20:57:24.909571: step: 1368/529, loss: 0.008118579164147377 2023-01-22 20:57:25.963495: step: 1372/529, loss: 0.001998700201511383 2023-01-22 20:57:27.020835: step: 1376/529, loss: 0.0020198209676891565 2023-01-22 20:57:28.065735: step: 1380/529, loss: 0.013016052544116974 2023-01-22 20:57:29.122799: step: 1384/529, loss: 0.03426613658666611 2023-01-22 20:57:30.187320: step: 1388/529, loss: 0.0 2023-01-22 20:57:31.232418: step: 1392/529, loss: 0.0031165643595159054 2023-01-22 20:57:32.284373: step: 1396/529, loss: 0.009783484973013401 2023-01-22 20:57:33.337154: step: 1400/529, loss: 0.011780040338635445 2023-01-22 20:57:34.391744: step: 1404/529, loss: 0.02556476928293705 2023-01-22 20:57:35.438619: step: 1408/529, loss: 0.0045912424102425575 2023-01-22 20:57:36.487901: step: 1412/529, loss: 0.0018574637360870838 2023-01-22 20:57:37.541755: step: 1416/529, loss: 0.006926057860255241 2023-01-22 20:57:38.573980: step: 1420/529, loss: 0.0006686334381811321 2023-01-22 20:57:39.629789: step: 1424/529, loss: 0.003066191216930747 2023-01-22 20:57:40.672659: step: 1428/529, loss: 0.000542367110028863 2023-01-22 20:57:41.729824: step: 1432/529, loss: 0.017975715920329094 2023-01-22 20:57:42.786947: step: 1436/529, loss: 0.0035078623332083225 2023-01-22 20:57:43.838129: step: 1440/529, loss: 0.008245199918746948 2023-01-22 20:57:44.914315: step: 1444/529, loss: 0.010481682606041431 2023-01-22 20:57:45.970397: step: 1448/529, loss: 0.006614771671593189 2023-01-22 20:57:47.015407: step: 1452/529, loss: 0.002646519336849451 2023-01-22 20:57:48.072783: step: 1456/529, loss: 0.010859166271984577 2023-01-22 20:57:49.137145: step: 1460/529, loss: 0.005819415673613548 2023-01-22 20:57:50.174141: step: 1464/529, loss: 0.007664044853299856 2023-01-22 20:57:51.217535: step: 1468/529, loss: 0.003635488450527191 2023-01-22 20:57:52.259056: step: 1472/529, loss: 0.013249932788312435 2023-01-22 20:57:53.321851: step: 1476/529, loss: 0.024490676820278168 2023-01-22 20:57:54.382595: step: 1480/529, loss: 0.0032691149972379208 2023-01-22 20:57:55.432875: step: 1484/529, loss: 0.00745146069675684 2023-01-22 20:57:56.498642: step: 1488/529, loss: 0.009740645065903664 2023-01-22 20:57:57.546092: step: 1492/529, loss: 0.006744786631315947 2023-01-22 20:57:58.600708: step: 1496/529, loss: 0.004934070631861687 2023-01-22 20:57:59.658082: step: 1500/529, loss: 0.0036018043756484985 2023-01-22 20:58:00.719033: step: 1504/529, loss: 0.008820726536214352 2023-01-22 20:58:01.756282: step: 1508/529, loss: 0.0027645002119243145 2023-01-22 20:58:02.811601: step: 1512/529, loss: 0.006532615050673485 2023-01-22 20:58:03.863652: step: 1516/529, loss: 0.018036650493741035 2023-01-22 20:58:04.933289: step: 1520/529, loss: 0.008637567982077599 2023-01-22 20:58:05.994937: step: 1524/529, loss: 0.00607710424810648 2023-01-22 20:58:07.052567: step: 1528/529, loss: 0.0010983337415382266 2023-01-22 20:58:08.104078: step: 1532/529, loss: 3.8286190829239786e-05 2023-01-22 20:58:09.183741: step: 1536/529, loss: 0.0023076815996319056 2023-01-22 20:58:10.233759: step: 1540/529, loss: 0.002279708394780755 2023-01-22 20:58:11.307739: step: 1544/529, loss: 0.0029560986440628767 2023-01-22 20:58:12.369267: step: 1548/529, loss: 0.0011503908317536116 2023-01-22 20:58:13.423465: step: 1552/529, loss: 0.004796003922820091 2023-01-22 20:58:14.479752: step: 1556/529, loss: 0.003004387952387333 2023-01-22 20:58:15.539642: step: 1560/529, loss: 0.011867834255099297 2023-01-22 20:58:16.567763: step: 1564/529, loss: 0.0016365665942430496 2023-01-22 20:58:17.620674: step: 1568/529, loss: 0.010986359789967537 2023-01-22 20:58:18.670881: step: 1572/529, loss: 0.0015012421645224094 2023-01-22 20:58:19.720944: step: 1576/529, loss: 0.012717810459434986 2023-01-22 20:58:20.801681: step: 1580/529, loss: 0.07196919620037079 2023-01-22 20:58:21.858467: step: 1584/529, loss: 0.0015860494459047914 2023-01-22 20:58:22.902033: step: 1588/529, loss: 0.00582316005602479 2023-01-22 20:58:23.955635: step: 1592/529, loss: 0.022280210629105568 2023-01-22 20:58:24.997909: step: 1596/529, loss: 0.005692686419934034 2023-01-22 20:58:26.076010: step: 1600/529, loss: 0.019194211810827255 2023-01-22 20:58:27.131921: step: 1604/529, loss: 0.007020164746791124 2023-01-22 20:58:28.204992: step: 1608/529, loss: 0.008333721198141575 2023-01-22 20:58:29.286629: step: 1612/529, loss: 0.008188854902982712 2023-01-22 20:58:30.345210: step: 1616/529, loss: 0.006952732801437378 2023-01-22 20:58:31.398444: step: 1620/529, loss: 0.004966230597347021 2023-01-22 20:58:32.442271: step: 1624/529, loss: 0.04598314315080643 2023-01-22 20:58:33.497723: step: 1628/529, loss: 0.001166617264971137 2023-01-22 20:58:34.539389: step: 1632/529, loss: 0.003217097371816635 2023-01-22 20:58:35.589728: step: 1636/529, loss: 0.007968607358634472 2023-01-22 20:58:36.645604: step: 1640/529, loss: 0.004410277586430311 2023-01-22 20:58:37.694221: step: 1644/529, loss: 0.006269694305956364 2023-01-22 20:58:38.736270: step: 1648/529, loss: 0.0014677905710414052 2023-01-22 20:58:39.789989: step: 1652/529, loss: 0.07598873972892761 2023-01-22 20:58:40.844448: step: 1656/529, loss: 0.005206728354096413 2023-01-22 20:58:41.917675: step: 1660/529, loss: 0.005225660279393196 2023-01-22 20:58:42.961922: step: 1664/529, loss: 0.004242300521582365 2023-01-22 20:58:44.004561: step: 1668/529, loss: 0.05876302719116211 2023-01-22 20:58:45.044285: step: 1672/529, loss: 0.03848068788647652 2023-01-22 20:58:46.096707: step: 1676/529, loss: 0.002204981166869402 2023-01-22 20:58:47.144934: step: 1680/529, loss: 0.007873247377574444 2023-01-22 20:58:48.215024: step: 1684/529, loss: 0.01370218675583601 2023-01-22 20:58:49.292164: step: 1688/529, loss: 0.01081059966236353 2023-01-22 20:58:50.335370: step: 1692/529, loss: 0.0035049791913479567 2023-01-22 20:58:51.385710: step: 1696/529, loss: 0.015782542526721954 2023-01-22 20:58:52.446865: step: 1700/529, loss: 0.02889157459139824 2023-01-22 20:58:53.486269: step: 1704/529, loss: 0.00474894093349576 2023-01-22 20:58:54.553533: step: 1708/529, loss: 0.04621373489499092 2023-01-22 20:58:55.610276: step: 1712/529, loss: 0.04079722613096237 2023-01-22 20:58:56.664256: step: 1716/529, loss: 0.0030744050163775682 2023-01-22 20:58:57.732593: step: 1720/529, loss: 0.007096126675605774 2023-01-22 20:58:58.790447: step: 1724/529, loss: 0.008257281966507435 2023-01-22 20:58:59.844408: step: 1728/529, loss: 0.001956286607310176 2023-01-22 20:59:00.913672: step: 1732/529, loss: 0.012075965292751789 2023-01-22 20:59:01.955329: step: 1736/529, loss: 0.0040674954652786255 2023-01-22 20:59:03.003713: step: 1740/529, loss: 0.0010538293281570077 2023-01-22 20:59:04.082689: step: 1744/529, loss: 0.006394412834197283 2023-01-22 20:59:05.137573: step: 1748/529, loss: 0.005157122388482094 2023-01-22 20:59:06.199015: step: 1752/529, loss: 0.0071691726334393024 2023-01-22 20:59:07.256083: step: 1756/529, loss: 0.00511873047798872 2023-01-22 20:59:08.318044: step: 1760/529, loss: 0.004206851590424776 2023-01-22 20:59:09.379153: step: 1764/529, loss: 0.0030038170516490936 2023-01-22 20:59:10.421815: step: 1768/529, loss: 0.006952269468456507 2023-01-22 20:59:11.454506: step: 1772/529, loss: 0.03365598991513252 2023-01-22 20:59:12.509890: step: 1776/529, loss: 0.007901431992650032 2023-01-22 20:59:13.546056: step: 1780/529, loss: 0.03756910189986229 2023-01-22 20:59:14.612754: step: 1784/529, loss: 0.0027143415063619614 2023-01-22 20:59:15.672535: step: 1788/529, loss: 0.007896346040070057 2023-01-22 20:59:16.732168: step: 1792/529, loss: 0.003454769728705287 2023-01-22 20:59:17.800062: step: 1796/529, loss: 0.008012359961867332 2023-01-22 20:59:18.859733: step: 1800/529, loss: 0.007863893173635006 2023-01-22 20:59:19.934881: step: 1804/529, loss: 0.002791995881125331 2023-01-22 20:59:20.992111: step: 1808/529, loss: 0.0037006670609116554 2023-01-22 20:59:22.037601: step: 1812/529, loss: 0.0075706117786467075 2023-01-22 20:59:23.091736: step: 1816/529, loss: 0.002864219481125474 2023-01-22 20:59:24.147272: step: 1820/529, loss: 0.0026690769009292126 2023-01-22 20:59:25.193430: step: 1824/529, loss: 0.004721864592283964 2023-01-22 20:59:26.254500: step: 1828/529, loss: 0.004358608741313219 2023-01-22 20:59:27.305933: step: 1832/529, loss: 0.006964650470763445 2023-01-22 20:59:28.348694: step: 1836/529, loss: 0.003265456762164831 2023-01-22 20:59:29.417676: step: 1840/529, loss: 0.02012091502547264 2023-01-22 20:59:30.475177: step: 1844/529, loss: 0.039580173790454865 2023-01-22 20:59:31.520055: step: 1848/529, loss: 0.005878944415599108 2023-01-22 20:59:32.587794: step: 1852/529, loss: 0.042363233864307404 2023-01-22 20:59:33.634445: step: 1856/529, loss: 0.0034193145111203194 2023-01-22 20:59:34.688920: step: 1860/529, loss: 0.010416567325592041 2023-01-22 20:59:35.734964: step: 1864/529, loss: 0.00946347787976265 2023-01-22 20:59:36.782548: step: 1868/529, loss: 0.015299217775464058 2023-01-22 20:59:37.838606: step: 1872/529, loss: 0.007803606800734997 2023-01-22 20:59:38.882435: step: 1876/529, loss: 0.034077953547239304 2023-01-22 20:59:39.936290: step: 1880/529, loss: 0.018398471176624298 2023-01-22 20:59:40.984929: step: 1884/529, loss: 0.025905737653374672 2023-01-22 20:59:42.065119: step: 1888/529, loss: 0.002401346806436777 2023-01-22 20:59:43.105832: step: 1892/529, loss: 0.014000258408486843 2023-01-22 20:59:44.156558: step: 1896/529, loss: 0.011593669652938843 2023-01-22 20:59:45.233449: step: 1900/529, loss: 0.018433650955557823 2023-01-22 20:59:46.284026: step: 1904/529, loss: 0.00021729448053520173 2023-01-22 20:59:47.322210: step: 1908/529, loss: 0.0025396591518074274 2023-01-22 20:59:48.394673: step: 1912/529, loss: 0.01734977960586548 2023-01-22 20:59:49.469492: step: 1916/529, loss: 0.021788204088807106 2023-01-22 20:59:50.529255: step: 1920/529, loss: 0.010209101252257824 2023-01-22 20:59:51.556097: step: 1924/529, loss: 0.0027999449521303177 2023-01-22 20:59:52.627625: step: 1928/529, loss: 0.006282568909227848 2023-01-22 20:59:53.655434: step: 1932/529, loss: 0.0023444530088454485 2023-01-22 20:59:54.714492: step: 1936/529, loss: 0.0015021865256130695 2023-01-22 20:59:55.767675: step: 1940/529, loss: 0.0232656579464674 2023-01-22 20:59:56.807359: step: 1944/529, loss: 0.0429142564535141 2023-01-22 20:59:57.858269: step: 1948/529, loss: 0.00369791011326015 2023-01-22 20:59:58.933007: step: 1952/529, loss: 0.004101696889847517 2023-01-22 20:59:59.984765: step: 1956/529, loss: 0.006419582292437553 2023-01-22 21:00:01.032491: step: 1960/529, loss: 0.009770194999873638 2023-01-22 21:00:02.083419: step: 1964/529, loss: 0.01119499932974577 2023-01-22 21:00:03.137990: step: 1968/529, loss: 0.0066248755902051926 2023-01-22 21:00:04.192852: step: 1972/529, loss: 0.014222013764083385 2023-01-22 21:00:05.255366: step: 1976/529, loss: 0.0012636489700526 2023-01-22 21:00:06.289314: step: 1980/529, loss: 0.0044588493183255196 2023-01-22 21:00:07.324459: step: 1984/529, loss: 0.006572291254997253 2023-01-22 21:00:08.377239: step: 1988/529, loss: 0.011034170165657997 2023-01-22 21:00:09.413188: step: 1992/529, loss: 0.013087132014334202 2023-01-22 21:00:10.453918: step: 1996/529, loss: 0.002118147676810622 2023-01-22 21:00:11.501407: step: 2000/529, loss: 0.0017166564939543605 2023-01-22 21:00:12.548374: step: 2004/529, loss: 0.0007918201736174524 2023-01-22 21:00:13.605716: step: 2008/529, loss: 0.0058188652619719505 2023-01-22 21:00:14.651637: step: 2012/529, loss: 0.004463243763893843 2023-01-22 21:00:15.703885: step: 2016/529, loss: 0.01546636875718832 2023-01-22 21:00:16.730012: step: 2020/529, loss: 0.006115283817052841 2023-01-22 21:00:17.779143: step: 2024/529, loss: 0.0038313865661621094 2023-01-22 21:00:18.819196: step: 2028/529, loss: 0.010081807151436806 2023-01-22 21:00:19.880739: step: 2032/529, loss: 0.011871806345880032 2023-01-22 21:00:20.930134: step: 2036/529, loss: 0.004678971599787474 2023-01-22 21:00:21.988605: step: 2040/529, loss: 0.005616115406155586 2023-01-22 21:00:23.030586: step: 2044/529, loss: 0.02291368879377842 2023-01-22 21:00:24.073703: step: 2048/529, loss: 0.0019425592618063092 2023-01-22 21:00:25.138395: step: 2052/529, loss: 0.0016388616058975458 2023-01-22 21:00:26.187682: step: 2056/529, loss: 0.01724652759730816 2023-01-22 21:00:27.234218: step: 2060/529, loss: 0.004556211177259684 2023-01-22 21:00:28.299718: step: 2064/529, loss: 0.0035861663054674864 2023-01-22 21:00:29.341826: step: 2068/529, loss: 0.0020745142828673124 2023-01-22 21:00:30.390551: step: 2072/529, loss: 0.004656584933400154 2023-01-22 21:00:31.438752: step: 2076/529, loss: 0.008734665811061859 2023-01-22 21:00:32.490113: step: 2080/529, loss: 0.003935365937650204 2023-01-22 21:00:33.530552: step: 2084/529, loss: 0.008014556020498276 2023-01-22 21:00:34.578096: step: 2088/529, loss: 0.0054819961078464985 2023-01-22 21:00:35.621113: step: 2092/529, loss: 0.003953464329242706 2023-01-22 21:00:36.675881: step: 2096/529, loss: 0.003167897928506136 2023-01-22 21:00:37.729315: step: 2100/529, loss: 0.0013746214099228382 2023-01-22 21:00:38.776481: step: 2104/529, loss: 0.0053943064995110035 2023-01-22 21:00:39.821282: step: 2108/529, loss: 0.00604413915425539 2023-01-22 21:00:40.851052: step: 2112/529, loss: 0.00036190840182825923 2023-01-22 21:00:41.915711: step: 2116/529, loss: 0.01194050908088684 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3374405961981567, 'r': 0.317591149362971, 'f1': 0.32721512358609134}, 'combined': 0.24110588053711993, 'stategy': 1, 'epoch': 7} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3785966798298504, 'r': 0.31880086914119166, 'f1': 0.3461352875753725}, 'combined': 0.24351226261583997, 'stategy': 1, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3113233827927981, 'r': 0.3414514520953269, 'f1': 0.32569215430631177}, 'combined': 0.23998369264675604, 'stategy': 1, 'epoch': 7} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38172126845400506, 'r': 0.33246691123413347, 'f1': 0.3553956637330392}, 'combined': 0.2523309212504578, 'stategy': 1, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32603922716627637, 'r': 0.3396499728923828, 'f1': 0.3327054567180032}, 'combined': 0.24515138916063395, 'stategy': 1, 'epoch': 7} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37727408963948444, 'r': 0.29990985659822894, 'f1': 0.3341727716690037}, 'combined': 0.23726266788499262, 'stategy': 1, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34188034188034183, 'r': 0.38095238095238093, 'f1': 0.36036036036036034}, 'combined': 0.2402402402402402, 'stategy': 1, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3629032258064516, 'r': 0.4891304347826087, 'f1': 0.41666666666666663}, 'combined': 0.20833333333333331, 'stategy': 1, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 8 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 21:03:14.729669: step: 4/529, loss: 0.00418679416179657 2023-01-22 21:03:15.758917: step: 8/529, loss: 0.00024320787633769214 2023-01-22 21:03:16.786191: step: 12/529, loss: 0.0021170873660594225 2023-01-22 21:03:17.846635: step: 16/529, loss: 0.0034182819072157145 2023-01-22 21:03:18.903761: step: 20/529, loss: 0.015641946345567703 2023-01-22 21:03:19.927803: step: 24/529, loss: 0.0004319738072808832 2023-01-22 21:03:20.995242: step: 28/529, loss: 0.0016258681425824761 2023-01-22 21:03:22.054493: step: 32/529, loss: 0.005644837860018015 2023-01-22 21:03:23.105556: step: 36/529, loss: 0.0015868940390646458 2023-01-22 21:03:24.144413: step: 40/529, loss: 0.0026029327418655157 2023-01-22 21:03:25.191641: step: 44/529, loss: 0.0011572610819712281 2023-01-22 21:03:26.232040: step: 48/529, loss: 0.09177444875240326 2023-01-22 21:03:27.298585: step: 52/529, loss: 0.006089744623750448 2023-01-22 21:03:28.352925: step: 56/529, loss: 0.006039205472916365 2023-01-22 21:03:29.389578: step: 60/529, loss: 0.003792577888816595 2023-01-22 21:03:30.425813: step: 64/529, loss: 0.008594024926424026 2023-01-22 21:03:31.457414: step: 68/529, loss: 0.020279861986637115 2023-01-22 21:03:32.499042: step: 72/529, loss: 0.00022051918494980782 2023-01-22 21:03:33.546715: step: 76/529, loss: 0.005525319371372461 2023-01-22 21:03:34.594992: step: 80/529, loss: 0.004667379893362522 2023-01-22 21:03:35.665499: step: 84/529, loss: 0.009687199257314205 2023-01-22 21:03:36.743726: step: 88/529, loss: 0.004570450633764267 2023-01-22 21:03:37.802459: step: 92/529, loss: 0.005088334903120995 2023-01-22 21:03:38.877349: step: 96/529, loss: 0.006963053252547979 2023-01-22 21:03:39.932310: step: 100/529, loss: 0.01978379860520363 2023-01-22 21:03:40.983345: step: 104/529, loss: 0.004413002170622349 2023-01-22 21:03:42.046935: step: 108/529, loss: 0.002835626946762204 2023-01-22 21:03:43.112609: step: 112/529, loss: 0.0014767611864954233 2023-01-22 21:03:44.152449: step: 116/529, loss: 0.004016049671918154 2023-01-22 21:03:45.193520: step: 120/529, loss: 0.034448012709617615 2023-01-22 21:03:46.238341: step: 124/529, loss: 0.0014202462043613195 2023-01-22 21:03:47.301659: step: 128/529, loss: 0.0018970968667417765 2023-01-22 21:03:48.355231: step: 132/529, loss: 0.0013371362583711743 2023-01-22 21:03:49.438604: step: 136/529, loss: 0.005217510275542736 2023-01-22 21:03:50.483367: step: 140/529, loss: 0.0025468477979302406 2023-01-22 21:03:51.521912: step: 144/529, loss: 0.0036807500291615725 2023-01-22 21:03:52.575992: step: 148/529, loss: 0.00315466639585793 2023-01-22 21:03:53.653626: step: 152/529, loss: 0.019548948854207993 2023-01-22 21:03:54.725466: step: 156/529, loss: 0.019182687625288963 2023-01-22 21:03:55.766649: step: 160/529, loss: 0.00523687107488513 2023-01-22 21:03:56.819128: step: 164/529, loss: 0.026018721982836723 2023-01-22 21:03:57.874053: step: 168/529, loss: 0.002533954568207264 2023-01-22 21:03:58.908981: step: 172/529, loss: 0.0016875425353646278 2023-01-22 21:03:59.956654: step: 176/529, loss: 0.009932790882885456 2023-01-22 21:04:01.008345: step: 180/529, loss: 0.01693861186504364 2023-01-22 21:04:02.056852: step: 184/529, loss: 0.00642986036837101 2023-01-22 21:04:03.100818: step: 188/529, loss: 0.007867802865803242 2023-01-22 21:04:04.161349: step: 192/529, loss: 0.017002422362565994 2023-01-22 21:04:05.211132: step: 196/529, loss: 0.0017728491220623255 2023-01-22 21:04:06.266086: step: 200/529, loss: 0.003194941207766533 2023-01-22 21:04:07.319553: step: 204/529, loss: 0.003827095264568925 2023-01-22 21:04:08.357077: step: 208/529, loss: 0.00481391279026866 2023-01-22 21:04:09.401267: step: 212/529, loss: 0.004151469562202692 2023-01-22 21:04:10.441165: step: 216/529, loss: 0.0036621044855564833 2023-01-22 21:04:11.489492: step: 220/529, loss: 0.012713721953332424 2023-01-22 21:04:12.568207: step: 224/529, loss: 0.011146667413413525 2023-01-22 21:04:13.637792: step: 228/529, loss: 0.012993094511330128 2023-01-22 21:04:14.695017: step: 232/529, loss: 0.02289563976228237 2023-01-22 21:04:15.752236: step: 236/529, loss: 0.00946480967104435 2023-01-22 21:04:16.805666: step: 240/529, loss: 0.0014833833556622267 2023-01-22 21:04:17.871316: step: 244/529, loss: 0.004948390647768974 2023-01-22 21:04:18.925240: step: 248/529, loss: 0.004082354251295328 2023-01-22 21:04:19.990485: step: 252/529, loss: 0.0009998567402362823 2023-01-22 21:04:21.045423: step: 256/529, loss: 0.005059545394033194 2023-01-22 21:04:22.097873: step: 260/529, loss: 0.0012378237443044782 2023-01-22 21:04:23.146841: step: 264/529, loss: 0.04017799347639084 2023-01-22 21:04:24.192800: step: 268/529, loss: 0.004544986877590418 2023-01-22 21:04:25.248443: step: 272/529, loss: 0.0021344891283661127 2023-01-22 21:04:26.309293: step: 276/529, loss: 0.01584240049123764 2023-01-22 21:04:27.356264: step: 280/529, loss: 0.012545171193778515 2023-01-22 21:04:28.427544: step: 284/529, loss: 0.012593216262757778 2023-01-22 21:04:29.484293: step: 288/529, loss: 0.003506703535094857 2023-01-22 21:04:30.521512: step: 292/529, loss: 0.015691058710217476 2023-01-22 21:04:31.573978: step: 296/529, loss: 0.007280253805220127 2023-01-22 21:04:32.644544: step: 300/529, loss: 0.0170859694480896 2023-01-22 21:04:33.700226: step: 304/529, loss: 0.005679363384842873 2023-01-22 21:04:34.736916: step: 308/529, loss: 0.0002973276423290372 2023-01-22 21:04:35.770526: step: 312/529, loss: 0.004358048550784588 2023-01-22 21:04:36.838299: step: 316/529, loss: 0.008698815479874611 2023-01-22 21:04:37.897380: step: 320/529, loss: 0.005490819923579693 2023-01-22 21:04:38.963263: step: 324/529, loss: 0.01282102894037962 2023-01-22 21:04:40.017764: step: 328/529, loss: 0.010726699605584145 2023-01-22 21:04:41.077958: step: 332/529, loss: 0.004520405549556017 2023-01-22 21:04:42.112288: step: 336/529, loss: 0.005283476784825325 2023-01-22 21:04:43.163849: step: 340/529, loss: 0.0027138590812683105 2023-01-22 21:04:44.219567: step: 344/529, loss: 0.003003682941198349 2023-01-22 21:04:45.282717: step: 348/529, loss: 0.011491017416119576 2023-01-22 21:04:46.343670: step: 352/529, loss: 0.007983622141182423 2023-01-22 21:04:47.399196: step: 356/529, loss: 0.011500740423798561 2023-01-22 21:04:48.462118: step: 360/529, loss: 0.031198058277368546 2023-01-22 21:04:49.548810: step: 364/529, loss: 0.0029806382954120636 2023-01-22 21:04:50.622122: step: 368/529, loss: 0.019409112632274628 2023-01-22 21:04:51.672489: step: 372/529, loss: 0.005313187837600708 2023-01-22 21:04:52.732679: step: 376/529, loss: 0.030536234378814697 2023-01-22 21:04:53.780392: step: 380/529, loss: 0.002520354464650154 2023-01-22 21:04:54.857165: step: 384/529, loss: 0.0021227183751761913 2023-01-22 21:04:55.917502: step: 388/529, loss: 0.010852312669157982 2023-01-22 21:04:56.987195: step: 392/529, loss: 0.02787388116121292 2023-01-22 21:04:58.041985: step: 396/529, loss: 0.005178820341825485 2023-01-22 21:04:59.078417: step: 400/529, loss: 0.006441417150199413 2023-01-22 21:05:00.119843: step: 404/529, loss: 0.003956621512770653 2023-01-22 21:05:01.171275: step: 408/529, loss: 0.0022857231087982655 2023-01-22 21:05:02.219641: step: 412/529, loss: 0.002254004590213299 2023-01-22 21:05:03.273044: step: 416/529, loss: 0.005189592484384775 2023-01-22 21:05:04.318333: step: 420/529, loss: 0.003928902093321085 2023-01-22 21:05:05.383529: step: 424/529, loss: 0.003885621204972267 2023-01-22 21:05:06.423541: step: 428/529, loss: 0.04855571687221527 2023-01-22 21:05:07.462370: step: 432/529, loss: 0.01071907952427864 2023-01-22 21:05:08.510050: step: 436/529, loss: 0.0030133952386677265 2023-01-22 21:05:09.556045: step: 440/529, loss: 0.00532871950417757 2023-01-22 21:05:10.609702: step: 444/529, loss: 0.00952321756631136 2023-01-22 21:05:11.659948: step: 448/529, loss: 0.00878421775996685 2023-01-22 21:05:12.714199: step: 452/529, loss: 0.006957669742405415 2023-01-22 21:05:13.762226: step: 456/529, loss: 0.0015822151908650994 2023-01-22 21:05:14.801334: step: 460/529, loss: 0.010986107401549816 2023-01-22 21:05:15.860716: step: 464/529, loss: 0.0009427077602595091 2023-01-22 21:05:16.894363: step: 468/529, loss: 0.0008773641893640161 2023-01-22 21:05:17.941811: step: 472/529, loss: 0.019628306850790977 2023-01-22 21:05:18.989313: step: 476/529, loss: 0.012755539268255234 2023-01-22 21:05:20.035780: step: 480/529, loss: 0.006823853589594364 2023-01-22 21:05:21.085486: step: 484/529, loss: 0.006882214453071356 2023-01-22 21:05:22.126517: step: 488/529, loss: 0.012533913366496563 2023-01-22 21:05:23.173532: step: 492/529, loss: 0.004268737509846687 2023-01-22 21:05:24.218880: step: 496/529, loss: 0.02475816383957863 2023-01-22 21:05:25.287169: step: 500/529, loss: 0.0008665351197123528 2023-01-22 21:05:26.322447: step: 504/529, loss: 0.0058772689662873745 2023-01-22 21:05:27.372223: step: 508/529, loss: 0.006087447050958872 2023-01-22 21:05:28.429072: step: 512/529, loss: 0.0039688218384981155 2023-01-22 21:05:29.486131: step: 516/529, loss: 0.002285595517605543 2023-01-22 21:05:30.531608: step: 520/529, loss: 0.003919811919331551 2023-01-22 21:05:31.576296: step: 524/529, loss: 0.0034155845642089844 2023-01-22 21:05:32.622003: step: 528/529, loss: 0.016393642872571945 2023-01-22 21:05:33.667756: step: 532/529, loss: 0.005018963012844324 2023-01-22 21:05:34.738718: step: 536/529, loss: 0.009512615390121937 2023-01-22 21:05:35.786787: step: 540/529, loss: 0.013742680661380291 2023-01-22 21:05:36.839939: step: 544/529, loss: 0.006120592355728149 2023-01-22 21:05:37.893063: step: 548/529, loss: 0.0055603631772100925 2023-01-22 21:05:38.931852: step: 552/529, loss: 0.0033476371318101883 2023-01-22 21:05:39.993024: step: 556/529, loss: 0.012790605425834656 2023-01-22 21:05:41.048953: step: 560/529, loss: 0.0031049575190991163 2023-01-22 21:05:42.087414: step: 564/529, loss: 8.340136264450848e-05 2023-01-22 21:05:43.138670: step: 568/529, loss: 0.044959839433431625 2023-01-22 21:05:44.169143: step: 572/529, loss: 0.002194754546508193 2023-01-22 21:05:45.227964: step: 576/529, loss: 0.0023691949900239706 2023-01-22 21:05:46.270902: step: 580/529, loss: 0.0014328605029731989 2023-01-22 21:05:47.332530: step: 584/529, loss: 0.009362118318676949 2023-01-22 21:05:48.371921: step: 588/529, loss: 0.005044514779001474 2023-01-22 21:05:49.438938: step: 592/529, loss: 0.0028894487768411636 2023-01-22 21:05:50.474492: step: 596/529, loss: 0.0012223867233842611 2023-01-22 21:05:51.535991: step: 600/529, loss: 0.0030365968123078346 2023-01-22 21:05:52.587381: step: 604/529, loss: 0.006186846178025007 2023-01-22 21:05:53.629493: step: 608/529, loss: 0.011325540021061897 2023-01-22 21:05:54.679237: step: 612/529, loss: 0.000886776892002672 2023-01-22 21:05:55.713083: step: 616/529, loss: 0.0035998898092657328 2023-01-22 21:05:56.774746: step: 620/529, loss: 0.0025763448793441057 2023-01-22 21:05:57.819619: step: 624/529, loss: 0.004772564861923456 2023-01-22 21:05:58.879600: step: 628/529, loss: 0.0006545006181113422 2023-01-22 21:05:59.944899: step: 632/529, loss: 0.0658249482512474 2023-01-22 21:06:00.995183: step: 636/529, loss: 0.0037241396494209766 2023-01-22 21:06:02.039196: step: 640/529, loss: 0.007055758032947779 2023-01-22 21:06:03.110885: step: 644/529, loss: 0.014152373187243938 2023-01-22 21:06:04.171903: step: 648/529, loss: 0.031136812642216682 2023-01-22 21:06:05.215878: step: 652/529, loss: 0.01173525582998991 2023-01-22 21:06:06.274338: step: 656/529, loss: 0.003205682383850217 2023-01-22 21:06:07.323921: step: 660/529, loss: 0.013180609792470932 2023-01-22 21:06:08.395309: step: 664/529, loss: 0.0017198617570102215 2023-01-22 21:06:09.470138: step: 668/529, loss: 0.010948908515274525 2023-01-22 21:06:10.525543: step: 672/529, loss: 0.004982938524335623 2023-01-22 21:06:11.588763: step: 676/529, loss: 0.011175957508385181 2023-01-22 21:06:12.639553: step: 680/529, loss: 0.004167577251791954 2023-01-22 21:06:13.696110: step: 684/529, loss: 0.004932687617838383 2023-01-22 21:06:14.742601: step: 688/529, loss: 0.0017282726475968957 2023-01-22 21:06:15.791398: step: 692/529, loss: 0.003486054250970483 2023-01-22 21:06:16.834531: step: 696/529, loss: 0.021378057077527046 2023-01-22 21:06:17.875942: step: 700/529, loss: 0.0025207463186234236 2023-01-22 21:06:18.915008: step: 704/529, loss: 0.0016067586839199066 2023-01-22 21:06:19.961981: step: 708/529, loss: 0.0031255383510142565 2023-01-22 21:06:20.996476: step: 712/529, loss: 0.01948780193924904 2023-01-22 21:06:22.054050: step: 716/529, loss: 0.0009144945070147514 2023-01-22 21:06:23.098099: step: 720/529, loss: 0.00979230459779501 2023-01-22 21:06:24.157465: step: 724/529, loss: 0.00207799905911088 2023-01-22 21:06:25.194995: step: 728/529, loss: 0.001519421930424869 2023-01-22 21:06:26.230851: step: 732/529, loss: 0.0032010162249207497 2023-01-22 21:06:27.288769: step: 736/529, loss: 0.02200601063668728 2023-01-22 21:06:28.345322: step: 740/529, loss: 0.005108694080263376 2023-01-22 21:06:29.409943: step: 744/529, loss: 0.006854139268398285 2023-01-22 21:06:30.453275: step: 748/529, loss: 0.0009267330169677734 2023-01-22 21:06:31.493270: step: 752/529, loss: 0.012000516057014465 2023-01-22 21:06:32.533650: step: 756/529, loss: 0.0005193682154640555 2023-01-22 21:06:33.592917: step: 760/529, loss: 0.028842847794294357 2023-01-22 21:06:34.643492: step: 764/529, loss: 0.00018473275122232735 2023-01-22 21:06:35.712686: step: 768/529, loss: 0.0016618078807368875 2023-01-22 21:06:36.764993: step: 772/529, loss: 0.0046205357648432255 2023-01-22 21:06:37.813673: step: 776/529, loss: 0.002604840788990259 2023-01-22 21:06:38.844462: step: 780/529, loss: 0.0010007359087467194 2023-01-22 21:06:39.908336: step: 784/529, loss: 0.005958912428468466 2023-01-22 21:06:40.968081: step: 788/529, loss: 0.002130286069586873 2023-01-22 21:06:42.017462: step: 792/529, loss: 0.0007109068683348596 2023-01-22 21:06:43.072474: step: 796/529, loss: 0.001822280348278582 2023-01-22 21:06:44.136284: step: 800/529, loss: 0.005782265681773424 2023-01-22 21:06:45.200184: step: 804/529, loss: 0.010541227646172047 2023-01-22 21:06:46.242812: step: 808/529, loss: 0.0004067984991706908 2023-01-22 21:06:47.288813: step: 812/529, loss: 0.0033398461528122425 2023-01-22 21:06:48.327148: step: 816/529, loss: 0.0020836712792515755 2023-01-22 21:06:49.375719: step: 820/529, loss: 0.00021697222837246954 2023-01-22 21:06:50.432611: step: 824/529, loss: 0.004366052802652121 2023-01-22 21:06:51.467099: step: 828/529, loss: 0.0027180283796042204 2023-01-22 21:06:52.517864: step: 832/529, loss: 0.005855072755366564 2023-01-22 21:06:53.562320: step: 836/529, loss: 0.0015405826270580292 2023-01-22 21:06:54.608630: step: 840/529, loss: 0.0014148568734526634 2023-01-22 21:06:55.655613: step: 844/529, loss: 0.0060127414762973785 2023-01-22 21:06:56.694727: step: 848/529, loss: 0.0026360999327152967 2023-01-22 21:06:57.735178: step: 852/529, loss: 0.006164724472910166 2023-01-22 21:06:58.787768: step: 856/529, loss: 0.0005568200722336769 2023-01-22 21:06:59.839873: step: 860/529, loss: 0.0031530912965536118 2023-01-22 21:07:00.876973: step: 864/529, loss: 0.00020890013547614217 2023-01-22 21:07:01.943830: step: 868/529, loss: 0.010337824001908302 2023-01-22 21:07:02.994417: step: 872/529, loss: 0.003372880630195141 2023-01-22 21:07:04.047794: step: 876/529, loss: 0.003235666314139962 2023-01-22 21:07:05.108174: step: 880/529, loss: 0.01767680235207081 2023-01-22 21:07:06.156422: step: 884/529, loss: 0.004162862431257963 2023-01-22 21:07:07.197783: step: 888/529, loss: 0.01351142767816782 2023-01-22 21:07:08.256674: step: 892/529, loss: 0.0017237719148397446 2023-01-22 21:07:09.296652: step: 896/529, loss: 0.004608328454196453 2023-01-22 21:07:10.341424: step: 900/529, loss: 0.006700914818793535 2023-01-22 21:07:11.388025: step: 904/529, loss: 0.0010789984371513128 2023-01-22 21:07:12.436865: step: 908/529, loss: 0.0014517439994961023 2023-01-22 21:07:13.478566: step: 912/529, loss: 0.00187535653822124 2023-01-22 21:07:14.532234: step: 916/529, loss: 0.11291077733039856 2023-01-22 21:07:15.571955: step: 920/529, loss: 0.003345996607095003 2023-01-22 21:07:16.624124: step: 924/529, loss: 0.037647683173418045 2023-01-22 21:07:17.655552: step: 928/529, loss: 0.0003483584150671959 2023-01-22 21:07:18.703240: step: 932/529, loss: 0.0035272075328975916 2023-01-22 21:07:19.740488: step: 936/529, loss: 0.0019465115619823337 2023-01-22 21:07:20.774574: step: 940/529, loss: 0.0008050024043768644 2023-01-22 21:07:21.828194: step: 944/529, loss: 0.010214627720415592 2023-01-22 21:07:22.869717: step: 948/529, loss: 0.011353747919201851 2023-01-22 21:07:23.930598: step: 952/529, loss: 0.004682374652475119 2023-01-22 21:07:24.964481: step: 956/529, loss: 0.029736792668700218 2023-01-22 21:07:26.013816: step: 960/529, loss: 0.06589806079864502 2023-01-22 21:07:27.063225: step: 964/529, loss: 0.018441027030348778 2023-01-22 21:07:28.119098: step: 968/529, loss: 0.010233191773295403 2023-01-22 21:07:29.179212: step: 972/529, loss: 0.010023259557783604 2023-01-22 21:07:30.240100: step: 976/529, loss: 0.003923496697098017 2023-01-22 21:07:31.283066: step: 980/529, loss: 0.0017873788019642234 2023-01-22 21:07:32.311739: step: 984/529, loss: 0.002645807806402445 2023-01-22 21:07:33.365105: step: 988/529, loss: 0.0050481995567679405 2023-01-22 21:07:34.422829: step: 992/529, loss: 0.0012396216625347733 2023-01-22 21:07:35.473528: step: 996/529, loss: 0.006174897309392691 2023-01-22 21:07:36.533161: step: 1000/529, loss: 0.005880600307136774 2023-01-22 21:07:37.600038: step: 1004/529, loss: 0.010341616347432137 2023-01-22 21:07:38.683571: step: 1008/529, loss: 0.005105757620185614 2023-01-22 21:07:39.729011: step: 1012/529, loss: 0.010343586094677448 2023-01-22 21:07:40.773126: step: 1016/529, loss: 0.0034047835506498814 2023-01-22 21:07:41.817637: step: 1020/529, loss: 0.00567442923784256 2023-01-22 21:07:42.879136: step: 1024/529, loss: 0.010222043842077255 2023-01-22 21:07:43.928452: step: 1028/529, loss: 0.0197332501411438 2023-01-22 21:07:44.977260: step: 1032/529, loss: 0.003273306880146265 2023-01-22 21:07:46.017500: step: 1036/529, loss: 0.00402007857337594 2023-01-22 21:07:47.062250: step: 1040/529, loss: 0.003292643465101719 2023-01-22 21:07:48.093705: step: 1044/529, loss: 0.006321679335087538 2023-01-22 21:07:49.147326: step: 1048/529, loss: 0.0028350367210805416 2023-01-22 21:07:50.202211: step: 1052/529, loss: 0.0015393736539408565 2023-01-22 21:07:51.237111: step: 1056/529, loss: 0.00930643081665039 2023-01-22 21:07:52.274715: step: 1060/529, loss: 0.009996904991567135 2023-01-22 21:07:53.320230: step: 1064/529, loss: 1.867491846496705e-05 2023-01-22 21:07:54.380290: step: 1068/529, loss: 0.013489971868693829 2023-01-22 21:07:55.446226: step: 1072/529, loss: 0.010666463524103165 2023-01-22 21:07:56.492659: step: 1076/529, loss: 0.0023364105727523565 2023-01-22 21:07:57.538249: step: 1080/529, loss: 0.004974940791726112 2023-01-22 21:07:58.605003: step: 1084/529, loss: 0.010700489394366741 2023-01-22 21:07:59.639754: step: 1088/529, loss: 0.006985725834965706 2023-01-22 21:08:00.684271: step: 1092/529, loss: 0.004594434984028339 2023-01-22 21:08:01.743525: step: 1096/529, loss: 0.004355976358056068 2023-01-22 21:08:02.800675: step: 1100/529, loss: 0.0022873496636748314 2023-01-22 21:08:03.862052: step: 1104/529, loss: 0.09679481387138367 2023-01-22 21:08:04.906936: step: 1108/529, loss: 0.0036758319474756718 2023-01-22 21:08:05.968544: step: 1112/529, loss: 0.001414563157595694 2023-01-22 21:08:07.021755: step: 1116/529, loss: 0.005843575578182936 2023-01-22 21:08:08.064606: step: 1120/529, loss: 0.003531578229740262 2023-01-22 21:08:09.128634: step: 1124/529, loss: 0.0021109702065587044 2023-01-22 21:08:10.162081: step: 1128/529, loss: 0.0007224337314255536 2023-01-22 21:08:11.210668: step: 1132/529, loss: 0.021974900737404823 2023-01-22 21:08:12.254164: step: 1136/529, loss: 0.0061231935396790504 2023-01-22 21:08:13.284977: step: 1140/529, loss: 0.0019948554690927267 2023-01-22 21:08:14.348447: step: 1144/529, loss: 0.0036585237830877304 2023-01-22 21:08:15.406631: step: 1148/529, loss: 0.016825184226036072 2023-01-22 21:08:16.472801: step: 1152/529, loss: 0.021752147004008293 2023-01-22 21:08:17.521518: step: 1156/529, loss: 0.006467336788773537 2023-01-22 21:08:18.560035: step: 1160/529, loss: 0.001560449949465692 2023-01-22 21:08:19.610220: step: 1164/529, loss: 0.005150810815393925 2023-01-22 21:08:20.651975: step: 1168/529, loss: 0.002788701094686985 2023-01-22 21:08:21.697019: step: 1172/529, loss: 0.00016972224693745375 2023-01-22 21:08:22.746753: step: 1176/529, loss: 0.0017187473131343722 2023-01-22 21:08:23.795407: step: 1180/529, loss: 0.002455262467265129 2023-01-22 21:08:24.854299: step: 1184/529, loss: 0.0024022443685680628 2023-01-22 21:08:25.909970: step: 1188/529, loss: 0.005293928552418947 2023-01-22 21:08:26.935567: step: 1192/529, loss: 0.010014212690293789 2023-01-22 21:08:27.990221: step: 1196/529, loss: 0.0057226428762078285 2023-01-22 21:08:29.043051: step: 1200/529, loss: 0.015261152759194374 2023-01-22 21:08:30.084083: step: 1204/529, loss: 0.0036069059278815985 2023-01-22 21:08:31.142608: step: 1208/529, loss: 0.022153861820697784 2023-01-22 21:08:32.197648: step: 1212/529, loss: 0.020718999207019806 2023-01-22 21:08:33.249876: step: 1216/529, loss: 0.02209906093776226 2023-01-22 21:08:34.287760: step: 1220/529, loss: 0.0646977424621582 2023-01-22 21:08:35.334314: step: 1224/529, loss: 0.004512893036007881 2023-01-22 21:08:36.369187: step: 1228/529, loss: 0.001222978695295751 2023-01-22 21:08:37.418950: step: 1232/529, loss: 0.010980901308357716 2023-01-22 21:08:38.472492: step: 1236/529, loss: 0.005596710368990898 2023-01-22 21:08:39.519774: step: 1240/529, loss: 0.0060578021220862865 2023-01-22 21:08:40.563975: step: 1244/529, loss: 0.006224739830940962 2023-01-22 21:08:41.606699: step: 1248/529, loss: 0.009836849756538868 2023-01-22 21:08:42.671420: step: 1252/529, loss: 0.007444403599947691 2023-01-22 21:08:43.715704: step: 1256/529, loss: 0.007615205831825733 2023-01-22 21:08:44.778667: step: 1260/529, loss: 0.009487012401223183 2023-01-22 21:08:45.831503: step: 1264/529, loss: 0.0037185424007475376 2023-01-22 21:08:46.889801: step: 1268/529, loss: 0.00032172995270229876 2023-01-22 21:08:47.957868: step: 1272/529, loss: 0.0013614734634757042 2023-01-22 21:08:48.995517: step: 1276/529, loss: 0.0034371695946902037 2023-01-22 21:08:50.038888: step: 1280/529, loss: 0.007634851615875959 2023-01-22 21:08:51.092298: step: 1284/529, loss: 0.039624497294425964 2023-01-22 21:08:52.137700: step: 1288/529, loss: 0.005870536435395479 2023-01-22 21:08:53.195106: step: 1292/529, loss: 0.0066833700984716415 2023-01-22 21:08:54.240015: step: 1296/529, loss: 0.009734535589814186 2023-01-22 21:08:55.287013: step: 1300/529, loss: 0.004231844563037157 2023-01-22 21:08:56.339802: step: 1304/529, loss: 0.01276403944939375 2023-01-22 21:08:57.407919: step: 1308/529, loss: 0.00589427724480629 2023-01-22 21:08:58.457173: step: 1312/529, loss: 0.01865006797015667 2023-01-22 21:08:59.493166: step: 1316/529, loss: 0.010553350672125816 2023-01-22 21:09:00.539981: step: 1320/529, loss: 0.0034456339199095964 2023-01-22 21:09:01.592167: step: 1324/529, loss: 0.004795698449015617 2023-01-22 21:09:02.623648: step: 1328/529, loss: 0.0005342481308616698 2023-01-22 21:09:03.668144: step: 1332/529, loss: 0.0017165472963824868 2023-01-22 21:09:04.700478: step: 1336/529, loss: 0.007456735707819462 2023-01-22 21:09:05.777512: step: 1340/529, loss: 0.023134509101510048 2023-01-22 21:09:06.849187: step: 1344/529, loss: 0.005263397470116615 2023-01-22 21:09:07.893585: step: 1348/529, loss: 0.0059796771965920925 2023-01-22 21:09:08.950097: step: 1352/529, loss: 0.005494790151715279 2023-01-22 21:09:10.007284: step: 1356/529, loss: 0.01765313744544983 2023-01-22 21:09:11.059510: step: 1360/529, loss: 0.010995949618518353 2023-01-22 21:09:12.113066: step: 1364/529, loss: 0.0022110093850642443 2023-01-22 21:09:13.171065: step: 1368/529, loss: 0.02863430045545101 2023-01-22 21:09:14.242591: step: 1372/529, loss: 0.005511537194252014 2023-01-22 21:09:15.293210: step: 1376/529, loss: 0.004279316868633032 2023-01-22 21:09:16.332945: step: 1380/529, loss: 0.009396612644195557 2023-01-22 21:09:17.382381: step: 1384/529, loss: 0.004264641087502241 2023-01-22 21:09:18.429426: step: 1388/529, loss: 0.003186908783391118 2023-01-22 21:09:19.475037: step: 1392/529, loss: 0.00893084704875946 2023-01-22 21:09:20.533690: step: 1396/529, loss: 0.005201453343033791 2023-01-22 21:09:21.574461: step: 1400/529, loss: 0.004537770990282297 2023-01-22 21:09:22.610864: step: 1404/529, loss: 0.0027724585961550474 2023-01-22 21:09:23.657379: step: 1408/529, loss: 0.013007668778300285 2023-01-22 21:09:24.742143: step: 1412/529, loss: 0.0039587197825312614 2023-01-22 21:09:25.812576: step: 1416/529, loss: 0.0021988931111991405 2023-01-22 21:09:26.866953: step: 1420/529, loss: 0.007401087321341038 2023-01-22 21:09:27.923614: step: 1424/529, loss: 0.002529607620090246 2023-01-22 21:09:28.969773: step: 1428/529, loss: 0.004105293191969395 2023-01-22 21:09:30.037009: step: 1432/529, loss: 0.00038521201349794865 2023-01-22 21:09:31.083587: step: 1436/529, loss: 0.0028108262922614813 2023-01-22 21:09:32.124926: step: 1440/529, loss: 0.02009866014122963 2023-01-22 21:09:33.175164: step: 1444/529, loss: 0.005186332389712334 2023-01-22 21:09:34.228714: step: 1448/529, loss: 0.00943546462804079 2023-01-22 21:09:35.275131: step: 1452/529, loss: 0.0028669829480350018 2023-01-22 21:09:36.317091: step: 1456/529, loss: 0.0006739003001712263 2023-01-22 21:09:37.364460: step: 1460/529, loss: 0.002959716599434614 2023-01-22 21:09:38.412023: step: 1464/529, loss: 0.025490200147032738 2023-01-22 21:09:39.452819: step: 1468/529, loss: 0.0006220805807970464 2023-01-22 21:09:40.491012: step: 1472/529, loss: 0.0045493426732718945 2023-01-22 21:09:41.555307: step: 1476/529, loss: 0.007422023452818394 2023-01-22 21:09:42.607167: step: 1480/529, loss: 0.0003434363752603531 2023-01-22 21:09:43.654222: step: 1484/529, loss: 0.002908052410930395 2023-01-22 21:09:44.699438: step: 1488/529, loss: 0.008059931918978691 2023-01-22 21:09:45.755445: step: 1492/529, loss: 0.009029661305248737 2023-01-22 21:09:46.789701: step: 1496/529, loss: 0.011172882281243801 2023-01-22 21:09:47.849909: step: 1500/529, loss: 0.007685962598770857 2023-01-22 21:09:48.907412: step: 1504/529, loss: 0.00531420623883605 2023-01-22 21:09:49.952684: step: 1508/529, loss: 0.00048667757073417306 2023-01-22 21:09:51.024020: step: 1512/529, loss: 0.012706358917057514 2023-01-22 21:09:52.065376: step: 1516/529, loss: 0.002237442648038268 2023-01-22 21:09:53.127177: step: 1520/529, loss: 0.0040430352091789246 2023-01-22 21:09:54.181834: step: 1524/529, loss: 0.03662671893835068 2023-01-22 21:09:55.243353: step: 1528/529, loss: 0.0010449369437992573 2023-01-22 21:09:56.286579: step: 1532/529, loss: 0.00566144660115242 2023-01-22 21:09:57.335176: step: 1536/529, loss: 0.0002811216691043228 2023-01-22 21:09:58.380578: step: 1540/529, loss: 0.0010364975314587355 2023-01-22 21:09:59.435778: step: 1544/529, loss: 0.0019227811135351658 2023-01-22 21:10:00.494232: step: 1548/529, loss: 0.01226894836872816 2023-01-22 21:10:01.536162: step: 1552/529, loss: 0.0028614425100386143 2023-01-22 21:10:02.585560: step: 1556/529, loss: 0.0017028783913701773 2023-01-22 21:10:03.639573: step: 1560/529, loss: 0.005646460223942995 2023-01-22 21:10:04.669030: step: 1564/529, loss: 0.011266736313700676 2023-01-22 21:10:05.711808: step: 1568/529, loss: 0.0068841492757201195 2023-01-22 21:10:06.770503: step: 1572/529, loss: 0.022557435557246208 2023-01-22 21:10:07.807771: step: 1576/529, loss: 0.007782240863889456 2023-01-22 21:10:08.862390: step: 1580/529, loss: 0.0022903589997440577 2023-01-22 21:10:09.941984: step: 1584/529, loss: 0.016455868259072304 2023-01-22 21:10:10.992026: step: 1588/529, loss: 0.0071045998483896255 2023-01-22 21:10:12.042706: step: 1592/529, loss: 0.018862931057810783 2023-01-22 21:10:13.091004: step: 1596/529, loss: 0.009078999049961567 2023-01-22 21:10:14.131724: step: 1600/529, loss: 0.004598978441208601 2023-01-22 21:10:15.176983: step: 1604/529, loss: 0.0017273937119171023 2023-01-22 21:10:16.224059: step: 1608/529, loss: 0.0034220723900943995 2023-01-22 21:10:17.256246: step: 1612/529, loss: 0.002577882958576083 2023-01-22 21:10:18.290406: step: 1616/529, loss: 0.0023255664855241776 2023-01-22 21:10:19.349119: step: 1620/529, loss: 0.005078338086605072 2023-01-22 21:10:20.393440: step: 1624/529, loss: 0.0017022796673700213 2023-01-22 21:10:21.437644: step: 1628/529, loss: 0.004401816986501217 2023-01-22 21:10:22.470081: step: 1632/529, loss: 0.006114621181041002 2023-01-22 21:10:23.496580: step: 1636/529, loss: 0.003404750255867839 2023-01-22 21:10:24.546577: step: 1640/529, loss: 0.0029521449469029903 2023-01-22 21:10:25.600044: step: 1644/529, loss: 0.01737884245812893 2023-01-22 21:10:26.648677: step: 1648/529, loss: 0.003609592327848077 2023-01-22 21:10:27.682512: step: 1652/529, loss: 0.0006018771673552692 2023-01-22 21:10:28.735539: step: 1656/529, loss: 0.00840011890977621 2023-01-22 21:10:29.778052: step: 1660/529, loss: 0.027620581910014153 2023-01-22 21:10:30.816599: step: 1664/529, loss: 0.0013584413100033998 2023-01-22 21:10:31.866974: step: 1668/529, loss: 0.020687013864517212 2023-01-22 21:10:32.947131: step: 1672/529, loss: 0.0036367462016642094 2023-01-22 21:10:34.000551: step: 1676/529, loss: 0.002350281924009323 2023-01-22 21:10:35.047795: step: 1680/529, loss: 0.0041205789893865585 2023-01-22 21:10:36.080862: step: 1684/529, loss: 0.00909779965877533 2023-01-22 21:10:37.140449: step: 1688/529, loss: 0.0005393131286837161 2023-01-22 21:10:38.183570: step: 1692/529, loss: 0.001936891465447843 2023-01-22 21:10:39.237928: step: 1696/529, loss: 0.025961333885788918 2023-01-22 21:10:40.282814: step: 1700/529, loss: 0.022636907175183296 2023-01-22 21:10:41.324994: step: 1704/529, loss: 0.004636785946786404 2023-01-22 21:10:42.367788: step: 1708/529, loss: 0.004595298320055008 2023-01-22 21:10:43.404378: step: 1712/529, loss: 0.00046826706966385245 2023-01-22 21:10:44.440571: step: 1716/529, loss: 0.006994291208684444 2023-01-22 21:10:45.485449: step: 1720/529, loss: 0.005971079226583242 2023-01-22 21:10:46.520161: step: 1724/529, loss: 0.009745530784130096 2023-01-22 21:10:47.573935: step: 1728/529, loss: 0.00013580379891209304 2023-01-22 21:10:48.616517: step: 1732/529, loss: 0.004373595584183931 2023-01-22 21:10:49.691001: step: 1736/529, loss: 0.02308701165020466 2023-01-22 21:10:50.731159: step: 1740/529, loss: 0.001067155273631215 2023-01-22 21:10:51.778512: step: 1744/529, loss: 0.0004653588111978024 2023-01-22 21:10:52.842841: step: 1748/529, loss: 0.005620166193693876 2023-01-22 21:10:53.890310: step: 1752/529, loss: 0.005924263037741184 2023-01-22 21:10:54.929773: step: 1756/529, loss: 0.00017855333862826228 2023-01-22 21:10:55.985138: step: 1760/529, loss: 0.002827636431902647 2023-01-22 21:10:57.026415: step: 1764/529, loss: 0.0012677439954131842 2023-01-22 21:10:58.081273: step: 1768/529, loss: 0.006064257584512234 2023-01-22 21:10:59.127999: step: 1772/529, loss: 0.010223768651485443 2023-01-22 21:11:00.193150: step: 1776/529, loss: 0.0016981661319732666 2023-01-22 21:11:01.247300: step: 1780/529, loss: 0.0022718484979122877 2023-01-22 21:11:02.302653: step: 1784/529, loss: 0.007074328605085611 2023-01-22 21:11:03.352325: step: 1788/529, loss: 0.004383916035294533 2023-01-22 21:11:04.398726: step: 1792/529, loss: 0.01516591664403677 2023-01-22 21:11:05.433901: step: 1796/529, loss: 0.01723382994532585 2023-01-22 21:11:06.488088: step: 1800/529, loss: 0.002176165347918868 2023-01-22 21:11:07.544183: step: 1804/529, loss: 0.0010999891674146056 2023-01-22 21:11:08.584881: step: 1808/529, loss: 0.00583680858835578 2023-01-22 21:11:09.633153: step: 1812/529, loss: 0.0063547235913574696 2023-01-22 21:11:10.670863: step: 1816/529, loss: 0.01315275114029646 2023-01-22 21:11:11.701213: step: 1820/529, loss: 0.005829112138599157 2023-01-22 21:11:12.739891: step: 1824/529, loss: 0.0037796262186020613 2023-01-22 21:11:13.779015: step: 1828/529, loss: 0.0016116027254611254 2023-01-22 21:11:14.821967: step: 1832/529, loss: 0.004495531786233187 2023-01-22 21:11:15.865117: step: 1836/529, loss: 0.0033119108993560076 2023-01-22 21:11:16.921213: step: 1840/529, loss: 0.015245331451296806 2023-01-22 21:11:17.968061: step: 1844/529, loss: 0.011795220896601677 2023-01-22 21:11:19.016177: step: 1848/529, loss: 0.00022787578927818686 2023-01-22 21:11:20.069462: step: 1852/529, loss: 0.0005150833167135715 2023-01-22 21:11:21.109790: step: 1856/529, loss: 0.006519637070596218 2023-01-22 21:11:22.142608: step: 1860/529, loss: 6.537542276419117e-07 2023-01-22 21:11:23.180913: step: 1864/529, loss: 0.0013407135847955942 2023-01-22 21:11:24.238327: step: 1868/529, loss: 0.005626714788377285 2023-01-22 21:11:25.303810: step: 1872/529, loss: 0.008508286438882351 2023-01-22 21:11:26.363552: step: 1876/529, loss: 0.0024089752696454525 2023-01-22 21:11:27.419701: step: 1880/529, loss: 0.008103289641439915 2023-01-22 21:11:28.457889: step: 1884/529, loss: 0.036085814237594604 2023-01-22 21:11:29.505684: step: 1888/529, loss: 0.020635241642594337 2023-01-22 21:11:30.555128: step: 1892/529, loss: 0.003302657278254628 2023-01-22 21:11:31.600598: step: 1896/529, loss: 0.004418402444571257 2023-01-22 21:11:32.646078: step: 1900/529, loss: 0.011185157112777233 2023-01-22 21:11:33.698641: step: 1904/529, loss: 0.014876248314976692 2023-01-22 21:11:34.754835: step: 1908/529, loss: 0.00410253182053566 2023-01-22 21:11:35.791217: step: 1912/529, loss: 0.0025743674486875534 2023-01-22 21:11:36.832644: step: 1916/529, loss: 5.1235416322015226e-05 2023-01-22 21:11:37.884204: step: 1920/529, loss: 0.01078212633728981 2023-01-22 21:11:38.926020: step: 1924/529, loss: 0.0040357462130486965 2023-01-22 21:11:39.972746: step: 1928/529, loss: 0.011944519355893135 2023-01-22 21:11:41.029889: step: 1932/529, loss: 0.014269320294260979 2023-01-22 21:11:42.072879: step: 1936/529, loss: 0.0864657610654831 2023-01-22 21:11:43.124239: step: 1940/529, loss: 0.006890103220939636 2023-01-22 21:11:44.134160: step: 1944/529, loss: 0.020429786294698715 2023-01-22 21:11:45.190388: step: 1948/529, loss: 0.015258668921887875 2023-01-22 21:11:46.228044: step: 1952/529, loss: 0.005719974637031555 2023-01-22 21:11:47.283309: step: 1956/529, loss: 0.016125567257404327 2023-01-22 21:11:48.327185: step: 1960/529, loss: 0.0015064983163028955 2023-01-22 21:11:49.369684: step: 1964/529, loss: 0.02629963494837284 2023-01-22 21:11:50.422669: step: 1968/529, loss: 0.0013904025545343757 2023-01-22 21:11:51.480276: step: 1972/529, loss: 0.001332998275756836 2023-01-22 21:11:52.539059: step: 1976/529, loss: 0.01769978180527687 2023-01-22 21:11:53.584623: step: 1980/529, loss: 0.028006859123706818 2023-01-22 21:11:54.640393: step: 1984/529, loss: 0.002977078314870596 2023-01-22 21:11:55.690808: step: 1988/529, loss: 0.3105734884738922 2023-01-22 21:11:56.730795: step: 1992/529, loss: 0.014351844787597656 2023-01-22 21:11:57.780282: step: 1996/529, loss: 0.008118154481053352 2023-01-22 21:11:58.825711: step: 2000/529, loss: 0.009803789667785168 2023-01-22 21:11:59.873204: step: 2004/529, loss: 0.011053507216274738 2023-01-22 21:12:00.906735: step: 2008/529, loss: 0.0022000197786837816 2023-01-22 21:12:01.965267: step: 2012/529, loss: 0.04004212096333504 2023-01-22 21:12:03.034709: step: 2016/529, loss: 0.003663303330540657 2023-01-22 21:12:04.082690: step: 2020/529, loss: 0.003477126592770219 2023-01-22 21:12:05.131381: step: 2024/529, loss: 0.0024025526363402605 2023-01-22 21:12:06.169794: step: 2028/529, loss: 0.0015577829908579588 2023-01-22 21:12:07.223297: step: 2032/529, loss: 0.002356215612962842 2023-01-22 21:12:08.265258: step: 2036/529, loss: 0.004957434721291065 2023-01-22 21:12:09.321348: step: 2040/529, loss: 0.01695277914404869 2023-01-22 21:12:10.373948: step: 2044/529, loss: 0.0 2023-01-22 21:12:11.435167: step: 2048/529, loss: 0.0058385892771184444 2023-01-22 21:12:12.475326: step: 2052/529, loss: 0.025018231943249702 2023-01-22 21:12:13.516419: step: 2056/529, loss: 0.001199639169499278 2023-01-22 21:12:14.575019: step: 2060/529, loss: 0.004526037257164717 2023-01-22 21:12:15.629334: step: 2064/529, loss: 0.016652587801218033 2023-01-22 21:12:16.707013: step: 2068/529, loss: 0.006676653865724802 2023-01-22 21:12:17.750499: step: 2072/529, loss: 0.004255416803061962 2023-01-22 21:12:18.799234: step: 2076/529, loss: 0.005319209769368172 2023-01-22 21:12:19.841588: step: 2080/529, loss: 0.008580082096159458 2023-01-22 21:12:20.899606: step: 2084/529, loss: 0.034373655915260315 2023-01-22 21:12:21.964562: step: 2088/529, loss: 0.0027881034184247255 2023-01-22 21:12:23.008271: step: 2092/529, loss: 0.04488471895456314 2023-01-22 21:12:24.057084: step: 2096/529, loss: 0.002458750270307064 2023-01-22 21:12:25.093637: step: 2100/529, loss: 0.011102106422185898 2023-01-22 21:12:26.123180: step: 2104/529, loss: 0.002054554643109441 2023-01-22 21:12:27.182723: step: 2108/529, loss: 0.01581074297428131 2023-01-22 21:12:28.227817: step: 2112/529, loss: 0.007924499921500683 2023-01-22 21:12:29.275698: step: 2116/529, loss: 0.02802487276494503 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3379446284562212, 'r': 0.3180655326646788, 'f1': 0.327703882139366}, 'combined': 0.2414660184184802, 'stategy': 1, 'epoch': 8} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3818138388799645, 'r': 0.32250942062461224, 'f1': 0.3496648969118313}, 'combined': 0.24599540486259489, 'stategy': 1, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31402931586522925, 'r': 0.3438233686038658, 'f1': 0.3282516580692704}, 'combined': 0.24186964278788345, 'stategy': 1, 'epoch': 8} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37958954718555105, 'r': 0.3309411919664787, 'f1': 0.35359995080163115}, 'combined': 0.2510559650691581, 'stategy': 1, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3278904632505176, 'r': 0.34344503930604503, 'f1': 0.33548755461406066}, 'combined': 0.2472013560314131, 'stategy': 1, 'epoch': 8} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37767690991419073, 'r': 0.30121875712178914, 'f1': 0.33514242297239843}, 'combined': 0.23795112031040289, 'stategy': 1, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 9 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 21:14:59.971034: step: 4/529, loss: 0.006300678476691246 2023-01-22 21:15:00.993966: step: 8/529, loss: 0.0010124027030542493 2023-01-22 21:15:02.031250: step: 12/529, loss: 0.005624172277748585 2023-01-22 21:15:03.069061: step: 16/529, loss: 0.0018110325327143073 2023-01-22 21:15:04.093801: step: 20/529, loss: 0.01202508807182312 2023-01-22 21:15:05.127292: step: 24/529, loss: 0.01973545178771019 2023-01-22 21:15:06.171448: step: 28/529, loss: 0.002924524713307619 2023-01-22 21:15:07.206607: step: 32/529, loss: 0.004013107158243656 2023-01-22 21:15:08.246109: step: 36/529, loss: 0.0044119092635810375 2023-01-22 21:15:09.274260: step: 40/529, loss: 0.00377178774215281 2023-01-22 21:15:10.299560: step: 44/529, loss: 0.005118925590068102 2023-01-22 21:15:11.340283: step: 48/529, loss: 1.1169495337526314e-05 2023-01-22 21:15:12.370593: step: 52/529, loss: 0.0015382913406938314 2023-01-22 21:15:13.425954: step: 56/529, loss: 0.003531418740749359 2023-01-22 21:15:14.481236: step: 60/529, loss: 0.003769496688619256 2023-01-22 21:15:15.518861: step: 64/529, loss: 0.004782876465469599 2023-01-22 21:15:16.549619: step: 68/529, loss: 0.0048364801332354546 2023-01-22 21:15:17.592350: step: 72/529, loss: 0.013067131862044334 2023-01-22 21:15:18.643690: step: 76/529, loss: 0.03310670703649521 2023-01-22 21:15:19.691052: step: 80/529, loss: 0.02008361928164959 2023-01-22 21:15:20.732960: step: 84/529, loss: 0.00011589369387365878 2023-01-22 21:15:21.768447: step: 88/529, loss: 0.002125108614563942 2023-01-22 21:15:22.810716: step: 92/529, loss: 0.010413751937448978 2023-01-22 21:15:23.871910: step: 96/529, loss: 0.003419365268200636 2023-01-22 21:15:24.926025: step: 100/529, loss: 0.006584096699953079 2023-01-22 21:15:25.972688: step: 104/529, loss: 0.008205682039260864 2023-01-22 21:15:27.011040: step: 108/529, loss: 0.0008537423564121127 2023-01-22 21:15:28.049970: step: 112/529, loss: 0.0020073417108505964 2023-01-22 21:15:29.112923: step: 116/529, loss: 0.004572881385684013 2023-01-22 21:15:30.167751: step: 120/529, loss: 0.003769422648474574 2023-01-22 21:15:31.234241: step: 124/529, loss: 0.0068070776760578156 2023-01-22 21:15:32.312231: step: 128/529, loss: 0.004832706414163113 2023-01-22 21:15:33.361984: step: 132/529, loss: 5.883812627871521e-05 2023-01-22 21:15:34.404291: step: 136/529, loss: 0.005368275102227926 2023-01-22 21:15:35.450232: step: 140/529, loss: 0.025845011696219444 2023-01-22 21:15:36.483122: step: 144/529, loss: 0.0010977952042594552 2023-01-22 21:15:37.537087: step: 148/529, loss: 0.009125487878918648 2023-01-22 21:15:38.588649: step: 152/529, loss: 0.017263898625969887 2023-01-22 21:15:39.629704: step: 156/529, loss: 0.04713668301701546 2023-01-22 21:15:40.675994: step: 160/529, loss: 0.0034220970701426268 2023-01-22 21:15:41.729176: step: 164/529, loss: 0.0039162649773061275 2023-01-22 21:15:42.775286: step: 168/529, loss: 0.00571163697168231 2023-01-22 21:15:43.826436: step: 172/529, loss: 0.002208835445344448 2023-01-22 21:15:44.868936: step: 176/529, loss: 0.01873951405286789 2023-01-22 21:15:45.921022: step: 180/529, loss: 1.901809810078703e-05 2023-01-22 21:15:46.966375: step: 184/529, loss: 0.0003114901774097234 2023-01-22 21:15:48.011791: step: 188/529, loss: 0.0011132288491353393 2023-01-22 21:15:49.063720: step: 192/529, loss: 0.015212882310152054 2023-01-22 21:15:50.120766: step: 196/529, loss: 0.006722830235958099 2023-01-22 21:15:51.154177: step: 200/529, loss: 0.006900318432599306 2023-01-22 21:15:52.198451: step: 204/529, loss: 0.0025597952771931887 2023-01-22 21:15:53.255779: step: 208/529, loss: 0.016573838889598846 2023-01-22 21:15:54.310585: step: 212/529, loss: 0.007509148679673672 2023-01-22 21:15:55.383738: step: 216/529, loss: 0.0031382490415126085 2023-01-22 21:15:56.421014: step: 220/529, loss: 0.0022234388161450624 2023-01-22 21:15:57.469693: step: 224/529, loss: 0.19986917078495026 2023-01-22 21:15:58.536933: step: 228/529, loss: 0.005614585243165493 2023-01-22 21:15:59.604271: step: 232/529, loss: 0.0059786103665828705 2023-01-22 21:16:00.650476: step: 236/529, loss: 0.015377702191472054 2023-01-22 21:16:01.694845: step: 240/529, loss: 0.0018340423703193665 2023-01-22 21:16:02.750713: step: 244/529, loss: 0.004900996573269367 2023-01-22 21:16:03.799147: step: 248/529, loss: 0.004624504595994949 2023-01-22 21:16:04.843402: step: 252/529, loss: 0.005510708317160606 2023-01-22 21:16:05.888975: step: 256/529, loss: 0.015011359006166458 2023-01-22 21:16:06.917870: step: 260/529, loss: 0.004912584088742733 2023-01-22 21:16:07.976295: step: 264/529, loss: 0.004095816984772682 2023-01-22 21:16:09.022312: step: 268/529, loss: 0.006231776904314756 2023-01-22 21:16:10.077213: step: 272/529, loss: 0.006075344048440456 2023-01-22 21:16:11.120083: step: 276/529, loss: 0.009774001315236092 2023-01-22 21:16:12.167450: step: 280/529, loss: 0.008760093711316586 2023-01-22 21:16:13.233584: step: 284/529, loss: 0.016454629600048065 2023-01-22 21:16:14.279849: step: 288/529, loss: 0.003196598496288061 2023-01-22 21:16:15.343538: step: 292/529, loss: 0.01105580572038889 2023-01-22 21:16:16.392032: step: 296/529, loss: 0.012824133038520813 2023-01-22 21:16:17.459925: step: 300/529, loss: 0.00654851458966732 2023-01-22 21:16:18.498117: step: 304/529, loss: 0.0002847735886462033 2023-01-22 21:16:19.540993: step: 308/529, loss: 0.006640063598752022 2023-01-22 21:16:20.587994: step: 312/529, loss: 0.0031307535246014595 2023-01-22 21:16:21.640502: step: 316/529, loss: 0.0006091123213991523 2023-01-22 21:16:22.683843: step: 320/529, loss: 0.006259743589907885 2023-01-22 21:16:23.740319: step: 324/529, loss: 0.0014743837527930737 2023-01-22 21:16:24.789788: step: 328/529, loss: 0.002671916503459215 2023-01-22 21:16:25.838482: step: 332/529, loss: 0.00605043675750494 2023-01-22 21:16:26.891357: step: 336/529, loss: 0.008138896897435188 2023-01-22 21:16:27.943450: step: 340/529, loss: 0.007081998512148857 2023-01-22 21:16:29.009434: step: 344/529, loss: 0.0178579930216074 2023-01-22 21:16:30.067475: step: 348/529, loss: 0.04688217490911484 2023-01-22 21:16:31.123661: step: 352/529, loss: 0.0034469827078282833 2023-01-22 21:16:32.165249: step: 356/529, loss: 0.002957022748887539 2023-01-22 21:16:33.226654: step: 360/529, loss: 0.0022730641067028046 2023-01-22 21:16:34.272636: step: 364/529, loss: 0.024246053770184517 2023-01-22 21:16:35.328663: step: 368/529, loss: 0.0020896270871162415 2023-01-22 21:16:36.384695: step: 372/529, loss: 0.005090031307190657 2023-01-22 21:16:37.465570: step: 376/529, loss: 0.016175616532564163 2023-01-22 21:16:38.525268: step: 380/529, loss: 0.011649226769804955 2023-01-22 21:16:39.602714: step: 384/529, loss: 0.001625675125978887 2023-01-22 21:16:40.652949: step: 388/529, loss: 0.0021939226426184177 2023-01-22 21:16:41.716520: step: 392/529, loss: 0.011116301640868187 2023-01-22 21:16:42.769781: step: 396/529, loss: 0.009499894455075264 2023-01-22 21:16:43.814490: step: 400/529, loss: 0.005800594110041857 2023-01-22 21:16:44.869725: step: 404/529, loss: 3.698108412208967e-05 2023-01-22 21:16:45.928342: step: 408/529, loss: 0.021780554205179214 2023-01-22 21:16:46.968358: step: 412/529, loss: 0.006811901926994324 2023-01-22 21:16:48.029407: step: 416/529, loss: 0.0037943851202726364 2023-01-22 21:16:49.083640: step: 420/529, loss: 0.005671271588653326 2023-01-22 21:16:50.127947: step: 424/529, loss: 0.005850177723914385 2023-01-22 21:16:51.199466: step: 428/529, loss: 0.0018757515354081988 2023-01-22 21:16:52.252696: step: 432/529, loss: 0.004273649770766497 2023-01-22 21:16:53.304030: step: 436/529, loss: 0.058385588228702545 2023-01-22 21:16:54.351230: step: 440/529, loss: 0.0034359463024884462 2023-01-22 21:16:55.392828: step: 444/529, loss: 0.00020473668701015413 2023-01-22 21:16:56.445340: step: 448/529, loss: 0.0007670792983844876 2023-01-22 21:16:57.525507: step: 452/529, loss: 0.0024094106629490852 2023-01-22 21:16:58.582044: step: 456/529, loss: 0.004597185179591179 2023-01-22 21:16:59.626763: step: 460/529, loss: 0.0015014943201094866 2023-01-22 21:17:00.670613: step: 464/529, loss: 0.00015867469483055174 2023-01-22 21:17:01.730580: step: 468/529, loss: 0.0029733991250395775 2023-01-22 21:17:02.769836: step: 472/529, loss: 0.006710219196975231 2023-01-22 21:17:03.814152: step: 476/529, loss: 0.0032519851811230183 2023-01-22 21:17:04.858644: step: 480/529, loss: 0.0009933942928910255 2023-01-22 21:17:05.916524: step: 484/529, loss: 0.01579311676323414 2023-01-22 21:17:06.959500: step: 488/529, loss: 0.0030558560974895954 2023-01-22 21:17:08.004386: step: 492/529, loss: 0.004293438978493214 2023-01-22 21:17:09.061890: step: 496/529, loss: 0.006788470782339573 2023-01-22 21:17:10.118058: step: 500/529, loss: 0.00423421198502183 2023-01-22 21:17:11.189705: step: 504/529, loss: 0.0215873084962368 2023-01-22 21:17:12.236593: step: 508/529, loss: 0.002454339526593685 2023-01-22 21:17:13.285195: step: 512/529, loss: 0.0126581359654665 2023-01-22 21:17:14.329386: step: 516/529, loss: 0.002481881296262145 2023-01-22 21:17:15.374038: step: 520/529, loss: 0.00029030904988758266 2023-01-22 21:17:16.417417: step: 524/529, loss: 0.0014340935740619898 2023-01-22 21:17:17.444375: step: 528/529, loss: 0.001938837580382824 2023-01-22 21:17:18.506758: step: 532/529, loss: 0.02064271830022335 2023-01-22 21:17:19.549474: step: 536/529, loss: 0.017089057713747025 2023-01-22 21:17:20.596499: step: 540/529, loss: 0.008119807578623295 2023-01-22 21:17:21.646314: step: 544/529, loss: 0.00399622181430459 2023-01-22 21:17:22.696179: step: 548/529, loss: 0.007835932075977325 2023-01-22 21:17:23.743930: step: 552/529, loss: 0.00044475361937657 2023-01-22 21:17:24.806077: step: 556/529, loss: 0.007072226610034704 2023-01-22 21:17:25.855719: step: 560/529, loss: 0.0001500154030509293 2023-01-22 21:17:26.912698: step: 564/529, loss: 0.006587793584913015 2023-01-22 21:17:27.964312: step: 568/529, loss: 0.00090974842896685 2023-01-22 21:17:29.028822: step: 572/529, loss: 0.0008496007067151368 2023-01-22 21:17:30.089235: step: 576/529, loss: 0.01644875481724739 2023-01-22 21:17:31.125042: step: 580/529, loss: 0.0014555059606209397 2023-01-22 21:17:32.190855: step: 584/529, loss: 0.01223946362733841 2023-01-22 21:17:33.239865: step: 588/529, loss: 0.013972879387438297 2023-01-22 21:17:34.307911: step: 592/529, loss: 0.0011880487436428666 2023-01-22 21:17:35.357735: step: 596/529, loss: 0.010912670753896236 2023-01-22 21:17:36.411580: step: 600/529, loss: 0.005654783919453621 2023-01-22 21:17:37.472587: step: 604/529, loss: 0.01036121416836977 2023-01-22 21:17:38.510052: step: 608/529, loss: 0.001505812630057335 2023-01-22 21:17:39.543493: step: 612/529, loss: 0.000928484951145947 2023-01-22 21:17:40.594883: step: 616/529, loss: 0.003394001629203558 2023-01-22 21:17:41.635992: step: 620/529, loss: 0.0040245638228952885 2023-01-22 21:17:42.690430: step: 624/529, loss: 0.0036833074409514666 2023-01-22 21:17:43.739146: step: 628/529, loss: 0.018184546381235123 2023-01-22 21:17:44.799773: step: 632/529, loss: 0.010472928173840046 2023-01-22 21:17:45.852693: step: 636/529, loss: 0.004791125655174255 2023-01-22 21:17:46.909159: step: 640/529, loss: 0.0016424853820353746 2023-01-22 21:17:47.957785: step: 644/529, loss: 0.00336257042363286 2023-01-22 21:17:49.012210: step: 648/529, loss: 0.008741194382309914 2023-01-22 21:17:50.061756: step: 652/529, loss: 0.00500513194128871 2023-01-22 21:17:51.108579: step: 656/529, loss: 0.0025286588352173567 2023-01-22 21:17:52.159565: step: 660/529, loss: 0.006148525048047304 2023-01-22 21:17:53.207442: step: 664/529, loss: 0.006512647494673729 2023-01-22 21:17:54.260152: step: 668/529, loss: 0.005718899425119162 2023-01-22 21:17:55.305896: step: 672/529, loss: 0.0015654120361432433 2023-01-22 21:17:56.348182: step: 676/529, loss: 0.002474777866154909 2023-01-22 21:17:57.404928: step: 680/529, loss: 0.00628057774156332 2023-01-22 21:17:58.448193: step: 684/529, loss: 0.0046486700884997845 2023-01-22 21:17:59.507370: step: 688/529, loss: 0.0025023389607667923 2023-01-22 21:18:00.602115: step: 692/529, loss: 0.0046284496784210205 2023-01-22 21:18:01.649307: step: 696/529, loss: 0.004663518629968166 2023-01-22 21:18:02.718305: step: 700/529, loss: 0.002358801197260618 2023-01-22 21:18:03.768212: step: 704/529, loss: 0.0038717612624168396 2023-01-22 21:18:04.831103: step: 708/529, loss: 0.01180274412035942 2023-01-22 21:18:05.886204: step: 712/529, loss: 0.0002772000734694302 2023-01-22 21:18:06.943659: step: 716/529, loss: 0.0019642598927021027 2023-01-22 21:18:08.012795: step: 720/529, loss: 0.00062557467026636 2023-01-22 21:18:09.054615: step: 724/529, loss: 0.003036304609850049 2023-01-22 21:18:10.106661: step: 728/529, loss: 0.0039842030964791775 2023-01-22 21:18:11.160622: step: 732/529, loss: 0.0019089869456365705 2023-01-22 21:18:12.214253: step: 736/529, loss: 0.0006355307996273041 2023-01-22 21:18:13.296770: step: 740/529, loss: 0.003295942209661007 2023-01-22 21:18:14.363595: step: 744/529, loss: 0.0062166741117835045 2023-01-22 21:18:15.407092: step: 748/529, loss: 0.0016474102158099413 2023-01-22 21:18:16.467466: step: 752/529, loss: 0.0015284394612535834 2023-01-22 21:18:17.530419: step: 756/529, loss: 0.005231295246630907 2023-01-22 21:18:18.572281: step: 760/529, loss: 0.010162388905882835 2023-01-22 21:18:19.639745: step: 764/529, loss: 0.0011594555107876658 2023-01-22 21:18:20.675091: step: 768/529, loss: 0.0014539504190906882 2023-01-22 21:18:21.740225: step: 772/529, loss: 0.005979025736451149 2023-01-22 21:18:22.807582: step: 776/529, loss: 0.004823142662644386 2023-01-22 21:18:23.860228: step: 780/529, loss: 0.017872508615255356 2023-01-22 21:18:24.910256: step: 784/529, loss: 0.0021699729841202497 2023-01-22 21:18:25.947123: step: 788/529, loss: 0.010397344827651978 2023-01-22 21:18:26.986352: step: 792/529, loss: 0.004523996729403734 2023-01-22 21:18:28.028174: step: 796/529, loss: 0.006840874440968037 2023-01-22 21:18:29.063255: step: 800/529, loss: 0.00152712466660887 2023-01-22 21:18:30.106882: step: 804/529, loss: 0.001005453639663756 2023-01-22 21:18:31.169088: step: 808/529, loss: 0.005728109274059534 2023-01-22 21:18:32.205497: step: 812/529, loss: 0.005101579707115889 2023-01-22 21:18:33.249075: step: 816/529, loss: 0.010346205905079842 2023-01-22 21:18:34.299753: step: 820/529, loss: 0.004148328211158514 2023-01-22 21:18:35.350487: step: 824/529, loss: 0.0022742159198969603 2023-01-22 21:18:36.391503: step: 828/529, loss: 0.005398695822805166 2023-01-22 21:18:37.442277: step: 832/529, loss: 0.00025242235278710723 2023-01-22 21:18:38.491903: step: 836/529, loss: 0.011434529908001423 2023-01-22 21:18:39.538526: step: 840/529, loss: 0.010000054724514484 2023-01-22 21:18:40.571152: step: 844/529, loss: 0.008968652226030827 2023-01-22 21:18:41.610049: step: 848/529, loss: 0.003606243757531047 2023-01-22 21:18:42.664274: step: 852/529, loss: 0.004035185556858778 2023-01-22 21:18:43.713531: step: 856/529, loss: 0.017740394920110703 2023-01-22 21:18:44.779114: step: 860/529, loss: 0.002499544760212302 2023-01-22 21:18:45.826389: step: 864/529, loss: 0.003376048058271408 2023-01-22 21:18:46.853034: step: 868/529, loss: 0.006387442350387573 2023-01-22 21:18:47.900856: step: 872/529, loss: 0.00011014998017344624 2023-01-22 21:18:48.954659: step: 876/529, loss: 0.004113172180950642 2023-01-22 21:18:50.031591: step: 880/529, loss: 0.006529481150209904 2023-01-22 21:18:51.078058: step: 884/529, loss: 0.007111255079507828 2023-01-22 21:18:52.121778: step: 888/529, loss: 0.010991730727255344 2023-01-22 21:18:53.181726: step: 892/529, loss: 0.015675922855734825 2023-01-22 21:18:54.236312: step: 896/529, loss: 0.004603234585374594 2023-01-22 21:18:55.311227: step: 900/529, loss: 0.0019323823507875204 2023-01-22 21:18:56.367846: step: 904/529, loss: 0.011816330254077911 2023-01-22 21:18:57.407013: step: 908/529, loss: 0.00700808921828866 2023-01-22 21:18:58.443505: step: 912/529, loss: 0.0396735705435276 2023-01-22 21:18:59.487110: step: 916/529, loss: 0.009099606424570084 2023-01-22 21:19:00.544436: step: 920/529, loss: 0.011520998552441597 2023-01-22 21:19:01.586374: step: 924/529, loss: 0.0048079960979521275 2023-01-22 21:19:02.627574: step: 928/529, loss: 0.0005959143745712936 2023-01-22 21:19:03.672305: step: 932/529, loss: 0.011329959146678448 2023-01-22 21:19:04.720547: step: 936/529, loss: 0.0014785476960241795 2023-01-22 21:19:05.780372: step: 940/529, loss: 0.0021816124208271503 2023-01-22 21:19:06.825247: step: 944/529, loss: 0.008383725769817829 2023-01-22 21:19:07.870653: step: 948/529, loss: 0.012099064886569977 2023-01-22 21:19:08.915524: step: 952/529, loss: 0.010949932038784027 2023-01-22 21:19:09.971219: step: 956/529, loss: 0.0030963753815740347 2023-01-22 21:19:11.018838: step: 960/529, loss: 0.00010512776498217136 2023-01-22 21:19:12.067207: step: 964/529, loss: 0.0034836141858249903 2023-01-22 21:19:13.109723: step: 968/529, loss: 0.009435310959815979 2023-01-22 21:19:14.178712: step: 972/529, loss: 0.009063382633030415 2023-01-22 21:19:15.220959: step: 976/529, loss: 0.001009809784591198 2023-01-22 21:19:16.266183: step: 980/529, loss: 0.007951056584715843 2023-01-22 21:19:17.323147: step: 984/529, loss: 0.009092065505683422 2023-01-22 21:19:18.383861: step: 988/529, loss: 0.003922322764992714 2023-01-22 21:19:19.451554: step: 992/529, loss: 0.0006192208966240287 2023-01-22 21:19:20.487098: step: 996/529, loss: 0.004097390454262495 2023-01-22 21:19:21.533764: step: 1000/529, loss: 0.012018872424960136 2023-01-22 21:19:22.594246: step: 1004/529, loss: 0.03923084959387779 2023-01-22 21:19:23.654267: step: 1008/529, loss: 0.008151457644999027 2023-01-22 21:19:24.708166: step: 1012/529, loss: 0.000528968230355531 2023-01-22 21:19:25.759795: step: 1016/529, loss: 0.007962619885802269 2023-01-22 21:19:26.792916: step: 1020/529, loss: 2.5195267880917527e-05 2023-01-22 21:19:27.838778: step: 1024/529, loss: 0.0011565190507099032 2023-01-22 21:19:28.896372: step: 1028/529, loss: 0.004009074065834284 2023-01-22 21:19:29.951727: step: 1032/529, loss: 0.0020806114189326763 2023-01-22 21:19:31.009095: step: 1036/529, loss: 0.004213140346109867 2023-01-22 21:19:32.060342: step: 1040/529, loss: 0.005793469026684761 2023-01-22 21:19:33.104053: step: 1044/529, loss: 8.711584086995572e-05 2023-01-22 21:19:34.143044: step: 1048/529, loss: 0.0008608251810073853 2023-01-22 21:19:35.194837: step: 1052/529, loss: 0.00797390379011631 2023-01-22 21:19:36.247535: step: 1056/529, loss: 0.006473200861364603 2023-01-22 21:19:37.297116: step: 1060/529, loss: 0.009393997490406036 2023-01-22 21:19:38.338921: step: 1064/529, loss: 0.000984341953881085 2023-01-22 21:19:39.397251: step: 1068/529, loss: 0.008125782944262028 2023-01-22 21:19:40.450871: step: 1072/529, loss: 0.022238871082663536 2023-01-22 21:19:41.506900: step: 1076/529, loss: 0.0008515786030329764 2023-01-22 21:19:42.565351: step: 1080/529, loss: 0.07729467004537582 2023-01-22 21:19:43.615392: step: 1084/529, loss: 0.009878535754978657 2023-01-22 21:19:44.662306: step: 1088/529, loss: 0.0023015232291072607 2023-01-22 21:19:45.717403: step: 1092/529, loss: 0.00883267167955637 2023-01-22 21:19:46.774779: step: 1096/529, loss: 0.00739073334261775 2023-01-22 21:19:47.820114: step: 1100/529, loss: 0.016659047454595566 2023-01-22 21:19:48.867271: step: 1104/529, loss: 0.0016267206519842148 2023-01-22 21:19:49.942974: step: 1108/529, loss: 0.0017009723233059049 2023-01-22 21:19:50.999304: step: 1112/529, loss: 0.00375702534802258 2023-01-22 21:19:52.043980: step: 1116/529, loss: 0.016666973009705544 2023-01-22 21:19:53.096220: step: 1120/529, loss: 0.0037795458920300007 2023-01-22 21:19:54.154898: step: 1124/529, loss: 0.004482579883188009 2023-01-22 21:19:55.198397: step: 1128/529, loss: 0.008286291733384132 2023-01-22 21:19:56.247300: step: 1132/529, loss: 0.0054707773961126804 2023-01-22 21:19:57.305812: step: 1136/529, loss: 0.0034230626188218594 2023-01-22 21:19:58.350723: step: 1140/529, loss: 0.01532284077256918 2023-01-22 21:19:59.413258: step: 1144/529, loss: 0.0024429569020867348 2023-01-22 21:20:00.467606: step: 1148/529, loss: 0.0010498602641746402 2023-01-22 21:20:01.525642: step: 1152/529, loss: 0.02562536858022213 2023-01-22 21:20:02.567164: step: 1156/529, loss: 0.005093762651085854 2023-01-22 21:20:03.613110: step: 1160/529, loss: 0.000866380927618593 2023-01-22 21:20:04.659672: step: 1164/529, loss: 0.0005144627066329122 2023-01-22 21:20:05.728507: step: 1168/529, loss: 0.0027241436764597893 2023-01-22 21:20:06.782190: step: 1172/529, loss: 0.0005300686461851001 2023-01-22 21:20:07.819697: step: 1176/529, loss: 0.0008234158158302307 2023-01-22 21:20:08.856120: step: 1180/529, loss: 0.00796295702457428 2023-01-22 21:20:09.916672: step: 1184/529, loss: 0.0014958082465454936 2023-01-22 21:20:10.972561: step: 1188/529, loss: 0.0001174230346805416 2023-01-22 21:20:12.021540: step: 1192/529, loss: 0.009548977948725224 2023-01-22 21:20:13.073527: step: 1196/529, loss: 0.004236704204231501 2023-01-22 21:20:14.133205: step: 1200/529, loss: 0.008820915594696999 2023-01-22 21:20:15.176505: step: 1204/529, loss: 0.001488731475546956 2023-01-22 21:20:16.225545: step: 1208/529, loss: 0.008006872609257698 2023-01-22 21:20:17.268769: step: 1212/529, loss: 0.0036577205173671246 2023-01-22 21:20:18.304663: step: 1216/529, loss: 0.006865944713354111 2023-01-22 21:20:19.354145: step: 1220/529, loss: 0.09012701362371445 2023-01-22 21:20:20.395465: step: 1224/529, loss: 0.006028877571225166 2023-01-22 21:20:21.434128: step: 1228/529, loss: 0.003818704979494214 2023-01-22 21:20:22.481226: step: 1232/529, loss: 0.0022012065164744854 2023-01-22 21:20:23.527092: step: 1236/529, loss: 0.05324241518974304 2023-01-22 21:20:24.576356: step: 1240/529, loss: 0.004673610907047987 2023-01-22 21:20:25.621715: step: 1244/529, loss: 0.003439712105318904 2023-01-22 21:20:26.663905: step: 1248/529, loss: 0.0005930070765316486 2023-01-22 21:20:27.722431: step: 1252/529, loss: 0.01084781251847744 2023-01-22 21:20:28.784841: step: 1256/529, loss: 0.03378957509994507 2023-01-22 21:20:29.842250: step: 1260/529, loss: 0.0139120789244771 2023-01-22 21:20:30.895453: step: 1264/529, loss: 0.0071626571007072926 2023-01-22 21:20:31.947211: step: 1268/529, loss: 0.0032292359974235296 2023-01-22 21:20:33.003503: step: 1272/529, loss: 0.009731153026223183 2023-01-22 21:20:34.050687: step: 1276/529, loss: 0.011153255589306355 2023-01-22 21:20:35.102444: step: 1280/529, loss: 0.011540660634636879 2023-01-22 21:20:36.150615: step: 1284/529, loss: 0.0007765796617604792 2023-01-22 21:20:37.199202: step: 1288/529, loss: 0.015370507724583149 2023-01-22 21:20:38.272604: step: 1292/529, loss: 0.0027697307523339987 2023-01-22 21:20:39.314723: step: 1296/529, loss: 0.0029579424299299717 2023-01-22 21:20:40.339833: step: 1300/529, loss: 0.0027856891974806786 2023-01-22 21:20:41.396193: step: 1304/529, loss: 0.0014508719323202968 2023-01-22 21:20:42.440029: step: 1308/529, loss: 0.0056475563906133175 2023-01-22 21:20:43.500997: step: 1312/529, loss: 0.009944622404873371 2023-01-22 21:20:44.539677: step: 1316/529, loss: 0.008630159310996532 2023-01-22 21:20:45.593639: step: 1320/529, loss: 0.011891843751072884 2023-01-22 21:20:46.644166: step: 1324/529, loss: 0.0006453021196648479 2023-01-22 21:20:47.700002: step: 1328/529, loss: 0.002190463710576296 2023-01-22 21:20:48.745583: step: 1332/529, loss: 0.010509279556572437 2023-01-22 21:20:49.799410: step: 1336/529, loss: 0.00253860279917717 2023-01-22 21:20:50.855210: step: 1340/529, loss: 0.0028235469944775105 2023-01-22 21:20:51.908797: step: 1344/529, loss: 0.005779411178082228 2023-01-22 21:20:52.950521: step: 1348/529, loss: 0.008151914924383163 2023-01-22 21:20:54.001588: step: 1352/529, loss: 0.0002389306464465335 2023-01-22 21:20:55.058905: step: 1356/529, loss: 0.019289448857307434 2023-01-22 21:20:56.103248: step: 1360/529, loss: 0.006370015442371368 2023-01-22 21:20:57.151630: step: 1364/529, loss: 0.0014786451356485486 2023-01-22 21:20:58.190566: step: 1368/529, loss: 0.002718487521633506 2023-01-22 21:20:59.233172: step: 1372/529, loss: 0.0013724368764087558 2023-01-22 21:21:00.291372: step: 1376/529, loss: 0.01049088966101408 2023-01-22 21:21:01.348603: step: 1380/529, loss: 0.00016955556930042803 2023-01-22 21:21:02.400748: step: 1384/529, loss: 0.008403759449720383 2023-01-22 21:21:03.445887: step: 1388/529, loss: 0.007500526495277882 2023-01-22 21:21:04.485018: step: 1392/529, loss: 0.00339939771220088 2023-01-22 21:21:05.519849: step: 1396/529, loss: 0.0010053713340312243 2023-01-22 21:21:06.552028: step: 1400/529, loss: 0.004347257316112518 2023-01-22 21:21:07.609997: step: 1404/529, loss: 0.02101157233119011 2023-01-22 21:21:08.650234: step: 1408/529, loss: 0.00039704336086288095 2023-01-22 21:21:09.703839: step: 1412/529, loss: 0.013309722766280174 2023-01-22 21:21:10.746764: step: 1416/529, loss: 0.003684376133605838 2023-01-22 21:21:11.804455: step: 1420/529, loss: 0.011502178385853767 2023-01-22 21:21:12.848053: step: 1424/529, loss: 0.0029402689542621374 2023-01-22 21:21:13.903941: step: 1428/529, loss: 0.011624012142419815 2023-01-22 21:21:14.989136: step: 1432/529, loss: 0.0010776517447084188 2023-01-22 21:21:16.034988: step: 1436/529, loss: 0.015498697757720947 2023-01-22 21:21:17.093372: step: 1440/529, loss: 0.005593031644821167 2023-01-22 21:21:18.143298: step: 1444/529, loss: 0.00390962278470397 2023-01-22 21:21:19.189389: step: 1448/529, loss: 0.005855011288076639 2023-01-22 21:21:20.243796: step: 1452/529, loss: 0.003248876892030239 2023-01-22 21:21:21.279771: step: 1456/529, loss: 0.0023326303344219923 2023-01-22 21:21:22.317813: step: 1460/529, loss: 1.2782409612555057e-05 2023-01-22 21:21:23.362045: step: 1464/529, loss: 0.0012349772732704878 2023-01-22 21:21:24.404702: step: 1468/529, loss: 0.0027779163792729378 2023-01-22 21:21:25.457206: step: 1472/529, loss: 0.0056990971788764 2023-01-22 21:21:26.517495: step: 1476/529, loss: 0.0035232354421168566 2023-01-22 21:21:27.566403: step: 1480/529, loss: 0.0039021545089781284 2023-01-22 21:21:28.600579: step: 1484/529, loss: 0.027668049558997154 2023-01-22 21:21:29.637878: step: 1488/529, loss: 0.0005954480729997158 2023-01-22 21:21:30.708445: step: 1492/529, loss: 0.0011912855552509427 2023-01-22 21:21:31.758722: step: 1496/529, loss: 0.00047708695637993515 2023-01-22 21:21:32.797600: step: 1500/529, loss: 0.004602633882313967 2023-01-22 21:21:33.842201: step: 1504/529, loss: 0.0015028327470645308 2023-01-22 21:21:34.887025: step: 1508/529, loss: 0.010026974603533745 2023-01-22 21:21:35.963935: step: 1512/529, loss: 0.003305347403511405 2023-01-22 21:21:37.026484: step: 1516/529, loss: 0.004152633249759674 2023-01-22 21:21:38.078890: step: 1520/529, loss: 0.0019243067363277078 2023-01-22 21:21:39.147768: step: 1524/529, loss: 0.0026344864163547754 2023-01-22 21:21:40.183206: step: 1528/529, loss: 0.0036074661184102297 2023-01-22 21:21:41.225207: step: 1532/529, loss: 0.0037625590339303017 2023-01-22 21:21:42.271076: step: 1536/529, loss: 0.0075498102232813835 2023-01-22 21:21:43.305146: step: 1540/529, loss: 0.008727450855076313 2023-01-22 21:21:44.376571: step: 1544/529, loss: 0.011576127260923386 2023-01-22 21:21:45.417439: step: 1548/529, loss: 0.003296859096735716 2023-01-22 21:21:46.469777: step: 1552/529, loss: 0.037110909819602966 2023-01-22 21:21:47.512948: step: 1556/529, loss: 0.005566664971411228 2023-01-22 21:21:48.570108: step: 1560/529, loss: 0.005826834589242935 2023-01-22 21:21:49.627357: step: 1564/529, loss: 0.024887530133128166 2023-01-22 21:21:50.673181: step: 1568/529, loss: 0.006921121384948492 2023-01-22 21:21:51.712309: step: 1572/529, loss: 0.00469944067299366 2023-01-22 21:21:52.741376: step: 1576/529, loss: 0.005027053412050009 2023-01-22 21:21:53.793362: step: 1580/529, loss: 0.0029274122789502144 2023-01-22 21:21:54.848765: step: 1584/529, loss: 0.0025618746876716614 2023-01-22 21:21:55.881111: step: 1588/529, loss: 0.02697928622364998 2023-01-22 21:21:56.924400: step: 1592/529, loss: 0.005197387654334307 2023-01-22 21:21:57.957009: step: 1596/529, loss: 1.45377816807013e-05 2023-01-22 21:21:59.016046: step: 1600/529, loss: 0.009968889877200127 2023-01-22 21:22:00.067974: step: 1604/529, loss: 0.002264374867081642 2023-01-22 21:22:01.132760: step: 1608/529, loss: 0.013011283241212368 2023-01-22 21:22:02.186188: step: 1612/529, loss: 0.0013504907255992293 2023-01-22 21:22:03.232074: step: 1616/529, loss: 0.008070433512330055 2023-01-22 21:22:04.304319: step: 1620/529, loss: 0.007053612265735865 2023-01-22 21:22:05.344807: step: 1624/529, loss: 0.007642920594662428 2023-01-22 21:22:06.385793: step: 1628/529, loss: 0.003725780174136162 2023-01-22 21:22:07.425841: step: 1632/529, loss: 0.007207238115370274 2023-01-22 21:22:08.478336: step: 1636/529, loss: 0.004764213226735592 2023-01-22 21:22:09.525385: step: 1640/529, loss: 0.004099587444216013 2023-01-22 21:22:10.556999: step: 1644/529, loss: 0.02866634726524353 2023-01-22 21:22:11.613850: step: 1648/529, loss: 0.00864456593990326 2023-01-22 21:22:12.658498: step: 1652/529, loss: 0.0008208337821997702 2023-01-22 21:22:13.707988: step: 1656/529, loss: 0.0026163270231336355 2023-01-22 21:22:14.750512: step: 1660/529, loss: 0.0034363132435828447 2023-01-22 21:22:15.803514: step: 1664/529, loss: 0.0016965939430519938 2023-01-22 21:22:16.854005: step: 1668/529, loss: 0.0014859935035929084 2023-01-22 21:22:17.901019: step: 1672/529, loss: 0.00024222467618528754 2023-01-22 21:22:18.943651: step: 1676/529, loss: 0.008612056262791157 2023-01-22 21:22:19.995681: step: 1680/529, loss: 0.0019694555085152388 2023-01-22 21:22:21.046014: step: 1684/529, loss: 0.009304205887019634 2023-01-22 21:22:22.102611: step: 1688/529, loss: 0.002786159748211503 2023-01-22 21:22:23.150235: step: 1692/529, loss: 0.002137355040758848 2023-01-22 21:22:24.206747: step: 1696/529, loss: 0.005678205285221338 2023-01-22 21:22:25.256150: step: 1700/529, loss: 0.0030227606184780598 2023-01-22 21:22:26.316833: step: 1704/529, loss: 0.0007980649243108928 2023-01-22 21:22:27.364430: step: 1708/529, loss: 0.007143289782106876 2023-01-22 21:22:28.411228: step: 1712/529, loss: 0.005251258611679077 2023-01-22 21:22:29.464332: step: 1716/529, loss: 0.009905372746288776 2023-01-22 21:22:30.520814: step: 1720/529, loss: 0.0006169564439915121 2023-01-22 21:22:31.564656: step: 1724/529, loss: 0.0006739806267432868 2023-01-22 21:22:32.604490: step: 1728/529, loss: 0.0003520239260978997 2023-01-22 21:22:33.641147: step: 1732/529, loss: 0.0025270460173487663 2023-01-22 21:22:34.682414: step: 1736/529, loss: 0.007693251129239798 2023-01-22 21:22:35.740815: step: 1740/529, loss: 0.003914014436304569 2023-01-22 21:22:36.794362: step: 1744/529, loss: 0.010403660126030445 2023-01-22 21:22:37.840450: step: 1748/529, loss: 1.3354620023164898e-06 2023-01-22 21:22:38.885101: step: 1752/529, loss: 0.0007473742589354515 2023-01-22 21:22:39.937141: step: 1756/529, loss: 0.004681863822042942 2023-01-22 21:22:40.979379: step: 1760/529, loss: 0.005493064410984516 2023-01-22 21:22:42.036230: step: 1764/529, loss: 0.00654172757640481 2023-01-22 21:22:43.084938: step: 1768/529, loss: 0.010084450244903564 2023-01-22 21:22:44.138795: step: 1772/529, loss: 0.004706752020865679 2023-01-22 21:22:45.175031: step: 1776/529, loss: 0.0009514093981124461 2023-01-22 21:22:46.211550: step: 1780/529, loss: 0.0014096301747485995 2023-01-22 21:22:47.251619: step: 1784/529, loss: 0.00012572364357765764 2023-01-22 21:22:48.307696: step: 1788/529, loss: 0.0232260599732399 2023-01-22 21:22:49.359618: step: 1792/529, loss: 0.0031046848744153976 2023-01-22 21:22:50.408875: step: 1796/529, loss: 0.006571891251951456 2023-01-22 21:22:51.447783: step: 1800/529, loss: 0.00016387851792387664 2023-01-22 21:22:52.505233: step: 1804/529, loss: 0.004842417314648628 2023-01-22 21:22:53.551173: step: 1808/529, loss: 0.005944418720901012 2023-01-22 21:22:54.613186: step: 1812/529, loss: 0.017155833542346954 2023-01-22 21:22:55.651525: step: 1816/529, loss: 0.011897356249392033 2023-01-22 21:22:56.689769: step: 1820/529, loss: 0.005222734529525042 2023-01-22 21:22:57.720662: step: 1824/529, loss: 0.005071532912552357 2023-01-22 21:22:58.753231: step: 1828/529, loss: 0.008446223102509975 2023-01-22 21:22:59.805477: step: 1832/529, loss: 0.004176684655249119 2023-01-22 21:23:00.870398: step: 1836/529, loss: 0.006557229906320572 2023-01-22 21:23:01.913953: step: 1840/529, loss: 0.0024884988088160753 2023-01-22 21:23:02.977338: step: 1844/529, loss: 0.00580362556502223 2023-01-22 21:23:04.029408: step: 1848/529, loss: 0.0016856566071510315 2023-01-22 21:23:05.094798: step: 1852/529, loss: 0.01191211398690939 2023-01-22 21:23:06.150160: step: 1856/529, loss: 0.0036614499986171722 2023-01-22 21:23:07.195346: step: 1860/529, loss: 0.03037346713244915 2023-01-22 21:23:08.243749: step: 1864/529, loss: 0.0011730212718248367 2023-01-22 21:23:09.293922: step: 1868/529, loss: 0.006767342798411846 2023-01-22 21:23:10.352314: step: 1872/529, loss: 0.006049699150025845 2023-01-22 21:23:11.401657: step: 1876/529, loss: 0.0009717896464280784 2023-01-22 21:23:12.467263: step: 1880/529, loss: 0.0024402965791523457 2023-01-22 21:23:13.527722: step: 1884/529, loss: 0.007370056584477425 2023-01-22 21:23:14.584153: step: 1888/529, loss: 0.007271936628967524 2023-01-22 21:23:15.631538: step: 1892/529, loss: 0.0024247579276561737 2023-01-22 21:23:16.680324: step: 1896/529, loss: 0.0014247093349695206 2023-01-22 21:23:17.735344: step: 1900/529, loss: 0.00316026178188622 2023-01-22 21:23:18.772884: step: 1904/529, loss: 0.0006484553450718522 2023-01-22 21:23:19.816361: step: 1908/529, loss: 0.003457282204180956 2023-01-22 21:23:20.855128: step: 1912/529, loss: 0.017518872395157814 2023-01-22 21:23:21.898444: step: 1916/529, loss: 0.01227415632456541 2023-01-22 21:23:22.961189: step: 1920/529, loss: 0.001206504413858056 2023-01-22 21:23:24.026049: step: 1924/529, loss: 0.003585570491850376 2023-01-22 21:23:25.080390: step: 1928/529, loss: 0.0017001867527142167 2023-01-22 21:23:26.139063: step: 1932/529, loss: 0.01086181029677391 2023-01-22 21:23:27.184283: step: 1936/529, loss: 0.01084099430590868 2023-01-22 21:23:28.249675: step: 1940/529, loss: 0.002988401334732771 2023-01-22 21:23:29.287779: step: 1944/529, loss: 0.0008111881325021386 2023-01-22 21:23:30.339632: step: 1948/529, loss: 0.0033058254048228264 2023-01-22 21:23:31.389346: step: 1952/529, loss: 0.006359463557600975 2023-01-22 21:23:32.443697: step: 1956/529, loss: 0.006494003813713789 2023-01-22 21:23:33.492900: step: 1960/529, loss: 0.0013034224975854158 2023-01-22 21:23:34.545837: step: 1964/529, loss: 0.0004844609065912664 2023-01-22 21:23:35.591466: step: 1968/529, loss: 0.0025299948174506426 2023-01-22 21:23:36.625054: step: 1972/529, loss: 0.004244665149599314 2023-01-22 21:23:37.688034: step: 1976/529, loss: 0.0024343086406588554 2023-01-22 21:23:38.740940: step: 1980/529, loss: 0.006696697324514389 2023-01-22 21:23:39.789426: step: 1984/529, loss: 0.001419520704075694 2023-01-22 21:23:40.842150: step: 1988/529, loss: 0.018003270030021667 2023-01-22 21:23:41.899468: step: 1992/529, loss: 0.017761848866939545 2023-01-22 21:23:42.933535: step: 1996/529, loss: 0.005436298903077841 2023-01-22 21:23:43.986635: step: 2000/529, loss: 0.001219843397848308 2023-01-22 21:23:45.014560: step: 2004/529, loss: 0.0013173733605071902 2023-01-22 21:23:46.044647: step: 2008/529, loss: 0.0013252663193270564 2023-01-22 21:23:47.110244: step: 2012/529, loss: 0.004386176820844412 2023-01-22 21:23:48.165144: step: 2016/529, loss: 0.003439849941059947 2023-01-22 21:23:49.236419: step: 2020/529, loss: 0.009806548245251179 2023-01-22 21:23:50.283479: step: 2024/529, loss: 0.022853875532746315 2023-01-22 21:23:51.331971: step: 2028/529, loss: 9.54199640545994e-05 2023-01-22 21:23:52.386590: step: 2032/529, loss: 0.007101565599441528 2023-01-22 21:23:53.453392: step: 2036/529, loss: 0.014761477708816528 2023-01-22 21:23:54.490192: step: 2040/529, loss: 0.002811864949762821 2023-01-22 21:23:55.546229: step: 2044/529, loss: 0.0033866928424686193 2023-01-22 21:23:56.593298: step: 2048/529, loss: 0.017494065687060356 2023-01-22 21:23:57.643478: step: 2052/529, loss: 0.001159417093731463 2023-01-22 21:23:58.699009: step: 2056/529, loss: 0.04199489578604698 2023-01-22 21:23:59.737982: step: 2060/529, loss: 0.00893386173993349 2023-01-22 21:24:00.808936: step: 2064/529, loss: 0.007701272610574961 2023-01-22 21:24:01.844755: step: 2068/529, loss: 0.00024471365031786263 2023-01-22 21:24:02.909858: step: 2072/529, loss: 0.01347952801734209 2023-01-22 21:24:03.931174: step: 2076/529, loss: 0.001283814199268818 2023-01-22 21:24:04.984324: step: 2080/529, loss: 0.006390150170773268 2023-01-22 21:24:06.023222: step: 2084/529, loss: 0.014116287231445312 2023-01-22 21:24:07.068504: step: 2088/529, loss: 0.0037055735010653734 2023-01-22 21:24:08.123051: step: 2092/529, loss: 0.008043169975280762 2023-01-22 21:24:09.159587: step: 2096/529, loss: 0.0025078430771827698 2023-01-22 21:24:10.215106: step: 2100/529, loss: 0.009062597528100014 2023-01-22 21:24:11.263100: step: 2104/529, loss: 0.004181451629847288 2023-01-22 21:24:12.300317: step: 2108/529, loss: 0.0007406818913295865 2023-01-22 21:24:13.347489: step: 2112/529, loss: 0.0006511376122944057 2023-01-22 21:24:14.418999: step: 2116/529, loss: 0.02515660598874092 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33662710511033683, 'r': 0.31427046625101657, 'f1': 0.32506483947848036}, 'combined': 0.23952146066835395, 'stategy': 1, 'epoch': 9} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37988189869266353, 'r': 0.3205460698392719, 'f1': 0.3477007061389547}, 'combined': 0.24461356210780735, 'stategy': 1, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3134928837181834, 'r': 0.3414514520953269, 'f1': 0.3268754137225019}, 'combined': 0.240855568006054, 'stategy': 1, 'epoch': 9} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37858954718555105, 'r': 0.3300693523849617, 'f1': 0.35266841843088126}, 'combined': 0.25039457708592566, 'stategy': 1, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.329059480282058, 'r': 0.3415475060992139, 'f1': 0.3351872173450386}, 'combined': 0.2469800548858179, 'stategy': 1, 'epoch': 9} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3753523501222019, 'r': 0.2990372562491888, 'f1': 0.3328768292001654}, 'combined': 0.23634254873211744, 'stategy': 1, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 10 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 21:26:43.939820: step: 4/529, loss: 0.01954532414674759 2023-01-22 21:26:44.971503: step: 8/529, loss: 0.004827527794986963 2023-01-22 21:26:46.024324: step: 12/529, loss: 0.0023530549369752407 2023-01-22 21:26:47.062373: step: 16/529, loss: 0.007843287661671638 2023-01-22 21:26:48.092303: step: 20/529, loss: 0.00231068953871727 2023-01-22 21:26:49.139082: step: 24/529, loss: 0.00043287378503009677 2023-01-22 21:26:50.189312: step: 28/529, loss: 0.0011379376519471407 2023-01-22 21:26:51.223743: step: 32/529, loss: 0.002764686942100525 2023-01-22 21:26:52.265889: step: 36/529, loss: 0.001859096810221672 2023-01-22 21:26:53.308381: step: 40/529, loss: 0.003255079500377178 2023-01-22 21:26:54.338035: step: 44/529, loss: 0.005087681580334902 2023-01-22 21:26:55.376299: step: 48/529, loss: 0.0029920516535639763 2023-01-22 21:26:56.406714: step: 52/529, loss: 0.0067216381430625916 2023-01-22 21:26:57.446909: step: 56/529, loss: 0.004546578042209148 2023-01-22 21:26:58.502731: step: 60/529, loss: 0.0018207207322120667 2023-01-22 21:26:59.538387: step: 64/529, loss: 6.823855073889717e-05 2023-01-22 21:27:00.578965: step: 68/529, loss: 0.0031718455720692873 2023-01-22 21:27:01.609609: step: 72/529, loss: 0.006877357605844736 2023-01-22 21:27:02.667906: step: 76/529, loss: 0.013448301702737808 2023-01-22 21:27:03.692100: step: 80/529, loss: 0.007238812744617462 2023-01-22 21:27:04.751959: step: 84/529, loss: 0.014285692013800144 2023-01-22 21:27:05.825681: step: 88/529, loss: 0.0036882429849356413 2023-01-22 21:27:06.861529: step: 92/529, loss: 0.0005338211194612086 2023-01-22 21:27:07.918915: step: 96/529, loss: 0.0038916023913770914 2023-01-22 21:27:08.956896: step: 100/529, loss: 0.00026086208526976407 2023-01-22 21:27:09.988536: step: 104/529, loss: 0.002933319192379713 2023-01-22 21:27:11.018199: step: 108/529, loss: 0.003995558712631464 2023-01-22 21:27:12.049712: step: 112/529, loss: 0.005591453984379768 2023-01-22 21:27:13.086118: step: 116/529, loss: 0.011964301578700542 2023-01-22 21:27:14.117808: step: 120/529, loss: 0.0051795560866594315 2023-01-22 21:27:15.168665: step: 124/529, loss: 0.007419218774884939 2023-01-22 21:27:16.212813: step: 128/529, loss: 0.023781759664416313 2023-01-22 21:27:17.250681: step: 132/529, loss: 0.003751203417778015 2023-01-22 21:27:18.290824: step: 136/529, loss: 0.004550854209810495 2023-01-22 21:27:19.338889: step: 140/529, loss: 0.02266247384250164 2023-01-22 21:27:20.383021: step: 144/529, loss: 0.010759777389466763 2023-01-22 21:27:21.462384: step: 148/529, loss: 0.002957502380013466 2023-01-22 21:27:22.511799: step: 152/529, loss: 0.0010958747006952763 2023-01-22 21:27:23.545413: step: 156/529, loss: 0.003890435677021742 2023-01-22 21:27:24.588850: step: 160/529, loss: 0.003994130529463291 2023-01-22 21:27:25.623269: step: 164/529, loss: 0.0005350682185962796 2023-01-22 21:27:26.681126: step: 168/529, loss: 0.01354897953569889 2023-01-22 21:27:27.738847: step: 172/529, loss: 0.000780853966716677 2023-01-22 21:27:28.786000: step: 176/529, loss: 0.00374949281103909 2023-01-22 21:27:29.841278: step: 180/529, loss: 0.00015729459119029343 2023-01-22 21:27:30.904550: step: 184/529, loss: 0.0020107519812881947 2023-01-22 21:27:31.970903: step: 188/529, loss: 0.00917111523449421 2023-01-22 21:27:33.015922: step: 192/529, loss: 0.00017682556062936783 2023-01-22 21:27:34.064995: step: 196/529, loss: 0.002421912271529436 2023-01-22 21:27:35.116475: step: 200/529, loss: 0.01361052505671978 2023-01-22 21:27:36.156950: step: 204/529, loss: 0.0035274813417345285 2023-01-22 21:27:37.194837: step: 208/529, loss: 0.011465724557638168 2023-01-22 21:27:38.239523: step: 212/529, loss: 0.005470445845276117 2023-01-22 21:27:39.286940: step: 216/529, loss: 0.0017092960188165307 2023-01-22 21:27:40.335751: step: 220/529, loss: 0.0019453064305707812 2023-01-22 21:27:41.388751: step: 224/529, loss: 0.0016606864519417286 2023-01-22 21:27:42.448195: step: 228/529, loss: 0.0024217041209340096 2023-01-22 21:27:43.488939: step: 232/529, loss: 0.0025438994634896517 2023-01-22 21:27:44.528778: step: 236/529, loss: 0.010617089457809925 2023-01-22 21:27:45.559268: step: 240/529, loss: 0.0002892845368478447 2023-01-22 21:27:46.616423: step: 244/529, loss: 0.0010042962385341525 2023-01-22 21:27:47.673644: step: 248/529, loss: 0.0031687342561781406 2023-01-22 21:27:48.725071: step: 252/529, loss: 0.0027466958854347467 2023-01-22 21:27:49.775796: step: 256/529, loss: 0.004958385601639748 2023-01-22 21:27:50.834624: step: 260/529, loss: 0.005573937203735113 2023-01-22 21:27:51.900283: step: 264/529, loss: 0.012827573344111443 2023-01-22 21:27:52.948874: step: 268/529, loss: 0.0027617281302809715 2023-01-22 21:27:53.983648: step: 272/529, loss: 0.0012532523833215237 2023-01-22 21:27:55.044155: step: 276/529, loss: 0.003218661993741989 2023-01-22 21:27:56.080870: step: 280/529, loss: 0.01514330692589283 2023-01-22 21:27:57.139161: step: 284/529, loss: 0.005460134241729975 2023-01-22 21:27:58.207369: step: 288/529, loss: 0.008205071091651917 2023-01-22 21:27:59.244640: step: 292/529, loss: 0.024040840566158295 2023-01-22 21:28:00.297696: step: 296/529, loss: 0.00874184537678957 2023-01-22 21:28:01.342658: step: 300/529, loss: 0.0034786139149218798 2023-01-22 21:28:02.396631: step: 304/529, loss: 0.0006200448842719197 2023-01-22 21:28:03.447981: step: 308/529, loss: 0.0003087203367613256 2023-01-22 21:28:04.513541: step: 312/529, loss: 0.02923998236656189 2023-01-22 21:28:05.565330: step: 316/529, loss: 0.0016877205343917012 2023-01-22 21:28:06.626452: step: 320/529, loss: 0.004156186711043119 2023-01-22 21:28:07.663873: step: 324/529, loss: 0.0034036983270198107 2023-01-22 21:28:08.700846: step: 328/529, loss: 0.007906085811555386 2023-01-22 21:28:09.754922: step: 332/529, loss: 0.003128611948341131 2023-01-22 21:28:10.796153: step: 336/529, loss: 0.0002056318917311728 2023-01-22 21:28:11.831562: step: 340/529, loss: 0.0006218391354195774 2023-01-22 21:28:12.895232: step: 344/529, loss: 0.00024410530750174075 2023-01-22 21:28:13.933754: step: 348/529, loss: 0.009633667767047882 2023-01-22 21:28:14.983584: step: 352/529, loss: 0.005283441860228777 2023-01-22 21:28:16.034258: step: 356/529, loss: 0.01262367982417345 2023-01-22 21:28:17.097296: step: 360/529, loss: 0.0043320441618561745 2023-01-22 21:28:18.166977: step: 364/529, loss: 0.006589141208678484 2023-01-22 21:28:19.235909: step: 368/529, loss: 0.0113912895321846 2023-01-22 21:28:20.281935: step: 372/529, loss: 0.008942551910877228 2023-01-22 21:28:21.345287: step: 376/529, loss: 0.014269191771745682 2023-01-22 21:28:22.388795: step: 380/529, loss: 0.0004190189065411687 2023-01-22 21:28:23.449691: step: 384/529, loss: 0.005326719954609871 2023-01-22 21:28:24.508748: step: 388/529, loss: 0.004991291556507349 2023-01-22 21:28:25.567912: step: 392/529, loss: 0.001971410820260644 2023-01-22 21:28:26.617476: step: 396/529, loss: 0.002824721857905388 2023-01-22 21:28:27.673205: step: 400/529, loss: 0.0025639294181019068 2023-01-22 21:28:28.709573: step: 404/529, loss: 3.207637200830504e-05 2023-01-22 21:28:29.757358: step: 408/529, loss: 0.00017237129213754088 2023-01-22 21:28:30.798489: step: 412/529, loss: 0.0005034393398091197 2023-01-22 21:28:31.860222: step: 416/529, loss: 0.0036588930524885654 2023-01-22 21:28:32.903751: step: 420/529, loss: 0.013937288895249367 2023-01-22 21:28:33.965472: step: 424/529, loss: 0.005364018492400646 2023-01-22 21:28:35.010747: step: 428/529, loss: 0.009514844976365566 2023-01-22 21:28:36.076232: step: 432/529, loss: 0.004260318353772163 2023-01-22 21:28:37.117565: step: 436/529, loss: 0.013584223575890064 2023-01-22 21:28:38.161095: step: 440/529, loss: 0.0012761547695845366 2023-01-22 21:28:39.226661: step: 444/529, loss: 0.004958345089107752 2023-01-22 21:28:40.274209: step: 448/529, loss: 0.0019658866804093122 2023-01-22 21:28:41.320520: step: 452/529, loss: 0.0008602796588093042 2023-01-22 21:28:42.394711: step: 456/529, loss: 0.005384955555200577 2023-01-22 21:28:43.465417: step: 460/529, loss: 0.0273515023291111 2023-01-22 21:28:44.534986: step: 464/529, loss: 0.0008273616549558938 2023-01-22 21:28:45.577534: step: 468/529, loss: 0.0031173808965831995 2023-01-22 21:28:46.623025: step: 472/529, loss: 0.0018533933907747269 2023-01-22 21:28:47.679883: step: 476/529, loss: 0.009084579534828663 2023-01-22 21:28:48.737324: step: 480/529, loss: 0.012417135760188103 2023-01-22 21:28:49.803280: step: 484/529, loss: 0.0009216720936819911 2023-01-22 21:28:50.847308: step: 488/529, loss: 0.0036223020870238543 2023-01-22 21:28:51.885037: step: 492/529, loss: 0.005514702759683132 2023-01-22 21:28:52.926142: step: 496/529, loss: 0.0013914547162130475 2023-01-22 21:28:53.982558: step: 500/529, loss: 0.0010951007716357708 2023-01-22 21:28:55.045156: step: 504/529, loss: 0.006260767113417387 2023-01-22 21:28:56.095617: step: 508/529, loss: 0.0001973892649402842 2023-01-22 21:28:57.144660: step: 512/529, loss: 0.005953501909971237 2023-01-22 21:28:58.204582: step: 516/529, loss: 0.009086262434720993 2023-01-22 21:28:59.259660: step: 520/529, loss: 0.0066679841838777065 2023-01-22 21:29:00.302078: step: 524/529, loss: 0.0023904310073703527 2023-01-22 21:29:01.361308: step: 528/529, loss: 0.0009538081358186901 2023-01-22 21:29:02.420087: step: 532/529, loss: 0.00764401163905859 2023-01-22 21:29:03.473775: step: 536/529, loss: 0.013999774120748043 2023-01-22 21:29:04.521572: step: 540/529, loss: 0.0009262578678317368 2023-01-22 21:29:05.569965: step: 544/529, loss: 0.001954246312379837 2023-01-22 21:29:06.617640: step: 548/529, loss: 0.0017167457845062017 2023-01-22 21:29:07.669331: step: 552/529, loss: 0.0032756426371634007 2023-01-22 21:29:08.725530: step: 556/529, loss: 0.008779548108577728 2023-01-22 21:29:09.771610: step: 560/529, loss: 0.006169681437313557 2023-01-22 21:29:10.822144: step: 564/529, loss: 0.005783047992736101 2023-01-22 21:29:11.879753: step: 568/529, loss: 0.0008857750217430294 2023-01-22 21:29:12.929618: step: 572/529, loss: 0.001680071814917028 2023-01-22 21:29:13.978796: step: 576/529, loss: 0.016182927414774895 2023-01-22 21:29:15.023032: step: 580/529, loss: 0.00239744083955884 2023-01-22 21:29:16.083544: step: 584/529, loss: 0.004633632488548756 2023-01-22 21:29:17.146671: step: 588/529, loss: 0.0033443255815654993 2023-01-22 21:29:18.179741: step: 592/529, loss: 0.0024107503704726696 2023-01-22 21:29:19.241993: step: 596/529, loss: 0.003631486790254712 2023-01-22 21:29:20.282998: step: 600/529, loss: 0.005317643750458956 2023-01-22 21:29:21.329525: step: 604/529, loss: 0.0025396232958883047 2023-01-22 21:29:22.379849: step: 608/529, loss: 0.003428045194596052 2023-01-22 21:29:23.422709: step: 612/529, loss: 0.0021843810100108385 2023-01-22 21:29:24.470353: step: 616/529, loss: 0.0038149873726069927 2023-01-22 21:29:25.514867: step: 620/529, loss: 0.0032608467154204845 2023-01-22 21:29:26.564404: step: 624/529, loss: 0.014715909957885742 2023-01-22 21:29:27.616649: step: 628/529, loss: 0.008677640929818153 2023-01-22 21:29:28.667435: step: 632/529, loss: 0.0063589587807655334 2023-01-22 21:29:29.721516: step: 636/529, loss: 0.00017495227803010494 2023-01-22 21:29:30.781132: step: 640/529, loss: 0.005503390915691853 2023-01-22 21:29:31.839507: step: 644/529, loss: 0.0024252599105238914 2023-01-22 21:29:32.892345: step: 648/529, loss: 0.007494138553738594 2023-01-22 21:29:33.949244: step: 652/529, loss: 0.010659885592758656 2023-01-22 21:29:35.006847: step: 656/529, loss: 0.00572338979691267 2023-01-22 21:29:36.053332: step: 660/529, loss: 0.01918092370033264 2023-01-22 21:29:37.105006: step: 664/529, loss: 0.003935312386602163 2023-01-22 21:29:38.162605: step: 668/529, loss: 0.004943212028592825 2023-01-22 21:29:39.206552: step: 672/529, loss: 0.0056266882456839085 2023-01-22 21:29:40.246661: step: 676/529, loss: 0.005428919568657875 2023-01-22 21:29:41.287217: step: 680/529, loss: 0.001886368845589459 2023-01-22 21:29:42.341924: step: 684/529, loss: 0.0029110547620803118 2023-01-22 21:29:43.380347: step: 688/529, loss: 0.00042290231795050204 2023-01-22 21:29:44.415248: step: 692/529, loss: 0.000785257900133729 2023-01-22 21:29:45.491510: step: 696/529, loss: 0.020851055160164833 2023-01-22 21:29:46.542585: step: 700/529, loss: 0.02555830404162407 2023-01-22 21:29:47.576924: step: 704/529, loss: 0.0007838797755539417 2023-01-22 21:29:48.630867: step: 708/529, loss: 3.426442344789393e-05 2023-01-22 21:29:49.688860: step: 712/529, loss: 0.0012816892703995109 2023-01-22 21:29:50.738396: step: 716/529, loss: 0.01079493761062622 2023-01-22 21:29:51.797526: step: 720/529, loss: 0.004079950973391533 2023-01-22 21:29:52.852679: step: 724/529, loss: 0.0009261576342396438 2023-01-22 21:29:53.901353: step: 728/529, loss: 0.0036834871862083673 2023-01-22 21:29:54.948260: step: 732/529, loss: 0.004398300778120756 2023-01-22 21:29:55.996751: step: 736/529, loss: 0.006478363182395697 2023-01-22 21:29:57.044328: step: 740/529, loss: 0.01300923153758049 2023-01-22 21:29:58.099012: step: 744/529, loss: 0.0057972632348537445 2023-01-22 21:29:59.169809: step: 748/529, loss: 0.0016906349919736385 2023-01-22 21:30:00.241841: step: 752/529, loss: 0.027488330379128456 2023-01-22 21:30:01.294153: step: 756/529, loss: 0.0013829410308972 2023-01-22 21:30:02.329724: step: 760/529, loss: 1.2820759366150014e-05 2023-01-22 21:30:03.389864: step: 764/529, loss: 0.0007968873251229525 2023-01-22 21:30:04.439910: step: 768/529, loss: 0.010744359344244003 2023-01-22 21:30:05.484041: step: 772/529, loss: 0.0016503616934642196 2023-01-22 21:30:06.538560: step: 776/529, loss: 0.004419537261128426 2023-01-22 21:30:07.585303: step: 780/529, loss: 0.00807860679924488 2023-01-22 21:30:08.640026: step: 784/529, loss: 0.012245913036167622 2023-01-22 21:30:09.692926: step: 788/529, loss: 0.00864124670624733 2023-01-22 21:30:10.742865: step: 792/529, loss: 5.279907782096416e-05 2023-01-22 21:30:11.788013: step: 796/529, loss: 0.0022962461225688457 2023-01-22 21:30:12.838191: step: 800/529, loss: 0.008585786446928978 2023-01-22 21:30:13.893616: step: 804/529, loss: 0.0009987863013520837 2023-01-22 21:30:14.942651: step: 808/529, loss: 0.0019152145832777023 2023-01-22 21:30:15.983563: step: 812/529, loss: 0.002717895433306694 2023-01-22 21:30:17.056235: step: 816/529, loss: 0.0072733149863779545 2023-01-22 21:30:18.109424: step: 820/529, loss: 0.0013716627145186067 2023-01-22 21:30:19.157409: step: 824/529, loss: 0.003835399867966771 2023-01-22 21:30:20.212823: step: 828/529, loss: 0.0007232326315715909 2023-01-22 21:30:21.261689: step: 832/529, loss: 0.002630772301927209 2023-01-22 21:30:22.317252: step: 836/529, loss: 0.004273257218301296 2023-01-22 21:30:23.366356: step: 840/529, loss: 0.0020624990575015545 2023-01-22 21:30:24.411932: step: 844/529, loss: 0.000740404415410012 2023-01-22 21:30:25.473945: step: 848/529, loss: 0.0037550434935837984 2023-01-22 21:30:26.528532: step: 852/529, loss: 0.004778649192303419 2023-01-22 21:30:27.572629: step: 856/529, loss: 0.0029009883292019367 2023-01-22 21:30:28.607936: step: 860/529, loss: 0.0007351295789703727 2023-01-22 21:30:29.670990: step: 864/529, loss: 5.348044214770198e-05 2023-01-22 21:30:30.731252: step: 868/529, loss: 0.0013033861760050058 2023-01-22 21:30:31.774009: step: 872/529, loss: 0.0012773633934557438 2023-01-22 21:30:32.834462: step: 876/529, loss: 0.0026880817022174597 2023-01-22 21:30:33.885030: step: 880/529, loss: 0.0012941344175487757 2023-01-22 21:30:34.923161: step: 884/529, loss: 0.003138786880299449 2023-01-22 21:30:35.954513: step: 888/529, loss: 0.0030836386140435934 2023-01-22 21:30:36.997671: step: 892/529, loss: 0.0 2023-01-22 21:30:38.050625: step: 896/529, loss: 0.006097411271184683 2023-01-22 21:30:39.094341: step: 900/529, loss: 0.030221397057175636 2023-01-22 21:30:40.136656: step: 904/529, loss: 0.005850756540894508 2023-01-22 21:30:41.169145: step: 908/529, loss: 0.00020152056822553277 2023-01-22 21:30:42.209530: step: 912/529, loss: 0.0044646733440458775 2023-01-22 21:30:43.264891: step: 916/529, loss: 0.006392831448465586 2023-01-22 21:30:44.314033: step: 920/529, loss: 0.0030963122844696045 2023-01-22 21:30:45.368021: step: 924/529, loss: 0.00047330569941550493 2023-01-22 21:30:46.432784: step: 928/529, loss: 0.006304592825472355 2023-01-22 21:30:47.458574: step: 932/529, loss: 0.002993023255839944 2023-01-22 21:30:48.493260: step: 936/529, loss: 0.00042874671635217965 2023-01-22 21:30:49.547524: step: 940/529, loss: 0.0001371793623548001 2023-01-22 21:30:50.592376: step: 944/529, loss: 3.6573783290805295e-05 2023-01-22 21:30:51.633078: step: 948/529, loss: 0.00013529634452424943 2023-01-22 21:30:52.676968: step: 952/529, loss: 0.019869500771164894 2023-01-22 21:30:53.717654: step: 956/529, loss: 0.038167525082826614 2023-01-22 21:30:54.778823: step: 960/529, loss: 0.0019290340133011341 2023-01-22 21:30:55.830103: step: 964/529, loss: 0.012385385110974312 2023-01-22 21:30:56.878865: step: 968/529, loss: 0.0015146546065807343 2023-01-22 21:30:57.932702: step: 972/529, loss: 0.0008462521946057677 2023-01-22 21:30:58.981414: step: 976/529, loss: 0.0033068947959691286 2023-01-22 21:31:00.024979: step: 980/529, loss: 0.0017208864446729422 2023-01-22 21:31:01.070387: step: 984/529, loss: 0.0017936790827661753 2023-01-22 21:31:02.137244: step: 988/529, loss: 0.0028875700663775206 2023-01-22 21:31:03.198424: step: 992/529, loss: 0.004011997953057289 2023-01-22 21:31:04.259209: step: 996/529, loss: 0.010539746843278408 2023-01-22 21:31:05.307231: step: 1000/529, loss: 0.015139748342335224 2023-01-22 21:31:06.350043: step: 1004/529, loss: 0.008691334165632725 2023-01-22 21:31:07.404369: step: 1008/529, loss: 0.00022248673485592008 2023-01-22 21:31:08.452230: step: 1012/529, loss: 0.011865814216434956 2023-01-22 21:31:09.492493: step: 1016/529, loss: 0.003201371291652322 2023-01-22 21:31:10.542341: step: 1020/529, loss: 0.003212942508980632 2023-01-22 21:31:11.595840: step: 1024/529, loss: 0.0038315316196531057 2023-01-22 21:31:12.630284: step: 1028/529, loss: 0.004290254786610603 2023-01-22 21:31:13.676017: step: 1032/529, loss: 0.0040176319889724255 2023-01-22 21:31:14.717180: step: 1036/529, loss: 0.005949924234300852 2023-01-22 21:31:15.755117: step: 1040/529, loss: 0.0007018258911557496 2023-01-22 21:31:16.795817: step: 1044/529, loss: 0.0027438232209533453 2023-01-22 21:31:17.848996: step: 1048/529, loss: 0.0018250832799822092 2023-01-22 21:31:18.894298: step: 1052/529, loss: 0.005582096055150032 2023-01-22 21:31:19.932047: step: 1056/529, loss: 0.004173826426267624 2023-01-22 21:31:20.975115: step: 1060/529, loss: 0.001151932985521853 2023-01-22 21:31:22.011753: step: 1064/529, loss: 0.0037320926785469055 2023-01-22 21:31:23.061431: step: 1068/529, loss: 0.00966629572212696 2023-01-22 21:31:24.133909: step: 1072/529, loss: 0.002055887598544359 2023-01-22 21:31:25.180003: step: 1076/529, loss: 0.01772480458021164 2023-01-22 21:31:26.230249: step: 1080/529, loss: 0.0003332009073346853 2023-01-22 21:31:27.276279: step: 1084/529, loss: 0.03068283572793007 2023-01-22 21:31:28.320226: step: 1088/529, loss: 0.003818134544417262 2023-01-22 21:31:29.361365: step: 1092/529, loss: 0.0016117250779643655 2023-01-22 21:31:30.395464: step: 1096/529, loss: 0.0033208210952579975 2023-01-22 21:31:31.434226: step: 1100/529, loss: 0.003668887075036764 2023-01-22 21:31:32.472112: step: 1104/529, loss: 0.003003299469128251 2023-01-22 21:31:33.516968: step: 1108/529, loss: 0.006399332545697689 2023-01-22 21:31:34.572157: step: 1112/529, loss: 0.0035741664469242096 2023-01-22 21:31:35.615986: step: 1116/529, loss: 0.0029484916012734175 2023-01-22 21:31:36.655846: step: 1120/529, loss: 0.005300041288137436 2023-01-22 21:31:37.701986: step: 1124/529, loss: 0.0008431575843133032 2023-01-22 21:31:38.747317: step: 1128/529, loss: 0.017666339874267578 2023-01-22 21:31:39.800107: step: 1132/529, loss: 0.0021952930837869644 2023-01-22 21:31:40.855357: step: 1136/529, loss: 0.0014462918043136597 2023-01-22 21:31:41.913404: step: 1140/529, loss: 0.00015663470549043268 2023-01-22 21:31:42.950198: step: 1144/529, loss: 0.025589918717741966 2023-01-22 21:31:44.006511: step: 1148/529, loss: 0.004201234318315983 2023-01-22 21:31:45.067886: step: 1152/529, loss: 0.013504664413630962 2023-01-22 21:31:46.119759: step: 1156/529, loss: 0.0031026527285575867 2023-01-22 21:31:47.163927: step: 1160/529, loss: 0.0063041215762495995 2023-01-22 21:31:48.201552: step: 1164/529, loss: 1.8157057638745755e-05 2023-01-22 21:31:49.292621: step: 1168/529, loss: 0.013643822632730007 2023-01-22 21:31:50.334288: step: 1172/529, loss: 0.0004492891894187778 2023-01-22 21:31:51.381251: step: 1176/529, loss: 0.002873055636882782 2023-01-22 21:31:52.419102: step: 1180/529, loss: 0.0022310272324830294 2023-01-22 21:31:53.475990: step: 1184/529, loss: 0.003860749304294586 2023-01-22 21:31:54.531473: step: 1188/529, loss: 0.0006902585737407207 2023-01-22 21:31:55.576423: step: 1192/529, loss: 0.03136343136429787 2023-01-22 21:31:56.636608: step: 1196/529, loss: 0.0003888942883349955 2023-01-22 21:31:57.674662: step: 1200/529, loss: 0.0014790548011660576 2023-01-22 21:31:58.717474: step: 1204/529, loss: 0.03887784853577614 2023-01-22 21:31:59.771127: step: 1208/529, loss: 0.007573306560516357 2023-01-22 21:32:00.828781: step: 1212/529, loss: 0.001519863959401846 2023-01-22 21:32:01.873843: step: 1216/529, loss: 0.00047163767158053815 2023-01-22 21:32:02.930209: step: 1220/529, loss: 0.01812143065035343 2023-01-22 21:32:03.986758: step: 1224/529, loss: 0.005998395383358002 2023-01-22 21:32:05.037976: step: 1228/529, loss: 0.003092394443228841 2023-01-22 21:32:06.112922: step: 1232/529, loss: 0.000265609472990036 2023-01-22 21:32:07.151523: step: 1236/529, loss: 0.0027293041348457336 2023-01-22 21:32:08.202403: step: 1240/529, loss: 0.01495912205427885 2023-01-22 21:32:09.260340: step: 1244/529, loss: 0.0053753117099404335 2023-01-22 21:32:10.321972: step: 1248/529, loss: 0.00598317664116621 2023-01-22 21:32:11.377165: step: 1252/529, loss: 0.0014641213929280639 2023-01-22 21:32:12.415876: step: 1256/529, loss: 0.008569150231778622 2023-01-22 21:32:13.465068: step: 1260/529, loss: 0.00345128052867949 2023-01-22 21:32:14.507200: step: 1264/529, loss: 0.000868515926413238 2023-01-22 21:32:15.561591: step: 1268/529, loss: 0.0023537895176559687 2023-01-22 21:32:16.616416: step: 1272/529, loss: 0.0015392887871712446 2023-01-22 21:32:17.664310: step: 1276/529, loss: 0.007391153369098902 2023-01-22 21:32:18.703303: step: 1280/529, loss: 0.0016162912361323833 2023-01-22 21:32:19.773190: step: 1284/529, loss: 0.0026112550403922796 2023-01-22 21:32:20.823481: step: 1288/529, loss: 0.0003828108892776072 2023-01-22 21:32:21.870835: step: 1292/529, loss: 0.003627361264079809 2023-01-22 21:32:22.927139: step: 1296/529, loss: 0.007039359770715237 2023-01-22 21:32:23.986926: step: 1300/529, loss: 0.00841111596673727 2023-01-22 21:32:25.033913: step: 1304/529, loss: 0.004582188557833433 2023-01-22 21:32:26.089657: step: 1308/529, loss: 0.0029177479445934296 2023-01-22 21:32:27.130422: step: 1312/529, loss: 0.0015576387522742152 2023-01-22 21:32:28.190578: step: 1316/529, loss: 0.035095054656267166 2023-01-22 21:32:29.242674: step: 1320/529, loss: 0.002325756009668112 2023-01-22 21:32:30.302603: step: 1324/529, loss: 0.001977218547835946 2023-01-22 21:32:31.372323: step: 1328/529, loss: 0.0022204043343663216 2023-01-22 21:32:32.422882: step: 1332/529, loss: 0.005964240524917841 2023-01-22 21:32:33.494728: step: 1336/529, loss: 0.004625619389116764 2023-01-22 21:32:34.544676: step: 1340/529, loss: 0.02371775358915329 2023-01-22 21:32:35.611087: step: 1344/529, loss: 0.011852601543068886 2023-01-22 21:32:36.664204: step: 1348/529, loss: 0.00196265522390604 2023-01-22 21:32:37.713410: step: 1352/529, loss: 0.007200157269835472 2023-01-22 21:32:38.758343: step: 1356/529, loss: 0.0016054463339969516 2023-01-22 21:32:39.791821: step: 1360/529, loss: 0.0001437036698916927 2023-01-22 21:32:40.839551: step: 1364/529, loss: 0.0014729555696249008 2023-01-22 21:32:41.904576: step: 1368/529, loss: 0.0012370975455269217 2023-01-22 21:32:42.940537: step: 1372/529, loss: 0.0005450923927128315 2023-01-22 21:32:43.997531: step: 1376/529, loss: 0.0024576110299676657 2023-01-22 21:32:45.041607: step: 1380/529, loss: 0.004145471844822168 2023-01-22 21:32:46.108173: step: 1384/529, loss: 0.0034017690923064947 2023-01-22 21:32:47.145779: step: 1388/529, loss: 0.0026113372296094894 2023-01-22 21:32:48.194068: step: 1392/529, loss: 0.018207484856247902 2023-01-22 21:32:49.246948: step: 1396/529, loss: 0.002914435463026166 2023-01-22 21:32:50.284478: step: 1400/529, loss: 0.026484588161110878 2023-01-22 21:32:51.322185: step: 1404/529, loss: 0.0011438442161306739 2023-01-22 21:32:52.351046: step: 1408/529, loss: 0.0011647769715636969 2023-01-22 21:32:53.394877: step: 1412/529, loss: 0.014256704598665237 2023-01-22 21:32:54.432598: step: 1416/529, loss: 0.008479629643261433 2023-01-22 21:32:55.485063: step: 1420/529, loss: 0.026539508253335953 2023-01-22 21:32:56.535321: step: 1424/529, loss: 0.003622222924605012 2023-01-22 21:32:57.585641: step: 1428/529, loss: 0.006273642648011446 2023-01-22 21:32:58.646120: step: 1432/529, loss: 0.008784571662545204 2023-01-22 21:32:59.687667: step: 1436/529, loss: 0.0009311490575782955 2023-01-22 21:33:00.731238: step: 1440/529, loss: 0.0015597260789945722 2023-01-22 21:33:01.792617: step: 1444/529, loss: 0.005850150249898434 2023-01-22 21:33:02.837527: step: 1448/529, loss: 0.0019786760676652193 2023-01-22 21:33:03.866466: step: 1452/529, loss: 0.0026978172827512026 2023-01-22 21:33:04.954518: step: 1456/529, loss: 0.006016007624566555 2023-01-22 21:33:05.992782: step: 1460/529, loss: 0.0029158438555896282 2023-01-22 21:33:07.048662: step: 1464/529, loss: 0.003299807431176305 2023-01-22 21:33:08.093696: step: 1468/529, loss: 0.00104052503593266 2023-01-22 21:33:09.141655: step: 1472/529, loss: 0.020689290016889572 2023-01-22 21:33:10.200370: step: 1476/529, loss: 0.0032323915511369705 2023-01-22 21:33:11.248710: step: 1480/529, loss: 0.011013713665306568 2023-01-22 21:33:12.293570: step: 1484/529, loss: 0.004391280468553305 2023-01-22 21:33:13.345113: step: 1488/529, loss: 0.0005535929813049734 2023-01-22 21:33:14.385922: step: 1492/529, loss: 0.005308902822434902 2023-01-22 21:33:15.455700: step: 1496/529, loss: 0.004690089263021946 2023-01-22 21:33:16.504503: step: 1500/529, loss: 0.009551119059324265 2023-01-22 21:33:17.547716: step: 1504/529, loss: 0.017965057864785194 2023-01-22 21:33:18.591661: step: 1508/529, loss: 0.009646509774029255 2023-01-22 21:33:19.634767: step: 1512/529, loss: 0.0019925751257687807 2023-01-22 21:33:20.689854: step: 1516/529, loss: 0.0031494018621742725 2023-01-22 21:33:21.746742: step: 1520/529, loss: 0.0037305722944438457 2023-01-22 21:33:22.803600: step: 1524/529, loss: 0.0010255238739773631 2023-01-22 21:33:23.855007: step: 1528/529, loss: 0.01110782753676176 2023-01-22 21:33:24.897758: step: 1532/529, loss: 0.007464790251106024 2023-01-22 21:33:25.974195: step: 1536/529, loss: 0.0172900203615427 2023-01-22 21:33:27.018125: step: 1540/529, loss: 0.0004540151567198336 2023-01-22 21:33:28.055916: step: 1544/529, loss: 0.0010561620583757758 2023-01-22 21:33:29.111064: step: 1548/529, loss: 0.0011781761422753334 2023-01-22 21:33:30.157874: step: 1552/529, loss: 0.010241216979920864 2023-01-22 21:33:31.221150: step: 1556/529, loss: 0.031845126301050186 2023-01-22 21:33:32.272000: step: 1560/529, loss: 0.04116947203874588 2023-01-22 21:33:33.311852: step: 1564/529, loss: 0.00023168123152572662 2023-01-22 21:33:34.373908: step: 1568/529, loss: 0.0013503329828381538 2023-01-22 21:33:35.414681: step: 1572/529, loss: 0.0028104600496590137 2023-01-22 21:33:36.458671: step: 1576/529, loss: 0.0056841871701180935 2023-01-22 21:33:37.507152: step: 1580/529, loss: 0.004517252091318369 2023-01-22 21:33:38.554984: step: 1584/529, loss: 0.004436878953129053 2023-01-22 21:33:39.615275: step: 1588/529, loss: 0.021682292222976685 2023-01-22 21:33:40.655844: step: 1592/529, loss: 0.012700929306447506 2023-01-22 21:33:41.707610: step: 1596/529, loss: 0.0085939671844244 2023-01-22 21:33:42.760789: step: 1600/529, loss: 0.0023251515813171864 2023-01-22 21:33:43.814237: step: 1604/529, loss: 0.00247582676820457 2023-01-22 21:33:44.855083: step: 1608/529, loss: 0.003466186812147498 2023-01-22 21:33:45.890296: step: 1612/529, loss: 0.020121384412050247 2023-01-22 21:33:46.936922: step: 1616/529, loss: 0.045501064509153366 2023-01-22 21:33:47.972233: step: 1620/529, loss: 0.0024784705601632595 2023-01-22 21:33:49.018958: step: 1624/529, loss: 0.005667181685566902 2023-01-22 21:33:50.069352: step: 1628/529, loss: 0.016882605850696564 2023-01-22 21:33:51.121893: step: 1632/529, loss: 0.004980574361979961 2023-01-22 21:33:52.177114: step: 1636/529, loss: 0.0016548606799915433 2023-01-22 21:33:53.236065: step: 1640/529, loss: 0.004596744664013386 2023-01-22 21:33:54.265835: step: 1644/529, loss: 0.0037330964114516973 2023-01-22 21:33:55.304358: step: 1648/529, loss: 0.0028481564950197935 2023-01-22 21:33:56.361131: step: 1652/529, loss: 0.0036452983040362597 2023-01-22 21:33:57.410795: step: 1656/529, loss: 0.0010336849372833967 2023-01-22 21:33:58.453098: step: 1660/529, loss: 0.0020581153221428394 2023-01-22 21:33:59.493675: step: 1664/529, loss: 0.0674237385392189 2023-01-22 21:34:00.548441: step: 1668/529, loss: 0.0003872031229548156 2023-01-22 21:34:01.595038: step: 1672/529, loss: 0.0003695247578434646 2023-01-22 21:34:02.663991: step: 1676/529, loss: 0.005342777818441391 2023-01-22 21:34:03.734751: step: 1680/529, loss: 0.006996373645961285 2023-01-22 21:34:04.784125: step: 1684/529, loss: 0.0035497688222676516 2023-01-22 21:34:05.860016: step: 1688/529, loss: 0.008472017012536526 2023-01-22 21:34:06.937637: step: 1692/529, loss: 0.10133443772792816 2023-01-22 21:34:07.988001: step: 1696/529, loss: 0.007926572114229202 2023-01-22 21:34:09.040230: step: 1700/529, loss: 0.004285447299480438 2023-01-22 21:34:10.078426: step: 1704/529, loss: 0.001144508132711053 2023-01-22 21:34:11.133201: step: 1708/529, loss: 0.0031161054503172636 2023-01-22 21:34:12.161237: step: 1712/529, loss: 0.002313500503078103 2023-01-22 21:34:13.219874: step: 1716/529, loss: 0.004640093073248863 2023-01-22 21:34:14.271815: step: 1720/529, loss: 0.00805331114679575 2023-01-22 21:34:15.329343: step: 1724/529, loss: 0.010073693469166756 2023-01-22 21:34:16.368406: step: 1728/529, loss: 0.0038662077859044075 2023-01-22 21:34:17.414008: step: 1732/529, loss: 0.0020495520438998938 2023-01-22 21:34:18.463785: step: 1736/529, loss: 0.003315746784210205 2023-01-22 21:34:19.496037: step: 1740/529, loss: 0.0011245302157476544 2023-01-22 21:34:20.544354: step: 1744/529, loss: 0.001619907096028328 2023-01-22 21:34:21.621582: step: 1748/529, loss: 0.0073272655718028545 2023-01-22 21:34:22.669491: step: 1752/529, loss: 0.0023567990865558386 2023-01-22 21:34:23.723943: step: 1756/529, loss: 0.0016487013781443238 2023-01-22 21:34:24.768622: step: 1760/529, loss: 0.010578070767223835 2023-01-22 21:34:25.830785: step: 1764/529, loss: 0.00725053995847702 2023-01-22 21:34:26.873111: step: 1768/529, loss: 0.013981441967189312 2023-01-22 21:34:27.908510: step: 1772/529, loss: 0.004138213116675615 2023-01-22 21:34:28.957032: step: 1776/529, loss: 0.002512231469154358 2023-01-22 21:34:30.005179: step: 1780/529, loss: 0.0007638219394721091 2023-01-22 21:34:31.068656: step: 1784/529, loss: 0.0032695638947188854 2023-01-22 21:34:32.121821: step: 1788/529, loss: 0.011093172244727612 2023-01-22 21:34:33.164646: step: 1792/529, loss: 0.023647598922252655 2023-01-22 21:34:34.218269: step: 1796/529, loss: 0.003556785173714161 2023-01-22 21:34:35.274235: step: 1800/529, loss: 0.00263601029291749 2023-01-22 21:34:36.310302: step: 1804/529, loss: 0.0054610725492239 2023-01-22 21:34:37.334772: step: 1808/529, loss: 0.002868467476218939 2023-01-22 21:34:38.374078: step: 1812/529, loss: 0.0035832358989864588 2023-01-22 21:34:39.429445: step: 1816/529, loss: 0.00465819425880909 2023-01-22 21:34:40.475001: step: 1820/529, loss: 0.00011471866309875622 2023-01-22 21:34:41.534318: step: 1824/529, loss: 0.0043467143550515175 2023-01-22 21:34:42.565093: step: 1828/529, loss: 0.007312522269785404 2023-01-22 21:34:43.614585: step: 1832/529, loss: 0.010814406909048557 2023-01-22 21:34:44.664071: step: 1836/529, loss: 0.0018458807608112693 2023-01-22 21:34:45.703401: step: 1840/529, loss: 0.006079292390495539 2023-01-22 21:34:46.748800: step: 1844/529, loss: 0.0033890395425260067 2023-01-22 21:34:47.793241: step: 1848/529, loss: 0.002275952138006687 2023-01-22 21:34:48.851994: step: 1852/529, loss: 0.0033465127926319838 2023-01-22 21:34:49.924014: step: 1856/529, loss: 0.007121488451957703 2023-01-22 21:34:50.982098: step: 1860/529, loss: 0.0057334741577506065 2023-01-22 21:34:52.041688: step: 1864/529, loss: 0.001228256500326097 2023-01-22 21:34:53.092030: step: 1868/529, loss: 0.0007331818924285471 2023-01-22 21:34:54.137609: step: 1872/529, loss: 0.006026719696819782 2023-01-22 21:34:55.187693: step: 1876/529, loss: 0.0013718903064727783 2023-01-22 21:34:56.234510: step: 1880/529, loss: 0.0017088564345613122 2023-01-22 21:34:57.283024: step: 1884/529, loss: 0.0013164597330614924 2023-01-22 21:34:58.330415: step: 1888/529, loss: 0.0020321484189480543 2023-01-22 21:34:59.396754: step: 1892/529, loss: 0.0026107255835086107 2023-01-22 21:35:00.437200: step: 1896/529, loss: 0.003549418644979596 2023-01-22 21:35:01.483189: step: 1900/529, loss: 0.0066976300440728664 2023-01-22 21:35:02.539361: step: 1904/529, loss: 0.006736681796610355 2023-01-22 21:35:03.591395: step: 1908/529, loss: 0.003880217904224992 2023-01-22 21:35:04.655289: step: 1912/529, loss: 0.0009326647268608212 2023-01-22 21:35:05.694372: step: 1916/529, loss: 0.0010575373889878392 2023-01-22 21:35:06.737676: step: 1920/529, loss: 0.0017090090550482273 2023-01-22 21:35:07.785661: step: 1924/529, loss: 0.004903963766992092 2023-01-22 21:35:08.828452: step: 1928/529, loss: 0.00022782431915402412 2023-01-22 21:35:09.863068: step: 1932/529, loss: 0.0030873375944793224 2023-01-22 21:35:10.919867: step: 1936/529, loss: 0.0012864961754530668 2023-01-22 21:35:11.998221: step: 1940/529, loss: 0.042002640664577484 2023-01-22 21:35:13.042787: step: 1944/529, loss: 0.004080172162503004 2023-01-22 21:35:14.097800: step: 1948/529, loss: 0.0033888269681483507 2023-01-22 21:35:15.159731: step: 1952/529, loss: 0.0010484391823410988 2023-01-22 21:35:16.217346: step: 1956/529, loss: 0.023458587005734444 2023-01-22 21:35:17.277598: step: 1960/529, loss: 0.0014475197531282902 2023-01-22 21:35:18.322733: step: 1964/529, loss: 0.026408446952700615 2023-01-22 21:35:19.374043: step: 1968/529, loss: 0.0014669850934296846 2023-01-22 21:35:20.412269: step: 1972/529, loss: 0.004631843883544207 2023-01-22 21:35:21.473637: step: 1976/529, loss: 0.0020155864767730236 2023-01-22 21:35:22.504133: step: 1980/529, loss: 0.0070532942190766335 2023-01-22 21:35:23.568977: step: 1984/529, loss: 0.016619721427559853 2023-01-22 21:35:24.611992: step: 1988/529, loss: 0.025262262672185898 2023-01-22 21:35:25.644278: step: 1992/529, loss: 0.02170470356941223 2023-01-22 21:35:26.697751: step: 1996/529, loss: 0.0022279764525592327 2023-01-22 21:35:27.777319: step: 2000/529, loss: 0.007930046878755093 2023-01-22 21:35:28.841473: step: 2004/529, loss: 0.0034217883367091417 2023-01-22 21:35:29.897820: step: 2008/529, loss: 0.001281469827517867 2023-01-22 21:35:30.964757: step: 2012/529, loss: 0.003413531230762601 2023-01-22 21:35:32.021425: step: 2016/529, loss: 0.002780960639938712 2023-01-22 21:35:33.068630: step: 2020/529, loss: 0.015387630090117455 2023-01-22 21:35:34.098186: step: 2024/529, loss: 0.015134617686271667 2023-01-22 21:35:35.150832: step: 2028/529, loss: 0.004267601296305656 2023-01-22 21:35:36.206481: step: 2032/529, loss: 0.001568476902320981 2023-01-22 21:35:37.264906: step: 2036/529, loss: 0.003909156192094088 2023-01-22 21:35:38.320214: step: 2040/529, loss: 0.010754327289760113 2023-01-22 21:35:39.370430: step: 2044/529, loss: 0.004229620564728975 2023-01-22 21:35:40.417949: step: 2048/529, loss: 0.00015167212404776365 2023-01-22 21:35:41.466726: step: 2052/529, loss: 0.001203554798848927 2023-01-22 21:35:42.513149: step: 2056/529, loss: 0.0020384385716170073 2023-01-22 21:35:43.556488: step: 2060/529, loss: 0.027280915528535843 2023-01-22 21:35:44.610884: step: 2064/529, loss: 0.01512991264462471 2023-01-22 21:35:45.650709: step: 2068/529, loss: 0.0004443250654730946 2023-01-22 21:35:46.701130: step: 2072/529, loss: 0.005818449892103672 2023-01-22 21:35:47.744684: step: 2076/529, loss: 0.002622608793899417 2023-01-22 21:35:48.782086: step: 2080/529, loss: 0.018039708957076073 2023-01-22 21:35:49.852912: step: 2084/529, loss: 0.026779957115650177 2023-01-22 21:35:50.911513: step: 2088/529, loss: 0.001887702033855021 2023-01-22 21:35:51.949196: step: 2092/529, loss: 0.001718476414680481 2023-01-22 21:35:53.001808: step: 2096/529, loss: 0.06352747976779938 2023-01-22 21:35:54.052790: step: 2100/529, loss: 0.00918897520750761 2023-01-22 21:35:55.096799: step: 2104/529, loss: 0.014546978287398815 2023-01-22 21:35:56.164138: step: 2108/529, loss: 0.016607722267508507 2023-01-22 21:35:57.203810: step: 2112/529, loss: 0.0009315998759120703 2023-01-22 21:35:58.248227: step: 2116/529, loss: 0.004374222829937935 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3281477305789933, 'r': 0.3057315668202765, 'f1': 0.3165432921695201}, 'combined': 0.23324242580912005, 'stategy': 1, 'epoch': 10} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.38143308793775144, 'r': 0.3218549703628321, 'f1': 0.34912048843900195}, 'combined': 0.24561240392693606, 'stategy': 1, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3125129864347413, 'r': 0.33860515228508026, 'f1': 0.32503627550862896}, 'combined': 0.23950041353267396, 'stategy': 1, 'epoch': 10} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37931366519438475, 'r': 0.33268487112951267, 'f1': 0.3544724079754306}, 'combined': 0.25167540966255575, 'stategy': 1, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32352120535714285, 'r': 0.3339573732718894, 'f1': 0.328656462585034}, 'combined': 0.24216791979949873, 'stategy': 1, 'epoch': 10} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3768563409864008, 'r': 0.300564306860009, 'f1': 0.3344142676325926}, 'combined': 0.2374341300191407, 'stategy': 1, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3508771929824561, 'r': 0.38095238095238093, 'f1': 0.365296803652968}, 'combined': 0.243531202435312, 'stategy': 1, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 11 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 21:38:29.324584: step: 4/529, loss: 0.0037140233907848597 2023-01-22 21:38:30.344030: step: 8/529, loss: 0.007363326847553253 2023-01-22 21:38:31.391589: step: 12/529, loss: 0.004608663730323315 2023-01-22 21:38:32.417517: step: 16/529, loss: 0.0027266694232821465 2023-01-22 21:38:33.458569: step: 20/529, loss: 0.0044874330051243305 2023-01-22 21:38:34.504042: step: 24/529, loss: 0.0015738230431452394 2023-01-22 21:38:35.532925: step: 28/529, loss: 0.004707221407443285 2023-01-22 21:38:36.580757: step: 32/529, loss: 0.03244788572192192 2023-01-22 21:38:37.617875: step: 36/529, loss: 0.00859522819519043 2023-01-22 21:38:38.658714: step: 40/529, loss: 0.0037811105139553547 2023-01-22 21:38:39.689164: step: 44/529, loss: 0.0032610762864351273 2023-01-22 21:38:40.715329: step: 48/529, loss: 0.012759698554873466 2023-01-22 21:38:41.766929: step: 52/529, loss: 0.00409624632447958 2023-01-22 21:38:42.809459: step: 56/529, loss: 0.002068314701318741 2023-01-22 21:38:43.868152: step: 60/529, loss: 0.01663852483034134 2023-01-22 21:38:44.911365: step: 64/529, loss: 0.005965459160506725 2023-01-22 21:38:45.940990: step: 68/529, loss: 0.008445071056485176 2023-01-22 21:38:46.981485: step: 72/529, loss: 0.0005764389061369002 2023-01-22 21:38:48.018053: step: 76/529, loss: 0.010759158059954643 2023-01-22 21:38:49.053554: step: 80/529, loss: 0.00017509213648736477 2023-01-22 21:38:50.096762: step: 84/529, loss: 0.0007360644522123039 2023-01-22 21:38:51.132525: step: 88/529, loss: 0.004500131588429213 2023-01-22 21:38:52.181257: step: 92/529, loss: 0.008626696653664112 2023-01-22 21:38:53.223916: step: 96/529, loss: 0.0027312820311635733 2023-01-22 21:38:54.267388: step: 100/529, loss: 0.012191029265522957 2023-01-22 21:38:55.293473: step: 104/529, loss: 0.007185385096818209 2023-01-22 21:38:56.350571: step: 108/529, loss: 0.004905566107481718 2023-01-22 21:38:57.395333: step: 112/529, loss: 0.004030782263725996 2023-01-22 21:38:58.429010: step: 116/529, loss: 0.016415033489465714 2023-01-22 21:38:59.474301: step: 120/529, loss: 0.0008366347174160182 2023-01-22 21:39:00.536686: step: 124/529, loss: 0.0015930411173030734 2023-01-22 21:39:01.568845: step: 128/529, loss: 0.002372258808463812 2023-01-22 21:39:02.619022: step: 132/529, loss: 0.009112424217164516 2023-01-22 21:39:03.669084: step: 136/529, loss: 0.0010882836068049073 2023-01-22 21:39:04.710495: step: 140/529, loss: 0.0032116733491420746 2023-01-22 21:39:05.759505: step: 144/529, loss: 0.004124924074858427 2023-01-22 21:39:06.816538: step: 148/529, loss: 0.0026849282439798117 2023-01-22 21:39:07.875301: step: 152/529, loss: 0.012590489350259304 2023-01-22 21:39:08.919131: step: 156/529, loss: 0.003016457427293062 2023-01-22 21:39:09.976261: step: 160/529, loss: 0.005077538546174765 2023-01-22 21:39:11.008675: step: 164/529, loss: 0.0031695785000920296 2023-01-22 21:39:12.057444: step: 168/529, loss: 0.0020259791053831577 2023-01-22 21:39:13.105140: step: 172/529, loss: 0.004039300139993429 2023-01-22 21:39:14.168630: step: 176/529, loss: 3.323805867694318e-05 2023-01-22 21:39:15.209679: step: 180/529, loss: 0.003353292355313897 2023-01-22 21:39:16.267438: step: 184/529, loss: 0.005855282302945852 2023-01-22 21:39:17.313773: step: 188/529, loss: 0.006960767786949873 2023-01-22 21:39:18.350502: step: 192/529, loss: 7.998461660463363e-05 2023-01-22 21:39:19.396666: step: 196/529, loss: 0.002480238676071167 2023-01-22 21:39:20.466106: step: 200/529, loss: 0.0027573422994464636 2023-01-22 21:39:21.504699: step: 204/529, loss: 9.641732322052121e-05 2023-01-22 21:39:22.560949: step: 208/529, loss: 0.00012682353553827852 2023-01-22 21:39:23.605608: step: 212/529, loss: 0.004122037906199694 2023-01-22 21:39:24.660773: step: 216/529, loss: 0.006666763219982386 2023-01-22 21:39:25.732960: step: 220/529, loss: 0.004897987935692072 2023-01-22 21:39:26.781340: step: 224/529, loss: 0.004491490311920643 2023-01-22 21:39:27.825828: step: 228/529, loss: 0.003970337100327015 2023-01-22 21:39:28.867953: step: 232/529, loss: 0.0026997982058674097 2023-01-22 21:39:29.915008: step: 236/529, loss: 0.0018035128014162183 2023-01-22 21:39:30.985653: step: 240/529, loss: 0.044095251709222794 2023-01-22 21:39:32.044935: step: 244/529, loss: 0.0046372017823159695 2023-01-22 21:39:33.089065: step: 248/529, loss: 0.0023649989161640406 2023-01-22 21:39:34.143798: step: 252/529, loss: 0.0108381612226367 2023-01-22 21:39:35.188502: step: 256/529, loss: 0.014285038225352764 2023-01-22 21:39:36.231971: step: 260/529, loss: 0.007839308120310307 2023-01-22 21:39:37.296599: step: 264/529, loss: 0.003131538862362504 2023-01-22 21:39:38.350055: step: 268/529, loss: 0.0014012412866577506 2023-01-22 21:39:39.403946: step: 272/529, loss: 1.2645472452277318e-05 2023-01-22 21:39:40.449830: step: 276/529, loss: 0.000384706596378237 2023-01-22 21:39:41.498394: step: 280/529, loss: 0.0059829652309417725 2023-01-22 21:39:42.568138: step: 284/529, loss: 0.00219951244071126 2023-01-22 21:39:43.614282: step: 288/529, loss: 0.003878541523590684 2023-01-22 21:39:44.662755: step: 292/529, loss: 0.0031933258287608624 2023-01-22 21:39:45.710479: step: 296/529, loss: 0.0010053592268377542 2023-01-22 21:39:46.763764: step: 300/529, loss: 0.011579213663935661 2023-01-22 21:39:47.798187: step: 304/529, loss: 0.013161268085241318 2023-01-22 21:39:48.848336: step: 308/529, loss: 0.0021102421451359987 2023-01-22 21:39:49.906403: step: 312/529, loss: 0.0006987681845203042 2023-01-22 21:39:50.946461: step: 316/529, loss: 0.022646820172667503 2023-01-22 21:39:51.995353: step: 320/529, loss: 0.01387560274451971 2023-01-22 21:39:53.046636: step: 324/529, loss: 0.004615884739905596 2023-01-22 21:39:54.100913: step: 328/529, loss: 0.0035643107257783413 2023-01-22 21:39:55.171279: step: 332/529, loss: 0.00041610028711147606 2023-01-22 21:39:56.235746: step: 336/529, loss: 0.025767065584659576 2023-01-22 21:39:57.299013: step: 340/529, loss: 0.013144945725798607 2023-01-22 21:39:58.339764: step: 344/529, loss: 0.00955265574157238 2023-01-22 21:39:59.383633: step: 348/529, loss: 0.0015506912022829056 2023-01-22 21:40:00.442781: step: 352/529, loss: 0.0009194116573780775 2023-01-22 21:40:01.483703: step: 356/529, loss: 0.026821289211511612 2023-01-22 21:40:02.562326: step: 360/529, loss: 0.027703434228897095 2023-01-22 21:40:03.616293: step: 364/529, loss: 0.00823939312249422 2023-01-22 21:40:04.659507: step: 368/529, loss: 6.478669092757627e-05 2023-01-22 21:40:05.721182: step: 372/529, loss: 0.012762855738401413 2023-01-22 21:40:06.782237: step: 376/529, loss: 0.01189438160508871 2023-01-22 21:40:07.827048: step: 380/529, loss: 0.002343898406252265 2023-01-22 21:40:08.876788: step: 384/529, loss: 0.004080342594534159 2023-01-22 21:40:09.928585: step: 388/529, loss: 0.0017143720760941505 2023-01-22 21:40:10.976395: step: 392/529, loss: 0.0038161305710673332 2023-01-22 21:40:12.038916: step: 396/529, loss: 0.00879244226962328 2023-01-22 21:40:13.078604: step: 400/529, loss: 0.01280451100319624 2023-01-22 21:40:14.137578: step: 404/529, loss: 0.01117230299860239 2023-01-22 21:40:15.183899: step: 408/529, loss: 9.91434990282869e-06 2023-01-22 21:40:16.221670: step: 412/529, loss: 0.0004885715316049755 2023-01-22 21:40:17.276434: step: 416/529, loss: 0.0038478204514831305 2023-01-22 21:40:18.338892: step: 420/529, loss: 0.0007420756155624986 2023-01-22 21:40:19.386270: step: 424/529, loss: 0.0049125379882752895 2023-01-22 21:40:20.440660: step: 428/529, loss: 0.006404376123100519 2023-01-22 21:40:21.498383: step: 432/529, loss: 0.0026791549753397703 2023-01-22 21:40:22.557858: step: 436/529, loss: 0.0013678320683538914 2023-01-22 21:40:23.618381: step: 440/529, loss: 3.432455559959635e-05 2023-01-22 21:40:24.667227: step: 444/529, loss: 0.00014037317305337638 2023-01-22 21:40:25.718013: step: 448/529, loss: 0.006670426111668348 2023-01-22 21:40:26.774696: step: 452/529, loss: 0.006580918561667204 2023-01-22 21:40:27.831798: step: 456/529, loss: 0.0009833669755607843 2023-01-22 21:40:28.884167: step: 460/529, loss: 0.004689553752541542 2023-01-22 21:40:29.933485: step: 464/529, loss: 0.0023072869516909122 2023-01-22 21:40:30.987694: step: 468/529, loss: 0.00025368755450472236 2023-01-22 21:40:32.038933: step: 472/529, loss: 0.009030071087181568 2023-01-22 21:40:33.096364: step: 476/529, loss: 0.00792622659355402 2023-01-22 21:40:34.147129: step: 480/529, loss: 0.018542658537626266 2023-01-22 21:40:35.190578: step: 484/529, loss: 0.00496524665504694 2023-01-22 21:40:36.256121: step: 488/529, loss: 0.001907652709633112 2023-01-22 21:40:37.313792: step: 492/529, loss: 0.010789998807013035 2023-01-22 21:40:38.366324: step: 496/529, loss: 0.0005600329604931176 2023-01-22 21:40:39.419025: step: 500/529, loss: 0.024913666769862175 2023-01-22 21:40:40.466586: step: 504/529, loss: 0.0014568931655958295 2023-01-22 21:40:41.513900: step: 508/529, loss: 0.004080232698470354 2023-01-22 21:40:42.566230: step: 512/529, loss: 0.0031587211415171623 2023-01-22 21:40:43.613001: step: 516/529, loss: 0.001987533876672387 2023-01-22 21:40:44.661814: step: 520/529, loss: 0.004712260328233242 2023-01-22 21:40:45.714828: step: 524/529, loss: 0.0010895893210545182 2023-01-22 21:40:46.760072: step: 528/529, loss: 0.004248771816492081 2023-01-22 21:40:47.805031: step: 532/529, loss: 0.0009934406261891127 2023-01-22 21:40:48.859020: step: 536/529, loss: 0.014179419726133347 2023-01-22 21:40:49.938972: step: 540/529, loss: 0.0005689356476068497 2023-01-22 21:40:50.985722: step: 544/529, loss: 0.011986362747848034 2023-01-22 21:40:52.033828: step: 548/529, loss: 0.010499045252799988 2023-01-22 21:40:53.098263: step: 552/529, loss: 0.006522223353385925 2023-01-22 21:40:54.152419: step: 556/529, loss: 0.012940270826220512 2023-01-22 21:40:55.193684: step: 560/529, loss: 0.005161145236343145 2023-01-22 21:40:56.230019: step: 564/529, loss: 0.009197832085192204 2023-01-22 21:40:57.272270: step: 568/529, loss: 0.0036527689080685377 2023-01-22 21:40:58.329905: step: 572/529, loss: 0.0020357053726911545 2023-01-22 21:40:59.378125: step: 576/529, loss: 0.025222772732377052 2023-01-22 21:41:00.439396: step: 580/529, loss: 0.008358001708984375 2023-01-22 21:41:01.504702: step: 584/529, loss: 0.0016905934317037463 2023-01-22 21:41:02.558103: step: 588/529, loss: 0.005074611399322748 2023-01-22 21:41:03.601919: step: 592/529, loss: 0.009218421764671803 2023-01-22 21:41:04.642800: step: 596/529, loss: 0.0033109921496361494 2023-01-22 21:41:05.692483: step: 600/529, loss: 0.017501311376690865 2023-01-22 21:41:06.744726: step: 604/529, loss: 0.005896209739148617 2023-01-22 21:41:07.800935: step: 608/529, loss: 0.0027542482130229473 2023-01-22 21:41:08.838248: step: 612/529, loss: 0.00024768355069682 2023-01-22 21:41:09.885660: step: 616/529, loss: 0.01774778589606285 2023-01-22 21:41:10.939835: step: 620/529, loss: 0.0021654502488672733 2023-01-22 21:41:11.974904: step: 624/529, loss: 0.0026314547285437584 2023-01-22 21:41:13.009405: step: 628/529, loss: 0.009898966178297997 2023-01-22 21:41:14.050740: step: 632/529, loss: 0.011996070854365826 2023-01-22 21:41:15.093692: step: 636/529, loss: 0.0032017745543271303 2023-01-22 21:41:16.154496: step: 640/529, loss: 0.0027105931658297777 2023-01-22 21:41:17.213470: step: 644/529, loss: 0.001175255049020052 2023-01-22 21:41:18.259995: step: 648/529, loss: 0.0024072739761322737 2023-01-22 21:41:19.310729: step: 652/529, loss: 3.804731750278734e-05 2023-01-22 21:41:20.359151: step: 656/529, loss: 0.006953171920031309 2023-01-22 21:41:21.421980: step: 660/529, loss: 0.004596054553985596 2023-01-22 21:41:22.475793: step: 664/529, loss: 0.00041125252027995884 2023-01-22 21:41:23.522944: step: 668/529, loss: 0.0030098790302872658 2023-01-22 21:41:24.568357: step: 672/529, loss: 7.274878953467123e-06 2023-01-22 21:41:25.634379: step: 676/529, loss: 0.003445247421041131 2023-01-22 21:41:26.686624: step: 680/529, loss: 0.0002504125877749175 2023-01-22 21:41:27.726743: step: 684/529, loss: 0.010124568827450275 2023-01-22 21:41:28.777842: step: 688/529, loss: 0.0045191217213869095 2023-01-22 21:41:29.824472: step: 692/529, loss: 1.2206885912746657e-05 2023-01-22 21:41:30.863742: step: 696/529, loss: 0.0002887707087211311 2023-01-22 21:41:31.922459: step: 700/529, loss: 0.015842795372009277 2023-01-22 21:41:32.989070: step: 704/529, loss: 0.0023848742712289095 2023-01-22 21:41:34.035042: step: 708/529, loss: 0.006225676275789738 2023-01-22 21:41:35.079547: step: 712/529, loss: 0.01146108377724886 2023-01-22 21:41:36.135513: step: 716/529, loss: 0.01568037085235119 2023-01-22 21:41:37.175418: step: 720/529, loss: 0.003764503635466099 2023-01-22 21:41:38.233026: step: 724/529, loss: 0.004998202435672283 2023-01-22 21:41:39.272351: step: 728/529, loss: 0.0030842002015560865 2023-01-22 21:41:40.320395: step: 732/529, loss: 0.009460856206715107 2023-01-22 21:41:41.383035: step: 736/529, loss: 0.004166828002780676 2023-01-22 21:41:42.448824: step: 740/529, loss: 0.008676378056406975 2023-01-22 21:41:43.492018: step: 744/529, loss: 0.007000930607318878 2023-01-22 21:41:44.540677: step: 748/529, loss: 0.0025891580153256655 2023-01-22 21:41:45.588460: step: 752/529, loss: 0.0013015758013352752 2023-01-22 21:41:46.657572: step: 756/529, loss: 0.004177324939519167 2023-01-22 21:41:47.689329: step: 760/529, loss: 1.4848366845399141e-05 2023-01-22 21:41:48.730891: step: 764/529, loss: 4.147878280491568e-05 2023-01-22 21:41:49.773554: step: 768/529, loss: 0.00991077534854412 2023-01-22 21:41:50.827174: step: 772/529, loss: 0.0014547478640452027 2023-01-22 21:41:51.884714: step: 776/529, loss: 0.001693952246569097 2023-01-22 21:41:52.931223: step: 780/529, loss: 0.00014577664842363447 2023-01-22 21:41:53.966317: step: 784/529, loss: 0.00577347120270133 2023-01-22 21:41:55.026243: step: 788/529, loss: 0.013503513298928738 2023-01-22 21:41:56.091520: step: 792/529, loss: 0.004513662774115801 2023-01-22 21:41:57.134529: step: 796/529, loss: 4.7638040996389464e-05 2023-01-22 21:41:58.169355: step: 800/529, loss: 0.0004572199541144073 2023-01-22 21:41:59.226814: step: 804/529, loss: 0.007813621312379837 2023-01-22 21:42:00.272056: step: 808/529, loss: 0.05005307123064995 2023-01-22 21:42:01.322362: step: 812/529, loss: 0.002417173469439149 2023-01-22 21:42:02.367173: step: 816/529, loss: 0.0017533792415633798 2023-01-22 21:42:03.415352: step: 820/529, loss: 0.014769935049116611 2023-01-22 21:42:04.461173: step: 824/529, loss: 0.004178961273282766 2023-01-22 21:42:05.509947: step: 828/529, loss: 0.00013096911425236613 2023-01-22 21:42:06.559614: step: 832/529, loss: 0.0037084289360791445 2023-01-22 21:42:07.598092: step: 836/529, loss: 0.004279987886548042 2023-01-22 21:42:08.650637: step: 840/529, loss: 0.0011910549364984035 2023-01-22 21:42:09.688593: step: 844/529, loss: 0.0047609745524823666 2023-01-22 21:42:10.752649: step: 848/529, loss: 0.002467039041221142 2023-01-22 21:42:11.801879: step: 852/529, loss: 0.003256711643189192 2023-01-22 21:42:12.846457: step: 856/529, loss: 0.007680240087211132 2023-01-22 21:42:13.886925: step: 860/529, loss: 0.002249338896945119 2023-01-22 21:42:14.936565: step: 864/529, loss: 0.005415516905486584 2023-01-22 21:42:15.992065: step: 868/529, loss: 0.0019472382264211774 2023-01-22 21:42:17.029564: step: 872/529, loss: 0.002556249499320984 2023-01-22 21:42:18.098862: step: 876/529, loss: 0.006466090679168701 2023-01-22 21:42:19.139186: step: 880/529, loss: 0.002294387901201844 2023-01-22 21:42:20.203574: step: 884/529, loss: 0.01852002553641796 2023-01-22 21:42:21.237032: step: 888/529, loss: 0.0030034990049898624 2023-01-22 21:42:22.280215: step: 892/529, loss: 0.0011849309084936976 2023-01-22 21:42:23.334724: step: 896/529, loss: 0.0020137985702604055 2023-01-22 21:42:24.383407: step: 900/529, loss: 0.009167115204036236 2023-01-22 21:42:25.437285: step: 904/529, loss: 0.002979674143716693 2023-01-22 21:42:26.488637: step: 908/529, loss: 0.0035214924719184637 2023-01-22 21:42:27.541846: step: 912/529, loss: 0.0065216198563575745 2023-01-22 21:42:28.608673: step: 916/529, loss: 0.005801389925181866 2023-01-22 21:42:29.663017: step: 920/529, loss: 0.004252105951309204 2023-01-22 21:42:30.695583: step: 924/529, loss: 0.0007447049720212817 2023-01-22 21:42:31.737714: step: 928/529, loss: 0.0006038918509148061 2023-01-22 21:42:32.777893: step: 932/529, loss: 0.0005102146533317864 2023-01-22 21:42:33.832397: step: 936/529, loss: 0.015616899356245995 2023-01-22 21:42:34.878422: step: 940/529, loss: 0.0020776998717337847 2023-01-22 21:42:35.925601: step: 944/529, loss: 0.00170222541783005 2023-01-22 21:42:36.967521: step: 948/529, loss: 0.0004186028672847897 2023-01-22 21:42:38.008129: step: 952/529, loss: 0.0005757810431532562 2023-01-22 21:42:39.060917: step: 956/529, loss: 0.005286172963678837 2023-01-22 21:42:40.102742: step: 960/529, loss: 0.006571881007403135 2023-01-22 21:42:41.153189: step: 964/529, loss: 0.010248671285808086 2023-01-22 21:42:42.200254: step: 968/529, loss: 0.0007061854121275246 2023-01-22 21:42:43.238046: step: 972/529, loss: 0.0005732354475185275 2023-01-22 21:42:44.276280: step: 976/529, loss: 0.00342729315161705 2023-01-22 21:42:45.308875: step: 980/529, loss: 0.0004146279243286699 2023-01-22 21:42:46.365666: step: 984/529, loss: 0.0023045900743454695 2023-01-22 21:42:47.408618: step: 988/529, loss: 0.006229712627828121 2023-01-22 21:42:48.449032: step: 992/529, loss: 0.0031180190853774548 2023-01-22 21:42:49.519738: step: 996/529, loss: 0.0009226909605786204 2023-01-22 21:42:50.572132: step: 1000/529, loss: 0.003265135455876589 2023-01-22 21:42:51.622036: step: 1004/529, loss: 0.00539995776489377 2023-01-22 21:42:52.676869: step: 1008/529, loss: 0.0003146221279166639 2023-01-22 21:42:53.729571: step: 1012/529, loss: 0.0054960367269814014 2023-01-22 21:42:54.783848: step: 1016/529, loss: 0.008426538668572903 2023-01-22 21:42:55.815439: step: 1020/529, loss: 0.00016055663581937551 2023-01-22 21:42:56.869146: step: 1024/529, loss: 0.00010434867726871744 2023-01-22 21:42:57.917143: step: 1028/529, loss: 0.004037004429847002 2023-01-22 21:42:58.954870: step: 1032/529, loss: 0.010236815549433231 2023-01-22 21:42:59.996600: step: 1036/529, loss: 0.0004770483647007495 2023-01-22 21:43:01.041335: step: 1040/529, loss: 0.00807817094027996 2023-01-22 21:43:02.079331: step: 1044/529, loss: 0.0033139334991574287 2023-01-22 21:43:03.129855: step: 1048/529, loss: 0.003517554607242346 2023-01-22 21:43:04.166525: step: 1052/529, loss: 0.0037483545020222664 2023-01-22 21:43:05.230028: step: 1056/529, loss: 0.006665835622698069 2023-01-22 21:43:06.291783: step: 1060/529, loss: 0.01069339457899332 2023-01-22 21:43:07.343240: step: 1064/529, loss: 0.011766803450882435 2023-01-22 21:43:08.393894: step: 1068/529, loss: 0.01205790601670742 2023-01-22 21:43:09.448719: step: 1072/529, loss: 0.0007381334435194731 2023-01-22 21:43:10.491688: step: 1076/529, loss: 0.009628570638597012 2023-01-22 21:43:11.555054: step: 1080/529, loss: 0.0015007449546828866 2023-01-22 21:43:12.599872: step: 1084/529, loss: 0.005146688316017389 2023-01-22 21:43:13.656846: step: 1088/529, loss: 0.09127529710531235 2023-01-22 21:43:14.694459: step: 1092/529, loss: 0.00021335705241654068 2023-01-22 21:43:15.739559: step: 1096/529, loss: 0.003383737290278077 2023-01-22 21:43:16.778223: step: 1100/529, loss: 0.0036397406365722418 2023-01-22 21:43:17.831036: step: 1104/529, loss: 0.039512280374765396 2023-01-22 21:43:18.893226: step: 1108/529, loss: 0.013543150387704372 2023-01-22 21:43:19.942613: step: 1112/529, loss: 0.014971619471907616 2023-01-22 21:43:21.010366: step: 1116/529, loss: 0.0005985999014228582 2023-01-22 21:43:22.046219: step: 1120/529, loss: 0.0002031822077697143 2023-01-22 21:43:23.095898: step: 1124/529, loss: 0.004033518023788929 2023-01-22 21:43:24.149950: step: 1128/529, loss: 0.012922819703817368 2023-01-22 21:43:25.202976: step: 1132/529, loss: 0.005352956708520651 2023-01-22 21:43:26.245649: step: 1136/529, loss: 0.002476489171385765 2023-01-22 21:43:27.313267: step: 1140/529, loss: 0.0013561686500906944 2023-01-22 21:43:28.360701: step: 1144/529, loss: 0.05515532195568085 2023-01-22 21:43:29.442417: step: 1148/529, loss: 0.028133900836110115 2023-01-22 21:43:30.490880: step: 1152/529, loss: 0.002206395147368312 2023-01-22 21:43:31.535172: step: 1156/529, loss: 0.01955327019095421 2023-01-22 21:43:32.578380: step: 1160/529, loss: 0.004749919753521681 2023-01-22 21:43:33.617674: step: 1164/529, loss: 1.1817434142358252e-06 2023-01-22 21:43:34.660002: step: 1168/529, loss: 0.004447528161108494 2023-01-22 21:43:35.704662: step: 1172/529, loss: 0.001559864031150937 2023-01-22 21:43:36.748313: step: 1176/529, loss: 0.0051524150185287 2023-01-22 21:43:37.803405: step: 1180/529, loss: 7.988676225068048e-05 2023-01-22 21:43:38.857543: step: 1184/529, loss: 0.016568822786211967 2023-01-22 21:43:39.929236: step: 1188/529, loss: 0.006948177237063646 2023-01-22 21:43:40.984577: step: 1192/529, loss: 0.00016617057553958148 2023-01-22 21:43:42.036475: step: 1196/529, loss: 0.02696973644196987 2023-01-22 21:43:43.096566: step: 1200/529, loss: 0.002667576540261507 2023-01-22 21:43:44.134686: step: 1204/529, loss: 0.008679074235260487 2023-01-22 21:43:45.187207: step: 1208/529, loss: 0.0016724177403375506 2023-01-22 21:43:46.243795: step: 1212/529, loss: 0.002812038641422987 2023-01-22 21:43:47.288093: step: 1216/529, loss: 0.00047247158363461494 2023-01-22 21:43:48.346765: step: 1220/529, loss: 0.0018974867416545749 2023-01-22 21:43:49.398791: step: 1224/529, loss: 3.432413359405473e-05 2023-01-22 21:43:50.439865: step: 1228/529, loss: 0.003925752826035023 2023-01-22 21:43:51.495275: step: 1232/529, loss: 0.01531320158392191 2023-01-22 21:43:52.555059: step: 1236/529, loss: 0.000788183300755918 2023-01-22 21:43:53.598258: step: 1240/529, loss: 0.0027437377721071243 2023-01-22 21:43:54.656745: step: 1244/529, loss: 0.004910984542220831 2023-01-22 21:43:55.694205: step: 1248/529, loss: 0.008901037275791168 2023-01-22 21:43:56.744799: step: 1252/529, loss: 0.01265747845172882 2023-01-22 21:43:57.796061: step: 1256/529, loss: 0.002478359267115593 2023-01-22 21:43:58.846307: step: 1260/529, loss: 0.010530180297791958 2023-01-22 21:43:59.902510: step: 1264/529, loss: 0.002999641001224518 2023-01-22 21:44:00.966886: step: 1268/529, loss: 0.01876983419060707 2023-01-22 21:44:02.011733: step: 1272/529, loss: 0.041945409029722214 2023-01-22 21:44:03.081804: step: 1276/529, loss: 0.006962242536246777 2023-01-22 21:44:04.122326: step: 1280/529, loss: 0.0027996469289064407 2023-01-22 21:44:05.166758: step: 1284/529, loss: 0.02595708705484867 2023-01-22 21:44:06.214839: step: 1288/529, loss: 0.0034862279426306486 2023-01-22 21:44:07.249250: step: 1292/529, loss: 0.0 2023-01-22 21:44:08.279631: step: 1296/529, loss: 0.003192838979884982 2023-01-22 21:44:09.320439: step: 1300/529, loss: 0.004453867673873901 2023-01-22 21:44:10.361898: step: 1304/529, loss: 0.002583130495622754 2023-01-22 21:44:11.403429: step: 1308/529, loss: 0.002401629462838173 2023-01-22 21:44:12.473030: step: 1312/529, loss: 0.011069645173847675 2023-01-22 21:44:13.532004: step: 1316/529, loss: 0.009756780229508877 2023-01-22 21:44:14.583771: step: 1320/529, loss: 0.005467626266181469 2023-01-22 21:44:15.631144: step: 1324/529, loss: 0.005336384754627943 2023-01-22 21:44:16.675235: step: 1328/529, loss: 0.0009322090772911906 2023-01-22 21:44:17.718491: step: 1332/529, loss: 0.0006837813998572528 2023-01-22 21:44:18.768012: step: 1336/529, loss: 0.00910196453332901 2023-01-22 21:44:19.803662: step: 1340/529, loss: 0.0006432613590732217 2023-01-22 21:44:20.852300: step: 1344/529, loss: 0.007985581643879414 2023-01-22 21:44:21.902408: step: 1348/529, loss: 0.0021324334666132927 2023-01-22 21:44:22.943771: step: 1352/529, loss: 0.0001354393461951986 2023-01-22 21:44:23.980282: step: 1356/529, loss: 0.006900588050484657 2023-01-22 21:44:25.023179: step: 1360/529, loss: 0.0008141408325172961 2023-01-22 21:44:26.068889: step: 1364/529, loss: 0.00016899597540032119 2023-01-22 21:44:27.126647: step: 1368/529, loss: 0.0012238860363140702 2023-01-22 21:44:28.177663: step: 1372/529, loss: 0.005127795040607452 2023-01-22 21:44:29.224639: step: 1376/529, loss: 8.328542025992647e-05 2023-01-22 21:44:30.268003: step: 1380/529, loss: 0.0007493635639548302 2023-01-22 21:44:31.326617: step: 1384/529, loss: 0.0011074476642534137 2023-01-22 21:44:32.383489: step: 1388/529, loss: 0.0018748913425952196 2023-01-22 21:44:33.437263: step: 1392/529, loss: 0.003236520104110241 2023-01-22 21:44:34.489388: step: 1396/529, loss: 0.002921327482908964 2023-01-22 21:44:35.536273: step: 1400/529, loss: 0.03134811669588089 2023-01-22 21:44:36.588196: step: 1404/529, loss: 0.0015671526780351996 2023-01-22 21:44:37.629925: step: 1408/529, loss: 0.0011532946955412626 2023-01-22 21:44:38.687329: step: 1412/529, loss: 0.004763533361256123 2023-01-22 21:44:39.735241: step: 1416/529, loss: 2.480861803633161e-05 2023-01-22 21:44:40.774456: step: 1420/529, loss: 0.0009689464932307601 2023-01-22 21:44:41.825588: step: 1424/529, loss: 0.0019960098434239626 2023-01-22 21:44:42.880059: step: 1428/529, loss: 0.00026565336156636477 2023-01-22 21:44:43.929259: step: 1432/529, loss: 0.003497309982776642 2023-01-22 21:44:44.993331: step: 1436/529, loss: 0.0014974374789744616 2023-01-22 21:44:46.022890: step: 1440/529, loss: 0.0028620664961636066 2023-01-22 21:44:47.076431: step: 1444/529, loss: 0.0024662851355969906 2023-01-22 21:44:48.129587: step: 1448/529, loss: 0.020522547885775566 2023-01-22 21:44:49.178836: step: 1452/529, loss: 0.0006964959902688861 2023-01-22 21:44:50.222053: step: 1456/529, loss: 0.009692458435893059 2023-01-22 21:44:51.269829: step: 1460/529, loss: 0.006227830890566111 2023-01-22 21:44:52.328077: step: 1464/529, loss: 0.006993358489125967 2023-01-22 21:44:53.387316: step: 1468/529, loss: 0.0041832816787064075 2023-01-22 21:44:54.430310: step: 1472/529, loss: 0.0029888206627219915 2023-01-22 21:44:55.487677: step: 1476/529, loss: 0.005621057469397783 2023-01-22 21:44:56.521043: step: 1480/529, loss: 0.0009370629559271038 2023-01-22 21:44:57.566577: step: 1484/529, loss: 0.00045446419971995056 2023-01-22 21:44:58.614823: step: 1488/529, loss: 0.0029750578105449677 2023-01-22 21:44:59.688324: step: 1492/529, loss: 0.003995663020759821 2023-01-22 21:45:00.728017: step: 1496/529, loss: 0.0024975668638944626 2023-01-22 21:45:01.791878: step: 1500/529, loss: 0.011821907013654709 2023-01-22 21:45:02.850288: step: 1504/529, loss: 0.005453081801533699 2023-01-22 21:45:03.886007: step: 1508/529, loss: 0.002573229605332017 2023-01-22 21:45:04.928241: step: 1512/529, loss: 0.00237230840139091 2023-01-22 21:45:05.989193: step: 1516/529, loss: 0.0050940620712935925 2023-01-22 21:45:07.029205: step: 1520/529, loss: 0.004747120197862387 2023-01-22 21:45:08.104312: step: 1524/529, loss: 0.005388921592384577 2023-01-22 21:45:09.161135: step: 1528/529, loss: 0.004092082846909761 2023-01-22 21:45:10.192827: step: 1532/529, loss: 0.00019601719395723194 2023-01-22 21:45:11.222263: step: 1536/529, loss: 0.00010974735778290778 2023-01-22 21:45:12.274937: step: 1540/529, loss: 0.002975917886942625 2023-01-22 21:45:13.325242: step: 1544/529, loss: 0.0005187825881876051 2023-01-22 21:45:14.369250: step: 1548/529, loss: 0.002703945618122816 2023-01-22 21:45:15.425002: step: 1552/529, loss: 0.01375254150480032 2023-01-22 21:45:16.471048: step: 1556/529, loss: 0.0013686912134289742 2023-01-22 21:45:17.540049: step: 1560/529, loss: 0.002727731829509139 2023-01-22 21:45:18.621587: step: 1564/529, loss: 0.006979046855121851 2023-01-22 21:45:19.697485: step: 1568/529, loss: 0.005205978639423847 2023-01-22 21:45:20.752302: step: 1572/529, loss: 0.006595241837203503 2023-01-22 21:45:21.787276: step: 1576/529, loss: 0.009415585547685623 2023-01-22 21:45:22.827847: step: 1580/529, loss: 0.00011147946497658268 2023-01-22 21:45:23.889941: step: 1584/529, loss: 0.0024288652930408716 2023-01-22 21:45:24.932182: step: 1588/529, loss: 2.269660217280034e-05 2023-01-22 21:45:25.987252: step: 1592/529, loss: 0.004198869224637747 2023-01-22 21:45:27.027804: step: 1596/529, loss: 0.0013804089976474643 2023-01-22 21:45:28.079678: step: 1600/529, loss: 0.0008442662074230611 2023-01-22 21:45:29.125184: step: 1604/529, loss: 0.001134915160946548 2023-01-22 21:45:30.162839: step: 1608/529, loss: 0.009165355935692787 2023-01-22 21:45:31.211394: step: 1612/529, loss: 0.005637517664581537 2023-01-22 21:45:32.266080: step: 1616/529, loss: 0.007507156580686569 2023-01-22 21:45:33.322810: step: 1620/529, loss: 0.0029284728225320578 2023-01-22 21:45:34.376266: step: 1624/529, loss: 0.012668718583881855 2023-01-22 21:45:35.432720: step: 1628/529, loss: 0.0013841319596394897 2023-01-22 21:45:36.480230: step: 1632/529, loss: 0.0024372327607125044 2023-01-22 21:45:37.537644: step: 1636/529, loss: 0.0012555678840726614 2023-01-22 21:45:38.577768: step: 1640/529, loss: 0.002120430115610361 2023-01-22 21:45:39.613815: step: 1644/529, loss: 0.0008931196061894298 2023-01-22 21:45:40.665042: step: 1648/529, loss: 0.010051054880023003 2023-01-22 21:45:41.706146: step: 1652/529, loss: 0.002121733734384179 2023-01-22 21:45:42.756657: step: 1656/529, loss: 0.001377974753268063 2023-01-22 21:45:43.820144: step: 1660/529, loss: 0.0012314737541601062 2023-01-22 21:45:44.863800: step: 1664/529, loss: 0.0050186412408947945 2023-01-22 21:45:45.915418: step: 1668/529, loss: 0.0019156120251864195 2023-01-22 21:45:46.977204: step: 1672/529, loss: 0.0025133220478892326 2023-01-22 21:45:48.017605: step: 1676/529, loss: 1.6332645827787928e-05 2023-01-22 21:45:49.084963: step: 1680/529, loss: 0.012591800652444363 2023-01-22 21:45:50.137044: step: 1684/529, loss: 0.0008415223564952612 2023-01-22 21:45:51.187900: step: 1688/529, loss: 0.001133262412622571 2023-01-22 21:45:52.236700: step: 1692/529, loss: 0.016971612349152565 2023-01-22 21:45:53.283212: step: 1696/529, loss: 0.002832441357895732 2023-01-22 21:45:54.341094: step: 1700/529, loss: 0.006976114585995674 2023-01-22 21:45:55.406855: step: 1704/529, loss: 0.007624673191457987 2023-01-22 21:45:56.459474: step: 1708/529, loss: 0.00490959919989109 2023-01-22 21:45:57.528441: step: 1712/529, loss: 0.0020893074106425047 2023-01-22 21:45:58.585210: step: 1716/529, loss: 0.004579850006848574 2023-01-22 21:45:59.640230: step: 1720/529, loss: 0.0037009124644100666 2023-01-22 21:46:00.691501: step: 1724/529, loss: 0.005810212809592485 2023-01-22 21:46:01.739033: step: 1728/529, loss: 0.0008320951019413769 2023-01-22 21:46:02.791900: step: 1732/529, loss: 0.004227285273373127 2023-01-22 21:46:03.846170: step: 1736/529, loss: 0.004570974502712488 2023-01-22 21:46:04.894440: step: 1740/529, loss: 0.006186396349221468 2023-01-22 21:46:05.936042: step: 1744/529, loss: 0.0009346940205432475 2023-01-22 21:46:06.974716: step: 1748/529, loss: 0.00037364018498919904 2023-01-22 21:46:08.023480: step: 1752/529, loss: 0.007283602841198444 2023-01-22 21:46:09.081344: step: 1756/529, loss: 0.046040501445531845 2023-01-22 21:46:10.134489: step: 1760/529, loss: 0.005600692238658667 2023-01-22 21:46:11.180815: step: 1764/529, loss: 0.0 2023-01-22 21:46:12.221904: step: 1768/529, loss: 0.008639183826744556 2023-01-22 21:46:13.280541: step: 1772/529, loss: 0.02638068050146103 2023-01-22 21:46:14.325078: step: 1776/529, loss: 0.024783330038189888 2023-01-22 21:46:15.361503: step: 1780/529, loss: 0.0004539838992059231 2023-01-22 21:46:16.412325: step: 1784/529, loss: 0.025161137804389 2023-01-22 21:46:17.454027: step: 1788/529, loss: 0.0012859432026743889 2023-01-22 21:46:18.502580: step: 1792/529, loss: 0.014201657846570015 2023-01-22 21:46:19.546536: step: 1796/529, loss: 0.0021940567530691624 2023-01-22 21:46:20.583813: step: 1800/529, loss: 0.0005616530543193221 2023-01-22 21:46:21.620304: step: 1804/529, loss: 0.004170292988419533 2023-01-22 21:46:22.669552: step: 1808/529, loss: 0.008446595631539822 2023-01-22 21:46:23.711834: step: 1812/529, loss: 0.000450603140052408 2023-01-22 21:46:24.751649: step: 1816/529, loss: 0.0029140945989638567 2023-01-22 21:46:25.776528: step: 1820/529, loss: 0.0017332229763269424 2023-01-22 21:46:26.823596: step: 1824/529, loss: 0.004897206090390682 2023-01-22 21:46:27.865522: step: 1828/529, loss: 0.006870834622532129 2023-01-22 21:46:28.910001: step: 1832/529, loss: 0.014151008799672127 2023-01-22 21:46:29.972906: step: 1836/529, loss: 0.036038123071193695 2023-01-22 21:46:31.027119: step: 1840/529, loss: 0.011616545729339123 2023-01-22 21:46:32.079213: step: 1844/529, loss: 0.018927808851003647 2023-01-22 21:46:33.111593: step: 1848/529, loss: 0.0 2023-01-22 21:46:34.150842: step: 1852/529, loss: 0.004454266745597124 2023-01-22 21:46:35.202246: step: 1856/529, loss: 0.0035331835970282555 2023-01-22 21:46:36.239018: step: 1860/529, loss: 1.868805702542886e-05 2023-01-22 21:46:37.288836: step: 1864/529, loss: 0.0021432985085994005 2023-01-22 21:46:38.348425: step: 1868/529, loss: 0.003018022049218416 2023-01-22 21:46:39.413708: step: 1872/529, loss: 0.009025768376886845 2023-01-22 21:46:40.455962: step: 1876/529, loss: 0.0006289375596679747 2023-01-22 21:46:41.507485: step: 1880/529, loss: 0.0043432037346065044 2023-01-22 21:46:42.566750: step: 1884/529, loss: 0.0023552393540740013 2023-01-22 21:46:43.605910: step: 1888/529, loss: 0.0007116672350093722 2023-01-22 21:46:44.667156: step: 1892/529, loss: 0.023320242762565613 2023-01-22 21:46:45.717693: step: 1896/529, loss: 0.009287681430578232 2023-01-22 21:46:46.753157: step: 1900/529, loss: 0.0048317378386855125 2023-01-22 21:46:47.799732: step: 1904/529, loss: 0.0021411834750324488 2023-01-22 21:46:48.842902: step: 1908/529, loss: 0.0043587158434093 2023-01-22 21:46:49.867848: step: 1912/529, loss: 0.0 2023-01-22 21:46:50.917433: step: 1916/529, loss: 0.0004871864803135395 2023-01-22 21:46:51.973716: step: 1920/529, loss: 0.0004517881607171148 2023-01-22 21:46:53.021202: step: 1924/529, loss: 0.0035631738137453794 2023-01-22 21:46:54.072160: step: 1928/529, loss: 0.0037075397558510303 2023-01-22 21:46:55.116437: step: 1932/529, loss: 0.005595594644546509 2023-01-22 21:46:56.154530: step: 1936/529, loss: 0.002533398102968931 2023-01-22 21:46:57.204886: step: 1940/529, loss: 0.002870419528335333 2023-01-22 21:46:58.247955: step: 1944/529, loss: 0.003952626138925552 2023-01-22 21:46:59.281649: step: 1948/529, loss: 0.0018022614531219006 2023-01-22 21:47:00.310663: step: 1952/529, loss: 0.005801658611744642 2023-01-22 21:47:01.350535: step: 1956/529, loss: 0.0008817269117571414 2023-01-22 21:47:02.403465: step: 1960/529, loss: 0.0038247734773904085 2023-01-22 21:47:03.451554: step: 1964/529, loss: 0.004059887956827879 2023-01-22 21:47:04.491384: step: 1968/529, loss: 0.02748439647257328 2023-01-22 21:47:05.546972: step: 1972/529, loss: 0.011934623122215271 2023-01-22 21:47:06.592567: step: 1976/529, loss: 0.0038518719375133514 2023-01-22 21:47:07.661659: step: 1980/529, loss: 0.0031616054475307465 2023-01-22 21:47:08.706665: step: 1984/529, loss: 0.005377650260925293 2023-01-22 21:47:09.751292: step: 1988/529, loss: 0.0032389755360782146 2023-01-22 21:47:10.809518: step: 1992/529, loss: 0.0034783773589879274 2023-01-22 21:47:11.861358: step: 1996/529, loss: 0.001888565137051046 2023-01-22 21:47:12.907381: step: 2000/529, loss: 0.014660288579761982 2023-01-22 21:47:13.971023: step: 2004/529, loss: 0.002842474263161421 2023-01-22 21:47:15.024970: step: 2008/529, loss: 0.01490737684071064 2023-01-22 21:47:16.084155: step: 2012/529, loss: 0.0063547599129378796 2023-01-22 21:47:17.148969: step: 2016/529, loss: 0.016145624220371246 2023-01-22 21:47:18.201693: step: 2020/529, loss: 0.0008024487760849297 2023-01-22 21:47:19.252221: step: 2024/529, loss: 0.002280746353790164 2023-01-22 21:47:20.283863: step: 2028/529, loss: 0.0002587547351140529 2023-01-22 21:47:21.331516: step: 2032/529, loss: 0.008462951518595219 2023-01-22 21:47:22.374679: step: 2036/529, loss: 0.005481327418237925 2023-01-22 21:47:23.423745: step: 2040/529, loss: 0.003511536167934537 2023-01-22 21:47:24.473807: step: 2044/529, loss: 0.005110475234687328 2023-01-22 21:47:25.536331: step: 2048/529, loss: 0.0042467848397791386 2023-01-22 21:47:26.577259: step: 2052/529, loss: 0.000514945772010833 2023-01-22 21:47:27.629232: step: 2056/529, loss: 0.024979516863822937 2023-01-22 21:47:28.694198: step: 2060/529, loss: 0.007289297878742218 2023-01-22 21:47:29.751256: step: 2064/529, loss: 0.004846369381994009 2023-01-22 21:47:30.780769: step: 2068/529, loss: 0.001162551692686975 2023-01-22 21:47:31.833778: step: 2072/529, loss: 0.002346632769331336 2023-01-22 21:47:32.879266: step: 2076/529, loss: 0.00891745276749134 2023-01-22 21:47:33.921214: step: 2080/529, loss: 0.0025244178250432014 2023-01-22 21:47:34.974381: step: 2084/529, loss: 0.006365702021867037 2023-01-22 21:47:36.044955: step: 2088/529, loss: 0.005297030787914991 2023-01-22 21:47:37.084248: step: 2092/529, loss: 0.0007587825530208647 2023-01-22 21:47:38.140033: step: 2096/529, loss: 0.008229751139879227 2023-01-22 21:47:39.216697: step: 2100/529, loss: 0.0023025795817375183 2023-01-22 21:47:40.259651: step: 2104/529, loss: 0.0029667671769857407 2023-01-22 21:47:41.308994: step: 2108/529, loss: 0.0015449775382876396 2023-01-22 21:47:42.386304: step: 2112/529, loss: 0.005429327487945557 2023-01-22 21:47:43.438245: step: 2116/529, loss: 0.003750232746824622 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3379726890756303, 'r': 0.3161679994578477, 'f1': 0.32670693277310925}, 'combined': 0.2407314241486068, 'stategy': 1, 'epoch': 11} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3819645554775188, 'r': 0.3216368202755721, 'f1': 0.34921439700218443}, 'combined': 0.24567847025279307, 'stategy': 1, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31175590874435516, 'r': 0.3419258353970347, 'f1': 0.3261446429940946}, 'combined': 0.24031710536406972, 'stategy': 1, 'epoch': 11} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38148399918310033, 'r': 0.3315950716526164, 'f1': 0.3547943537178648}, 'combined': 0.251903991139684, 'stategy': 1, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3254464285714286, 'r': 0.3396499728923828, 'f1': 0.3323965380023876}, 'combined': 0.24492376484386455, 'stategy': 1, 'epoch': 11} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37343901165198945, 'r': 0.2981646559001486, 'f1': 0.33158340190351315}, 'combined': 0.23542421535149433, 'stategy': 1, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3508771929824561, 'r': 0.38095238095238093, 'f1': 0.365296803652968}, 'combined': 0.243531202435312, 'stategy': 1, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 12 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 21:50:14.664015: step: 4/529, loss: 0.0034538169857114553 2023-01-22 21:50:15.721752: step: 8/529, loss: 0.0016711006173864007 2023-01-22 21:50:16.762953: step: 12/529, loss: 0.01156221330165863 2023-01-22 21:50:17.819577: step: 16/529, loss: 0.00015871012874413282 2023-01-22 21:50:18.866318: step: 20/529, loss: 0.003133102087303996 2023-01-22 21:50:19.913996: step: 24/529, loss: 0.0010158048244193196 2023-01-22 21:50:20.948466: step: 28/529, loss: 0.003993500489741564 2023-01-22 21:50:21.991988: step: 32/529, loss: 0.005018666852265596 2023-01-22 21:50:23.057437: step: 36/529, loss: 0.0014677639119327068 2023-01-22 21:50:24.094889: step: 40/529, loss: 0.0026256253477185965 2023-01-22 21:50:25.139412: step: 44/529, loss: 0.0006456120172515512 2023-01-22 21:50:26.190067: step: 48/529, loss: 0.0033675660379230976 2023-01-22 21:50:27.226168: step: 52/529, loss: 0.009733915328979492 2023-01-22 21:50:28.264600: step: 56/529, loss: 0.0034726152662187815 2023-01-22 21:50:29.306319: step: 60/529, loss: 0.0018313925247639418 2023-01-22 21:50:30.342754: step: 64/529, loss: 0.00013744371244683862 2023-01-22 21:50:31.367464: step: 68/529, loss: 0.003951665014028549 2023-01-22 21:50:32.406358: step: 72/529, loss: 0.003070714185014367 2023-01-22 21:50:33.451451: step: 76/529, loss: 0.004276313818991184 2023-01-22 21:50:34.501731: step: 80/529, loss: 0.004900243598967791 2023-01-22 21:50:35.550543: step: 84/529, loss: 0.009483034722507 2023-01-22 21:50:36.582443: step: 88/529, loss: 0.0 2023-01-22 21:50:37.638003: step: 92/529, loss: 0.0053872899152338505 2023-01-22 21:50:38.705483: step: 96/529, loss: 0.00832262635231018 2023-01-22 21:50:39.748574: step: 100/529, loss: 0.0006826301105320454 2023-01-22 21:50:40.807953: step: 104/529, loss: 0.0007925773970782757 2023-01-22 21:50:41.847287: step: 108/529, loss: 0.01183923427015543 2023-01-22 21:50:42.891302: step: 112/529, loss: 0.0062284646555781364 2023-01-22 21:50:43.944321: step: 116/529, loss: 0.010330263525247574 2023-01-22 21:50:44.985689: step: 120/529, loss: 0.0029908437281847 2023-01-22 21:50:46.029111: step: 124/529, loss: 0.0037122832145541906 2023-01-22 21:50:47.063609: step: 128/529, loss: 0.0021550990641117096 2023-01-22 21:50:48.118339: step: 132/529, loss: 0.0009647613042034209 2023-01-22 21:50:49.168818: step: 136/529, loss: 0.0009003085433505476 2023-01-22 21:50:50.214451: step: 140/529, loss: 0.011518586426973343 2023-01-22 21:50:51.277093: step: 144/529, loss: 0.008874570950865746 2023-01-22 21:50:52.333152: step: 148/529, loss: 0.00550888990983367 2023-01-22 21:50:53.377854: step: 152/529, loss: 0.0016588961007073522 2023-01-22 21:50:54.430977: step: 156/529, loss: 0.0010452051647007465 2023-01-22 21:50:55.478658: step: 160/529, loss: 0.010655257850885391 2023-01-22 21:50:56.529824: step: 164/529, loss: 0.01128036342561245 2023-01-22 21:50:57.598789: step: 168/529, loss: 0.0031479373574256897 2023-01-22 21:50:58.654102: step: 172/529, loss: 0.0006826169556006789 2023-01-22 21:50:59.687819: step: 176/529, loss: 0.002177071524783969 2023-01-22 21:51:00.729656: step: 180/529, loss: 0.00041223509470000863 2023-01-22 21:51:01.786473: step: 184/529, loss: 0.0014141061110422015 2023-01-22 21:51:02.830483: step: 188/529, loss: 0.002947329543530941 2023-01-22 21:51:03.895540: step: 192/529, loss: 0.027088206261396408 2023-01-22 21:51:04.954234: step: 196/529, loss: 0.004891794640570879 2023-01-22 21:51:06.007124: step: 200/529, loss: 0.0029908197466284037 2023-01-22 21:51:07.059138: step: 204/529, loss: 0.0033854979556053877 2023-01-22 21:51:08.112445: step: 208/529, loss: 0.0030778059735894203 2023-01-22 21:51:09.166905: step: 212/529, loss: 9.326578947366215e-06 2023-01-22 21:51:10.224894: step: 216/529, loss: 0.009751619771122932 2023-01-22 21:51:11.280869: step: 220/529, loss: 0.00016446239897049963 2023-01-22 21:51:12.334465: step: 224/529, loss: 0.0047297230921685696 2023-01-22 21:51:13.373506: step: 228/529, loss: 0.003842433448880911 2023-01-22 21:51:14.450893: step: 232/529, loss: 0.020309636369347572 2023-01-22 21:51:15.505665: step: 236/529, loss: 0.0009144988725893199 2023-01-22 21:51:16.546910: step: 240/529, loss: 0.006281842943280935 2023-01-22 21:51:17.587417: step: 244/529, loss: 0.0013440930051729083 2023-01-22 21:51:18.627551: step: 248/529, loss: 0.0018926558550447226 2023-01-22 21:51:19.700056: step: 252/529, loss: 0.0012141893384978175 2023-01-22 21:51:20.758208: step: 256/529, loss: 0.0010878157336264849 2023-01-22 21:51:21.800422: step: 260/529, loss: 0.001576004084199667 2023-01-22 21:51:22.850511: step: 264/529, loss: 0.024168329313397408 2023-01-22 21:51:23.893290: step: 268/529, loss: 0.0028887009248137474 2023-01-22 21:51:24.958172: step: 272/529, loss: 0.0031978636980056763 2023-01-22 21:51:26.011683: step: 276/529, loss: 0.0031443950720131397 2023-01-22 21:51:27.060370: step: 280/529, loss: 0.00029196255491115153 2023-01-22 21:51:28.109023: step: 284/529, loss: 0.0007459737826138735 2023-01-22 21:51:29.153202: step: 288/529, loss: 0.011547936126589775 2023-01-22 21:51:30.216385: step: 292/529, loss: 0.01153575349599123 2023-01-22 21:51:31.266570: step: 296/529, loss: 0.00045906411833129823 2023-01-22 21:51:32.332595: step: 300/529, loss: 0.000437918643001467 2023-01-22 21:51:33.399033: step: 304/529, loss: 0.011378184892237186 2023-01-22 21:51:34.453631: step: 308/529, loss: 0.0032906539272516966 2023-01-22 21:51:35.505012: step: 312/529, loss: 0.0014775949530303478 2023-01-22 21:51:36.544421: step: 316/529, loss: 0.00013546424452215433 2023-01-22 21:51:37.582375: step: 320/529, loss: 0.0022926980163902044 2023-01-22 21:51:38.632482: step: 324/529, loss: 0.003716593375429511 2023-01-22 21:51:39.694223: step: 328/529, loss: 0.004395076539367437 2023-01-22 21:51:40.737100: step: 332/529, loss: 3.9748797462380026e-06 2023-01-22 21:51:41.787154: step: 336/529, loss: 0.0008925807778723538 2023-01-22 21:51:42.834676: step: 340/529, loss: 3.408240445423871e-05 2023-01-22 21:51:43.879136: step: 344/529, loss: 0.0003700566594488919 2023-01-22 21:51:44.943439: step: 348/529, loss: 0.0013339375145733356 2023-01-22 21:51:45.994070: step: 352/529, loss: 0.0017752345884218812 2023-01-22 21:51:47.043035: step: 356/529, loss: 0.00028631434543058276 2023-01-22 21:51:48.125140: step: 360/529, loss: 0.006901028100401163 2023-01-22 21:51:49.182996: step: 364/529, loss: 0.00553686311468482 2023-01-22 21:51:50.215714: step: 368/529, loss: 0.00450486596673727 2023-01-22 21:51:51.260168: step: 372/529, loss: 0.0002825112023856491 2023-01-22 21:51:52.308809: step: 376/529, loss: 0.004270290490239859 2023-01-22 21:51:53.349732: step: 380/529, loss: 0.0010503791272640228 2023-01-22 21:51:54.401950: step: 384/529, loss: 0.0004965506959706545 2023-01-22 21:51:55.446687: step: 388/529, loss: 0.015216793864965439 2023-01-22 21:51:56.521321: step: 392/529, loss: 0.004793694242835045 2023-01-22 21:51:57.556126: step: 396/529, loss: 0.00018608092796057463 2023-01-22 21:51:58.603961: step: 400/529, loss: 0.0012672869488596916 2023-01-22 21:51:59.662433: step: 404/529, loss: 0.007794622331857681 2023-01-22 21:52:00.700078: step: 408/529, loss: 0.001383294933475554 2023-01-22 21:52:01.755432: step: 412/529, loss: 0.0012107929214835167 2023-01-22 21:52:02.814284: step: 416/529, loss: 0.005087608005851507 2023-01-22 21:52:03.863535: step: 420/529, loss: 0.0041254255920648575 2023-01-22 21:52:04.908305: step: 424/529, loss: 2.905014980569831e-06 2023-01-22 21:52:05.981660: step: 428/529, loss: 0.00452798418700695 2023-01-22 21:52:07.028794: step: 432/529, loss: 0.00936139840632677 2023-01-22 21:52:08.084233: step: 436/529, loss: 0.0002534158411435783 2023-01-22 21:52:09.143960: step: 440/529, loss: 0.003384101903066039 2023-01-22 21:52:10.216640: step: 444/529, loss: 0.019990907981991768 2023-01-22 21:52:11.274155: step: 448/529, loss: 0.0031755047384649515 2023-01-22 21:52:12.315624: step: 452/529, loss: 0.0009907354833558202 2023-01-22 21:52:13.359390: step: 456/529, loss: 0.0047884550876915455 2023-01-22 21:52:14.417241: step: 460/529, loss: 0.0032467516139149666 2023-01-22 21:52:15.487322: step: 464/529, loss: 0.003332197666168213 2023-01-22 21:52:16.532355: step: 468/529, loss: 0.00868942029774189 2023-01-22 21:52:17.582437: step: 472/529, loss: 0.005520698148757219 2023-01-22 21:52:18.657133: step: 476/529, loss: 0.004008840769529343 2023-01-22 21:52:19.719380: step: 480/529, loss: 0.005649685859680176 2023-01-22 21:52:20.773622: step: 484/529, loss: 0.0016456665471196175 2023-01-22 21:52:21.826469: step: 488/529, loss: 7.642077980563045e-05 2023-01-22 21:52:22.875055: step: 492/529, loss: 0.019764306023716927 2023-01-22 21:52:23.917055: step: 496/529, loss: 0.0016616829670965672 2023-01-22 21:52:24.960642: step: 500/529, loss: 0.003973504062741995 2023-01-22 21:52:25.995651: step: 504/529, loss: 0.001964021008461714 2023-01-22 21:52:27.053108: step: 508/529, loss: 0.02109139785170555 2023-01-22 21:52:28.106282: step: 512/529, loss: 0.0010557562345638871 2023-01-22 21:52:29.151057: step: 516/529, loss: 0.005340509582310915 2023-01-22 21:52:30.206720: step: 520/529, loss: 0.008324533700942993 2023-01-22 21:52:31.246835: step: 524/529, loss: 6.679386569885537e-05 2023-01-22 21:52:32.292471: step: 528/529, loss: 0.00031830911757424474 2023-01-22 21:52:33.340710: step: 532/529, loss: 0.0005614500259980559 2023-01-22 21:52:34.387727: step: 536/529, loss: 0.0015028188936412334 2023-01-22 21:52:35.428540: step: 540/529, loss: 0.0034789899364113808 2023-01-22 21:52:36.483920: step: 544/529, loss: 0.003667730139568448 2023-01-22 21:52:37.547731: step: 548/529, loss: 0.0026404177770018578 2023-01-22 21:52:38.605836: step: 552/529, loss: 0.003268154803663492 2023-01-22 21:52:39.653110: step: 556/529, loss: 0.0051860809326171875 2023-01-22 21:52:40.704039: step: 560/529, loss: 0.002388609340414405 2023-01-22 21:52:41.742883: step: 564/529, loss: 0.003886688034981489 2023-01-22 21:52:42.814132: step: 568/529, loss: 0.003776333061978221 2023-01-22 21:52:43.870435: step: 572/529, loss: 0.0010710746282711625 2023-01-22 21:52:44.919150: step: 576/529, loss: 0.07810000330209732 2023-01-22 21:52:45.963970: step: 580/529, loss: 0.009665369987487793 2023-01-22 21:52:47.019329: step: 584/529, loss: 0.011006626300513744 2023-01-22 21:52:48.068513: step: 588/529, loss: 0.005104816053062677 2023-01-22 21:52:49.114875: step: 592/529, loss: 0.0031157396733760834 2023-01-22 21:52:50.184910: step: 596/529, loss: 0.013658968731760979 2023-01-22 21:52:51.244197: step: 600/529, loss: 0.008167355321347713 2023-01-22 21:52:52.286674: step: 604/529, loss: 0.007260228507220745 2023-01-22 21:52:53.331931: step: 608/529, loss: 0.0036794058978557587 2023-01-22 21:52:54.368885: step: 612/529, loss: 0.0023569369222968817 2023-01-22 21:52:55.421018: step: 616/529, loss: 0.0006588835385628045 2023-01-22 21:52:56.460221: step: 620/529, loss: 0.003614986315369606 2023-01-22 21:52:57.521005: step: 624/529, loss: 0.004600749351084232 2023-01-22 21:52:58.571240: step: 628/529, loss: 0.01361650601029396 2023-01-22 21:52:59.615388: step: 632/529, loss: 0.005067877471446991 2023-01-22 21:53:00.643683: step: 636/529, loss: 0.002755256835371256 2023-01-22 21:53:01.713046: step: 640/529, loss: 0.007259910460561514 2023-01-22 21:53:02.764690: step: 644/529, loss: 0.0013632168993353844 2023-01-22 21:53:03.788962: step: 648/529, loss: 0.0010076288599520922 2023-01-22 21:53:04.843381: step: 652/529, loss: 0.008541107177734375 2023-01-22 21:53:05.875444: step: 656/529, loss: 0.00029975667712278664 2023-01-22 21:53:06.933609: step: 660/529, loss: 0.0031777096446603537 2023-01-22 21:53:08.009475: step: 664/529, loss: 0.014218487776815891 2023-01-22 21:53:09.065583: step: 668/529, loss: 0.0036193605046719313 2023-01-22 21:53:10.119711: step: 672/529, loss: 0.0037756417877972126 2023-01-22 21:53:11.164523: step: 676/529, loss: 0.018947923555970192 2023-01-22 21:53:12.221464: step: 680/529, loss: 0.0020872927270829678 2023-01-22 21:53:13.264154: step: 684/529, loss: 0.008403408341109753 2023-01-22 21:53:14.313236: step: 688/529, loss: 0.001032195519655943 2023-01-22 21:53:15.359735: step: 692/529, loss: 0.0011361497454345226 2023-01-22 21:53:16.404413: step: 696/529, loss: 0.003513647010549903 2023-01-22 21:53:17.461754: step: 700/529, loss: 0.0001323939359281212 2023-01-22 21:53:18.529683: step: 704/529, loss: 0.00027199662872590125 2023-01-22 21:53:19.574255: step: 708/529, loss: 0.021403253078460693 2023-01-22 21:53:20.627690: step: 712/529, loss: 0.028027458116412163 2023-01-22 21:53:21.692630: step: 716/529, loss: 0.0006352125201374292 2023-01-22 21:53:22.743895: step: 720/529, loss: 3.136417217319831e-05 2023-01-22 21:53:23.791159: step: 724/529, loss: 0.0061670932918787 2023-01-22 21:53:24.843240: step: 728/529, loss: 0.005565732251852751 2023-01-22 21:53:25.899986: step: 732/529, loss: 0.0010282763978466392 2023-01-22 21:53:26.936840: step: 736/529, loss: 0.0016645464347675443 2023-01-22 21:53:28.002533: step: 740/529, loss: 0.0024394686333835125 2023-01-22 21:53:29.050831: step: 744/529, loss: 0.001042565330862999 2023-01-22 21:53:30.102603: step: 748/529, loss: 0.005726627539843321 2023-01-22 21:53:31.155149: step: 752/529, loss: 0.0016147037968039513 2023-01-22 21:53:32.201637: step: 756/529, loss: 0.0038548894226551056 2023-01-22 21:53:33.245524: step: 760/529, loss: 0.002423239406198263 2023-01-22 21:53:34.298232: step: 764/529, loss: 0.00017455000488553196 2023-01-22 21:53:35.350889: step: 768/529, loss: 0.017610331997275352 2023-01-22 21:53:36.395006: step: 772/529, loss: 0.0 2023-01-22 21:53:37.432469: step: 776/529, loss: 4.326991620473564e-05 2023-01-22 21:53:38.484318: step: 780/529, loss: 0.0027888838667422533 2023-01-22 21:53:39.534668: step: 784/529, loss: 0.022689491510391235 2023-01-22 21:53:40.581892: step: 788/529, loss: 0.0008440619567409158 2023-01-22 21:53:41.662053: step: 792/529, loss: 0.0021070868242532015 2023-01-22 21:53:42.709681: step: 796/529, loss: 0.0049596624448895454 2023-01-22 21:53:43.760981: step: 800/529, loss: 0.0001836716110119596 2023-01-22 21:53:44.817260: step: 804/529, loss: 0.006417789030820131 2023-01-22 21:53:45.873656: step: 808/529, loss: 0.0004340535379014909 2023-01-22 21:53:46.944620: step: 812/529, loss: 0.009803470224142075 2023-01-22 21:53:48.004920: step: 816/529, loss: 0.005087788216769695 2023-01-22 21:53:49.062491: step: 820/529, loss: 0.0021837654057890177 2023-01-22 21:53:50.126988: step: 824/529, loss: 0.015038087964057922 2023-01-22 21:53:51.174599: step: 828/529, loss: 0.008199043571949005 2023-01-22 21:53:52.208643: step: 832/529, loss: 0.00026388303376734257 2023-01-22 21:53:53.252038: step: 836/529, loss: 0.00013836135622113943 2023-01-22 21:53:54.300495: step: 840/529, loss: 0.00014398775238078088 2023-01-22 21:53:55.362273: step: 844/529, loss: 0.007086677476763725 2023-01-22 21:53:56.401874: step: 848/529, loss: 0.000679773569572717 2023-01-22 21:53:57.437369: step: 852/529, loss: 0.0003471091913525015 2023-01-22 21:53:58.488549: step: 856/529, loss: 0.00017987570026889443 2023-01-22 21:53:59.533347: step: 860/529, loss: 0.0060335020534694195 2023-01-22 21:54:00.596907: step: 864/529, loss: 0.0013328570639714599 2023-01-22 21:54:01.653586: step: 868/529, loss: 0.008790730498731136 2023-01-22 21:54:02.704607: step: 872/529, loss: 0.049121756106615067 2023-01-22 21:54:03.761746: step: 876/529, loss: 0.000212679457035847 2023-01-22 21:54:04.794787: step: 880/529, loss: 0.0013097607297822833 2023-01-22 21:54:05.843907: step: 884/529, loss: 0.0009541487670503557 2023-01-22 21:54:06.892057: step: 888/529, loss: 0.005695894360542297 2023-01-22 21:54:07.935650: step: 892/529, loss: 0.036895278841257095 2023-01-22 21:54:08.981013: step: 896/529, loss: 0.004692884627729654 2023-01-22 21:54:10.026198: step: 900/529, loss: 0.0002595408004708588 2023-01-22 21:54:11.061314: step: 904/529, loss: 0.0007599055534228683 2023-01-22 21:54:12.095140: step: 908/529, loss: 0.0011384158860892057 2023-01-22 21:54:13.146455: step: 912/529, loss: 0.0008297098102048039 2023-01-22 21:54:14.196938: step: 916/529, loss: 0.00234603532589972 2023-01-22 21:54:15.244814: step: 920/529, loss: 0.0019602435640990734 2023-01-22 21:54:16.309114: step: 924/529, loss: 0.00711282342672348 2023-01-22 21:54:17.356998: step: 928/529, loss: 0.005014392081648111 2023-01-22 21:54:18.400584: step: 932/529, loss: 0.001218703924678266 2023-01-22 21:54:19.472096: step: 936/529, loss: 0.005473948549479246 2023-01-22 21:54:20.518278: step: 940/529, loss: 0.001442932989448309 2023-01-22 21:54:21.557782: step: 944/529, loss: 0.002020786516368389 2023-01-22 21:54:22.597560: step: 948/529, loss: 0.004130592104047537 2023-01-22 21:54:23.632566: step: 952/529, loss: 0.0004926081164740026 2023-01-22 21:54:24.684513: step: 956/529, loss: 0.006414211355149746 2023-01-22 21:54:25.729876: step: 960/529, loss: 0.0008974411757662892 2023-01-22 21:54:26.770345: step: 964/529, loss: 0.0013735558604821563 2023-01-22 21:54:27.830709: step: 968/529, loss: 0.0026449968572705984 2023-01-22 21:54:28.870502: step: 972/529, loss: 0.016979465261101723 2023-01-22 21:54:29.912128: step: 976/529, loss: 0.006331773474812508 2023-01-22 21:54:30.970419: step: 980/529, loss: 0.0014255353016778827 2023-01-22 21:54:32.015216: step: 984/529, loss: 0.001388548407703638 2023-01-22 21:54:33.079652: step: 988/529, loss: 0.005819765385240316 2023-01-22 21:54:34.114261: step: 992/529, loss: 0.002782560186460614 2023-01-22 21:54:35.181398: step: 996/529, loss: 0.0005248901434242725 2023-01-22 21:54:36.239432: step: 1000/529, loss: 0.005262112710624933 2023-01-22 21:54:37.281671: step: 1004/529, loss: 0.0017985641025006771 2023-01-22 21:54:38.325744: step: 1008/529, loss: 0.0009738872176967561 2023-01-22 21:54:39.370532: step: 1012/529, loss: 0.004128714092075825 2023-01-22 21:54:40.423705: step: 1016/529, loss: 0.009687910787761211 2023-01-22 21:54:41.479077: step: 1020/529, loss: 0.0061416723765432835 2023-01-22 21:54:42.537888: step: 1024/529, loss: 0.0007446203380823135 2023-01-22 21:54:43.582211: step: 1028/529, loss: 0.00688126077875495 2023-01-22 21:54:44.645097: step: 1032/529, loss: 0.002748201135545969 2023-01-22 21:54:45.697500: step: 1036/529, loss: 0.009075099602341652 2023-01-22 21:54:46.753245: step: 1040/529, loss: 0.009910820052027702 2023-01-22 21:54:47.796146: step: 1044/529, loss: 0.016206787899136543 2023-01-22 21:54:48.830362: step: 1048/529, loss: 0.00026731740217655897 2023-01-22 21:54:49.875283: step: 1052/529, loss: 0.023184679448604584 2023-01-22 21:54:50.928951: step: 1056/529, loss: 0.0005515580996870995 2023-01-22 21:54:51.975613: step: 1060/529, loss: 0.006231253035366535 2023-01-22 21:54:53.025685: step: 1064/529, loss: 0.004519576672464609 2023-01-22 21:54:54.068222: step: 1068/529, loss: 0.006572348531335592 2023-01-22 21:54:55.135560: step: 1072/529, loss: 0.0019725102465599775 2023-01-22 21:54:56.194329: step: 1076/529, loss: 0.01795819029211998 2023-01-22 21:54:57.243693: step: 1080/529, loss: 0.0052817221730947495 2023-01-22 21:54:58.291185: step: 1084/529, loss: 0.004028265364468098 2023-01-22 21:54:59.329290: step: 1088/529, loss: 0.0028873439878225327 2023-01-22 21:55:00.367523: step: 1092/529, loss: 0.00017623452004045248 2023-01-22 21:55:01.447775: step: 1096/529, loss: 0.00811166875064373 2023-01-22 21:55:02.513006: step: 1100/529, loss: 0.0035460330545902252 2023-01-22 21:55:03.560033: step: 1104/529, loss: 0.0014432879397645593 2023-01-22 21:55:04.604998: step: 1108/529, loss: 9.490230877418071e-05 2023-01-22 21:55:05.661494: step: 1112/529, loss: 6.598322215722874e-06 2023-01-22 21:55:06.719190: step: 1116/529, loss: 0.003287338884547353 2023-01-22 21:55:07.763733: step: 1120/529, loss: 0.0043175108730793 2023-01-22 21:55:08.801763: step: 1124/529, loss: 0.0068112327717244625 2023-01-22 21:55:09.862270: step: 1128/529, loss: 0.002569206990301609 2023-01-22 21:55:10.915832: step: 1132/529, loss: 0.003962248098105192 2023-01-22 21:55:11.966930: step: 1136/529, loss: 3.942374314647168e-05 2023-01-22 21:55:13.012852: step: 1140/529, loss: 0.0015602442435920238 2023-01-22 21:55:14.052213: step: 1144/529, loss: 0.003851967863738537 2023-01-22 21:55:15.102195: step: 1148/529, loss: 0.005517622455954552 2023-01-22 21:55:16.150383: step: 1152/529, loss: 0.0045139179565012455 2023-01-22 21:55:17.209207: step: 1156/529, loss: 0.00961222406476736 2023-01-22 21:55:18.261777: step: 1160/529, loss: 0.00814170204102993 2023-01-22 21:55:19.300841: step: 1164/529, loss: 0.000687994179315865 2023-01-22 21:55:20.348838: step: 1168/529, loss: 0.0016381940804421902 2023-01-22 21:55:21.398133: step: 1172/529, loss: 0.0023828938137739897 2023-01-22 21:55:22.442484: step: 1176/529, loss: 0.00030106629128567874 2023-01-22 21:55:23.504986: step: 1180/529, loss: 0.0014321234775707126 2023-01-22 21:55:24.551651: step: 1184/529, loss: 0.006543700583279133 2023-01-22 21:55:25.591762: step: 1188/529, loss: 0.002816717606037855 2023-01-22 21:55:26.649849: step: 1192/529, loss: 0.0027113796677440405 2023-01-22 21:55:27.705520: step: 1196/529, loss: 0.0018475671531632543 2023-01-22 21:55:28.748641: step: 1200/529, loss: 0.00477629667147994 2023-01-22 21:55:29.810078: step: 1204/529, loss: 0.008887148462235928 2023-01-22 21:55:30.855596: step: 1208/529, loss: 0.007356569170951843 2023-01-22 21:55:31.919301: step: 1212/529, loss: 0.0033601808827370405 2023-01-22 21:55:32.983837: step: 1216/529, loss: 0.0026360158808529377 2023-01-22 21:55:34.043637: step: 1220/529, loss: 0.002555998507887125 2023-01-22 21:55:35.091424: step: 1224/529, loss: 0.00549728749319911 2023-01-22 21:55:36.157688: step: 1228/529, loss: 0.013226628303527832 2023-01-22 21:55:37.211630: step: 1232/529, loss: 0.0025701280683279037 2023-01-22 21:55:38.258935: step: 1236/529, loss: 0.006285619456321001 2023-01-22 21:55:39.315710: step: 1240/529, loss: 0.001428676419891417 2023-01-22 21:55:40.357007: step: 1244/529, loss: 0.004736202768981457 2023-01-22 21:55:41.415720: step: 1248/529, loss: 0.019103853031992912 2023-01-22 21:55:42.473459: step: 1252/529, loss: 0.0007028862019069493 2023-01-22 21:55:43.527095: step: 1256/529, loss: 0.001356936409138143 2023-01-22 21:55:44.575947: step: 1260/529, loss: 0.003363113384693861 2023-01-22 21:55:45.622992: step: 1264/529, loss: 0.006181258242577314 2023-01-22 21:55:46.679800: step: 1268/529, loss: 0.004516343120485544 2023-01-22 21:55:47.737504: step: 1272/529, loss: 0.002820379566401243 2023-01-22 21:55:48.779782: step: 1276/529, loss: 0.00543256476521492 2023-01-22 21:55:49.830856: step: 1280/529, loss: 0.0019710632041096687 2023-01-22 21:55:50.876165: step: 1284/529, loss: 0.0027457019314169884 2023-01-22 21:55:51.907246: step: 1288/529, loss: 0.001080790301784873 2023-01-22 21:55:52.940881: step: 1292/529, loss: 0.007547048386186361 2023-01-22 21:55:53.990212: step: 1296/529, loss: 0.02213042415678501 2023-01-22 21:55:55.009873: step: 1300/529, loss: 3.618982373154722e-05 2023-01-22 21:55:56.068295: step: 1304/529, loss: 0.008058971725404263 2023-01-22 21:55:57.126580: step: 1308/529, loss: 0.005725135561078787 2023-01-22 21:55:58.179650: step: 1312/529, loss: 0.003886567195877433 2023-01-22 21:55:59.222518: step: 1316/529, loss: 0.006567446980625391 2023-01-22 21:56:00.275417: step: 1320/529, loss: 0.007206076756119728 2023-01-22 21:56:01.332759: step: 1324/529, loss: 0.009695267304778099 2023-01-22 21:56:02.373249: step: 1328/529, loss: 0.005573967006057501 2023-01-22 21:56:03.419932: step: 1332/529, loss: 0.003601539647206664 2023-01-22 21:56:04.458099: step: 1336/529, loss: 0.005629237741231918 2023-01-22 21:56:05.512266: step: 1340/529, loss: 0.008871670812368393 2023-01-22 21:56:06.544701: step: 1344/529, loss: 0.0026468003634363413 2023-01-22 21:56:07.584926: step: 1348/529, loss: 0.005154135636985302 2023-01-22 21:56:08.644966: step: 1352/529, loss: 0.002335173077881336 2023-01-22 21:56:09.674141: step: 1356/529, loss: 0.008903377689421177 2023-01-22 21:56:10.718280: step: 1360/529, loss: 0.00041577909723855555 2023-01-22 21:56:11.753777: step: 1364/529, loss: 0.0027857183013111353 2023-01-22 21:56:12.794477: step: 1368/529, loss: 0.005076603032648563 2023-01-22 21:56:13.846063: step: 1372/529, loss: 0.0035085254348814487 2023-01-22 21:56:14.888114: step: 1376/529, loss: 0.006606556009501219 2023-01-22 21:56:15.941410: step: 1380/529, loss: 6.7213268266641535e-06 2023-01-22 21:56:17.007607: step: 1384/529, loss: 0.0004115298215765506 2023-01-22 21:56:18.051061: step: 1388/529, loss: 0.007112570106983185 2023-01-22 21:56:19.100133: step: 1392/529, loss: 0.002831387799233198 2023-01-22 21:56:20.137853: step: 1396/529, loss: 0.000764812168199569 2023-01-22 21:56:21.178927: step: 1400/529, loss: 0.0021896029356867075 2023-01-22 21:56:22.223886: step: 1404/529, loss: 0.002743021585047245 2023-01-22 21:56:23.276562: step: 1408/529, loss: 8.683648775331676e-05 2023-01-22 21:56:24.315874: step: 1412/529, loss: 0.00040909796371124685 2023-01-22 21:56:25.365988: step: 1416/529, loss: 0.006013940554112196 2023-01-22 21:56:26.409536: step: 1420/529, loss: 0.0031260058749467134 2023-01-22 21:56:27.446199: step: 1424/529, loss: 0.0031865935306996107 2023-01-22 21:56:28.511913: step: 1428/529, loss: 0.002149343490600586 2023-01-22 21:56:29.563630: step: 1432/529, loss: 0.001372204627841711 2023-01-22 21:56:30.587611: step: 1436/529, loss: 0.02134183794260025 2023-01-22 21:56:31.643699: step: 1440/529, loss: 0.01621353067457676 2023-01-22 21:56:32.678141: step: 1444/529, loss: 0.0009152347920462489 2023-01-22 21:56:33.721019: step: 1448/529, loss: 0.004257665015757084 2023-01-22 21:56:34.787812: step: 1452/529, loss: 0.007955099456012249 2023-01-22 21:56:35.835910: step: 1456/529, loss: 0.001764209708198905 2023-01-22 21:56:36.908808: step: 1460/529, loss: 0.022892527282238007 2023-01-22 21:56:37.962582: step: 1464/529, loss: 0.009340415708720684 2023-01-22 21:56:39.010430: step: 1468/529, loss: 0.0027028219774365425 2023-01-22 21:56:40.066375: step: 1472/529, loss: 0.0027752385940402746 2023-01-22 21:56:41.092779: step: 1476/529, loss: 0.00025280597037635744 2023-01-22 21:56:42.139801: step: 1480/529, loss: 0.0013899061596021056 2023-01-22 21:56:43.194369: step: 1484/529, loss: 0.002894199453294277 2023-01-22 21:56:44.235446: step: 1488/529, loss: 3.107917291345075e-05 2023-01-22 21:56:45.289143: step: 1492/529, loss: 0.004997130483388901 2023-01-22 21:56:46.340609: step: 1496/529, loss: 0.004742403980344534 2023-01-22 21:56:47.392282: step: 1500/529, loss: 0.002078400691971183 2023-01-22 21:56:48.440316: step: 1504/529, loss: 0.015194614417850971 2023-01-22 21:56:49.504722: step: 1508/529, loss: 0.0025632097385823727 2023-01-22 21:56:50.556086: step: 1512/529, loss: 0.0006677258061245084 2023-01-22 21:56:51.609686: step: 1516/529, loss: 0.004418171476572752 2023-01-22 21:56:52.644798: step: 1520/529, loss: 0.004550809971988201 2023-01-22 21:56:53.689164: step: 1524/529, loss: 9.262320236302912e-05 2023-01-22 21:56:54.724907: step: 1528/529, loss: 0.004586302675306797 2023-01-22 21:56:55.768342: step: 1532/529, loss: 0.02046208828687668 2023-01-22 21:56:56.806895: step: 1536/529, loss: 0.0026131935883313417 2023-01-22 21:56:57.851403: step: 1540/529, loss: 0.0024927135091274977 2023-01-22 21:56:58.883235: step: 1544/529, loss: 0.0028580259531736374 2023-01-22 21:56:59.928379: step: 1548/529, loss: 0.009128554724156857 2023-01-22 21:57:00.980507: step: 1552/529, loss: 0.002217313041910529 2023-01-22 21:57:02.027920: step: 1556/529, loss: 0.006017806939780712 2023-01-22 21:57:03.077150: step: 1560/529, loss: 0.006406652275472879 2023-01-22 21:57:04.117520: step: 1564/529, loss: 0.0005443730624392629 2023-01-22 21:57:05.152945: step: 1568/529, loss: 5.7744142395677045e-05 2023-01-22 21:57:06.204288: step: 1572/529, loss: 0.011759455315768719 2023-01-22 21:57:07.242200: step: 1576/529, loss: 0.0014660715823993087 2023-01-22 21:57:08.289509: step: 1580/529, loss: 0.005744372494518757 2023-01-22 21:57:09.333815: step: 1584/529, loss: 0.001768304966390133 2023-01-22 21:57:10.377931: step: 1588/529, loss: 0.0025157839991152287 2023-01-22 21:57:11.420543: step: 1592/529, loss: 0.0037688319571316242 2023-01-22 21:57:12.462793: step: 1596/529, loss: 0.0014135395176708698 2023-01-22 21:57:13.506829: step: 1600/529, loss: 0.0036624078638851643 2023-01-22 21:57:14.549995: step: 1604/529, loss: 0.0006104863132350147 2023-01-22 21:57:15.603982: step: 1608/529, loss: 4.5989978389116004e-05 2023-01-22 21:57:16.656552: step: 1612/529, loss: 0.0017223499016836286 2023-01-22 21:57:17.705702: step: 1616/529, loss: 0.007549617905169725 2023-01-22 21:57:18.745706: step: 1620/529, loss: 0.004971557296812534 2023-01-22 21:57:19.802494: step: 1624/529, loss: 0.012937960214912891 2023-01-22 21:57:20.836973: step: 1628/529, loss: 0.0021600075997412205 2023-01-22 21:57:21.863605: step: 1632/529, loss: 0.007813052274286747 2023-01-22 21:57:22.902715: step: 1636/529, loss: 0.01909341663122177 2023-01-22 21:57:23.940790: step: 1640/529, loss: 0.0017056944780051708 2023-01-22 21:57:24.981508: step: 1644/529, loss: 0.005108917597681284 2023-01-22 21:57:26.025944: step: 1648/529, loss: 0.0015330326277762651 2023-01-22 21:57:27.074379: step: 1652/529, loss: 0.005255911964923143 2023-01-22 21:57:28.117387: step: 1656/529, loss: 0.0001902960066217929 2023-01-22 21:57:29.157869: step: 1660/529, loss: 0.001039320370182395 2023-01-22 21:57:30.226105: step: 1664/529, loss: 0.004947258625179529 2023-01-22 21:57:31.271352: step: 1668/529, loss: 0.009913178160786629 2023-01-22 21:57:32.313891: step: 1672/529, loss: 0.0032350632827728987 2023-01-22 21:57:33.362289: step: 1676/529, loss: 0.0020946054719388485 2023-01-22 21:57:34.422127: step: 1680/529, loss: 0.010111463256180286 2023-01-22 21:57:35.459912: step: 1684/529, loss: 0.006983467377722263 2023-01-22 21:57:36.500268: step: 1688/529, loss: 0.009233947843313217 2023-01-22 21:57:37.544605: step: 1692/529, loss: 0.0008421643287874758 2023-01-22 21:57:38.594801: step: 1696/529, loss: 0.011338598094880581 2023-01-22 21:57:39.634753: step: 1700/529, loss: 0.008865226060152054 2023-01-22 21:57:40.680037: step: 1704/529, loss: 0.05970841646194458 2023-01-22 21:57:41.740634: step: 1708/529, loss: 0.0056977653875947 2023-01-22 21:57:42.809853: step: 1712/529, loss: 0.003153856610879302 2023-01-22 21:57:43.871058: step: 1716/529, loss: 0.0013077165931463242 2023-01-22 21:57:44.940910: step: 1720/529, loss: 0.004332357551902533 2023-01-22 21:57:45.993846: step: 1724/529, loss: 0.0056518083438277245 2023-01-22 21:57:47.045259: step: 1728/529, loss: 0.0014051418984308839 2023-01-22 21:57:48.109429: step: 1732/529, loss: 0.012752902694046497 2023-01-22 21:57:49.149326: step: 1736/529, loss: 0.001972480211406946 2023-01-22 21:57:50.187509: step: 1740/529, loss: 0.0025719499681144953 2023-01-22 21:57:51.232333: step: 1744/529, loss: 0.006823750212788582 2023-01-22 21:57:52.269024: step: 1748/529, loss: 0.005476097110658884 2023-01-22 21:57:53.327150: step: 1752/529, loss: 0.011366089805960655 2023-01-22 21:57:54.363777: step: 1756/529, loss: 0.00028451211983338 2023-01-22 21:57:55.414238: step: 1760/529, loss: 0.0023403684608638287 2023-01-22 21:57:56.455879: step: 1764/529, loss: 0.0043478477746248245 2023-01-22 21:57:57.503785: step: 1768/529, loss: 0.0056175971403717995 2023-01-22 21:57:58.552463: step: 1772/529, loss: 0.010001528076827526 2023-01-22 21:57:59.586108: step: 1776/529, loss: 0.011165730655193329 2023-01-22 21:58:00.627070: step: 1780/529, loss: 0.0014509441098198295 2023-01-22 21:58:01.670519: step: 1784/529, loss: 0.0008313686703331769 2023-01-22 21:58:02.723534: step: 1788/529, loss: 0.003808972192928195 2023-01-22 21:58:03.768372: step: 1792/529, loss: 0.0002662185288500041 2023-01-22 21:58:04.816855: step: 1796/529, loss: 0.008610406890511513 2023-01-22 21:58:05.862325: step: 1800/529, loss: 0.0011986854951828718 2023-01-22 21:58:06.902847: step: 1804/529, loss: 0.004532924387603998 2023-01-22 21:58:07.963010: step: 1808/529, loss: 0.0023454048205167055 2023-01-22 21:58:09.018084: step: 1812/529, loss: 0.00456051854416728 2023-01-22 21:58:10.065098: step: 1816/529, loss: 0.0001380283065373078 2023-01-22 21:58:11.097867: step: 1820/529, loss: 0.0028413415420800447 2023-01-22 21:58:12.149527: step: 1824/529, loss: 0.0010695684468373656 2023-01-22 21:58:13.205336: step: 1828/529, loss: 0.00026654437533579767 2023-01-22 21:58:14.252998: step: 1832/529, loss: 0.0013222841080278158 2023-01-22 21:58:15.291615: step: 1836/529, loss: 0.0005296416929922998 2023-01-22 21:58:16.331179: step: 1840/529, loss: 0.005853899754583836 2023-01-22 21:58:17.373365: step: 1844/529, loss: 0.0041617522947490215 2023-01-22 21:58:18.438420: step: 1848/529, loss: 0.0015156196895986795 2023-01-22 21:58:19.474716: step: 1852/529, loss: 0.0035164745058864355 2023-01-22 21:58:20.514570: step: 1856/529, loss: 0.00017092064081225544 2023-01-22 21:58:21.566111: step: 1860/529, loss: 0.00426424341276288 2023-01-22 21:58:22.622136: step: 1864/529, loss: 0.003121687099337578 2023-01-22 21:58:23.673201: step: 1868/529, loss: 0.00400412455201149 2023-01-22 21:58:24.715608: step: 1872/529, loss: 0.0123432707041502 2023-01-22 21:58:25.774257: step: 1876/529, loss: 0.0005002549150958657 2023-01-22 21:58:26.819595: step: 1880/529, loss: 0.001664901152253151 2023-01-22 21:58:27.880061: step: 1884/529, loss: 0.008837983012199402 2023-01-22 21:58:28.945131: step: 1888/529, loss: 0.00404225243255496 2023-01-22 21:58:29.996266: step: 1892/529, loss: 0.01162963267415762 2023-01-22 21:58:31.027499: step: 1896/529, loss: 0.003960322123020887 2023-01-22 21:58:32.073931: step: 1900/529, loss: 8.359771527466364e-06 2023-01-22 21:58:33.110913: step: 1904/529, loss: 0.0008350919233635068 2023-01-22 21:58:34.157634: step: 1908/529, loss: 0.007631985936313868 2023-01-22 21:58:35.217829: step: 1912/529, loss: 0.0056535713374614716 2023-01-22 21:58:36.277818: step: 1916/529, loss: 0.0006918919389136136 2023-01-22 21:58:37.318430: step: 1920/529, loss: 0.0027876945678144693 2023-01-22 21:58:38.355543: step: 1924/529, loss: 0.0018325918354094028 2023-01-22 21:58:39.412564: step: 1928/529, loss: 0.0023743915371596813 2023-01-22 21:58:40.448539: step: 1932/529, loss: 0.0025626919232308865 2023-01-22 21:58:41.490855: step: 1936/529, loss: 0.012492476031184196 2023-01-22 21:58:42.543518: step: 1940/529, loss: 0.004952891264110804 2023-01-22 21:58:43.590912: step: 1944/529, loss: 0.0029234527610242367 2023-01-22 21:58:44.628860: step: 1948/529, loss: 0.0027377381920814514 2023-01-22 21:58:45.680100: step: 1952/529, loss: 0.0008895958890207112 2023-01-22 21:58:46.712916: step: 1956/529, loss: 0.0013366240309551358 2023-01-22 21:58:47.753845: step: 1960/529, loss: 0.0002729465486481786 2023-01-22 21:58:48.797173: step: 1964/529, loss: 0.0007911220891401172 2023-01-22 21:58:49.849814: step: 1968/529, loss: 0.0022898789029568434 2023-01-22 21:58:50.900943: step: 1972/529, loss: 0.0005048089078627527 2023-01-22 21:58:51.967039: step: 1976/529, loss: 0.0016113612800836563 2023-01-22 21:58:53.011628: step: 1980/529, loss: 0.002150622196495533 2023-01-22 21:58:54.052196: step: 1984/529, loss: 0.00198745122179389 2023-01-22 21:58:55.102670: step: 1988/529, loss: 0.0005819597281515598 2023-01-22 21:58:56.150653: step: 1992/529, loss: 0.015266776084899902 2023-01-22 21:58:57.194860: step: 1996/529, loss: 0.010057618841528893 2023-01-22 21:58:58.250615: step: 2000/529, loss: 0.0023630731739103794 2023-01-22 21:58:59.304671: step: 2004/529, loss: 0.0004862755013164133 2023-01-22 21:59:00.364400: step: 2008/529, loss: 0.005735492333769798 2023-01-22 21:59:01.411910: step: 2012/529, loss: 0.0018624253571033478 2023-01-22 21:59:02.461214: step: 2016/529, loss: 0.005650147795677185 2023-01-22 21:59:03.516822: step: 2020/529, loss: 3.1878375011729077e-05 2023-01-22 21:59:04.546900: step: 2024/529, loss: 0.0011836141347885132 2023-01-22 21:59:05.592832: step: 2028/529, loss: 0.008907408453524113 2023-01-22 21:59:06.656972: step: 2032/529, loss: 0.0010813509579747915 2023-01-22 21:59:07.737215: step: 2036/529, loss: 0.022435814142227173 2023-01-22 21:59:08.793347: step: 2040/529, loss: 0.006156958173960447 2023-01-22 21:59:09.836159: step: 2044/529, loss: 0.004454421810805798 2023-01-22 21:59:10.892546: step: 2048/529, loss: 0.007299572229385376 2023-01-22 21:59:11.937654: step: 2052/529, loss: 0.00802726112306118 2023-01-22 21:59:12.983200: step: 2056/529, loss: 0.0005126087926328182 2023-01-22 21:59:14.021969: step: 2060/529, loss: 0.0022101940121501684 2023-01-22 21:59:15.075327: step: 2064/529, loss: 0.023532552644610405 2023-01-22 21:59:16.132200: step: 2068/529, loss: 0.01658235676586628 2023-01-22 21:59:17.190662: step: 2072/529, loss: 0.010429018177092075 2023-01-22 21:59:18.265347: step: 2076/529, loss: 0.0028988479170948267 2023-01-22 21:59:19.317689: step: 2080/529, loss: 0.005375751294195652 2023-01-22 21:59:20.380161: step: 2084/529, loss: 0.0018056014087051153 2023-01-22 21:59:21.424692: step: 2088/529, loss: 0.0120967086404562 2023-01-22 21:59:22.461759: step: 2092/529, loss: 0.003507033921778202 2023-01-22 21:59:23.502423: step: 2096/529, loss: 0.0016474586445838213 2023-01-22 21:59:24.535010: step: 2100/529, loss: 0.0030624258797615767 2023-01-22 21:59:25.590431: step: 2104/529, loss: 0.009287316352128983 2023-01-22 21:59:26.628158: step: 2108/529, loss: 0.0013327146880328655 2023-01-22 21:59:27.662609: step: 2112/529, loss: 0.00021456743706949055 2023-01-22 21:59:28.710114: step: 2116/529, loss: 0.02893240936100483 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3322278050896472, 'r': 0.31142416644076987, 'f1': 0.321489785924164}, 'combined': 0.23688721068096294, 'stategy': 1, 'epoch': 12} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3801510839749281, 'r': 0.32010976966475185, 'f1': 0.3475564150031318}, 'combined': 0.24451205075597213, 'stategy': 1, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3106682556497175, 'r': 0.3395539188884958, 'f1': 0.3244694746223704}, 'combined': 0.23908277077437817, 'stategy': 1, 'epoch': 12} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3793482436728968, 'r': 0.3300693523849617, 'f1': 0.35299724679305455}, 'combined': 0.2506280452230687, 'stategy': 1, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32723132671715854, 'r': 0.3396499728923828, 'f1': 0.33332501995211494}, 'combined': 0.24560790943840047, 'stategy': 1, 'epoch': 12} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37658628910259906, 'r': 0.2996917065109689, 'f1': 0.3337674399043444}, 'combined': 0.2369748823320845, 'stategy': 1, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 13 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 22:01:59.575128: step: 4/529, loss: 0.0020661125890910625 2023-01-22 22:02:00.611393: step: 8/529, loss: 0.004797362256795168 2023-01-22 22:02:01.658015: step: 12/529, loss: 0.0030104818288236856 2023-01-22 22:02:02.684772: step: 16/529, loss: 0.005209543276578188 2023-01-22 22:02:03.721592: step: 20/529, loss: 0.003855886636301875 2023-01-22 22:02:04.770063: step: 24/529, loss: 0.008314124308526516 2023-01-22 22:02:05.794907: step: 28/529, loss: 0.0010546843986958265 2023-01-22 22:02:06.835874: step: 32/529, loss: 0.002141502918675542 2023-01-22 22:02:07.871339: step: 36/529, loss: 0.0009814116638153791 2023-01-22 22:02:08.921048: step: 40/529, loss: 0.003405241295695305 2023-01-22 22:02:09.963005: step: 44/529, loss: 0.00189563340973109 2023-01-22 22:02:11.013150: step: 48/529, loss: 0.0020448495633900166 2023-01-22 22:02:12.047011: step: 52/529, loss: 0.002719337586313486 2023-01-22 22:02:13.089763: step: 56/529, loss: 0.03908460587263107 2023-01-22 22:02:14.120343: step: 60/529, loss: 0.0020464614499360323 2023-01-22 22:02:15.186445: step: 64/529, loss: 0.0036927012261003256 2023-01-22 22:02:16.224394: step: 68/529, loss: 0.0058067562058568 2023-01-22 22:02:17.259539: step: 72/529, loss: 0.024410398676991463 2023-01-22 22:02:18.294232: step: 76/529, loss: 0.0010673971846699715 2023-01-22 22:02:19.349685: step: 80/529, loss: 0.0007646349840797484 2023-01-22 22:02:20.385126: step: 84/529, loss: 0.00024966764613054693 2023-01-22 22:02:21.435109: step: 88/529, loss: 0.005194772034883499 2023-01-22 22:02:22.480210: step: 92/529, loss: 0.000579176819883287 2023-01-22 22:02:23.537708: step: 96/529, loss: 0.004788865800946951 2023-01-22 22:02:24.580034: step: 100/529, loss: 0.006167104467749596 2023-01-22 22:02:25.623541: step: 104/529, loss: 0.008440110832452774 2023-01-22 22:02:26.675492: step: 108/529, loss: 0.00958058051764965 2023-01-22 22:02:27.744010: step: 112/529, loss: 0.00473578367382288 2023-01-22 22:02:28.780001: step: 116/529, loss: 0.0003848426276817918 2023-01-22 22:02:29.796900: step: 120/529, loss: 0.013177048414945602 2023-01-22 22:02:30.847617: step: 124/529, loss: 0.029592398554086685 2023-01-22 22:02:31.889691: step: 128/529, loss: 0.00013439639587886631 2023-01-22 22:02:32.921642: step: 132/529, loss: 0.0008256007567979395 2023-01-22 22:02:33.988085: step: 136/529, loss: 0.0034323392901569605 2023-01-22 22:02:35.027496: step: 140/529, loss: 0.001702593988738954 2023-01-22 22:02:36.061580: step: 144/529, loss: 2.79263872471347e-06 2023-01-22 22:02:37.108867: step: 148/529, loss: 0.010335664264857769 2023-01-22 22:02:38.161019: step: 152/529, loss: 0.004042556043714285 2023-01-22 22:02:39.193503: step: 156/529, loss: 0.003973588813096285 2023-01-22 22:02:40.234546: step: 160/529, loss: 0.00038929111906327307 2023-01-22 22:02:41.282847: step: 164/529, loss: 0.003283280413597822 2023-01-22 22:02:42.330592: step: 168/529, loss: 0.019665243104100227 2023-01-22 22:02:43.377475: step: 172/529, loss: 0.0005752895958721638 2023-01-22 22:02:44.444460: step: 176/529, loss: 0.003360697301104665 2023-01-22 22:02:45.501085: step: 180/529, loss: 0.0008212961256504059 2023-01-22 22:02:46.548953: step: 184/529, loss: 0.0006881274748593569 2023-01-22 22:02:47.601117: step: 188/529, loss: 0.007221120875328779 2023-01-22 22:02:48.648791: step: 192/529, loss: 0.006299884989857674 2023-01-22 22:02:49.703979: step: 196/529, loss: 0.00047569183516316116 2023-01-22 22:02:50.745071: step: 200/529, loss: 0.009679806418716908 2023-01-22 22:02:51.815789: step: 204/529, loss: 0.001241132733412087 2023-01-22 22:02:52.900750: step: 208/529, loss: 0.0017919227248057723 2023-01-22 22:02:53.952539: step: 212/529, loss: 0.0006977166049182415 2023-01-22 22:02:54.990409: step: 216/529, loss: 0.0027206167578697205 2023-01-22 22:02:56.028772: step: 220/529, loss: 1.4901126554889288e-08 2023-01-22 22:02:57.088859: step: 224/529, loss: 0.004480557981878519 2023-01-22 22:02:58.131826: step: 228/529, loss: 6.839342677267268e-05 2023-01-22 22:02:59.177576: step: 232/529, loss: 0.018638627603650093 2023-01-22 22:03:00.248998: step: 236/529, loss: 0.0004414775758050382 2023-01-22 22:03:01.302737: step: 240/529, loss: 0.005069043952971697 2023-01-22 22:03:02.352444: step: 244/529, loss: 0.0065057347528636456 2023-01-22 22:03:03.403359: step: 248/529, loss: 0.014601541683077812 2023-01-22 22:03:04.445683: step: 252/529, loss: 0.017725970596075058 2023-01-22 22:03:05.490884: step: 256/529, loss: 9.545955435896758e-06 2023-01-22 22:03:06.548816: step: 260/529, loss: 0.005442390218377113 2023-01-22 22:03:07.590914: step: 264/529, loss: 0.006095238495618105 2023-01-22 22:03:08.643522: step: 268/529, loss: 0.0005663083284161985 2023-01-22 22:03:09.700140: step: 272/529, loss: 0.006307459902018309 2023-01-22 22:03:10.748365: step: 276/529, loss: 0.005606014281511307 2023-01-22 22:03:11.797161: step: 280/529, loss: 0.0009300084784626961 2023-01-22 22:03:12.839398: step: 284/529, loss: 0.024398071691393852 2023-01-22 22:03:13.888502: step: 288/529, loss: 0.0015872081276029348 2023-01-22 22:03:14.939749: step: 292/529, loss: 0.0037489307578653097 2023-01-22 22:03:16.009219: step: 296/529, loss: 0.0005181817687116563 2023-01-22 22:03:17.052382: step: 300/529, loss: 0.0006078698788769543 2023-01-22 22:03:18.088278: step: 304/529, loss: 0.00024561380269005895 2023-01-22 22:03:19.138752: step: 308/529, loss: 0.006673007272183895 2023-01-22 22:03:20.207711: step: 312/529, loss: 0.029074745252728462 2023-01-22 22:03:21.258246: step: 316/529, loss: 0.001385142095386982 2023-01-22 22:03:22.320134: step: 320/529, loss: 0.004524963907897472 2023-01-22 22:03:23.367689: step: 324/529, loss: 8.751742279855534e-05 2023-01-22 22:03:24.413495: step: 328/529, loss: 9.058280738827307e-06 2023-01-22 22:03:25.454211: step: 332/529, loss: 0.003962811082601547 2023-01-22 22:03:26.499158: step: 336/529, loss: 0.008100816048681736 2023-01-22 22:03:27.531844: step: 340/529, loss: 1.4225344102669624e-06 2023-01-22 22:03:28.567465: step: 344/529, loss: 0.0009997226297855377 2023-01-22 22:03:29.614159: step: 348/529, loss: 0.0045590875670313835 2023-01-22 22:03:30.662792: step: 352/529, loss: 0.004171362612396479 2023-01-22 22:03:31.712465: step: 356/529, loss: 0.003166272072121501 2023-01-22 22:03:32.761821: step: 360/529, loss: 0.00483390549197793 2023-01-22 22:03:33.824844: step: 364/529, loss: 0.00013336514530237764 2023-01-22 22:03:34.878789: step: 368/529, loss: 0.005746159702539444 2023-01-22 22:03:35.931976: step: 372/529, loss: 0.0021320979576557875 2023-01-22 22:03:36.972900: step: 376/529, loss: 0.0010759546421468258 2023-01-22 22:03:38.020218: step: 380/529, loss: 0.004866831935942173 2023-01-22 22:03:39.080830: step: 384/529, loss: 0.00578966923058033 2023-01-22 22:03:40.125836: step: 388/529, loss: 0.00445671146735549 2023-01-22 22:03:41.175686: step: 392/529, loss: 0.0017480573151260614 2023-01-22 22:03:42.204336: step: 396/529, loss: 4.209559278933739e-08 2023-01-22 22:03:43.252890: step: 400/529, loss: 0.002795041538774967 2023-01-22 22:03:44.297570: step: 404/529, loss: 0.0003665978729259223 2023-01-22 22:03:45.354902: step: 408/529, loss: 3.003964593517594e-06 2023-01-22 22:03:46.420574: step: 412/529, loss: 0.00517407339066267 2023-01-22 22:03:47.462038: step: 416/529, loss: 0.001969423610717058 2023-01-22 22:03:48.515414: step: 420/529, loss: 0.002456636866554618 2023-01-22 22:03:49.565298: step: 424/529, loss: 0.004229211248457432 2023-01-22 22:03:50.622489: step: 428/529, loss: 0.0047072554007172585 2023-01-22 22:03:51.674059: step: 432/529, loss: 0.001991803525015712 2023-01-22 22:03:52.730396: step: 436/529, loss: 0.0005003004334867001 2023-01-22 22:03:53.766626: step: 440/529, loss: 3.7962745409458876e-06 2023-01-22 22:03:54.802891: step: 444/529, loss: 0.00012111301475670189 2023-01-22 22:03:55.851042: step: 448/529, loss: 0.0017179299611598253 2023-01-22 22:03:56.898427: step: 452/529, loss: 0.0014325024094432592 2023-01-22 22:03:57.960639: step: 456/529, loss: 0.00023156014503911138 2023-01-22 22:03:58.999288: step: 460/529, loss: 0.00570771936327219 2023-01-22 22:04:00.045045: step: 464/529, loss: 0.0027715875767171383 2023-01-22 22:04:01.100138: step: 468/529, loss: 0.0049084508791565895 2023-01-22 22:04:02.157196: step: 472/529, loss: 0.0016448169481009245 2023-01-22 22:04:03.216449: step: 476/529, loss: 0.0030526258051395416 2023-01-22 22:04:04.260025: step: 480/529, loss: 0.003808526787906885 2023-01-22 22:04:05.307143: step: 484/529, loss: 0.0011900898534804583 2023-01-22 22:04:06.354621: step: 488/529, loss: 0.015231611207127571 2023-01-22 22:04:07.393060: step: 492/529, loss: 2.99135058412503e-06 2023-01-22 22:04:08.440493: step: 496/529, loss: 0.001119957072660327 2023-01-22 22:04:09.491961: step: 500/529, loss: 0.0035611132625490427 2023-01-22 22:04:10.538569: step: 504/529, loss: 0.002170960884541273 2023-01-22 22:04:11.588239: step: 508/529, loss: 0.0002338943595532328 2023-01-22 22:04:12.656071: step: 512/529, loss: 0.007079706992954016 2023-01-22 22:04:13.703193: step: 516/529, loss: 0.001179091283120215 2023-01-22 22:04:14.766927: step: 520/529, loss: 0.0005809864378534257 2023-01-22 22:04:15.814629: step: 524/529, loss: 0.00419318163767457 2023-01-22 22:04:16.880083: step: 528/529, loss: 0.001712079276330769 2023-01-22 22:04:17.935710: step: 532/529, loss: 8.249054371844977e-05 2023-01-22 22:04:18.996451: step: 536/529, loss: 0.0008708325331099331 2023-01-22 22:04:20.039962: step: 540/529, loss: 0.00023778510512784123 2023-01-22 22:04:21.081985: step: 544/529, loss: 0.00044428210821934044 2023-01-22 22:04:22.127626: step: 548/529, loss: 0.001483298372477293 2023-01-22 22:04:23.173824: step: 552/529, loss: 0.0007793279364705086 2023-01-22 22:04:24.214786: step: 556/529, loss: 8.888561751518864e-06 2023-01-22 22:04:25.280019: step: 560/529, loss: 0.00010682106949388981 2023-01-22 22:04:26.327884: step: 564/529, loss: 0.0035573970526456833 2023-01-22 22:04:27.396470: step: 568/529, loss: 0.0010148158762603998 2023-01-22 22:04:28.454554: step: 572/529, loss: 0.003898882307112217 2023-01-22 22:04:29.495359: step: 576/529, loss: 0.002758265007287264 2023-01-22 22:04:30.550557: step: 580/529, loss: 0.0031173850875347853 2023-01-22 22:04:31.602681: step: 584/529, loss: 0.005281922873109579 2023-01-22 22:04:32.657869: step: 588/529, loss: 0.0021905687171965837 2023-01-22 22:04:33.721309: step: 592/529, loss: 0.0040220702067017555 2023-01-22 22:04:34.760977: step: 596/529, loss: 0.00012050622171955183 2023-01-22 22:04:35.804236: step: 600/529, loss: 0.005438728258013725 2023-01-22 22:04:36.841631: step: 604/529, loss: 0.0038155545480549335 2023-01-22 22:04:37.886139: step: 608/529, loss: 0.0031832456588745117 2023-01-22 22:04:38.960347: step: 612/529, loss: 0.0066933236084878445 2023-01-22 22:04:40.011951: step: 616/529, loss: 0.0057805622927844524 2023-01-22 22:04:41.068230: step: 620/529, loss: 0.0010341937886551023 2023-01-22 22:04:42.107926: step: 624/529, loss: 0.0030660927295684814 2023-01-22 22:04:43.155484: step: 628/529, loss: 0.009330186992883682 2023-01-22 22:04:44.205930: step: 632/529, loss: 0.00010489900887478143 2023-01-22 22:04:45.233592: step: 636/529, loss: 0.034650009125471115 2023-01-22 22:04:46.286547: step: 640/529, loss: 0.004256380721926689 2023-01-22 22:04:47.331217: step: 644/529, loss: 0.006344602443277836 2023-01-22 22:04:48.371785: step: 648/529, loss: 0.0023871518205851316 2023-01-22 22:04:49.437876: step: 652/529, loss: 0.001650508027523756 2023-01-22 22:04:50.492945: step: 656/529, loss: 0.0018196991877630353 2023-01-22 22:04:51.562696: step: 660/529, loss: 0.00402287021279335 2023-01-22 22:04:52.615518: step: 664/529, loss: 0.004933268763124943 2023-01-22 22:04:53.670807: step: 668/529, loss: 8.87352871359326e-05 2023-01-22 22:04:54.725418: step: 672/529, loss: 0.0035212389193475246 2023-01-22 22:04:55.771867: step: 676/529, loss: 0.0037635432090610266 2023-01-22 22:04:56.841559: step: 680/529, loss: 0.005341978743672371 2023-01-22 22:04:57.902012: step: 684/529, loss: 0.005741693079471588 2023-01-22 22:04:58.949013: step: 688/529, loss: 0.009633461944758892 2023-01-22 22:05:00.004618: step: 692/529, loss: 0.0036536778789013624 2023-01-22 22:05:01.067934: step: 696/529, loss: 0.012270782142877579 2023-01-22 22:05:02.106919: step: 700/529, loss: 0.0007505777175538242 2023-01-22 22:05:03.175250: step: 704/529, loss: 0.008541149087250233 2023-01-22 22:05:04.232862: step: 708/529, loss: 0.0002813671890180558 2023-01-22 22:05:05.272194: step: 712/529, loss: 8.576991240261123e-05 2023-01-22 22:05:06.308741: step: 716/529, loss: 0.002426203340291977 2023-01-22 22:05:07.365220: step: 720/529, loss: 0.00396518362686038 2023-01-22 22:05:08.412290: step: 724/529, loss: 0.0037479812745004892 2023-01-22 22:05:09.456683: step: 728/529, loss: 0.0020349733531475067 2023-01-22 22:05:10.507977: step: 732/529, loss: 8.447006985079497e-05 2023-01-22 22:05:11.565802: step: 736/529, loss: 0.0046722679398953915 2023-01-22 22:05:12.616065: step: 740/529, loss: 0.012401421554386616 2023-01-22 22:05:13.669052: step: 744/529, loss: 3.371183538547484e-06 2023-01-22 22:05:14.725828: step: 748/529, loss: 0.0030706883408129215 2023-01-22 22:05:15.766300: step: 752/529, loss: 0.004088858142495155 2023-01-22 22:05:16.809875: step: 756/529, loss: 0.0031179229263216257 2023-01-22 22:05:17.871069: step: 760/529, loss: 0.001078569795936346 2023-01-22 22:05:18.912249: step: 764/529, loss: 0.0008053707424551249 2023-01-22 22:05:19.974435: step: 768/529, loss: 0.006676113232970238 2023-01-22 22:05:21.031688: step: 772/529, loss: 0.00822971947491169 2023-01-22 22:05:22.079171: step: 776/529, loss: 0.013130847364664078 2023-01-22 22:05:23.141303: step: 780/529, loss: 0.004000606946647167 2023-01-22 22:05:24.198283: step: 784/529, loss: 0.0074057066813111305 2023-01-22 22:05:25.236432: step: 788/529, loss: 0.00500484649091959 2023-01-22 22:05:26.300441: step: 792/529, loss: 0.005439819302409887 2023-01-22 22:05:27.348164: step: 796/529, loss: 0.0075200339779257774 2023-01-22 22:05:28.389435: step: 800/529, loss: 0.0011484931455925107 2023-01-22 22:05:29.453150: step: 804/529, loss: 0.001748825772665441 2023-01-22 22:05:30.496916: step: 808/529, loss: 0.01814914681017399 2023-01-22 22:05:31.539983: step: 812/529, loss: 0.0004535217594821006 2023-01-22 22:05:32.594394: step: 816/529, loss: 0.0007095172768458724 2023-01-22 22:05:33.626170: step: 820/529, loss: 0.004177503287792206 2023-01-22 22:05:34.671306: step: 824/529, loss: 0.0007283009472303092 2023-01-22 22:05:35.713326: step: 828/529, loss: 0.005750639364123344 2023-01-22 22:05:36.758051: step: 832/529, loss: 0.0017952565103769302 2023-01-22 22:05:37.799550: step: 836/529, loss: 0.00019902654457837343 2023-01-22 22:05:38.837434: step: 840/529, loss: 0.0028454596176743507 2023-01-22 22:05:39.885675: step: 844/529, loss: 0.007106723263859749 2023-01-22 22:05:40.946816: step: 848/529, loss: 0.003491441486403346 2023-01-22 22:05:41.991118: step: 852/529, loss: 0.005451558157801628 2023-01-22 22:05:43.040715: step: 856/529, loss: 0.006979256868362427 2023-01-22 22:05:44.093288: step: 860/529, loss: 0.010109237395226955 2023-01-22 22:05:45.151735: step: 864/529, loss: 0.0021151432301849127 2023-01-22 22:05:46.183523: step: 868/529, loss: 0.003990199416875839 2023-01-22 22:05:47.243326: step: 872/529, loss: 0.00414115097373724 2023-01-22 22:05:48.304221: step: 876/529, loss: 0.0018837007228285074 2023-01-22 22:05:49.364840: step: 880/529, loss: 0.0015135619323700666 2023-01-22 22:05:50.432415: step: 884/529, loss: 0.0002434489142615348 2023-01-22 22:05:51.469040: step: 888/529, loss: 7.682701834710315e-05 2023-01-22 22:05:52.522874: step: 892/529, loss: 0.009019049815833569 2023-01-22 22:05:53.565835: step: 896/529, loss: 0.0029005573596805334 2023-01-22 22:05:54.613306: step: 900/529, loss: 0.002610166324302554 2023-01-22 22:05:55.669609: step: 904/529, loss: 0.0032255917321890593 2023-01-22 22:05:56.717244: step: 908/529, loss: 0.005235786084085703 2023-01-22 22:05:57.758328: step: 912/529, loss: 0.0025006921496242285 2023-01-22 22:05:58.807665: step: 916/529, loss: 0.0007335762493312359 2023-01-22 22:05:59.835660: step: 920/529, loss: 0.002886684611439705 2023-01-22 22:06:00.879853: step: 924/529, loss: 0.00046012047096155584 2023-01-22 22:06:01.941324: step: 928/529, loss: 0.0027566186618059874 2023-01-22 22:06:02.983850: step: 932/529, loss: 0.0010049877455458045 2023-01-22 22:06:04.031197: step: 936/529, loss: 0.006620476022362709 2023-01-22 22:06:05.068646: step: 940/529, loss: 0.000793657498434186 2023-01-22 22:06:06.114714: step: 944/529, loss: 0.00758998142555356 2023-01-22 22:06:07.154126: step: 948/529, loss: 0.0013402948388829827 2023-01-22 22:06:08.193341: step: 952/529, loss: 0.007372173015028238 2023-01-22 22:06:09.239146: step: 956/529, loss: 0.0013702159048989415 2023-01-22 22:06:10.280602: step: 960/529, loss: 0.000671215180773288 2023-01-22 22:06:11.354550: step: 964/529, loss: 0.00010007419768953696 2023-01-22 22:06:12.436068: step: 968/529, loss: 0.00841107964515686 2023-01-22 22:06:13.474947: step: 972/529, loss: 0.0013482884969562292 2023-01-22 22:06:14.522012: step: 976/529, loss: 0.0025155905168503523 2023-01-22 22:06:15.558796: step: 980/529, loss: 0.00037983356742188334 2023-01-22 22:06:16.605764: step: 984/529, loss: 0.001696680672466755 2023-01-22 22:06:17.651002: step: 988/529, loss: 0.005443311296403408 2023-01-22 22:06:18.697504: step: 992/529, loss: 0.0049167596735060215 2023-01-22 22:06:19.759708: step: 996/529, loss: 0.001431543962098658 2023-01-22 22:06:20.817695: step: 1000/529, loss: 0.00031198316719383 2023-01-22 22:06:21.867573: step: 1004/529, loss: 0.0003814932715613395 2023-01-22 22:06:22.924916: step: 1008/529, loss: 0.0009857534896582365 2023-01-22 22:06:23.967004: step: 1012/529, loss: 0.00374615122564137 2023-01-22 22:06:25.020234: step: 1016/529, loss: 0.0025709676556289196 2023-01-22 22:06:26.078849: step: 1020/529, loss: 0.003676342312246561 2023-01-22 22:06:27.124585: step: 1024/529, loss: 0.005495840683579445 2023-01-22 22:06:28.171614: step: 1028/529, loss: 0.0005040357937105 2023-01-22 22:06:29.231963: step: 1032/529, loss: 0.010000457987189293 2023-01-22 22:06:30.286912: step: 1036/529, loss: 0.004912738688290119 2023-01-22 22:06:31.362073: step: 1040/529, loss: 0.01084085926413536 2023-01-22 22:06:32.407899: step: 1044/529, loss: 0.0009981781477108598 2023-01-22 22:06:33.452734: step: 1048/529, loss: 8.090591290965676e-05 2023-01-22 22:06:34.516712: step: 1052/529, loss: 0.016693925485014915 2023-01-22 22:06:35.569544: step: 1056/529, loss: 0.009830198250710964 2023-01-22 22:06:36.614466: step: 1060/529, loss: 0.0007593714981339872 2023-01-22 22:06:37.670746: step: 1064/529, loss: 0.002879825420677662 2023-01-22 22:06:38.742096: step: 1068/529, loss: 0.006912213750183582 2023-01-22 22:06:39.800280: step: 1072/529, loss: 0.0023335698060691357 2023-01-22 22:06:40.844895: step: 1076/529, loss: 0.0051352763548493385 2023-01-22 22:06:41.909965: step: 1080/529, loss: 0.006917743943631649 2023-01-22 22:06:42.961859: step: 1084/529, loss: 0.005088940262794495 2023-01-22 22:06:44.017168: step: 1088/529, loss: 0.013597643934190273 2023-01-22 22:06:45.093748: step: 1092/529, loss: 0.0037607047706842422 2023-01-22 22:06:46.144232: step: 1096/529, loss: 0.002548146527260542 2023-01-22 22:06:47.201428: step: 1100/529, loss: 0.004771307110786438 2023-01-22 22:06:48.269098: step: 1104/529, loss: 0.0005690946127288043 2023-01-22 22:06:49.309858: step: 1108/529, loss: 0.03359944745898247 2023-01-22 22:06:50.358797: step: 1112/529, loss: 0.0028464437928050756 2023-01-22 22:06:51.404599: step: 1116/529, loss: 0.0009826518362388015 2023-01-22 22:06:52.458371: step: 1120/529, loss: 0.004724137019366026 2023-01-22 22:06:53.513087: step: 1124/529, loss: 0.004929270129650831 2023-01-22 22:06:54.565457: step: 1128/529, loss: 0.013819349929690361 2023-01-22 22:06:55.620771: step: 1132/529, loss: 0.0013363063335418701 2023-01-22 22:06:56.663690: step: 1136/529, loss: 0.0019385061459615827 2023-01-22 22:06:57.716456: step: 1140/529, loss: 0.0021166556980460882 2023-01-22 22:06:58.788411: step: 1144/529, loss: 0.0015858920523896813 2023-01-22 22:06:59.840145: step: 1148/529, loss: 0.0013822591863572598 2023-01-22 22:07:00.891507: step: 1152/529, loss: 0.0007352886605076492 2023-01-22 22:07:01.930048: step: 1156/529, loss: 0.003908718936145306 2023-01-22 22:07:02.983809: step: 1160/529, loss: 0.005205837544053793 2023-01-22 22:07:04.023381: step: 1164/529, loss: 7.31611653463915e-05 2023-01-22 22:07:05.067339: step: 1168/529, loss: 0.0021883153822273016 2023-01-22 22:07:06.105903: step: 1172/529, loss: 0.003466276917606592 2023-01-22 22:07:07.155660: step: 1176/529, loss: 2.9402990548987873e-05 2023-01-22 22:07:08.204074: step: 1180/529, loss: 0.0007753438549116254 2023-01-22 22:07:09.246975: step: 1184/529, loss: 0.0012284221593290567 2023-01-22 22:07:10.282689: step: 1188/529, loss: 0.005005307495594025 2023-01-22 22:07:11.322826: step: 1192/529, loss: 0.00047058000927791 2023-01-22 22:07:12.377782: step: 1196/529, loss: 0.03527161106467247 2023-01-22 22:07:13.407321: step: 1200/529, loss: 0.0010925685055553913 2023-01-22 22:07:14.459228: step: 1204/529, loss: 0.006169338244944811 2023-01-22 22:07:15.514598: step: 1208/529, loss: 0.004179372917860746 2023-01-22 22:07:16.565112: step: 1212/529, loss: 0.002172439359128475 2023-01-22 22:07:17.620688: step: 1216/529, loss: 0.013159377500414848 2023-01-22 22:07:18.654750: step: 1220/529, loss: 0.0007594486232846975 2023-01-22 22:07:19.714298: step: 1224/529, loss: 0.010045397095382214 2023-01-22 22:07:20.763846: step: 1228/529, loss: 0.0003671941813081503 2023-01-22 22:07:21.806125: step: 1232/529, loss: 0.014888791367411613 2023-01-22 22:07:22.849441: step: 1236/529, loss: 0.0013621074613183737 2023-01-22 22:07:23.896485: step: 1240/529, loss: 0.000595246150624007 2023-01-22 22:07:24.946638: step: 1244/529, loss: 0.0011664701160043478 2023-01-22 22:07:25.990268: step: 1248/529, loss: 0.008231216110289097 2023-01-22 22:07:27.023799: step: 1252/529, loss: 1.5318893929361366e-05 2023-01-22 22:07:28.082879: step: 1256/529, loss: 0.005235225893557072 2023-01-22 22:07:29.116667: step: 1260/529, loss: 0.0008889954187907279 2023-01-22 22:07:30.180280: step: 1264/529, loss: 0.00544637069106102 2023-01-22 22:07:31.222975: step: 1268/529, loss: 0.0017070675967261195 2023-01-22 22:07:32.267146: step: 1272/529, loss: 0.0032997624948620796 2023-01-22 22:07:33.310438: step: 1276/529, loss: 0.0027532135136425495 2023-01-22 22:07:34.360614: step: 1280/529, loss: 0.004179778508841991 2023-01-22 22:07:35.402605: step: 1284/529, loss: 0.005422530695796013 2023-01-22 22:07:36.433380: step: 1288/529, loss: 0.00449686124920845 2023-01-22 22:07:37.479083: step: 1292/529, loss: 0.005088336765766144 2023-01-22 22:07:38.540461: step: 1296/529, loss: 0.002625285880640149 2023-01-22 22:07:39.595565: step: 1300/529, loss: 0.003848590888082981 2023-01-22 22:07:40.634917: step: 1304/529, loss: 0.0032895051408559084 2023-01-22 22:07:41.674697: step: 1308/529, loss: 0.022802336141467094 2023-01-22 22:07:42.722727: step: 1312/529, loss: 0.0009778965031728148 2023-01-22 22:07:43.759209: step: 1316/529, loss: 0.002754578134045005 2023-01-22 22:07:44.808165: step: 1320/529, loss: 0.007280981168150902 2023-01-22 22:07:45.851112: step: 1324/529, loss: 7.569111858174438e-06 2023-01-22 22:07:46.895353: step: 1328/529, loss: 0.0017688446678221226 2023-01-22 22:07:47.954289: step: 1332/529, loss: 0.006402065046131611 2023-01-22 22:07:49.007706: step: 1336/529, loss: 0.005658620968461037 2023-01-22 22:07:50.045881: step: 1340/529, loss: 0.000416864815633744 2023-01-22 22:07:51.086549: step: 1344/529, loss: 0.0076031978242099285 2023-01-22 22:07:52.135252: step: 1348/529, loss: 0.010647532530128956 2023-01-22 22:07:53.205747: step: 1352/529, loss: 0.0029094917699694633 2023-01-22 22:07:54.252727: step: 1356/529, loss: 0.00999512616544962 2023-01-22 22:07:55.308702: step: 1360/529, loss: 0.0018489662325009704 2023-01-22 22:07:56.344505: step: 1364/529, loss: 0.0031395924743264914 2023-01-22 22:07:57.392373: step: 1368/529, loss: 0.0034959500189870596 2023-01-22 22:07:58.427428: step: 1372/529, loss: 0.0017480578972026706 2023-01-22 22:07:59.476842: step: 1376/529, loss: 5.7238441513618454e-05 2023-01-22 22:08:00.535806: step: 1380/529, loss: 0.0035993969067931175 2023-01-22 22:08:01.580567: step: 1384/529, loss: 0.00016169989248737693 2023-01-22 22:08:02.628585: step: 1388/529, loss: 0.00029528679442591965 2023-01-22 22:08:03.681296: step: 1392/529, loss: 0.00122136774007231 2023-01-22 22:08:04.731587: step: 1396/529, loss: 0.01023776549845934 2023-01-22 22:08:05.776292: step: 1400/529, loss: 0.0012139384634792805 2023-01-22 22:08:06.832186: step: 1404/529, loss: 0.007344082463532686 2023-01-22 22:08:07.880524: step: 1408/529, loss: 0.0012629888951778412 2023-01-22 22:08:08.929269: step: 1412/529, loss: 0.007245719898492098 2023-01-22 22:08:09.977312: step: 1416/529, loss: 0.00011588398047024384 2023-01-22 22:08:11.026827: step: 1420/529, loss: 0.009380210191011429 2023-01-22 22:08:12.079417: step: 1424/529, loss: 0.0013724250020459294 2023-01-22 22:08:13.135001: step: 1428/529, loss: 0.008972019888460636 2023-01-22 22:08:14.181200: step: 1432/529, loss: 0.001762324245646596 2023-01-22 22:08:15.218660: step: 1436/529, loss: 0.024198533967137337 2023-01-22 22:08:16.267863: step: 1440/529, loss: 0.00026054983027279377 2023-01-22 22:08:17.344503: step: 1444/529, loss: 0.007760311011224985 2023-01-22 22:08:18.374718: step: 1448/529, loss: 0.0011082420824095607 2023-01-22 22:08:19.454986: step: 1452/529, loss: 1.0289173587807454e-05 2023-01-22 22:08:20.497121: step: 1456/529, loss: 0.0013445861404761672 2023-01-22 22:08:21.546567: step: 1460/529, loss: 0.0054323081858456135 2023-01-22 22:08:22.580415: step: 1464/529, loss: 0.004967245738953352 2023-01-22 22:08:23.640051: step: 1468/529, loss: 0.0005596865667030215 2023-01-22 22:08:24.700217: step: 1472/529, loss: 0.0036300430074334145 2023-01-22 22:08:25.747087: step: 1476/529, loss: 0.001081341877579689 2023-01-22 22:08:26.791345: step: 1480/529, loss: 0.0033402361441403627 2023-01-22 22:08:27.842220: step: 1484/529, loss: 8.448483276879415e-05 2023-01-22 22:08:28.882792: step: 1488/529, loss: 0.0019197298679500818 2023-01-22 22:08:29.938631: step: 1492/529, loss: 0.03283115103840828 2023-01-22 22:08:31.000887: step: 1496/529, loss: 0.030642159283161163 2023-01-22 22:08:32.058603: step: 1500/529, loss: 0.0003372599894646555 2023-01-22 22:08:33.102138: step: 1504/529, loss: 0.0056101055815815926 2023-01-22 22:08:34.181716: step: 1508/529, loss: 0.002772001549601555 2023-01-22 22:08:35.236366: step: 1512/529, loss: 0.005742242094129324 2023-01-22 22:08:36.293170: step: 1516/529, loss: 0.001894099055789411 2023-01-22 22:08:37.357500: step: 1520/529, loss: 0.0035289162769913673 2023-01-22 22:08:38.408357: step: 1524/529, loss: 0.0035893614403903484 2023-01-22 22:08:39.451265: step: 1528/529, loss: 0.0023253585677593946 2023-01-22 22:08:40.508904: step: 1532/529, loss: 0.007577994838356972 2023-01-22 22:08:41.573681: step: 1536/529, loss: 0.029570136219263077 2023-01-22 22:08:42.617669: step: 1540/529, loss: 0.000860285887029022 2023-01-22 22:08:43.668742: step: 1544/529, loss: 0.013597846031188965 2023-01-22 22:08:44.706630: step: 1548/529, loss: 0.003933881875127554 2023-01-22 22:08:45.755801: step: 1552/529, loss: 0.015945985913276672 2023-01-22 22:08:46.811093: step: 1556/529, loss: 0.027418581768870354 2023-01-22 22:08:47.876633: step: 1560/529, loss: 0.027375882491469383 2023-01-22 22:08:48.944871: step: 1564/529, loss: 0.0002518148685339838 2023-01-22 22:08:50.002269: step: 1568/529, loss: 0.004726926330476999 2023-01-22 22:08:51.067575: step: 1572/529, loss: 0.01280633918941021 2023-01-22 22:08:52.099707: step: 1576/529, loss: 0.001645186566747725 2023-01-22 22:08:53.154738: step: 1580/529, loss: 0.0077974833548069 2023-01-22 22:08:54.194950: step: 1584/529, loss: 0.009461759589612484 2023-01-22 22:08:55.254289: step: 1588/529, loss: 0.0042156437411904335 2023-01-22 22:08:56.308766: step: 1592/529, loss: 0.0005270384717732668 2023-01-22 22:08:57.359259: step: 1596/529, loss: 0.0077084568329155445 2023-01-22 22:08:58.415047: step: 1600/529, loss: 0.0017473968910053372 2023-01-22 22:08:59.476434: step: 1604/529, loss: 0.002532375743612647 2023-01-22 22:09:00.526572: step: 1608/529, loss: 0.005044633522629738 2023-01-22 22:09:01.572399: step: 1612/529, loss: 0.002297821454703808 2023-01-22 22:09:02.619437: step: 1616/529, loss: 0.003892823588103056 2023-01-22 22:09:03.684105: step: 1620/529, loss: 0.0002748209226410836 2023-01-22 22:09:04.721110: step: 1624/529, loss: 9.179872904496733e-06 2023-01-22 22:09:05.765679: step: 1628/529, loss: 0.001021646079607308 2023-01-22 22:09:06.809790: step: 1632/529, loss: 0.005978672299534082 2023-01-22 22:09:07.867973: step: 1636/529, loss: 0.006814329884946346 2023-01-22 22:09:08.904377: step: 1640/529, loss: 0.03574233502149582 2023-01-22 22:09:09.953596: step: 1644/529, loss: 0.02467593178153038 2023-01-22 22:09:10.988639: step: 1648/529, loss: 0.008382775820791721 2023-01-22 22:09:12.041085: step: 1652/529, loss: 0.013118617236614227 2023-01-22 22:09:13.084784: step: 1656/529, loss: 0.003820637706667185 2023-01-22 22:09:14.135035: step: 1660/529, loss: 0.0017149208579212427 2023-01-22 22:09:15.183124: step: 1664/529, loss: 0.00028046814259141684 2023-01-22 22:09:16.255644: step: 1668/529, loss: 0.0019609276205301285 2023-01-22 22:09:17.292347: step: 1672/529, loss: 0.0032593209762126207 2023-01-22 22:09:18.338544: step: 1676/529, loss: 0.006112730596214533 2023-01-22 22:09:19.375709: step: 1680/529, loss: 0.00010005044896388426 2023-01-22 22:09:20.426042: step: 1684/529, loss: 3.4220702218590304e-05 2023-01-22 22:09:21.474625: step: 1688/529, loss: 0.004033186472952366 2023-01-22 22:09:22.538535: step: 1692/529, loss: 0.008895925246179104 2023-01-22 22:09:23.585943: step: 1696/529, loss: 0.008494758978486061 2023-01-22 22:09:24.626491: step: 1700/529, loss: 0.002115307841449976 2023-01-22 22:09:25.679229: step: 1704/529, loss: 0.007378788664937019 2023-01-22 22:09:26.738772: step: 1708/529, loss: 0.002627026755362749 2023-01-22 22:09:27.766685: step: 1712/529, loss: 0.009284731931984425 2023-01-22 22:09:28.818621: step: 1716/529, loss: 0.006106279790401459 2023-01-22 22:09:29.867922: step: 1720/529, loss: 0.0019213747000321746 2023-01-22 22:09:30.925902: step: 1724/529, loss: 0.0005601948359981179 2023-01-22 22:09:31.994253: step: 1728/529, loss: 0.010213938541710377 2023-01-22 22:09:33.039734: step: 1732/529, loss: 0.0005565674509853125 2023-01-22 22:09:34.095841: step: 1736/529, loss: 0.000787131953984499 2023-01-22 22:09:35.160112: step: 1740/529, loss: 0.00015847556642256677 2023-01-22 22:09:36.204415: step: 1744/529, loss: 0.00042605522321537137 2023-01-22 22:09:37.256692: step: 1748/529, loss: 0.004209085367619991 2023-01-22 22:09:38.308549: step: 1752/529, loss: 0.000641661521513015 2023-01-22 22:09:39.345543: step: 1756/529, loss: 8.162459562299773e-05 2023-01-22 22:09:40.395526: step: 1760/529, loss: 0.024534158408641815 2023-01-22 22:09:41.444725: step: 1764/529, loss: 0.0014209687942638993 2023-01-22 22:09:42.510728: step: 1768/529, loss: 0.0010247068712487817 2023-01-22 22:09:43.541803: step: 1772/529, loss: 3.590714914025739e-05 2023-01-22 22:09:44.588754: step: 1776/529, loss: 0.02816914953291416 2023-01-22 22:09:45.645445: step: 1780/529, loss: 0.00558052584528923 2023-01-22 22:09:46.700745: step: 1784/529, loss: 0.009706541895866394 2023-01-22 22:09:47.751119: step: 1788/529, loss: 0.0005814678152091801 2023-01-22 22:09:48.817462: step: 1792/529, loss: 0.0017659314908087254 2023-01-22 22:09:49.856755: step: 1796/529, loss: 0.0008670383831486106 2023-01-22 22:09:50.914357: step: 1800/529, loss: 0.0028869614470750093 2023-01-22 22:09:51.956262: step: 1804/529, loss: 0.011576946824789047 2023-01-22 22:09:52.998462: step: 1808/529, loss: 0.00038343342021107674 2023-01-22 22:09:54.048454: step: 1812/529, loss: 0.00820641964673996 2023-01-22 22:09:55.086175: step: 1816/529, loss: 0.0005200691521167755 2023-01-22 22:09:56.140384: step: 1820/529, loss: 9.082742326427251e-05 2023-01-22 22:09:57.198189: step: 1824/529, loss: 0.007103316485881805 2023-01-22 22:09:58.258936: step: 1828/529, loss: 0.012334599159657955 2023-01-22 22:09:59.295114: step: 1832/529, loss: 0.0004765341291204095 2023-01-22 22:10:00.355445: step: 1836/529, loss: 0.0059090047143399715 2023-01-22 22:10:01.408084: step: 1840/529, loss: 0.007239827420562506 2023-01-22 22:10:02.448326: step: 1844/529, loss: 0.002510536927729845 2023-01-22 22:10:03.529419: step: 1848/529, loss: 0.017909036949276924 2023-01-22 22:10:04.589261: step: 1852/529, loss: 0.0015903770690783858 2023-01-22 22:10:05.655381: step: 1856/529, loss: 0.0007870272966101766 2023-01-22 22:10:06.694657: step: 1860/529, loss: 0.0011313335271552205 2023-01-22 22:10:07.732518: step: 1864/529, loss: 0.0006634037126787007 2023-01-22 22:10:08.780218: step: 1868/529, loss: 0.0016665610019117594 2023-01-22 22:10:09.820590: step: 1872/529, loss: 0.0005217918078415096 2023-01-22 22:10:10.900240: step: 1876/529, loss: 0.006858109962195158 2023-01-22 22:10:11.931799: step: 1880/529, loss: 0.0031124502420425415 2023-01-22 22:10:12.981685: step: 1884/529, loss: 0.00012151140253990889 2023-01-22 22:10:14.021996: step: 1888/529, loss: 0.0066160480491817 2023-01-22 22:10:15.060708: step: 1892/529, loss: 0.0004973806207999587 2023-01-22 22:10:16.109569: step: 1896/529, loss: 0.00022057184833101928 2023-01-22 22:10:17.154750: step: 1900/529, loss: 0.00285763549618423 2023-01-22 22:10:18.203058: step: 1904/529, loss: 0.002581467851996422 2023-01-22 22:10:19.247146: step: 1908/529, loss: 0.0006056476850062609 2023-01-22 22:10:20.291777: step: 1912/529, loss: 0.008550887927412987 2023-01-22 22:10:21.340446: step: 1916/529, loss: 7.217138045234606e-05 2023-01-22 22:10:22.381940: step: 1920/529, loss: 0.0030196711886674166 2023-01-22 22:10:23.440890: step: 1924/529, loss: 0.004655394237488508 2023-01-22 22:10:24.486472: step: 1928/529, loss: 0.0003646163095254451 2023-01-22 22:10:25.534237: step: 1932/529, loss: 0.00010691424540709704 2023-01-22 22:10:26.567727: step: 1936/529, loss: 9.057366696652025e-05 2023-01-22 22:10:27.616031: step: 1940/529, loss: 0.0007152705220505595 2023-01-22 22:10:28.672542: step: 1944/529, loss: 0.0013242709683254361 2023-01-22 22:10:29.729200: step: 1948/529, loss: 0.029559042304754257 2023-01-22 22:10:30.783506: step: 1952/529, loss: 0.0009429925703443587 2023-01-22 22:10:31.821061: step: 1956/529, loss: 0.0024571174290031195 2023-01-22 22:10:32.869946: step: 1960/529, loss: 0.004922664258629084 2023-01-22 22:10:33.928952: step: 1964/529, loss: 0.007519112899899483 2023-01-22 22:10:34.965083: step: 1968/529, loss: 0.008268882520496845 2023-01-22 22:10:36.019847: step: 1972/529, loss: 0.008260934613645077 2023-01-22 22:10:37.067444: step: 1976/529, loss: 0.0023932557087391615 2023-01-22 22:10:38.112521: step: 1980/529, loss: 0.0017931960755959153 2023-01-22 22:10:39.158463: step: 1984/529, loss: 0.0020225034095346928 2023-01-22 22:10:40.192218: step: 1988/529, loss: 0.0015393097419291735 2023-01-22 22:10:41.241095: step: 1992/529, loss: 0.002816372085362673 2023-01-22 22:10:42.288790: step: 1996/529, loss: 0.003302858443930745 2023-01-22 22:10:43.345167: step: 2000/529, loss: 0.001450967276468873 2023-01-22 22:10:44.408983: step: 2004/529, loss: 0.0015459076967090368 2023-01-22 22:10:45.472568: step: 2008/529, loss: 0.006638125516474247 2023-01-22 22:10:46.526657: step: 2012/529, loss: 0.0036761483643203974 2023-01-22 22:10:47.584460: step: 2016/529, loss: 0.0077044907957315445 2023-01-22 22:10:48.629546: step: 2020/529, loss: 0.0005872112233191729 2023-01-22 22:10:49.685623: step: 2024/529, loss: 0.002109806053340435 2023-01-22 22:10:50.732532: step: 2028/529, loss: 0.005489069037139416 2023-01-22 22:10:51.781919: step: 2032/529, loss: 0.003942413255572319 2023-01-22 22:10:52.827580: step: 2036/529, loss: 0.0008662652689963579 2023-01-22 22:10:53.872663: step: 2040/529, loss: 0.006898907478898764 2023-01-22 22:10:54.937282: step: 2044/529, loss: 9.478346328251064e-06 2023-01-22 22:10:55.984140: step: 2048/529, loss: 0.004567817784845829 2023-01-22 22:10:57.048686: step: 2052/529, loss: 0.004708696622401476 2023-01-22 22:10:58.090390: step: 2056/529, loss: 0.00026184471789747477 2023-01-22 22:10:59.150536: step: 2060/529, loss: 0.01017149817198515 2023-01-22 22:11:00.192652: step: 2064/529, loss: 0.006889031268656254 2023-01-22 22:11:01.240600: step: 2068/529, loss: 3.684464900288731e-05 2023-01-22 22:11:02.294209: step: 2072/529, loss: 0.004533926025032997 2023-01-22 22:11:03.359537: step: 2076/529, loss: 0.0059210010804235935 2023-01-22 22:11:04.388998: step: 2080/529, loss: 0.0006793277570977807 2023-01-22 22:11:05.450644: step: 2084/529, loss: 0.0015826687449589372 2023-01-22 22:11:06.487886: step: 2088/529, loss: 0.005795718170702457 2023-01-22 22:11:07.523240: step: 2092/529, loss: 0.0038641574792563915 2023-01-22 22:11:08.576871: step: 2096/529, loss: 0.004207300953567028 2023-01-22 22:11:09.615088: step: 2100/529, loss: 0.008558275178074837 2023-01-22 22:11:10.675477: step: 2104/529, loss: 0.0020565742161124945 2023-01-22 22:11:11.719735: step: 2108/529, loss: 0.006336023565381765 2023-01-22 22:11:12.772332: step: 2112/529, loss: 0.005426003597676754 2023-01-22 22:11:13.808478: step: 2116/529, loss: 0.006619339343160391 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33830067958357435, 'r': 0.3171167660612632, 'f1': 0.32736637750104935}, 'combined': 0.24121733079024688, 'stategy': 1, 'epoch': 13} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.38417613696660335, 'r': 0.3231638708863923, 'f1': 0.3510386692282518}, 'combined': 0.24696187784902138, 'stategy': 1, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31272948917545457, 'r': 0.3424002186987425, 'f1': 0.32689296241709653}, 'combined': 0.24086849862312376, 'stategy': 1, 'epoch': 13} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37943351259736946, 'r': 0.3331207909202712, 'f1': 0.35477209580831104}, 'combined': 0.25188818802390084, 'stategy': 1, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32667066372828624, 'r': 0.3415475060992139, 'f1': 0.3339434799893984}, 'combined': 0.24606361683429354, 'stategy': 1, 'epoch': 13} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3789034925263064, 'r': 0.30252765764534933, 'f1': 0.33643541548915123}, 'combined': 0.23886914499729736, 'stategy': 1, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 14 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 22:13:46.354402: step: 4/529, loss: 0.002155041554942727 2023-01-22 22:13:47.385780: step: 8/529, loss: 0.0024896918330341578 2023-01-22 22:13:48.426960: step: 12/529, loss: 0.0019456169102340937 2023-01-22 22:13:49.464565: step: 16/529, loss: 0.0026377339381724596 2023-01-22 22:13:50.520449: step: 20/529, loss: 0.0028321901336312294 2023-01-22 22:13:51.548282: step: 24/529, loss: 0.004259231500327587 2023-01-22 22:13:52.569749: step: 28/529, loss: 0.018199829384684563 2023-01-22 22:13:53.607711: step: 32/529, loss: 0.007439835928380489 2023-01-22 22:13:54.651677: step: 36/529, loss: 0.0032286536879837513 2023-01-22 22:13:55.698916: step: 40/529, loss: 0.005584105849266052 2023-01-22 22:13:56.720251: step: 44/529, loss: 0.000737053225748241 2023-01-22 22:13:57.751292: step: 48/529, loss: 8.867979340720922e-05 2023-01-22 22:13:58.788810: step: 52/529, loss: 0.0032440032809972763 2023-01-22 22:13:59.839391: step: 56/529, loss: 0.000398068135837093 2023-01-22 22:14:00.887419: step: 60/529, loss: 0.0005101492861285806 2023-01-22 22:14:01.923968: step: 64/529, loss: 0.00027752568712458014 2023-01-22 22:14:02.959168: step: 68/529, loss: 0.0002734805748332292 2023-01-22 22:14:04.009244: step: 72/529, loss: 0.0017120081465691328 2023-01-22 22:14:05.055595: step: 76/529, loss: 0.00775559339672327 2023-01-22 22:14:06.099489: step: 80/529, loss: 0.0 2023-01-22 22:14:07.149042: step: 84/529, loss: 0.0017491179751232266 2023-01-22 22:14:08.184338: step: 88/529, loss: 0.0003052113752346486 2023-01-22 22:14:09.220366: step: 92/529, loss: 0.004354408010840416 2023-01-22 22:14:10.281228: step: 96/529, loss: 0.00548102892935276 2023-01-22 22:14:11.344372: step: 100/529, loss: 0.00046892414684407413 2023-01-22 22:14:12.384975: step: 104/529, loss: 0.024891097098588943 2023-01-22 22:14:13.451872: step: 108/529, loss: 0.002255897969007492 2023-01-22 22:14:14.509003: step: 112/529, loss: 0.0025460803881287575 2023-01-22 22:14:15.547103: step: 116/529, loss: 0.000157733287778683 2023-01-22 22:14:16.585017: step: 120/529, loss: 0.0006947237998247147 2023-01-22 22:14:17.622664: step: 124/529, loss: 0.005598415154963732 2023-01-22 22:14:18.680948: step: 128/529, loss: 0.005664017051458359 2023-01-22 22:14:19.749643: step: 132/529, loss: 0.005263058934360743 2023-01-22 22:14:20.791818: step: 136/529, loss: 0.010701052844524384 2023-01-22 22:14:21.842810: step: 140/529, loss: 0.0008631572127342224 2023-01-22 22:14:22.879285: step: 144/529, loss: 0.0003937564615625888 2023-01-22 22:14:23.915236: step: 148/529, loss: 0.0011120919371023774 2023-01-22 22:14:24.962786: step: 152/529, loss: 0.01467109564691782 2023-01-22 22:14:26.002760: step: 156/529, loss: 0.005467843730002642 2023-01-22 22:14:27.050772: step: 160/529, loss: 0.0001164595887530595 2023-01-22 22:14:28.116750: step: 164/529, loss: 0.005824428051710129 2023-01-22 22:14:29.164072: step: 168/529, loss: 0.0009500669548287988 2023-01-22 22:14:30.220298: step: 172/529, loss: 0.00394128542393446 2023-01-22 22:14:31.261271: step: 176/529, loss: 0.0038720332086086273 2023-01-22 22:14:32.298340: step: 180/529, loss: 0.003293375950306654 2023-01-22 22:14:33.336376: step: 184/529, loss: 0.00012934478581883013 2023-01-22 22:14:34.385736: step: 188/529, loss: 0.003876871196553111 2023-01-22 22:14:35.427573: step: 192/529, loss: 0.010672000236809254 2023-01-22 22:14:36.481533: step: 196/529, loss: 0.0033258579205721617 2023-01-22 22:14:37.541788: step: 200/529, loss: 0.0007002443890087306 2023-01-22 22:14:38.609323: step: 204/529, loss: 0.0027310438454151154 2023-01-22 22:14:39.660588: step: 208/529, loss: 0.001031105755828321 2023-01-22 22:14:40.709329: step: 212/529, loss: 0.0027227492537349463 2023-01-22 22:14:41.767668: step: 216/529, loss: 0.0012246674159541726 2023-01-22 22:14:42.816933: step: 220/529, loss: 0.0028238133527338505 2023-01-22 22:14:43.878170: step: 224/529, loss: 0.0016362721798941493 2023-01-22 22:14:44.917234: step: 228/529, loss: 0.0014409139985218644 2023-01-22 22:14:45.959305: step: 232/529, loss: 0.0034382170997560024 2023-01-22 22:14:47.013140: step: 236/529, loss: 0.0010161075042560697 2023-01-22 22:14:48.061135: step: 240/529, loss: 0.005142989568412304 2023-01-22 22:14:49.122395: step: 244/529, loss: 0.0007435519946739078 2023-01-22 22:14:50.194192: step: 248/529, loss: 3.999934415332973e-05 2023-01-22 22:14:51.239052: step: 252/529, loss: 0.0007353387190960348 2023-01-22 22:14:52.300637: step: 256/529, loss: 0.01636943407356739 2023-01-22 22:14:53.358557: step: 260/529, loss: 0.0003766532754525542 2023-01-22 22:14:54.429224: step: 264/529, loss: 0.005757701583206654 2023-01-22 22:14:55.472187: step: 268/529, loss: 0.01307438500225544 2023-01-22 22:14:56.516968: step: 272/529, loss: 0.004971130285412073 2023-01-22 22:14:57.580967: step: 276/529, loss: 0.008131401613354683 2023-01-22 22:14:58.642093: step: 280/529, loss: 0.008967198431491852 2023-01-22 22:14:59.698695: step: 284/529, loss: 0.00253537530079484 2023-01-22 22:15:00.783195: step: 288/529, loss: 0.00853712297976017 2023-01-22 22:15:01.834493: step: 292/529, loss: 0.0031157343182712793 2023-01-22 22:15:02.895001: step: 296/529, loss: 0.000533953367266804 2023-01-22 22:15:03.938016: step: 300/529, loss: 0.0009093710104934871 2023-01-22 22:15:04.991420: step: 304/529, loss: 0.024085940793156624 2023-01-22 22:15:06.032609: step: 308/529, loss: 0.003846596460789442 2023-01-22 22:15:07.086784: step: 312/529, loss: 0.0037498008459806442 2023-01-22 22:15:08.130214: step: 316/529, loss: 0.001778452773578465 2023-01-22 22:15:09.183047: step: 320/529, loss: 0.0061292885802686214 2023-01-22 22:15:10.228721: step: 324/529, loss: 0.0021300611551851034 2023-01-22 22:15:11.289920: step: 328/529, loss: 0.0011772337602451444 2023-01-22 22:15:12.359512: step: 332/529, loss: 8.890203753253445e-05 2023-01-22 22:15:13.413898: step: 336/529, loss: 0.000312322867102921 2023-01-22 22:15:14.459050: step: 340/529, loss: 0.00734830554574728 2023-01-22 22:15:15.496851: step: 344/529, loss: 0.004455361049622297 2023-01-22 22:15:16.549738: step: 348/529, loss: 0.00104904780164361 2023-01-22 22:15:17.600863: step: 352/529, loss: 0.0019082671497017145 2023-01-22 22:15:18.664326: step: 356/529, loss: 0.0028017640579491854 2023-01-22 22:15:19.735814: step: 360/529, loss: 0.00459581334143877 2023-01-22 22:15:20.780629: step: 364/529, loss: 0.002507390920072794 2023-01-22 22:15:21.824126: step: 368/529, loss: 0.005696725565940142 2023-01-22 22:15:22.872813: step: 372/529, loss: 0.0025276239030063152 2023-01-22 22:15:23.921865: step: 376/529, loss: 0.0014634763356298208 2023-01-22 22:15:24.993277: step: 380/529, loss: 0.0009265472763217986 2023-01-22 22:15:26.037669: step: 384/529, loss: 0.001057068700902164 2023-01-22 22:15:27.089356: step: 388/529, loss: 0.00324159674346447 2023-01-22 22:15:28.134272: step: 392/529, loss: 0.0004419496690388769 2023-01-22 22:15:29.188934: step: 396/529, loss: 0.033807914704084396 2023-01-22 22:15:30.243844: step: 400/529, loss: 0.0019463013159111142 2023-01-22 22:15:31.292534: step: 404/529, loss: 0.001065503223799169 2023-01-22 22:15:32.328987: step: 408/529, loss: 0.0009013141389004886 2023-01-22 22:15:33.379333: step: 412/529, loss: 0.0005079795373603702 2023-01-22 22:15:34.437323: step: 416/529, loss: 0.008460178039968014 2023-01-22 22:15:35.499940: step: 420/529, loss: 0.00020978682732675225 2023-01-22 22:15:36.542559: step: 424/529, loss: 0.007690533995628357 2023-01-22 22:15:37.578735: step: 428/529, loss: 0.002265217714011669 2023-01-22 22:15:38.625172: step: 432/529, loss: 0.0010795852867886424 2023-01-22 22:15:39.677424: step: 436/529, loss: 8.389084541704506e-05 2023-01-22 22:15:40.728441: step: 440/529, loss: 0.00022258538228925318 2023-01-22 22:15:41.783368: step: 444/529, loss: 0.0032947936560958624 2023-01-22 22:15:42.839334: step: 448/529, loss: 0.002077118493616581 2023-01-22 22:15:43.894585: step: 452/529, loss: 0.0038745917845517397 2023-01-22 22:15:44.942186: step: 456/529, loss: 0.006758896633982658 2023-01-22 22:15:46.009177: step: 460/529, loss: 0.002824159571900964 2023-01-22 22:15:47.077174: step: 464/529, loss: 0.0017004429828375578 2023-01-22 22:15:48.118639: step: 468/529, loss: 0.004164464771747589 2023-01-22 22:15:49.169045: step: 472/529, loss: 0.0004152019682805985 2023-01-22 22:15:50.218178: step: 476/529, loss: 0.03245808929204941 2023-01-22 22:15:51.267511: step: 480/529, loss: 0.002974143484607339 2023-01-22 22:15:52.313628: step: 484/529, loss: 0.000262746267253533 2023-01-22 22:15:53.355926: step: 488/529, loss: 0.002748754806816578 2023-01-22 22:15:54.426438: step: 492/529, loss: 0.005684906151145697 2023-01-22 22:15:55.487192: step: 496/529, loss: 0.0008600415894761682 2023-01-22 22:15:56.553223: step: 500/529, loss: 0.002134669106453657 2023-01-22 22:15:57.601402: step: 504/529, loss: 0.0003652505110949278 2023-01-22 22:15:58.648987: step: 508/529, loss: 9.394869266543537e-05 2023-01-22 22:15:59.698115: step: 512/529, loss: 0.003705232171341777 2023-01-22 22:16:00.749983: step: 516/529, loss: 4.8185422201640904e-05 2023-01-22 22:16:01.803110: step: 520/529, loss: 0.0011554473312571645 2023-01-22 22:16:02.854127: step: 524/529, loss: 0.004970197100192308 2023-01-22 22:16:03.919923: step: 528/529, loss: 0.003078704932704568 2023-01-22 22:16:04.988965: step: 532/529, loss: 0.005329811945557594 2023-01-22 22:16:06.035391: step: 536/529, loss: 0.005917105358093977 2023-01-22 22:16:07.088135: step: 540/529, loss: 0.003959241788834333 2023-01-22 22:16:08.126959: step: 544/529, loss: 7.423215720336884e-05 2023-01-22 22:16:09.176763: step: 548/529, loss: 0.002601574407890439 2023-01-22 22:16:10.250589: step: 552/529, loss: 0.0009090172243304551 2023-01-22 22:16:11.298818: step: 556/529, loss: 0.011935210786759853 2023-01-22 22:16:12.338747: step: 560/529, loss: 0.0008761092321947217 2023-01-22 22:16:13.381158: step: 564/529, loss: 0.007983102463185787 2023-01-22 22:16:14.423402: step: 568/529, loss: 0.0009231130825355649 2023-01-22 22:16:15.480452: step: 572/529, loss: 0.0145405363291502 2023-01-22 22:16:16.550802: step: 576/529, loss: 0.004041179083287716 2023-01-22 22:16:17.599983: step: 580/529, loss: 0.00153260154183954 2023-01-22 22:16:18.654170: step: 584/529, loss: 0.010185637511312962 2023-01-22 22:16:19.708691: step: 588/529, loss: 0.00577272055670619 2023-01-22 22:16:20.761924: step: 592/529, loss: 0.0002528772165533155 2023-01-22 22:16:21.801227: step: 596/529, loss: 2.7549012884264812e-05 2023-01-22 22:16:22.858574: step: 600/529, loss: 0.0031712548807263374 2023-01-22 22:16:23.924163: step: 604/529, loss: 0.0013344237813726068 2023-01-22 22:16:24.974981: step: 608/529, loss: 0.00024164778005797416 2023-01-22 22:16:26.034770: step: 612/529, loss: 0.020136168226599693 2023-01-22 22:16:27.082572: step: 616/529, loss: 0.006162128411233425 2023-01-22 22:16:28.154351: step: 620/529, loss: 0.00643115071579814 2023-01-22 22:16:29.197508: step: 624/529, loss: 0.0015247485134750605 2023-01-22 22:16:30.255482: step: 628/529, loss: 0.00420212559401989 2023-01-22 22:16:31.306590: step: 632/529, loss: 0.005724126938730478 2023-01-22 22:16:32.366899: step: 636/529, loss: 0.004357540979981422 2023-01-22 22:16:33.403971: step: 640/529, loss: 0.001910732826218009 2023-01-22 22:16:34.464485: step: 644/529, loss: 0.00031357069383375347 2023-01-22 22:16:35.514045: step: 648/529, loss: 0.004843851551413536 2023-01-22 22:16:36.560793: step: 652/529, loss: 0.00043408438796177506 2023-01-22 22:16:37.614971: step: 656/529, loss: 0.006753296125680208 2023-01-22 22:16:38.684477: step: 660/529, loss: 0.0032933454494923353 2023-01-22 22:16:39.746438: step: 664/529, loss: 0.0027763620018959045 2023-01-22 22:16:40.794171: step: 668/529, loss: 0.0038538069929927588 2023-01-22 22:16:41.845436: step: 672/529, loss: 0.0014959522522985935 2023-01-22 22:16:42.891552: step: 676/529, loss: 0.0011482766130939126 2023-01-22 22:16:43.938683: step: 680/529, loss: 0.006304308772087097 2023-01-22 22:16:44.982058: step: 684/529, loss: 0.028128989040851593 2023-01-22 22:16:46.025819: step: 688/529, loss: 7.844861102057621e-06 2023-01-22 22:16:47.072432: step: 692/529, loss: 0.004067400004714727 2023-01-22 22:16:48.119118: step: 696/529, loss: 0.0036468200851231813 2023-01-22 22:16:49.171669: step: 700/529, loss: 0.007611549459397793 2023-01-22 22:16:50.232268: step: 704/529, loss: 0.0027929057832807302 2023-01-22 22:16:51.278034: step: 708/529, loss: 7.540291699115187e-05 2023-01-22 22:16:52.334836: step: 712/529, loss: 0.0030069584026932716 2023-01-22 22:16:53.407598: step: 716/529, loss: 2.465686065988848e-06 2023-01-22 22:16:54.450693: step: 720/529, loss: 0.0004384104977361858 2023-01-22 22:16:55.496194: step: 724/529, loss: 0.005286080297082663 2023-01-22 22:16:56.538373: step: 728/529, loss: 2.288771611347329e-06 2023-01-22 22:16:57.604133: step: 732/529, loss: 0.0015304546104744077 2023-01-22 22:16:58.653729: step: 736/529, loss: 0.002393218455836177 2023-01-22 22:16:59.697345: step: 740/529, loss: 0.002610682975500822 2023-01-22 22:17:00.743891: step: 744/529, loss: 0.0013310756767168641 2023-01-22 22:17:01.785120: step: 748/529, loss: 6.078930709918495e-06 2023-01-22 22:17:02.831787: step: 752/529, loss: 0.009128289297223091 2023-01-22 22:17:03.869042: step: 756/529, loss: 0.00015589853865094483 2023-01-22 22:17:04.917600: step: 760/529, loss: 0.00020308865350671113 2023-01-22 22:17:05.972856: step: 764/529, loss: 0.0062463413923978806 2023-01-22 22:17:07.015742: step: 768/529, loss: 0.005044538527727127 2023-01-22 22:17:08.075949: step: 772/529, loss: 0.0030988561920821667 2023-01-22 22:17:09.131532: step: 776/529, loss: 0.00045495491940528154 2023-01-22 22:17:10.197398: step: 780/529, loss: 0.006148281041532755 2023-01-22 22:17:11.253591: step: 784/529, loss: 0.0037484855856746435 2023-01-22 22:17:12.304612: step: 788/529, loss: 0.0024214996956288815 2023-01-22 22:17:13.352650: step: 792/529, loss: 0.001267142128199339 2023-01-22 22:17:14.403843: step: 796/529, loss: 0.010626288130879402 2023-01-22 22:17:15.446246: step: 800/529, loss: 0.00012084989430150017 2023-01-22 22:17:16.503251: step: 804/529, loss: 0.004150815773755312 2023-01-22 22:17:17.557364: step: 808/529, loss: 0.0003200530481990427 2023-01-22 22:17:18.606044: step: 812/529, loss: 0.07948880642652512 2023-01-22 22:17:19.647072: step: 816/529, loss: 0.00025089934933930635 2023-01-22 22:17:20.694332: step: 820/529, loss: 0.006020818371325731 2023-01-22 22:17:21.734753: step: 824/529, loss: 0.004475736059248447 2023-01-22 22:17:22.780285: step: 828/529, loss: 0.0023098408710211515 2023-01-22 22:17:23.827840: step: 832/529, loss: 0.004625964909791946 2023-01-22 22:17:24.886223: step: 836/529, loss: 0.0036058761179447174 2023-01-22 22:17:25.920645: step: 840/529, loss: 0.00279402662999928 2023-01-22 22:17:26.959486: step: 844/529, loss: 0.016049500554800034 2023-01-22 22:17:28.002902: step: 848/529, loss: 0.001846490427851677 2023-01-22 22:17:29.072267: step: 852/529, loss: 0.02167278155684471 2023-01-22 22:17:30.113750: step: 856/529, loss: 0.00016633969789836556 2023-01-22 22:17:31.166147: step: 860/529, loss: 0.005792663432657719 2023-01-22 22:17:32.215678: step: 864/529, loss: 0.0022735409438610077 2023-01-22 22:17:33.260009: step: 868/529, loss: 0.02926735393702984 2023-01-22 22:17:34.310708: step: 872/529, loss: 0.001265300321392715 2023-01-22 22:17:35.355391: step: 876/529, loss: 0.0004687622422352433 2023-01-22 22:17:36.394193: step: 880/529, loss: 0.0007134170737117529 2023-01-22 22:17:37.449322: step: 884/529, loss: 0.0013752337545156479 2023-01-22 22:17:38.496921: step: 888/529, loss: 0.0016911706188693643 2023-01-22 22:17:39.540950: step: 892/529, loss: 0.0035386206582188606 2023-01-22 22:17:40.589895: step: 896/529, loss: 0.004194940906018019 2023-01-22 22:17:41.652164: step: 900/529, loss: 0.0007625749567523599 2023-01-22 22:17:42.697888: step: 904/529, loss: 0.001970997080206871 2023-01-22 22:17:43.734124: step: 908/529, loss: 0.00860562827438116 2023-01-22 22:17:44.783335: step: 912/529, loss: 9.350409158059847e-08 2023-01-22 22:17:45.830020: step: 916/529, loss: 4.811794497072697e-05 2023-01-22 22:17:46.880175: step: 920/529, loss: 0.0011264131171628833 2023-01-22 22:17:47.945658: step: 924/529, loss: 0.00645265681669116 2023-01-22 22:17:49.042125: step: 928/529, loss: 0.017367225140333176 2023-01-22 22:17:50.102280: step: 932/529, loss: 2.5015289793373086e-05 2023-01-22 22:17:51.160825: step: 936/529, loss: 0.009600379504263401 2023-01-22 22:17:52.196849: step: 940/529, loss: 0.0007128501893021166 2023-01-22 22:17:53.256222: step: 944/529, loss: 0.0011572977527976036 2023-01-22 22:17:54.294546: step: 948/529, loss: 0.0 2023-01-22 22:17:55.343469: step: 952/529, loss: 0.005213496740907431 2023-01-22 22:17:56.402969: step: 956/529, loss: 0.009229489602148533 2023-01-22 22:17:57.448138: step: 960/529, loss: 0.00018803254351951182 2023-01-22 22:17:58.509640: step: 964/529, loss: 1.0339845175622031e-06 2023-01-22 22:17:59.548489: step: 968/529, loss: 0.0019180604722350836 2023-01-22 22:18:00.586431: step: 972/529, loss: 0.0005541059072129428 2023-01-22 22:18:01.637387: step: 976/529, loss: 0.00013990083243697882 2023-01-22 22:18:02.689722: step: 980/529, loss: 0.003979128319770098 2023-01-22 22:18:03.734420: step: 984/529, loss: 0.00013347630738280714 2023-01-22 22:18:04.786952: step: 988/529, loss: 0.010431413538753986 2023-01-22 22:18:05.832836: step: 992/529, loss: 0.013405528850853443 2023-01-22 22:18:06.870252: step: 996/529, loss: 0.005670431535691023 2023-01-22 22:18:07.916345: step: 1000/529, loss: 4.412421549204737e-05 2023-01-22 22:18:08.981798: step: 1004/529, loss: 0.005037806462496519 2023-01-22 22:18:10.045219: step: 1008/529, loss: 0.013764195144176483 2023-01-22 22:18:11.092497: step: 1012/529, loss: 0.01303254347294569 2023-01-22 22:18:12.138705: step: 1016/529, loss: 0.0015644734958186746 2023-01-22 22:18:13.207235: step: 1020/529, loss: 0.015473760664463043 2023-01-22 22:18:14.283193: step: 1024/529, loss: 0.010308913886547089 2023-01-22 22:18:15.339986: step: 1028/529, loss: 0.00089548050891608 2023-01-22 22:18:16.391807: step: 1032/529, loss: 0.00725750345736742 2023-01-22 22:18:17.459655: step: 1036/529, loss: 9.78075186139904e-05 2023-01-22 22:18:18.499566: step: 1040/529, loss: 8.911739132599905e-05 2023-01-22 22:18:19.544885: step: 1044/529, loss: 0.0020880233496427536 2023-01-22 22:18:20.595136: step: 1048/529, loss: 0.0030502784065902233 2023-01-22 22:18:21.651548: step: 1052/529, loss: 2.607702498380604e-09 2023-01-22 22:18:22.695890: step: 1056/529, loss: 0.007089340128004551 2023-01-22 22:18:23.736054: step: 1060/529, loss: 0.0038894531317055225 2023-01-22 22:18:24.786057: step: 1064/529, loss: 0.0003203263331670314 2023-01-22 22:18:25.844870: step: 1068/529, loss: 0.00047433737199753523 2023-01-22 22:18:26.916845: step: 1072/529, loss: 0.0030498255509883165 2023-01-22 22:18:27.955325: step: 1076/529, loss: 0.00015224394155666232 2023-01-22 22:18:28.989799: step: 1080/529, loss: 0.0026026112027466297 2023-01-22 22:18:30.035809: step: 1084/529, loss: 0.006098547484725714 2023-01-22 22:18:31.083324: step: 1088/529, loss: 0.003093560691922903 2023-01-22 22:18:32.125046: step: 1092/529, loss: 0.006555825471878052 2023-01-22 22:18:33.165307: step: 1096/529, loss: 0.002913222648203373 2023-01-22 22:18:34.209315: step: 1100/529, loss: 0.0013297703117132187 2023-01-22 22:18:35.273245: step: 1104/529, loss: 0.0017203304450958967 2023-01-22 22:18:36.328414: step: 1108/529, loss: 0.007219015620648861 2023-01-22 22:18:37.372147: step: 1112/529, loss: 0.005297096446156502 2023-01-22 22:18:38.407471: step: 1116/529, loss: 0.005970312282443047 2023-01-22 22:18:39.454499: step: 1120/529, loss: 0.0008048623567447066 2023-01-22 22:18:40.510153: step: 1124/529, loss: 0.005152496043592691 2023-01-22 22:18:41.555745: step: 1128/529, loss: 0.006924059242010117 2023-01-22 22:18:42.594624: step: 1132/529, loss: 8.650627933093347e-06 2023-01-22 22:18:43.654018: step: 1136/529, loss: 0.003863108344376087 2023-01-22 22:18:44.701667: step: 1140/529, loss: 0.0020686748903244734 2023-01-22 22:18:45.753347: step: 1144/529, loss: 0.002790396334603429 2023-01-22 22:18:46.819815: step: 1148/529, loss: 0.0071144853718578815 2023-01-22 22:18:47.854033: step: 1152/529, loss: 0.00039947134791873395 2023-01-22 22:18:48.908616: step: 1156/529, loss: 0.003844982013106346 2023-01-22 22:18:49.944453: step: 1160/529, loss: 0.0016410565003752708 2023-01-22 22:18:50.991298: step: 1164/529, loss: 0.009676797315478325 2023-01-22 22:18:52.065646: step: 1168/529, loss: 0.001850215601734817 2023-01-22 22:18:53.119653: step: 1172/529, loss: 0.005094414576888084 2023-01-22 22:18:54.156470: step: 1176/529, loss: 0.00012213742593303323 2023-01-22 22:18:55.224617: step: 1180/529, loss: 0.0033602348994463682 2023-01-22 22:18:56.278878: step: 1184/529, loss: 0.0028793150559067726 2023-01-22 22:18:57.319953: step: 1188/529, loss: 4.4469630665844306e-05 2023-01-22 22:18:58.365791: step: 1192/529, loss: 0.01524511817842722 2023-01-22 22:18:59.397060: step: 1196/529, loss: 0.003424513852223754 2023-01-22 22:19:00.457184: step: 1200/529, loss: 0.018090898171067238 2023-01-22 22:19:01.505945: step: 1204/529, loss: 0.006048219744116068 2023-01-22 22:19:02.566373: step: 1208/529, loss: 0.010499890893697739 2023-01-22 22:19:03.615266: step: 1212/529, loss: 0.0007027279934845865 2023-01-22 22:19:04.673530: step: 1216/529, loss: 0.009670097380876541 2023-01-22 22:19:05.714643: step: 1220/529, loss: 0.0006043684552423656 2023-01-22 22:19:06.765763: step: 1224/529, loss: 0.008252520114183426 2023-01-22 22:19:07.812851: step: 1228/529, loss: 0.0016587678110226989 2023-01-22 22:19:08.876536: step: 1232/529, loss: 0.0037884386256337166 2023-01-22 22:19:09.918836: step: 1236/529, loss: 0.00356994173489511 2023-01-22 22:19:10.966399: step: 1240/529, loss: 0.004430616274476051 2023-01-22 22:19:12.019397: step: 1244/529, loss: 0.003589344909414649 2023-01-22 22:19:13.079972: step: 1248/529, loss: 0.0034834539983421564 2023-01-22 22:19:14.123381: step: 1252/529, loss: 0.0011686928337439895 2023-01-22 22:19:15.159524: step: 1256/529, loss: 0.00046972971176728606 2023-01-22 22:19:16.206156: step: 1260/529, loss: 0.0004676127282436937 2023-01-22 22:19:17.258337: step: 1264/529, loss: 0.002452847780659795 2023-01-22 22:19:18.310150: step: 1268/529, loss: 0.00738517427816987 2023-01-22 22:19:19.360159: step: 1272/529, loss: 0.0006423183949664235 2023-01-22 22:19:20.414928: step: 1276/529, loss: 0.0014510777546092868 2023-01-22 22:19:21.461947: step: 1280/529, loss: 0.002393566071987152 2023-01-22 22:19:22.517696: step: 1284/529, loss: 3.10423674818594e-05 2023-01-22 22:19:23.574687: step: 1288/529, loss: 7.145856943679973e-05 2023-01-22 22:19:24.629854: step: 1292/529, loss: 0.0017516142688691616 2023-01-22 22:19:25.696399: step: 1296/529, loss: 0.0008705674554221332 2023-01-22 22:19:26.743688: step: 1300/529, loss: 8.241000614361838e-05 2023-01-22 22:19:27.792820: step: 1304/529, loss: 0.006379532627761364 2023-01-22 22:19:28.845836: step: 1308/529, loss: 0.001797855831682682 2023-01-22 22:19:29.895985: step: 1312/529, loss: 0.0020840391516685486 2023-01-22 22:19:30.938956: step: 1316/529, loss: 0.005940550472587347 2023-01-22 22:19:31.981922: step: 1320/529, loss: 0.000661805912386626 2023-01-22 22:19:33.031435: step: 1324/529, loss: 0.0022622672840952873 2023-01-22 22:19:34.084115: step: 1328/529, loss: 0.012145286425948143 2023-01-22 22:19:35.131969: step: 1332/529, loss: 0.0002955519303213805 2023-01-22 22:19:36.183412: step: 1336/529, loss: 0.0020370502024888992 2023-01-22 22:19:37.228900: step: 1340/529, loss: 0.0025677757803350687 2023-01-22 22:19:38.289216: step: 1344/529, loss: 0.000634418916888535 2023-01-22 22:19:39.341969: step: 1348/529, loss: 0.004189720377326012 2023-01-22 22:19:40.400198: step: 1352/529, loss: 0.005921152886003256 2023-01-22 22:19:41.439506: step: 1356/529, loss: 0.0022508951369673014 2023-01-22 22:19:42.484139: step: 1360/529, loss: 0.00119803287088871 2023-01-22 22:19:43.529125: step: 1364/529, loss: 0.0036022388376295567 2023-01-22 22:19:44.568701: step: 1368/529, loss: 0.0006720342789776623 2023-01-22 22:19:45.607559: step: 1372/529, loss: 0.0022841556929051876 2023-01-22 22:19:46.655827: step: 1376/529, loss: 0.00012633096775971353 2023-01-22 22:19:47.711471: step: 1380/529, loss: 0.0010301998117938638 2023-01-22 22:19:48.757190: step: 1384/529, loss: 0.0014144123997539282 2023-01-22 22:19:49.800087: step: 1388/529, loss: 0.00037574220914393663 2023-01-22 22:19:50.860447: step: 1392/529, loss: 0.0001125840499298647 2023-01-22 22:19:51.910976: step: 1396/529, loss: 0.0015256714541465044 2023-01-22 22:19:52.984228: step: 1400/529, loss: 0.0010932701407000422 2023-01-22 22:19:54.033957: step: 1404/529, loss: 0.030305279418826103 2023-01-22 22:19:55.072985: step: 1408/529, loss: 0.00935385748744011 2023-01-22 22:19:56.132233: step: 1412/529, loss: 0.006787709891796112 2023-01-22 22:19:57.182164: step: 1416/529, loss: 6.2260933191282675e-06 2023-01-22 22:19:58.210824: step: 1420/529, loss: 0.00024706675321795046 2023-01-22 22:19:59.265955: step: 1424/529, loss: 0.0016209837049245834 2023-01-22 22:20:00.306321: step: 1428/529, loss: 0.00030261397478170693 2023-01-22 22:20:01.355035: step: 1432/529, loss: 0.000810148601885885 2023-01-22 22:20:02.403100: step: 1436/529, loss: 0.002890772419050336 2023-01-22 22:20:03.446713: step: 1440/529, loss: 0.0015610548434779048 2023-01-22 22:20:04.495176: step: 1444/529, loss: 0.003267600666731596 2023-01-22 22:20:05.527680: step: 1448/529, loss: 0.0009290831512771547 2023-01-22 22:20:06.582435: step: 1452/529, loss: 0.004930882249027491 2023-01-22 22:20:07.638404: step: 1456/529, loss: 0.0014959500404074788 2023-01-22 22:20:08.683097: step: 1460/529, loss: 0.001852201297879219 2023-01-22 22:20:09.718136: step: 1464/529, loss: 0.0013484645169228315 2023-01-22 22:20:10.758984: step: 1468/529, loss: 8.979537960840389e-05 2023-01-22 22:20:11.799639: step: 1472/529, loss: 0.0047757914289832115 2023-01-22 22:20:12.845735: step: 1476/529, loss: 0.0012093611294403672 2023-01-22 22:20:13.904881: step: 1480/529, loss: 0.006836912594735622 2023-01-22 22:20:14.952223: step: 1484/529, loss: 0.03023025020956993 2023-01-22 22:20:15.990886: step: 1488/529, loss: 0.0027947898488491774 2023-01-22 22:20:17.039763: step: 1492/529, loss: 0.0011901309480890632 2023-01-22 22:20:18.082359: step: 1496/529, loss: 0.00440404424443841 2023-01-22 22:20:19.152469: step: 1500/529, loss: 0.0022089453414082527 2023-01-22 22:20:20.197819: step: 1504/529, loss: 0.0015562280314043164 2023-01-22 22:20:21.241191: step: 1508/529, loss: 0.004183995071798563 2023-01-22 22:20:22.307022: step: 1512/529, loss: 0.0017055704956874251 2023-01-22 22:20:23.358026: step: 1516/529, loss: 0.0004223677678965032 2023-01-22 22:20:24.422033: step: 1520/529, loss: 0.00618738355115056 2023-01-22 22:20:25.464305: step: 1524/529, loss: 0.0017013464821502566 2023-01-22 22:20:26.513725: step: 1528/529, loss: 0.008293135091662407 2023-01-22 22:20:27.581758: step: 1532/529, loss: 0.0015210562851279974 2023-01-22 22:20:28.618176: step: 1536/529, loss: 0.00017991337517742068 2023-01-22 22:20:29.658538: step: 1540/529, loss: 0.0006232120795175433 2023-01-22 22:20:30.720271: step: 1544/529, loss: 0.0008592517697252333 2023-01-22 22:20:31.775705: step: 1548/529, loss: 0.00017508870223537087 2023-01-22 22:20:32.829355: step: 1552/529, loss: 0.0019115714821964502 2023-01-22 22:20:33.872447: step: 1556/529, loss: 0.007235506549477577 2023-01-22 22:20:34.908586: step: 1560/529, loss: 0.00026587743195705116 2023-01-22 22:20:35.963518: step: 1564/529, loss: 0.02108778990805149 2023-01-22 22:20:37.021162: step: 1568/529, loss: 0.0002542046713642776 2023-01-22 22:20:38.079892: step: 1572/529, loss: 0.043281376361846924 2023-01-22 22:20:39.143544: step: 1576/529, loss: 0.0018378469394519925 2023-01-22 22:20:40.176159: step: 1580/529, loss: 0.0005417782231234014 2023-01-22 22:20:41.222581: step: 1584/529, loss: 0.0037707893643528223 2023-01-22 22:20:42.267365: step: 1588/529, loss: 0.0007958101341500878 2023-01-22 22:20:43.327616: step: 1592/529, loss: 0.0039056213572621346 2023-01-22 22:20:44.385506: step: 1596/529, loss: 0.005149473436176777 2023-01-22 22:20:45.437634: step: 1600/529, loss: 0.01222204603254795 2023-01-22 22:20:46.479450: step: 1604/529, loss: 0.01026830356568098 2023-01-22 22:20:47.529866: step: 1608/529, loss: 0.0020108758471906185 2023-01-22 22:20:48.577017: step: 1612/529, loss: 0.0037760769482702017 2023-01-22 22:20:49.628454: step: 1616/529, loss: 0.0014027329161763191 2023-01-22 22:20:50.684550: step: 1620/529, loss: 0.0037235114723443985 2023-01-22 22:20:51.735028: step: 1624/529, loss: 0.003399776527658105 2023-01-22 22:20:52.780888: step: 1628/529, loss: 0.0009062264580279589 2023-01-22 22:20:53.832895: step: 1632/529, loss: 0.006494338158518076 2023-01-22 22:20:54.887884: step: 1636/529, loss: 0.0031147990375757217 2023-01-22 22:20:55.925672: step: 1640/529, loss: 0.0019699931144714355 2023-01-22 22:20:56.987233: step: 1644/529, loss: 0.00017522247799206525 2023-01-22 22:20:58.053394: step: 1648/529, loss: 0.009914258494973183 2023-01-22 22:20:59.091546: step: 1652/529, loss: 0.0004724213504232466 2023-01-22 22:21:00.144757: step: 1656/529, loss: 0.029571304097771645 2023-01-22 22:21:01.201987: step: 1660/529, loss: 0.003799635451287031 2023-01-22 22:21:02.253145: step: 1664/529, loss: 0.0020503555424511433 2023-01-22 22:21:03.314422: step: 1668/529, loss: 0.0030156716238707304 2023-01-22 22:21:04.381094: step: 1672/529, loss: 0.0007942665251903236 2023-01-22 22:21:05.418406: step: 1676/529, loss: 0.00023483762925025076 2023-01-22 22:21:06.481277: step: 1680/529, loss: 0.00456179678440094 2023-01-22 22:21:07.529517: step: 1684/529, loss: 0.00029944704147055745 2023-01-22 22:21:08.560942: step: 1688/529, loss: 0.004664743784815073 2023-01-22 22:21:09.617579: step: 1692/529, loss: 0.008343634195625782 2023-01-22 22:21:10.660155: step: 1696/529, loss: 0.006985554937273264 2023-01-22 22:21:11.716648: step: 1700/529, loss: 0.0020220025908201933 2023-01-22 22:21:12.768934: step: 1704/529, loss: 0.00547309685498476 2023-01-22 22:21:13.813780: step: 1708/529, loss: 0.001600018353201449 2023-01-22 22:21:14.885443: step: 1712/529, loss: 0.0012805818114429712 2023-01-22 22:21:15.941302: step: 1716/529, loss: 0.003580654039978981 2023-01-22 22:21:16.979778: step: 1720/529, loss: 0.0027319910004734993 2023-01-22 22:21:18.031255: step: 1724/529, loss: 0.006305277347564697 2023-01-22 22:21:19.091091: step: 1728/529, loss: 0.0007833088166080415 2023-01-22 22:21:20.158358: step: 1732/529, loss: 0.0032503888942301273 2023-01-22 22:21:21.200485: step: 1736/529, loss: 0.007268775720149279 2023-01-22 22:21:22.248347: step: 1740/529, loss: 0.003392603248357773 2023-01-22 22:21:23.292604: step: 1744/529, loss: 0.00544536579400301 2023-01-22 22:21:24.360639: step: 1748/529, loss: 0.0060477484948933125 2023-01-22 22:21:25.408806: step: 1752/529, loss: 0.002798511879518628 2023-01-22 22:21:26.450981: step: 1756/529, loss: 0.006484963931143284 2023-01-22 22:21:27.493594: step: 1760/529, loss: 0.004555354826152325 2023-01-22 22:21:28.536557: step: 1764/529, loss: 0.0005081974086351693 2023-01-22 22:21:29.594892: step: 1768/529, loss: 0.00535816652700305 2023-01-22 22:21:30.649125: step: 1772/529, loss: 0.00024211459094658494 2023-01-22 22:21:31.730519: step: 1776/529, loss: 0.004681541118770838 2023-01-22 22:21:32.777020: step: 1780/529, loss: 0.0004348951333668083 2023-01-22 22:21:33.830686: step: 1784/529, loss: 0.012286700308322906 2023-01-22 22:21:34.883712: step: 1788/529, loss: 0.010655753314495087 2023-01-22 22:21:35.925227: step: 1792/529, loss: 0.00028969047707505524 2023-01-22 22:21:36.977767: step: 1796/529, loss: 0.002627830719575286 2023-01-22 22:21:38.037656: step: 1800/529, loss: 0.001515217125415802 2023-01-22 22:21:39.088951: step: 1804/529, loss: 0.010236173868179321 2023-01-22 22:21:40.142049: step: 1808/529, loss: 0.001353904022835195 2023-01-22 22:21:41.203798: step: 1812/529, loss: 0.0016460255719721317 2023-01-22 22:21:42.278345: step: 1816/529, loss: 0.010405554436147213 2023-01-22 22:21:43.328044: step: 1820/529, loss: 0.0007682672585360706 2023-01-22 22:21:44.375499: step: 1824/529, loss: 0.011460606940090656 2023-01-22 22:21:45.424501: step: 1828/529, loss: 0.0007372607942670584 2023-01-22 22:21:46.483826: step: 1832/529, loss: 0.0008323192014358938 2023-01-22 22:21:47.511206: step: 1836/529, loss: 0.018875084817409515 2023-01-22 22:21:48.560319: step: 1840/529, loss: 0.0017271727556362748 2023-01-22 22:21:49.605322: step: 1844/529, loss: 0.017299069091677666 2023-01-22 22:21:50.635844: step: 1848/529, loss: 0.00012825964950025082 2023-01-22 22:21:51.677763: step: 1852/529, loss: 0.0035302641335874796 2023-01-22 22:21:52.736968: step: 1856/529, loss: 0.00363427447155118 2023-01-22 22:21:53.780613: step: 1860/529, loss: 0.004317414481192827 2023-01-22 22:21:54.833893: step: 1864/529, loss: 2.819651854224503e-05 2023-01-22 22:21:55.903851: step: 1868/529, loss: 0.00021076505072414875 2023-01-22 22:21:56.958630: step: 1872/529, loss: 0.0018988648662343621 2023-01-22 22:21:58.015214: step: 1876/529, loss: 0.02652699127793312 2023-01-22 22:21:59.085073: step: 1880/529, loss: 0.004440431483089924 2023-01-22 22:22:00.155403: step: 1884/529, loss: 0.006358923856168985 2023-01-22 22:22:01.204864: step: 1888/529, loss: 0.003089465433731675 2023-01-22 22:22:02.240775: step: 1892/529, loss: 0.0008446264546364546 2023-01-22 22:22:03.287398: step: 1896/529, loss: 0.0005855276831425726 2023-01-22 22:22:04.352356: step: 1900/529, loss: 0.004542219452559948 2023-01-22 22:22:05.406916: step: 1904/529, loss: 0.005437630694359541 2023-01-22 22:22:06.459139: step: 1908/529, loss: 0.0001455077581340447 2023-01-22 22:22:07.503527: step: 1912/529, loss: 0.0003511338436510414 2023-01-22 22:22:08.540235: step: 1916/529, loss: 0.004878541920334101 2023-01-22 22:22:09.601148: step: 1920/529, loss: 0.007897789590060711 2023-01-22 22:22:10.645277: step: 1924/529, loss: 0.00013669008330907673 2023-01-22 22:22:11.705788: step: 1928/529, loss: 3.771206320379861e-05 2023-01-22 22:22:12.750698: step: 1932/529, loss: 0.0010777936549857259 2023-01-22 22:22:13.778129: step: 1936/529, loss: 0.0012563636992126703 2023-01-22 22:22:14.816595: step: 1940/529, loss: 0.0006482333992607892 2023-01-22 22:22:15.864531: step: 1944/529, loss: 0.002654564566910267 2023-01-22 22:22:16.907543: step: 1948/529, loss: 0.0015561624895781279 2023-01-22 22:22:17.965535: step: 1952/529, loss: 0.002170481253415346 2023-01-22 22:22:19.016350: step: 1956/529, loss: 0.00042557946289889514 2023-01-22 22:22:20.070330: step: 1960/529, loss: 0.0014467902947217226 2023-01-22 22:22:21.110172: step: 1964/529, loss: 3.72021459043026e-05 2023-01-22 22:22:22.156915: step: 1968/529, loss: 0.0004619108804035932 2023-01-22 22:22:23.207850: step: 1972/529, loss: 0.0078103202395141125 2023-01-22 22:22:24.249265: step: 1976/529, loss: 2.5208486476913095e-05 2023-01-22 22:22:25.303086: step: 1980/529, loss: 0.0024055971298366785 2023-01-22 22:22:26.349755: step: 1984/529, loss: 0.0017308022361248732 2023-01-22 22:22:27.416725: step: 1988/529, loss: 0.0021961783058941364 2023-01-22 22:22:28.477786: step: 1992/529, loss: 0.004103126935660839 2023-01-22 22:22:29.521789: step: 1996/529, loss: 0.0026436406187713146 2023-01-22 22:22:30.565870: step: 2000/529, loss: 0.0019824618939310312 2023-01-22 22:22:31.622372: step: 2004/529, loss: 0.005595298949629068 2023-01-22 22:22:32.667683: step: 2008/529, loss: 0.001071313163265586 2023-01-22 22:22:33.718315: step: 2012/529, loss: 0.009003261104226112 2023-01-22 22:22:34.765352: step: 2016/529, loss: 0.05005432292819023 2023-01-22 22:22:35.818639: step: 2020/529, loss: 0.0026370352134108543 2023-01-22 22:22:36.851465: step: 2024/529, loss: 0.003142518224194646 2023-01-22 22:22:37.919404: step: 2028/529, loss: 0.005490990821272135 2023-01-22 22:22:38.993795: step: 2032/529, loss: 0.004497552756220102 2023-01-22 22:22:40.043917: step: 2036/529, loss: 0.009655139409005642 2023-01-22 22:22:41.088280: step: 2040/529, loss: 6.18443446001038e-05 2023-01-22 22:22:42.119145: step: 2044/529, loss: 0.0007054428569972515 2023-01-22 22:22:43.184464: step: 2048/529, loss: 0.004290977958589792 2023-01-22 22:22:44.215132: step: 2052/529, loss: 0.01048473734408617 2023-01-22 22:22:45.263247: step: 2056/529, loss: 0.00985910277813673 2023-01-22 22:22:46.321238: step: 2060/529, loss: 0.00368954217992723 2023-01-22 22:22:47.357877: step: 2064/529, loss: 2.9466292517099646e-07 2023-01-22 22:22:48.398545: step: 2068/529, loss: 0.01730327121913433 2023-01-22 22:22:49.456708: step: 2072/529, loss: 0.0021001403219997883 2023-01-22 22:22:50.512395: step: 2076/529, loss: 0.0007918146438896656 2023-01-22 22:22:51.556953: step: 2080/529, loss: 0.004450857173651457 2023-01-22 22:22:52.603724: step: 2084/529, loss: 0.010045414790511131 2023-01-22 22:22:53.646716: step: 2088/529, loss: 0.000921680883038789 2023-01-22 22:22:54.723219: step: 2092/529, loss: 0.0015958340372890234 2023-01-22 22:22:55.780228: step: 2096/529, loss: 0.0005397596978582442 2023-01-22 22:22:56.821994: step: 2100/529, loss: 0.006287833675742149 2023-01-22 22:22:57.860265: step: 2104/529, loss: 0.00022083580552134663 2023-01-22 22:22:58.905574: step: 2108/529, loss: 0.0034712550695985556 2023-01-22 22:22:59.957902: step: 2112/529, loss: 0.00458060996606946 2023-01-22 22:23:00.997416: step: 2116/529, loss: 0.01014195941388607 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3349300927267459, 'r': 0.313321699647601, 'f1': 0.32376575630252097}, 'combined': 0.23856424148606806, 'stategy': 1, 'epoch': 14} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.378204757801041, 'r': 0.31880086914119166, 'f1': 0.3459713977611796}, 'combined': 0.24339696324907106, 'stategy': 1, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316221492590292, 'r': 0.3438233686038658, 'f1': 0.32944530046224957}, 'combined': 0.24274916876165756, 'stategy': 1, 'epoch': 14} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3823395471855511, 'r': 0.33333875081565045, 'f1': 0.3561616648211933}, 'combined': 0.25287478202304725, 'stategy': 1, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3229275884665793, 'r': 0.3339573732718894, 'f1': 0.3283498800639659}, 'combined': 0.24194201688923803, 'stategy': 1, 'epoch': 14} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.377548513349693, 'r': 0.30012800668548895, 'f1': 0.3344158441045895}, 'combined': 0.23743524931425855, 'stategy': 1, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 15 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 22:25:31.669498: step: 4/529, loss: 0.02547350898385048 2023-01-22 22:25:32.701583: step: 8/529, loss: 0.0024359237868338823 2023-01-22 22:25:33.743257: step: 12/529, loss: 0.010536756366491318 2023-01-22 22:25:34.794674: step: 16/529, loss: 0.004394357558339834 2023-01-22 22:25:35.858883: step: 20/529, loss: 0.005176179111003876 2023-01-22 22:25:36.893330: step: 24/529, loss: 0.006142718717455864 2023-01-22 22:25:37.930839: step: 28/529, loss: 0.0014051987091079354 2023-01-22 22:25:38.977221: step: 32/529, loss: 0.002256381558254361 2023-01-22 22:25:39.994708: step: 36/529, loss: 0.00350232794880867 2023-01-22 22:25:41.045254: step: 40/529, loss: 5.2118579333182424e-05 2023-01-22 22:25:42.091146: step: 44/529, loss: 0.005257933400571346 2023-01-22 22:25:43.137756: step: 48/529, loss: 0.0021917736157774925 2023-01-22 22:25:44.169918: step: 52/529, loss: 0.0002937588142231107 2023-01-22 22:25:45.211043: step: 56/529, loss: 0.0036064530722796917 2023-01-22 22:25:46.251409: step: 60/529, loss: 0.008360263891518116 2023-01-22 22:25:47.287302: step: 64/529, loss: 0.0011056349612772465 2023-01-22 22:25:48.340533: step: 68/529, loss: 0.0011078242678195238 2023-01-22 22:25:49.375896: step: 72/529, loss: 0.003867167979478836 2023-01-22 22:25:50.416076: step: 76/529, loss: 0.0028319107368588448 2023-01-22 22:25:51.451285: step: 80/529, loss: 0.0055654821917414665 2023-01-22 22:25:52.487928: step: 84/529, loss: 0.0013121470110490918 2023-01-22 22:25:53.524394: step: 88/529, loss: 3.651808583526872e-05 2023-01-22 22:25:54.562910: step: 92/529, loss: 0.0029635934624820948 2023-01-22 22:25:55.589861: step: 96/529, loss: 0.0007521317456848919 2023-01-22 22:25:56.651850: step: 100/529, loss: 4.6694611228303984e-05 2023-01-22 22:25:57.707357: step: 104/529, loss: 0.006930282339453697 2023-01-22 22:25:58.752633: step: 108/529, loss: 0.01244620606303215 2023-01-22 22:25:59.775967: step: 112/529, loss: 0.00013211714394856244 2023-01-22 22:26:00.807062: step: 116/529, loss: 0.0009811338968575 2023-01-22 22:26:01.855053: step: 120/529, loss: 0.006453531328588724 2023-01-22 22:26:02.910594: step: 124/529, loss: 0.00013782500172965229 2023-01-22 22:26:03.957452: step: 128/529, loss: 0.0031705941073596478 2023-01-22 22:26:05.022429: step: 132/529, loss: 0.000844110909383744 2023-01-22 22:26:06.065241: step: 136/529, loss: 0.00041022332152351737 2023-01-22 22:26:07.101547: step: 140/529, loss: 0.003398698288947344 2023-01-22 22:26:08.148642: step: 144/529, loss: 0.0006233734311535954 2023-01-22 22:26:09.199525: step: 148/529, loss: 0.0002525327727198601 2023-01-22 22:26:10.236150: step: 152/529, loss: 0.001142202178016305 2023-01-22 22:26:11.280742: step: 156/529, loss: 0.00030284057720564306 2023-01-22 22:26:12.341908: step: 160/529, loss: 0.0012181728379800916 2023-01-22 22:26:13.380374: step: 164/529, loss: 0.01125998143106699 2023-01-22 22:26:14.436537: step: 168/529, loss: 0.0018623822834342718 2023-01-22 22:26:15.490740: step: 172/529, loss: 0.001834856579080224 2023-01-22 22:26:16.540698: step: 176/529, loss: 0.0 2023-01-22 22:26:17.596857: step: 180/529, loss: 0.017671937122941017 2023-01-22 22:26:18.628903: step: 184/529, loss: 0.0015418316470459104 2023-01-22 22:26:19.701177: step: 188/529, loss: 0.0037392976228147745 2023-01-22 22:26:20.758016: step: 192/529, loss: 0.010129069909453392 2023-01-22 22:26:21.810676: step: 196/529, loss: 9.772488556336612e-05 2023-01-22 22:26:22.852676: step: 200/529, loss: 0.0009238485945388675 2023-01-22 22:26:23.925963: step: 204/529, loss: 0.002464145887643099 2023-01-22 22:26:24.973674: step: 208/529, loss: 0.030568746849894524 2023-01-22 22:26:26.040049: step: 212/529, loss: 0.0010398138547316194 2023-01-22 22:26:27.111375: step: 216/529, loss: 0.002943401224911213 2023-01-22 22:26:28.164844: step: 220/529, loss: 0.0008004967239685357 2023-01-22 22:26:29.214421: step: 224/529, loss: 0.012237193062901497 2023-01-22 22:26:30.255361: step: 228/529, loss: 0.0064261299557983875 2023-01-22 22:26:31.316782: step: 232/529, loss: 0.001803913852199912 2023-01-22 22:26:32.363043: step: 236/529, loss: 0.0001677940017543733 2023-01-22 22:26:33.409270: step: 240/529, loss: 0.005809134803712368 2023-01-22 22:26:34.452317: step: 244/529, loss: 0.0031476204749196768 2023-01-22 22:26:35.511868: step: 248/529, loss: 0.0023658541031181812 2023-01-22 22:26:36.570501: step: 252/529, loss: 0.003516340861096978 2023-01-22 22:26:37.631185: step: 256/529, loss: 6.895070328027941e-06 2023-01-22 22:26:38.674584: step: 260/529, loss: 2.7178200980415568e-05 2023-01-22 22:26:39.732485: step: 264/529, loss: 0.002515393542125821 2023-01-22 22:26:40.816464: step: 268/529, loss: 0.0016364234033972025 2023-01-22 22:26:41.863619: step: 272/529, loss: 0.00437150988727808 2023-01-22 22:26:42.908765: step: 276/529, loss: 0.006446543149650097 2023-01-22 22:26:43.960525: step: 280/529, loss: 0.004683638922870159 2023-01-22 22:26:45.015237: step: 284/529, loss: 0.024891860783100128 2023-01-22 22:26:46.075663: step: 288/529, loss: 0.0013586105778813362 2023-01-22 22:26:47.127956: step: 292/529, loss: 0.00580563023686409 2023-01-22 22:26:48.178741: step: 296/529, loss: 0.0007761814049445093 2023-01-22 22:26:49.241818: step: 300/529, loss: 0.0007714795065112412 2023-01-22 22:26:50.293279: step: 304/529, loss: 0.00043657227070070803 2023-01-22 22:26:51.355932: step: 308/529, loss: 0.010554426349699497 2023-01-22 22:26:52.396191: step: 312/529, loss: 0.003605673788115382 2023-01-22 22:26:53.463958: step: 316/529, loss: 0.0010989387519657612 2023-01-22 22:26:54.508342: step: 320/529, loss: 0.0009404823067598045 2023-01-22 22:26:55.574027: step: 324/529, loss: 0.007967980578541756 2023-01-22 22:26:56.631847: step: 328/529, loss: 0.004060606472194195 2023-01-22 22:26:57.696765: step: 332/529, loss: 0.02769039385020733 2023-01-22 22:26:58.766134: step: 336/529, loss: 0.008088546805083752 2023-01-22 22:26:59.814419: step: 340/529, loss: 0.0007039580959826708 2023-01-22 22:27:00.887550: step: 344/529, loss: 0.0022395076230168343 2023-01-22 22:27:01.921918: step: 348/529, loss: 0.01384727843105793 2023-01-22 22:27:02.976142: step: 352/529, loss: 0.00398551020771265 2023-01-22 22:27:04.041499: step: 356/529, loss: 0.005353952292352915 2023-01-22 22:27:05.081379: step: 360/529, loss: 0.0175437293946743 2023-01-22 22:27:06.136609: step: 364/529, loss: 0.0021287009585648775 2023-01-22 22:27:07.184162: step: 368/529, loss: 0.007279746234416962 2023-01-22 22:27:08.251577: step: 372/529, loss: 0.005400799680501223 2023-01-22 22:27:09.313932: step: 376/529, loss: 0.0007781374733895063 2023-01-22 22:27:10.365497: step: 380/529, loss: 0.0069593945518136024 2023-01-22 22:27:11.423186: step: 384/529, loss: 0.008567679673433304 2023-01-22 22:27:12.491502: step: 388/529, loss: 0.0024873402435332537 2023-01-22 22:27:13.536342: step: 392/529, loss: 7.720361463725567e-05 2023-01-22 22:27:14.591191: step: 396/529, loss: 0.0021369722671806812 2023-01-22 22:27:15.638932: step: 400/529, loss: 0.002031546551734209 2023-01-22 22:27:16.680752: step: 404/529, loss: 0.017181765288114548 2023-01-22 22:27:17.737529: step: 408/529, loss: 0.008010189048945904 2023-01-22 22:27:18.785502: step: 412/529, loss: 0.005657844245433807 2023-01-22 22:27:19.811760: step: 416/529, loss: 0.002242745365947485 2023-01-22 22:27:20.903463: step: 420/529, loss: 0.00472149346023798 2023-01-22 22:27:21.956433: step: 424/529, loss: 0.002846792573109269 2023-01-22 22:27:23.004788: step: 428/529, loss: 0.0038022934459149837 2023-01-22 22:27:24.056676: step: 432/529, loss: 0.007306874729692936 2023-01-22 22:27:25.093098: step: 436/529, loss: 0.0017006334383040667 2023-01-22 22:27:26.143753: step: 440/529, loss: 0.00595502695068717 2023-01-22 22:27:27.180529: step: 444/529, loss: 0.0022472855634987354 2023-01-22 22:27:28.236584: step: 448/529, loss: 0.0021269740536808968 2023-01-22 22:27:29.306891: step: 452/529, loss: 0.006769747007638216 2023-01-22 22:27:30.360910: step: 456/529, loss: 0.002602675464004278 2023-01-22 22:27:31.410689: step: 460/529, loss: 0.007864139974117279 2023-01-22 22:27:32.479599: step: 464/529, loss: 0.0011239133309572935 2023-01-22 22:27:33.522508: step: 468/529, loss: 0.002571893623098731 2023-01-22 22:27:34.575520: step: 472/529, loss: 0.0008138378616422415 2023-01-22 22:27:35.631951: step: 476/529, loss: 0.0007387528312392533 2023-01-22 22:27:36.684634: step: 480/529, loss: 5.3386305808089674e-05 2023-01-22 22:27:37.726184: step: 484/529, loss: 0.0008478633244521916 2023-01-22 22:27:38.779778: step: 488/529, loss: 0.00877456646412611 2023-01-22 22:27:39.840531: step: 492/529, loss: 0.000730357482098043 2023-01-22 22:27:40.891141: step: 496/529, loss: 0.006803086493164301 2023-01-22 22:27:41.936758: step: 500/529, loss: 0.009922508150339127 2023-01-22 22:27:42.988625: step: 504/529, loss: 0.0013279798440635204 2023-01-22 22:27:44.034804: step: 508/529, loss: 0.004157001152634621 2023-01-22 22:27:45.082007: step: 512/529, loss: 0.0011677667498588562 2023-01-22 22:27:46.132927: step: 516/529, loss: 0.006929452531039715 2023-01-22 22:27:47.184863: step: 520/529, loss: 0.00869422871619463 2023-01-22 22:27:48.248298: step: 524/529, loss: 0.007221246603876352 2023-01-22 22:27:49.302525: step: 528/529, loss: 0.0027418616227805614 2023-01-22 22:27:50.356973: step: 532/529, loss: 0.00040890014497563243 2023-01-22 22:27:51.414606: step: 536/529, loss: 0.046654339879751205 2023-01-22 22:27:52.490494: step: 540/529, loss: 0.010836038738489151 2023-01-22 22:27:53.557312: step: 544/529, loss: 0.0014794718008488417 2023-01-22 22:27:54.616470: step: 548/529, loss: 0.00042480259435251355 2023-01-22 22:27:55.661972: step: 552/529, loss: 0.009522294625639915 2023-01-22 22:27:56.727059: step: 556/529, loss: 0.062240198254585266 2023-01-22 22:27:57.770075: step: 560/529, loss: 0.011479411274194717 2023-01-22 22:27:58.837387: step: 564/529, loss: 0.01005205325782299 2023-01-22 22:27:59.877528: step: 568/529, loss: 0.008391836658120155 2023-01-22 22:28:00.919997: step: 572/529, loss: 2.6581063139019534e-05 2023-01-22 22:28:01.971177: step: 576/529, loss: 0.00020754177239723504 2023-01-22 22:28:03.035523: step: 580/529, loss: 0.0021191900596022606 2023-01-22 22:28:04.094244: step: 584/529, loss: 0.0059251622296869755 2023-01-22 22:28:05.163424: step: 588/529, loss: 0.015873238444328308 2023-01-22 22:28:06.213729: step: 592/529, loss: 0.0022850006353110075 2023-01-22 22:28:07.256874: step: 596/529, loss: 0.009629203006625175 2023-01-22 22:28:08.312162: step: 600/529, loss: 0.0008775306050665677 2023-01-22 22:28:09.360922: step: 604/529, loss: 0.0015412142965942621 2023-01-22 22:28:10.413595: step: 608/529, loss: 0.002682688180357218 2023-01-22 22:28:11.456386: step: 612/529, loss: 0.00675795366987586 2023-01-22 22:28:12.509637: step: 616/529, loss: 0.0038627793546766043 2023-01-22 22:28:13.552173: step: 620/529, loss: 0.007737928070127964 2023-01-22 22:28:14.607353: step: 624/529, loss: 0.005158205982297659 2023-01-22 22:28:15.658648: step: 628/529, loss: 0.002522035501897335 2023-01-22 22:28:16.721569: step: 632/529, loss: 0.0022010558750480413 2023-01-22 22:28:17.764261: step: 636/529, loss: 0.007542647421360016 2023-01-22 22:28:18.814592: step: 640/529, loss: 0.006155920680612326 2023-01-22 22:28:19.870169: step: 644/529, loss: 0.0018787117442116141 2023-01-22 22:28:20.922522: step: 648/529, loss: 0.003504598280414939 2023-01-22 22:28:21.974663: step: 652/529, loss: 0.002860158681869507 2023-01-22 22:28:23.033844: step: 656/529, loss: 0.005974387284368277 2023-01-22 22:28:24.084803: step: 660/529, loss: 0.004573486279696226 2023-01-22 22:28:25.145586: step: 664/529, loss: 0.0013934510061517358 2023-01-22 22:28:26.213601: step: 668/529, loss: 0.002816671971231699 2023-01-22 22:28:27.261315: step: 672/529, loss: 0.00842900387942791 2023-01-22 22:28:28.305008: step: 676/529, loss: 3.678074062918313e-05 2023-01-22 22:28:29.359031: step: 680/529, loss: 0.007004658225923777 2023-01-22 22:28:30.405400: step: 684/529, loss: 0.0030656338203698397 2023-01-22 22:28:31.441505: step: 688/529, loss: 0.009362931363284588 2023-01-22 22:28:32.500400: step: 692/529, loss: 0.00020349063561297953 2023-01-22 22:28:33.541167: step: 696/529, loss: 0.0032213758677244186 2023-01-22 22:28:34.593195: step: 700/529, loss: 0.005186780821532011 2023-01-22 22:28:35.646114: step: 704/529, loss: 0.011361275799572468 2023-01-22 22:28:36.699572: step: 708/529, loss: 0.0035757229197770357 2023-01-22 22:28:37.751454: step: 712/529, loss: 0.04425759240984917 2023-01-22 22:28:38.819865: step: 716/529, loss: 0.0067059057764709 2023-01-22 22:28:39.862677: step: 720/529, loss: 0.006438928656280041 2023-01-22 22:28:40.894296: step: 724/529, loss: 0.00032496300991624594 2023-01-22 22:28:41.946953: step: 728/529, loss: 0.0012740223901346326 2023-01-22 22:28:42.992503: step: 732/529, loss: 0.007967410609126091 2023-01-22 22:28:44.049321: step: 736/529, loss: 0.005423595663160086 2023-01-22 22:28:45.107060: step: 740/529, loss: 0.007249068934470415 2023-01-22 22:28:46.140880: step: 744/529, loss: 0.007369640748947859 2023-01-22 22:28:47.199358: step: 748/529, loss: 0.0006447425112128258 2023-01-22 22:28:48.256734: step: 752/529, loss: 0.00011417798668844625 2023-01-22 22:28:49.314414: step: 756/529, loss: 0.00752163166180253 2023-01-22 22:28:50.378082: step: 760/529, loss: 0.0035515357740223408 2023-01-22 22:28:51.425968: step: 764/529, loss: 0.03446972742676735 2023-01-22 22:28:52.494748: step: 768/529, loss: 0.000921080878470093 2023-01-22 22:28:53.545113: step: 772/529, loss: 0.0012987533118575811 2023-01-22 22:28:54.598732: step: 776/529, loss: 0.004751041065901518 2023-01-22 22:28:55.633625: step: 780/529, loss: 0.007162058260291815 2023-01-22 22:28:56.667722: step: 784/529, loss: 0.0030342331156134605 2023-01-22 22:28:57.712475: step: 788/529, loss: 0.001183570479042828 2023-01-22 22:28:58.760540: step: 792/529, loss: 4.184794670436531e-05 2023-01-22 22:28:59.802420: step: 796/529, loss: 0.0 2023-01-22 22:29:00.838362: step: 800/529, loss: 0.007229954935610294 2023-01-22 22:29:01.905710: step: 804/529, loss: 0.0014380726497620344 2023-01-22 22:29:02.958104: step: 808/529, loss: 0.0007997100474312901 2023-01-22 22:29:04.041364: step: 812/529, loss: 0.02172614075243473 2023-01-22 22:29:05.118085: step: 816/529, loss: 0.0026660545263439417 2023-01-22 22:29:06.170698: step: 820/529, loss: 0.0014644160401076078 2023-01-22 22:29:07.214125: step: 824/529, loss: 0.00634665647521615 2023-01-22 22:29:08.246699: step: 828/529, loss: 0.0008031089673750103 2023-01-22 22:29:09.290666: step: 832/529, loss: 0.0011976719833910465 2023-01-22 22:29:10.342633: step: 836/529, loss: 6.576683517778292e-05 2023-01-22 22:29:11.390183: step: 840/529, loss: 0.015353788621723652 2023-01-22 22:29:12.431636: step: 844/529, loss: 0.0001090245132218115 2023-01-22 22:29:13.500925: step: 848/529, loss: 0.001465656328946352 2023-01-22 22:29:14.543418: step: 852/529, loss: 0.00026879823417402804 2023-01-22 22:29:15.589235: step: 856/529, loss: 0.0032660504803061485 2023-01-22 22:29:16.638524: step: 860/529, loss: 2.0073419364052825e-05 2023-01-22 22:29:17.690930: step: 864/529, loss: 0.004537275992333889 2023-01-22 22:29:18.764799: step: 868/529, loss: 0.0021819269750267267 2023-01-22 22:29:19.795521: step: 872/529, loss: 0.0001611022889846936 2023-01-22 22:29:20.829976: step: 876/529, loss: 0.00015540956519544125 2023-01-22 22:29:21.884385: step: 880/529, loss: 0.003966639284044504 2023-01-22 22:29:22.939892: step: 884/529, loss: 0.009977524168789387 2023-01-22 22:29:23.996568: step: 888/529, loss: 0.0008841767557896674 2023-01-22 22:29:25.039205: step: 892/529, loss: 0.004638585262000561 2023-01-22 22:29:26.084964: step: 896/529, loss: 0.002827602904289961 2023-01-22 22:29:27.134428: step: 900/529, loss: 0.007450871169567108 2023-01-22 22:29:28.188307: step: 904/529, loss: 0.007704949472099543 2023-01-22 22:29:29.253062: step: 908/529, loss: 0.0012313313782215118 2023-01-22 22:29:30.306685: step: 912/529, loss: 0.004235626198351383 2023-01-22 22:29:31.361187: step: 916/529, loss: 0.001037833048030734 2023-01-22 22:29:32.409697: step: 920/529, loss: 0.060191232711076736 2023-01-22 22:29:33.460288: step: 924/529, loss: 0.001633047591894865 2023-01-22 22:29:34.535560: step: 928/529, loss: 0.0011934576323255897 2023-01-22 22:29:35.587611: step: 932/529, loss: 0.0010923752561211586 2023-01-22 22:29:36.628034: step: 936/529, loss: 0.007551896385848522 2023-01-22 22:29:37.685700: step: 940/529, loss: 0.001908862846903503 2023-01-22 22:29:38.732687: step: 944/529, loss: 0.008333663456141949 2023-01-22 22:29:39.776788: step: 948/529, loss: 0.013990223407745361 2023-01-22 22:29:40.825818: step: 952/529, loss: 0.003981521353125572 2023-01-22 22:29:41.876775: step: 956/529, loss: 0.01204980444163084 2023-01-22 22:29:42.940397: step: 960/529, loss: 0.0024978260044008493 2023-01-22 22:29:43.987982: step: 964/529, loss: 0.0012541920877993107 2023-01-22 22:29:45.044560: step: 968/529, loss: 0.011120478622615337 2023-01-22 22:29:46.093585: step: 972/529, loss: 0.01642058975994587 2023-01-22 22:29:47.149076: step: 976/529, loss: 0.0008371184812858701 2023-01-22 22:29:48.192938: step: 980/529, loss: 0.0033924595918506384 2023-01-22 22:29:49.254474: step: 984/529, loss: 0.0605584979057312 2023-01-22 22:29:50.306751: step: 988/529, loss: 0.029489995911717415 2023-01-22 22:29:51.354045: step: 992/529, loss: 0.0028151371516287327 2023-01-22 22:29:52.411208: step: 996/529, loss: 0.0014346233801916242 2023-01-22 22:29:53.466272: step: 1000/529, loss: 0.005104742478579283 2023-01-22 22:29:54.527860: step: 1004/529, loss: 0.003330428386107087 2023-01-22 22:29:55.588510: step: 1008/529, loss: 0.00872800499200821 2023-01-22 22:29:56.641906: step: 1012/529, loss: 0.009540783241391182 2023-01-22 22:29:57.684510: step: 1016/529, loss: 0.012608298100531101 2023-01-22 22:29:58.743362: step: 1020/529, loss: 0.0002557553816586733 2023-01-22 22:29:59.781310: step: 1024/529, loss: 0.01341619249433279 2023-01-22 22:30:00.822752: step: 1028/529, loss: 0.0011868203291669488 2023-01-22 22:30:01.854212: step: 1032/529, loss: 0.00023331133706960827 2023-01-22 22:30:02.912590: step: 1036/529, loss: 0.021170977503061295 2023-01-22 22:30:03.965053: step: 1040/529, loss: 0.009980987757444382 2023-01-22 22:30:05.002648: step: 1044/529, loss: 0.00884020421653986 2023-01-22 22:30:06.044629: step: 1048/529, loss: 0.00028001322061754763 2023-01-22 22:30:07.092612: step: 1052/529, loss: 0.0014264644123613834 2023-01-22 22:30:08.144081: step: 1056/529, loss: 0.013133921660482883 2023-01-22 22:30:09.186755: step: 1060/529, loss: 0.00042832340113818645 2023-01-22 22:30:10.238729: step: 1064/529, loss: 0.001327728503383696 2023-01-22 22:30:11.291544: step: 1068/529, loss: 0.01047524530440569 2023-01-22 22:30:12.325616: step: 1072/529, loss: 0.00042269338155165315 2023-01-22 22:30:13.369819: step: 1076/529, loss: 0.0021684295497834682 2023-01-22 22:30:14.410846: step: 1080/529, loss: 0.020686794072389603 2023-01-22 22:30:15.469310: step: 1084/529, loss: 0.021807998418807983 2023-01-22 22:30:16.527510: step: 1088/529, loss: 0.014890799298882484 2023-01-22 22:30:17.571579: step: 1092/529, loss: 0.009047629311680794 2023-01-22 22:30:18.621653: step: 1096/529, loss: 0.009094939567148685 2023-01-22 22:30:19.688914: step: 1100/529, loss: 0.0036889223847538233 2023-01-22 22:30:20.765678: step: 1104/529, loss: 0.002958907512947917 2023-01-22 22:30:21.814546: step: 1108/529, loss: 0.00041549192974343896 2023-01-22 22:30:22.864304: step: 1112/529, loss: 0.000194278807612136 2023-01-22 22:30:23.907877: step: 1116/529, loss: 7.379856106126681e-05 2023-01-22 22:30:24.966929: step: 1120/529, loss: 4.395669748191722e-07 2023-01-22 22:30:26.021350: step: 1124/529, loss: 0.013221163302659988 2023-01-22 22:30:27.082961: step: 1128/529, loss: 0.02606922574341297 2023-01-22 22:30:28.131817: step: 1132/529, loss: 0.0003559020406100899 2023-01-22 22:30:29.175823: step: 1136/529, loss: 0.0002540110726840794 2023-01-22 22:30:30.220576: step: 1140/529, loss: 0.0008303250651806593 2023-01-22 22:30:31.276950: step: 1144/529, loss: 0.04103177785873413 2023-01-22 22:30:32.313652: step: 1148/529, loss: 0.00151734403334558 2023-01-22 22:30:33.382319: step: 1152/529, loss: 0.018841566517949104 2023-01-22 22:30:34.413253: step: 1156/529, loss: 0.0016617031069472432 2023-01-22 22:30:35.471610: step: 1160/529, loss: 0.0002569114731159061 2023-01-22 22:30:36.521118: step: 1164/529, loss: 0.00046839378774166107 2023-01-22 22:30:37.577285: step: 1168/529, loss: 0.0011505342554301023 2023-01-22 22:30:38.620221: step: 1172/529, loss: 0.006648893002420664 2023-01-22 22:30:39.671464: step: 1176/529, loss: 6.648615089943632e-05 2023-01-22 22:30:40.709988: step: 1180/529, loss: 0.008183448575437069 2023-01-22 22:30:41.757865: step: 1184/529, loss: 0.005855028983205557 2023-01-22 22:30:42.812472: step: 1188/529, loss: 0.0055140843614935875 2023-01-22 22:30:43.856110: step: 1192/529, loss: 0.0005517246900126338 2023-01-22 22:30:44.893019: step: 1196/529, loss: 0.0009113152627833188 2023-01-22 22:30:45.929332: step: 1200/529, loss: 2.0724279238493182e-05 2023-01-22 22:30:46.987164: step: 1204/529, loss: 0.04111278057098389 2023-01-22 22:30:48.042037: step: 1208/529, loss: 0.003848353633657098 2023-01-22 22:30:49.082501: step: 1212/529, loss: 0.005723903886973858 2023-01-22 22:30:50.123022: step: 1216/529, loss: 0.0015278341015800834 2023-01-22 22:30:51.180504: step: 1220/529, loss: 0.003078390145674348 2023-01-22 22:30:52.217408: step: 1224/529, loss: 0.00029825459932908416 2023-01-22 22:30:53.278086: step: 1228/529, loss: 0.006305004935711622 2023-01-22 22:30:54.315294: step: 1232/529, loss: 0.003038804279640317 2023-01-22 22:30:55.378710: step: 1236/529, loss: 0.019613297656178474 2023-01-22 22:30:56.419858: step: 1240/529, loss: 0.006205080542713404 2023-01-22 22:30:57.458763: step: 1244/529, loss: 0.002174819353967905 2023-01-22 22:30:58.513688: step: 1248/529, loss: 0.007180603686720133 2023-01-22 22:30:59.577111: step: 1252/529, loss: 0.011505067348480225 2023-01-22 22:31:00.621229: step: 1256/529, loss: 0.018739808350801468 2023-01-22 22:31:01.678382: step: 1260/529, loss: 0.007175636012107134 2023-01-22 22:31:02.714997: step: 1264/529, loss: 0.0007976465858519077 2023-01-22 22:31:03.768824: step: 1268/529, loss: 0.0011825715191662312 2023-01-22 22:31:04.813735: step: 1272/529, loss: 0.02307371236383915 2023-01-22 22:31:05.862788: step: 1276/529, loss: 0.0007031054701656103 2023-01-22 22:31:06.920685: step: 1280/529, loss: 0.004278052132576704 2023-01-22 22:31:07.957405: step: 1284/529, loss: 0.00021557587024290115 2023-01-22 22:31:09.015536: step: 1288/529, loss: 0.0046916864812374115 2023-01-22 22:31:10.076314: step: 1292/529, loss: 0.009935064241290092 2023-01-22 22:31:11.136627: step: 1296/529, loss: 3.562248093658127e-05 2023-01-22 22:31:12.195563: step: 1300/529, loss: 0.0010613743215799332 2023-01-22 22:31:13.253081: step: 1304/529, loss: 0.014720913954079151 2023-01-22 22:31:14.309038: step: 1308/529, loss: 0.0009182118810713291 2023-01-22 22:31:15.352318: step: 1312/529, loss: 0.0007193423807621002 2023-01-22 22:31:16.408312: step: 1316/529, loss: 0.0038255939725786448 2023-01-22 22:31:17.453192: step: 1320/529, loss: 0.0019312261138111353 2023-01-22 22:31:18.500490: step: 1324/529, loss: 0.0019026235677301884 2023-01-22 22:31:19.555463: step: 1328/529, loss: 0.03608163446187973 2023-01-22 22:31:20.595561: step: 1332/529, loss: 0.005005764774978161 2023-01-22 22:31:21.629243: step: 1336/529, loss: 0.003935342654585838 2023-01-22 22:31:22.672464: step: 1340/529, loss: 0.0024578962475061417 2023-01-22 22:31:23.729275: step: 1344/529, loss: 0.0019849329255521297 2023-01-22 22:31:24.778752: step: 1348/529, loss: 5.3696952818427235e-05 2023-01-22 22:31:25.824180: step: 1352/529, loss: 0.003461863612756133 2023-01-22 22:31:26.863923: step: 1356/529, loss: 0.00017433488392271101 2023-01-22 22:31:27.918278: step: 1360/529, loss: 0.005787468980997801 2023-01-22 22:31:28.982613: step: 1364/529, loss: 0.003369669895619154 2023-01-22 22:31:30.021767: step: 1368/529, loss: 0.00422493414953351 2023-01-22 22:31:31.081640: step: 1372/529, loss: 0.006990745663642883 2023-01-22 22:31:32.143515: step: 1376/529, loss: 0.008735550567507744 2023-01-22 22:31:33.204207: step: 1380/529, loss: 0.008439648896455765 2023-01-22 22:31:34.256160: step: 1384/529, loss: 0.005575504619628191 2023-01-22 22:31:35.303677: step: 1388/529, loss: 0.007669614627957344 2023-01-22 22:31:36.355653: step: 1392/529, loss: 0.002193581545725465 2023-01-22 22:31:37.413240: step: 1396/529, loss: 0.00018089528020936996 2023-01-22 22:31:38.465324: step: 1400/529, loss: 0.0005955706001259387 2023-01-22 22:31:39.501912: step: 1404/529, loss: 0.0010929397540166974 2023-01-22 22:31:40.552058: step: 1408/529, loss: 0.01696513593196869 2023-01-22 22:31:41.612571: step: 1412/529, loss: 0.014978902414441109 2023-01-22 22:31:42.673607: step: 1416/529, loss: 0.0025212853215634823 2023-01-22 22:31:43.720973: step: 1420/529, loss: 0.0011030887253582478 2023-01-22 22:31:44.766391: step: 1424/529, loss: 0.005700564943253994 2023-01-22 22:31:45.809177: step: 1428/529, loss: 0.003724184585735202 2023-01-22 22:31:46.862901: step: 1432/529, loss: 0.003426947630941868 2023-01-22 22:31:47.906777: step: 1436/529, loss: 0.008364698849618435 2023-01-22 22:31:48.959334: step: 1440/529, loss: 0.0014146262547001243 2023-01-22 22:31:50.023375: step: 1444/529, loss: 0.0006436764961108565 2023-01-22 22:31:51.076515: step: 1448/529, loss: 0.004170652944594622 2023-01-22 22:31:52.127145: step: 1452/529, loss: 0.0038733934052288532 2023-01-22 22:31:53.191094: step: 1456/529, loss: 0.009054483845829964 2023-01-22 22:31:54.249481: step: 1460/529, loss: 0.0011655604466795921 2023-01-22 22:31:55.293877: step: 1464/529, loss: 0.0017788042314350605 2023-01-22 22:31:56.329251: step: 1468/529, loss: 0.0019456666195765138 2023-01-22 22:31:57.370365: step: 1472/529, loss: 0.0035811972338706255 2023-01-22 22:31:58.411495: step: 1476/529, loss: 0.001796094817109406 2023-01-22 22:31:59.464134: step: 1480/529, loss: 0.0002110555360559374 2023-01-22 22:32:00.520893: step: 1484/529, loss: 3.618112896219827e-05 2023-01-22 22:32:01.573480: step: 1488/529, loss: 2.2881127733853646e-05 2023-01-22 22:32:02.636399: step: 1492/529, loss: 4.8986486945068464e-05 2023-01-22 22:32:03.708934: step: 1496/529, loss: 0.008576088584959507 2023-01-22 22:32:04.784710: step: 1500/529, loss: 0.0032493879552930593 2023-01-22 22:32:05.830202: step: 1504/529, loss: 0.004876949358731508 2023-01-22 22:32:06.875320: step: 1508/529, loss: 0.0013323853490874171 2023-01-22 22:32:07.934342: step: 1512/529, loss: 0.002135796006768942 2023-01-22 22:32:08.968646: step: 1516/529, loss: 1.8612987332744524e-05 2023-01-22 22:32:10.012576: step: 1520/529, loss: 0.009267893619835377 2023-01-22 22:32:11.054325: step: 1524/529, loss: 0.0032342039048671722 2023-01-22 22:32:12.120275: step: 1528/529, loss: 0.005769701674580574 2023-01-22 22:32:13.163071: step: 1532/529, loss: 0.008464229293167591 2023-01-22 22:32:14.223871: step: 1536/529, loss: 0.07072443515062332 2023-01-22 22:32:15.267862: step: 1540/529, loss: 0.005040166899561882 2023-01-22 22:32:16.308964: step: 1544/529, loss: 0.0010388904483988881 2023-01-22 22:32:17.364684: step: 1548/529, loss: 0.0021455856040120125 2023-01-22 22:32:18.427902: step: 1552/529, loss: 2.0178003978799097e-06 2023-01-22 22:32:19.488845: step: 1556/529, loss: 0.00013423204654827714 2023-01-22 22:32:20.545394: step: 1560/529, loss: 0.0010413069976493716 2023-01-22 22:32:21.601380: step: 1564/529, loss: 0.002832639729604125 2023-01-22 22:32:22.633006: step: 1568/529, loss: 0.0003754736972041428 2023-01-22 22:32:23.681478: step: 1572/529, loss: 0.007404036819934845 2023-01-22 22:32:24.712146: step: 1576/529, loss: 0.0008828699355944991 2023-01-22 22:32:25.752319: step: 1580/529, loss: 0.0003870359214488417 2023-01-22 22:32:26.795957: step: 1584/529, loss: 0.0023760509211570024 2023-01-22 22:32:27.838787: step: 1588/529, loss: 9.630201384425163e-05 2023-01-22 22:32:28.888351: step: 1592/529, loss: 0.005458700470626354 2023-01-22 22:32:29.946436: step: 1596/529, loss: 0.010021292604506016 2023-01-22 22:32:30.992631: step: 1600/529, loss: 0.002486381446942687 2023-01-22 22:32:32.031303: step: 1604/529, loss: 0.0083415936678648 2023-01-22 22:32:33.082849: step: 1608/529, loss: 0.008551139384508133 2023-01-22 22:32:34.142807: step: 1612/529, loss: 0.006925257854163647 2023-01-22 22:32:35.200807: step: 1616/529, loss: 0.005146630574017763 2023-01-22 22:32:36.251053: step: 1620/529, loss: 0.003973962739109993 2023-01-22 22:32:37.297160: step: 1624/529, loss: 0.0007142430986277759 2023-01-22 22:32:38.339977: step: 1628/529, loss: 0.0016972122248262167 2023-01-22 22:32:39.389068: step: 1632/529, loss: 0.004277615807950497 2023-01-22 22:32:40.443325: step: 1636/529, loss: 0.00030127615900710225 2023-01-22 22:32:41.482519: step: 1640/529, loss: 0.0039160898886621 2023-01-22 22:32:42.534916: step: 1644/529, loss: 0.0014413015451282263 2023-01-22 22:32:43.582809: step: 1648/529, loss: 0.00010024020593846217 2023-01-22 22:32:44.609275: step: 1652/529, loss: 0.004258297383785248 2023-01-22 22:32:45.655068: step: 1656/529, loss: 0.005418865941464901 2023-01-22 22:32:46.708672: step: 1660/529, loss: 0.007196826860308647 2023-01-22 22:32:47.773497: step: 1664/529, loss: 0.003028488950803876 2023-01-22 22:32:48.822997: step: 1668/529, loss: 0.00010984179971273988 2023-01-22 22:32:49.871616: step: 1672/529, loss: 0.0008404529071412981 2023-01-22 22:32:50.913746: step: 1676/529, loss: 0.00011003081453964114 2023-01-22 22:32:51.965315: step: 1680/529, loss: 0.0001030673083732836 2023-01-22 22:32:53.010612: step: 1684/529, loss: 0.000246745184995234 2023-01-22 22:32:54.079841: step: 1688/529, loss: 0.00010838175512617454 2023-01-22 22:32:55.111702: step: 1692/529, loss: 0.0009822645224630833 2023-01-22 22:32:56.159437: step: 1696/529, loss: 0.002012751530855894 2023-01-22 22:32:57.201464: step: 1700/529, loss: 6.477873102994636e-05 2023-01-22 22:32:58.258713: step: 1704/529, loss: 0.0005273003480397165 2023-01-22 22:32:59.292696: step: 1708/529, loss: 0.0054536545649170876 2023-01-22 22:33:00.334170: step: 1712/529, loss: 0.003672548569738865 2023-01-22 22:33:01.383737: step: 1716/529, loss: 3.28023852489423e-05 2023-01-22 22:33:02.428508: step: 1720/529, loss: 0.0010711478535085917 2023-01-22 22:33:03.472609: step: 1724/529, loss: 0.01713605411350727 2023-01-22 22:33:04.534517: step: 1728/529, loss: 0.0016306425677612424 2023-01-22 22:33:05.586867: step: 1732/529, loss: 0.005326485726982355 2023-01-22 22:33:06.654075: step: 1736/529, loss: 0.01241031289100647 2023-01-22 22:33:07.706180: step: 1740/529, loss: 0.000909050926566124 2023-01-22 22:33:08.765779: step: 1744/529, loss: 0.003233623458072543 2023-01-22 22:33:09.812654: step: 1748/529, loss: 0.006340848281979561 2023-01-22 22:33:10.856861: step: 1752/529, loss: 0.002494446001946926 2023-01-22 22:33:11.911386: step: 1756/529, loss: 6.975528231123462e-05 2023-01-22 22:33:12.954119: step: 1760/529, loss: 0.0044804723002016544 2023-01-22 22:33:14.017812: step: 1764/529, loss: 0.002811324317008257 2023-01-22 22:33:15.073724: step: 1768/529, loss: 0.014119782485067844 2023-01-22 22:33:16.097322: step: 1772/529, loss: 0.0007044809171929955 2023-01-22 22:33:17.151035: step: 1776/529, loss: 0.00011078844545409083 2023-01-22 22:33:18.181019: step: 1780/529, loss: 0.0009673495660535991 2023-01-22 22:33:19.246896: step: 1784/529, loss: 0.001845009857788682 2023-01-22 22:33:20.287051: step: 1788/529, loss: 0.00029794545844197273 2023-01-22 22:33:21.324116: step: 1792/529, loss: 5.079575566924177e-05 2023-01-22 22:33:22.359803: step: 1796/529, loss: 0.014804074540734291 2023-01-22 22:33:23.409135: step: 1800/529, loss: 0.0012412871001288295 2023-01-22 22:33:24.478807: step: 1804/529, loss: 0.018694493919610977 2023-01-22 22:33:25.538321: step: 1808/529, loss: 0.002348124049603939 2023-01-22 22:33:26.585182: step: 1812/529, loss: 0.00012574980792123824 2023-01-22 22:33:27.639738: step: 1816/529, loss: 0.002805741736665368 2023-01-22 22:33:28.706670: step: 1820/529, loss: 0.0022428843658417463 2023-01-22 22:33:29.750585: step: 1824/529, loss: 0.000614923716057092 2023-01-22 22:33:30.802910: step: 1828/529, loss: 0.001742077525705099 2023-01-22 22:33:31.856133: step: 1832/529, loss: 0.008281680755317211 2023-01-22 22:33:32.892707: step: 1836/529, loss: 0.005693594925105572 2023-01-22 22:33:33.936805: step: 1840/529, loss: 0.0005140280118212104 2023-01-22 22:33:34.984010: step: 1844/529, loss: 0.005766930989921093 2023-01-22 22:33:36.031975: step: 1848/529, loss: 0.000772569328546524 2023-01-22 22:33:37.080147: step: 1852/529, loss: 0.0020715140271931887 2023-01-22 22:33:38.132638: step: 1856/529, loss: 0.0030506481416523457 2023-01-22 22:33:39.185408: step: 1860/529, loss: 0.0015574651770293713 2023-01-22 22:33:40.232476: step: 1864/529, loss: 0.004971726797521114 2023-01-22 22:33:41.262586: step: 1868/529, loss: 0.0052582258358597755 2023-01-22 22:33:42.307829: step: 1872/529, loss: 0.013295542448759079 2023-01-22 22:33:43.364383: step: 1876/529, loss: 0.004197181202471256 2023-01-22 22:33:44.436368: step: 1880/529, loss: 0.0027576754800975323 2023-01-22 22:33:45.495978: step: 1884/529, loss: 0.003946739714592695 2023-01-22 22:33:46.540179: step: 1888/529, loss: 0.00249081919901073 2023-01-22 22:33:47.596195: step: 1892/529, loss: 0.0010628255549818277 2023-01-22 22:33:48.646928: step: 1896/529, loss: 0.000503671180922538 2023-01-22 22:33:49.698212: step: 1900/529, loss: 0.0081910640001297 2023-01-22 22:33:50.730493: step: 1904/529, loss: 0.00023846494150348008 2023-01-22 22:33:51.770456: step: 1908/529, loss: 0.0032240136060863733 2023-01-22 22:33:52.819411: step: 1912/529, loss: 0.012257558293640614 2023-01-22 22:33:53.862480: step: 1916/529, loss: 0.000457115878816694 2023-01-22 22:33:54.912563: step: 1920/529, loss: 0.0074079311452806 2023-01-22 22:33:55.948964: step: 1924/529, loss: 0.003005512058734894 2023-01-22 22:33:56.998019: step: 1928/529, loss: 0.004610523581504822 2023-01-22 22:33:58.046305: step: 1932/529, loss: 0.00012290927406866103 2023-01-22 22:33:59.087024: step: 1936/529, loss: 0.003614837070927024 2023-01-22 22:34:00.123190: step: 1940/529, loss: 0.004162222612649202 2023-01-22 22:34:01.162072: step: 1944/529, loss: 0.0025061958003789186 2023-01-22 22:34:02.216921: step: 1948/529, loss: 0.0016118192579597235 2023-01-22 22:34:03.263898: step: 1952/529, loss: 0.0035871940199285746 2023-01-22 22:34:04.304655: step: 1956/529, loss: 0.004990218207240105 2023-01-22 22:34:05.341811: step: 1960/529, loss: 0.007121474482119083 2023-01-22 22:34:06.377540: step: 1964/529, loss: 0.0031373086385428905 2023-01-22 22:34:07.415044: step: 1968/529, loss: 0.0021349869202822447 2023-01-22 22:34:08.477397: step: 1972/529, loss: 0.007196491584181786 2023-01-22 22:34:09.545574: step: 1976/529, loss: 0.008731399662792683 2023-01-22 22:34:10.583624: step: 1980/529, loss: 0.0022996803745627403 2023-01-22 22:34:11.614541: step: 1984/529, loss: 0.009347790852189064 2023-01-22 22:34:12.661788: step: 1988/529, loss: 0.002855120226740837 2023-01-22 22:34:13.698039: step: 1992/529, loss: 0.0009848474292084575 2023-01-22 22:34:14.762986: step: 1996/529, loss: 0.002582747722044587 2023-01-22 22:34:15.793854: step: 2000/529, loss: 3.786386514548212e-05 2023-01-22 22:34:16.849094: step: 2004/529, loss: 0.0004998651565983891 2023-01-22 22:34:17.894953: step: 2008/529, loss: 0.0010684020817279816 2023-01-22 22:34:18.933485: step: 2012/529, loss: 0.00010652600758476183 2023-01-22 22:34:19.977108: step: 2016/529, loss: 0.018018808215856552 2023-01-22 22:34:21.028215: step: 2020/529, loss: 0.0013210283359512687 2023-01-22 22:34:22.073719: step: 2024/529, loss: 0.0001975179329747334 2023-01-22 22:34:23.110817: step: 2028/529, loss: 0.004234502092003822 2023-01-22 22:34:24.158566: step: 2032/529, loss: 0.0004407464584801346 2023-01-22 22:34:25.203757: step: 2036/529, loss: 0.001212246366776526 2023-01-22 22:34:26.238731: step: 2040/529, loss: 0.000988429645076394 2023-01-22 22:34:27.306597: step: 2044/529, loss: 0.015469436533749104 2023-01-22 22:34:28.368759: step: 2048/529, loss: 0.002529177349060774 2023-01-22 22:34:29.419043: step: 2052/529, loss: 0.0005427937721833587 2023-01-22 22:34:30.464513: step: 2056/529, loss: 1.3922626749263145e-05 2023-01-22 22:34:31.502385: step: 2060/529, loss: 0.002548729069530964 2023-01-22 22:34:32.554803: step: 2064/529, loss: 0.001122251502238214 2023-01-22 22:34:33.607839: step: 2068/529, loss: 0.001024989876896143 2023-01-22 22:34:34.663753: step: 2072/529, loss: 0.00022314635862130672 2023-01-22 22:34:35.700763: step: 2076/529, loss: 8.953847282100469e-05 2023-01-22 22:34:36.727152: step: 2080/529, loss: 0.000566621427424252 2023-01-22 22:34:37.772342: step: 2084/529, loss: 0.0018103665206581354 2023-01-22 22:34:38.817951: step: 2088/529, loss: 0.0006287604919634759 2023-01-22 22:34:39.860437: step: 2092/529, loss: 0.0002173274988308549 2023-01-22 22:34:40.913667: step: 2096/529, loss: 0.0313841849565506 2023-01-22 22:34:41.971344: step: 2100/529, loss: 0.0014324260409921408 2023-01-22 22:34:43.018902: step: 2104/529, loss: 0.0011754590086638927 2023-01-22 22:34:44.075699: step: 2108/529, loss: 0.000206009965040721 2023-01-22 22:34:45.128990: step: 2112/529, loss: 0.006074794102460146 2023-01-22 22:34:46.163215: step: 2116/529, loss: 0.004383652005344629 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3379446284562212, 'r': 0.3180655326646788, 'f1': 0.327703882139366}, 'combined': 0.2414660184184802, 'stategy': 1, 'epoch': 15} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3774233430121959, 'r': 0.31880086914119166, 'f1': 0.34564408328836865}, 'combined': 0.24316669176066139, 'stategy': 1, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30905857556863087, 'r': 0.3395539188884958, 'f1': 0.3235893585067582}, 'combined': 0.23843426416287444, 'stategy': 1, 'epoch': 15} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38058954718555105, 'r': 0.3318130315479957, 'f1': 0.35453148317238103}, 'combined': 0.2517173530523905, 'stategy': 1, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3266341892596455, 'r': 0.3396499728923828, 'f1': 0.33301495016611293}, 'combined': 0.24537943696450426, 'stategy': 1, 'epoch': 15} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37275349634745114, 'r': 0.2972920555511085, 'f1': 0.33077349093356345}, 'combined': 0.23484917856283005, 'stategy': 1, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.4891304347826087, 'f1': 0.4245283018867925}, 'combined': 0.21226415094339626, 'stategy': 1, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393128253325622, 'r': 0.3180655326646788, 'f1': 0.3283458094305303}, 'combined': 0.24193901747512758, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3816102042039642, 'r': 0.3196734694902318, 'f1': 0.34790673887540896}, 'combined': 0.24475850976159427, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36036036036036034, 'r': 0.38095238095238093, 'f1': 0.37037037037037035}, 'combined': 0.24691358024691357, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170622448080369, 'r': 0.3471440517158203, 'f1': 0.33142194792434293}, 'combined': 0.24420564583898952, 'stategy': 1, 'epoch': 6} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38320634084470634, 'r': 0.3344285502925467, 'f1': 0.3571597273608483}, 'combined': 0.25358340642620225, 'stategy': 1, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3879310344827586, 'r': 0.4891304347826087, 'f1': 0.4326923076923077}, 'combined': 0.21634615384615385, 'stategy': 1, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3315037080405933, 'r': 0.34534257251287614, 'f1': 0.3382816648964419}, 'combined': 0.24926017413422033, 'stategy': 1, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787875776500773, 'r': 0.3007824569472691, 'f1': 0.3353080697096988}, 'combined': 0.23806872949388613, 'stategy': 1, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.3017241379310345, 'f1': 0.39772727272727276}, 'combined': 0.26515151515151514, 'stategy': 1, 'epoch': 6} ****************************** Epoch: 16 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 22:37:17.659312: step: 4/529, loss: 0.0008352497825399041 2023-01-22 22:37:18.684995: step: 8/529, loss: 0.004078231751918793 2023-01-22 22:37:19.725998: step: 12/529, loss: 0.0013906897511333227 2023-01-22 22:37:20.765228: step: 16/529, loss: 0.00027968670474365354 2023-01-22 22:37:21.798936: step: 20/529, loss: 6.573928840225562e-05 2023-01-22 22:37:22.854422: step: 24/529, loss: 0.000341216626111418 2023-01-22 22:37:23.893011: step: 28/529, loss: 0.0004673922376241535 2023-01-22 22:37:24.937917: step: 32/529, loss: 0.00481852050870657 2023-01-22 22:37:25.982170: step: 36/529, loss: 0.0002737178292591125 2023-01-22 22:37:27.029027: step: 40/529, loss: 0.0001533913309685886 2023-01-22 22:37:28.058334: step: 44/529, loss: 0.0006647821865044534 2023-01-22 22:37:29.112932: step: 48/529, loss: 9.900423174258322e-05 2023-01-22 22:37:30.166772: step: 52/529, loss: 0.05244966223835945 2023-01-22 22:37:31.208734: step: 56/529, loss: 0.0017704941565170884 2023-01-22 22:37:32.235804: step: 60/529, loss: 0.02957840822637081 2023-01-22 22:37:33.269296: step: 64/529, loss: 0.00543832266703248 2023-01-22 22:37:34.309706: step: 68/529, loss: 0.005368039943277836 2023-01-22 22:37:35.343005: step: 72/529, loss: 0.008126365952193737 2023-01-22 22:37:36.373277: step: 76/529, loss: 0.0016023352509364486 2023-01-22 22:37:37.419787: step: 80/529, loss: 0.00793336983770132 2023-01-22 22:37:38.482273: step: 84/529, loss: 0.0010627913288772106 2023-01-22 22:37:39.538221: step: 88/529, loss: 0.0004878414620179683 2023-01-22 22:37:40.584862: step: 92/529, loss: 0.000876517326105386 2023-01-22 22:37:41.622573: step: 96/529, loss: 1.0525673133088276e-06 2023-01-22 22:37:42.648491: step: 100/529, loss: 0.00041814500582404435 2023-01-22 22:37:43.712026: step: 104/529, loss: 0.0010216075461357832 2023-01-22 22:37:44.760568: step: 108/529, loss: 0.024219796061515808 2023-01-22 22:37:45.820073: step: 112/529, loss: 0.002600511536002159 2023-01-22 22:37:46.875533: step: 116/529, loss: 0.00044134625932201743 2023-01-22 22:37:47.939025: step: 120/529, loss: 0.0047204033471643925 2023-01-22 22:37:48.994350: step: 124/529, loss: 0.014981193467974663 2023-01-22 22:37:50.039405: step: 128/529, loss: 8.036367944441736e-05 2023-01-22 22:37:51.090473: step: 132/529, loss: 0.002071731723845005 2023-01-22 22:37:52.133696: step: 136/529, loss: 0.0003172782016918063 2023-01-22 22:37:53.180014: step: 140/529, loss: 0.003152410266920924 2023-01-22 22:37:54.228180: step: 144/529, loss: 0.048385705798864365 2023-01-22 22:37:55.283923: step: 148/529, loss: 0.005180898122489452 2023-01-22 22:37:56.318227: step: 152/529, loss: 0.00030260326457209885 2023-01-22 22:37:57.368182: step: 156/529, loss: 0.00290667824447155 2023-01-22 22:37:58.417035: step: 160/529, loss: 0.004487393423914909 2023-01-22 22:37:59.459889: step: 164/529, loss: 0.0004990944289602339 2023-01-22 22:38:00.501003: step: 168/529, loss: 0.0006227202829904854 2023-01-22 22:38:01.539062: step: 172/529, loss: 9.849414345808327e-05 2023-01-22 22:38:02.593828: step: 176/529, loss: 0.004696741234511137 2023-01-22 22:38:03.641912: step: 180/529, loss: 0.0026739477179944515 2023-01-22 22:38:04.693238: step: 184/529, loss: 0.009527433663606644 2023-01-22 22:38:05.751828: step: 188/529, loss: 0.002856643171980977 2023-01-22 22:38:06.801456: step: 192/529, loss: 0.00040713392081670463 2023-01-22 22:38:07.855282: step: 196/529, loss: 0.0025559449568390846 2023-01-22 22:38:08.899095: step: 200/529, loss: 0.0034359805285930634 2023-01-22 22:38:09.940970: step: 204/529, loss: 0.0123750614002347 2023-01-22 22:38:10.992707: step: 208/529, loss: 0.01791662536561489 2023-01-22 22:38:12.031462: step: 212/529, loss: 4.506243203650229e-05 2023-01-22 22:38:13.062832: step: 216/529, loss: 0.0007484178058803082 2023-01-22 22:38:14.107330: step: 220/529, loss: 0.0033656363375484943 2023-01-22 22:38:15.158980: step: 224/529, loss: 0.00407741405069828 2023-01-22 22:38:16.207385: step: 228/529, loss: 0.0006752608460374177 2023-01-22 22:38:17.260870: step: 232/529, loss: 0.0016098516061902046 2023-01-22 22:38:18.320545: step: 236/529, loss: 0.016018150374293327 2023-01-22 22:38:19.377448: step: 240/529, loss: 6.399289850378409e-05 2023-01-22 22:38:20.442358: step: 244/529, loss: 0.00240446999669075 2023-01-22 22:38:21.502525: step: 248/529, loss: 0.0006661063525825739 2023-01-22 22:38:22.548960: step: 252/529, loss: 9.479092841502279e-05 2023-01-22 22:38:23.593820: step: 256/529, loss: 0.0013702742289751768 2023-01-22 22:38:24.640283: step: 260/529, loss: 5.1778410124825314e-05 2023-01-22 22:38:25.680839: step: 264/529, loss: 0.0004261040885467082 2023-01-22 22:38:26.718413: step: 268/529, loss: 0.0008805907564237714 2023-01-22 22:38:27.763144: step: 272/529, loss: 0.00448616174980998 2023-01-22 22:38:28.815644: step: 276/529, loss: 0.00040727463783696294 2023-01-22 22:38:29.856172: step: 280/529, loss: 0.005305705592036247 2023-01-22 22:38:30.903152: step: 284/529, loss: 0.016064930707216263 2023-01-22 22:38:31.958154: step: 288/529, loss: 0.008089309558272362 2023-01-22 22:38:33.012977: step: 292/529, loss: 0.0012017115950584412 2023-01-22 22:38:34.068188: step: 296/529, loss: 0.00027334154583513737 2023-01-22 22:38:35.110085: step: 300/529, loss: 0.00013047060929238796 2023-01-22 22:38:36.157295: step: 304/529, loss: 0.00014351372374221683 2023-01-22 22:38:37.221454: step: 308/529, loss: 0.0010556101333349943 2023-01-22 22:38:38.273843: step: 312/529, loss: 0.0018022399162873626 2023-01-22 22:38:39.335947: step: 316/529, loss: 8.848635479807854e-05 2023-01-22 22:38:40.371218: step: 320/529, loss: 0.0022214234340935946 2023-01-22 22:38:41.426116: step: 324/529, loss: 0.0008178128045983613 2023-01-22 22:38:42.478571: step: 328/529, loss: 0.0081281503662467 2023-01-22 22:38:43.545023: step: 332/529, loss: 0.004785628989338875 2023-01-22 22:38:44.587388: step: 336/529, loss: 0.0004804731288459152 2023-01-22 22:38:45.635058: step: 340/529, loss: 2.2609745428781025e-05 2023-01-22 22:38:46.688626: step: 344/529, loss: 0.0034750434570014477 2023-01-22 22:38:47.730608: step: 348/529, loss: 0.0018449969356879592 2023-01-22 22:38:48.783849: step: 352/529, loss: 0.002631227020174265 2023-01-22 22:38:49.820588: step: 356/529, loss: 0.00014063646085560322 2023-01-22 22:38:50.866202: step: 360/529, loss: 0.0004171330656390637 2023-01-22 22:38:51.896793: step: 364/529, loss: 0.002189196180552244 2023-01-22 22:38:52.949111: step: 368/529, loss: 0.004228558856993914 2023-01-22 22:38:54.005197: step: 372/529, loss: 0.019063761457800865 2023-01-22 22:38:55.060116: step: 376/529, loss: 0.0009880102006718516 2023-01-22 22:38:56.132135: step: 380/529, loss: 0.010479042306542397 2023-01-22 22:38:57.180016: step: 384/529, loss: 0.0027219983749091625 2023-01-22 22:38:58.222042: step: 388/529, loss: 0.0005921715637668967 2023-01-22 22:38:59.270969: step: 392/529, loss: 0.004992567002773285 2023-01-22 22:39:00.319320: step: 396/529, loss: 0.008218024857342243 2023-01-22 22:39:01.368210: step: 400/529, loss: 5.4673455451847985e-05 2023-01-22 22:39:02.421639: step: 404/529, loss: 0.0011151168728247285 2023-01-22 22:39:03.475369: step: 408/529, loss: 0.006082640960812569 2023-01-22 22:39:04.548583: step: 412/529, loss: 0.0007668251055292785 2023-01-22 22:39:05.599807: step: 416/529, loss: 0.0020557192619889975 2023-01-22 22:39:06.659799: step: 420/529, loss: 0.004256993066519499 2023-01-22 22:39:07.724340: step: 424/529, loss: 0.005285164806991816 2023-01-22 22:39:08.786992: step: 428/529, loss: 0.0007947867270559072 2023-01-22 22:39:09.845802: step: 432/529, loss: 0.0017105155857279897 2023-01-22 22:39:10.889347: step: 436/529, loss: 0.002169121755287051 2023-01-22 22:39:11.955318: step: 440/529, loss: 0.003915147855877876 2023-01-22 22:39:13.001659: step: 444/529, loss: 5.4450207244371995e-05 2023-01-22 22:39:14.048911: step: 448/529, loss: 0.0067147123627364635 2023-01-22 22:39:15.116597: step: 452/529, loss: 0.0001681046123849228 2023-01-22 22:39:16.166346: step: 456/529, loss: 0.002292748773470521 2023-01-22 22:39:17.216456: step: 460/529, loss: 0.005784038919955492 2023-01-22 22:39:18.264739: step: 464/529, loss: 0.012059425003826618 2023-01-22 22:39:19.309373: step: 468/529, loss: 0.004067446570843458 2023-01-22 22:39:20.363657: step: 472/529, loss: 0.002616090001538396 2023-01-22 22:39:21.424292: step: 476/529, loss: 0.0029812497086822987 2023-01-22 22:39:22.480962: step: 480/529, loss: 0.0012229267740622163 2023-01-22 22:39:23.535033: step: 484/529, loss: 0.000983092817477882 2023-01-22 22:39:24.585981: step: 488/529, loss: 0.0017438458744436502 2023-01-22 22:39:25.625183: step: 492/529, loss: 0.0007921059732325375 2023-01-22 22:39:26.667860: step: 496/529, loss: 0.007515037432312965 2023-01-22 22:39:27.712617: step: 500/529, loss: 0.002744954079389572 2023-01-22 22:39:28.775872: step: 504/529, loss: 0.004854784347116947 2023-01-22 22:39:29.836396: step: 508/529, loss: 0.0002995973627548665 2023-01-22 22:39:30.885329: step: 512/529, loss: 0.0030475344974547625 2023-01-22 22:39:31.924037: step: 516/529, loss: 3.5621828828880098e-06 2023-01-22 22:39:32.971009: step: 520/529, loss: 0.000995233771391213 2023-01-22 22:39:34.019050: step: 524/529, loss: 0.007095505017787218 2023-01-22 22:39:35.075504: step: 528/529, loss: 0.0015335733769461513 2023-01-22 22:39:36.124740: step: 532/529, loss: 0.0002862678375095129 2023-01-22 22:39:37.164682: step: 536/529, loss: 0.0022908649407327175 2023-01-22 22:39:38.211695: step: 540/529, loss: 0.0026858216151595116 2023-01-22 22:39:39.262557: step: 544/529, loss: 0.0034945036750286818 2023-01-22 22:39:40.307195: step: 548/529, loss: 0.00019250012701377273 2023-01-22 22:39:41.377019: step: 552/529, loss: 0.008986166678369045 2023-01-22 22:39:42.427805: step: 556/529, loss: 0.004181261174380779 2023-01-22 22:39:43.472727: step: 560/529, loss: 0.005867184139788151 2023-01-22 22:39:44.519424: step: 564/529, loss: 0.0009576360462233424 2023-01-22 22:39:45.561564: step: 568/529, loss: 0.00015088755753822625 2023-01-22 22:39:46.628440: step: 572/529, loss: 0.008697553537786007 2023-01-22 22:39:47.673647: step: 576/529, loss: 0.0022136373445391655 2023-01-22 22:39:48.734720: step: 580/529, loss: 0.012643320485949516 2023-01-22 22:39:49.783371: step: 584/529, loss: 0.008251936174929142 2023-01-22 22:39:50.836772: step: 588/529, loss: 0.00020720501197502017 2023-01-22 22:39:51.901416: step: 592/529, loss: 0.00682100560516119 2023-01-22 22:39:52.946981: step: 596/529, loss: 9.279289952246472e-05 2023-01-22 22:39:54.008068: step: 600/529, loss: 0.018781989812850952 2023-01-22 22:39:55.068320: step: 604/529, loss: 0.0048301187343895435 2023-01-22 22:39:56.120292: step: 608/529, loss: 0.007250762544572353 2023-01-22 22:39:57.175533: step: 612/529, loss: 0.0010262010619044304 2023-01-22 22:39:58.219696: step: 616/529, loss: 0.0006206289981491864 2023-01-22 22:39:59.266534: step: 620/529, loss: 0.00022213127522263676 2023-01-22 22:40:00.314519: step: 624/529, loss: 0.0050963144749403 2023-01-22 22:40:01.350783: step: 628/529, loss: 0.0033172564581036568 2023-01-22 22:40:02.396253: step: 632/529, loss: 0.0030756439082324505 2023-01-22 22:40:03.434769: step: 636/529, loss: 0.005096208769828081 2023-01-22 22:40:04.487538: step: 640/529, loss: 0.002249770564958453 2023-01-22 22:40:05.546530: step: 644/529, loss: 0.008157442323863506 2023-01-22 22:40:06.599306: step: 648/529, loss: 0.0012797346571460366 2023-01-22 22:40:07.648541: step: 652/529, loss: 0.0005326832761056721 2023-01-22 22:40:08.702320: step: 656/529, loss: 0.00044118391815572977 2023-01-22 22:40:09.739344: step: 660/529, loss: 0.0005492839845828712 2023-01-22 22:40:10.784648: step: 664/529, loss: 0.0018050550715997815 2023-01-22 22:40:11.838102: step: 668/529, loss: 0.004495986737310886 2023-01-22 22:40:12.881752: step: 672/529, loss: 0.004443437326699495 2023-01-22 22:40:13.926105: step: 676/529, loss: 0.00021187537640798837 2023-01-22 22:40:14.972433: step: 680/529, loss: 4.35755355283618e-05 2023-01-22 22:40:16.010259: step: 684/529, loss: 3.831455251201987e-05 2023-01-22 22:40:17.061064: step: 688/529, loss: 4.212204657960683e-05 2023-01-22 22:40:18.099200: step: 692/529, loss: 0.007300470490008593 2023-01-22 22:40:19.169791: step: 696/529, loss: 0.0007477918989025056 2023-01-22 22:40:20.224963: step: 700/529, loss: 0.0037029797676950693 2023-01-22 22:40:21.268945: step: 704/529, loss: 0.005753783974796534 2023-01-22 22:40:22.338879: step: 708/529, loss: 0.0029113520868122578 2023-01-22 22:40:23.378058: step: 712/529, loss: 0.002503064926713705 2023-01-22 22:40:24.440909: step: 716/529, loss: 0.004338518250733614 2023-01-22 22:40:25.500640: step: 720/529, loss: 0.010502398014068604 2023-01-22 22:40:26.529173: step: 724/529, loss: 1.895812965813093e-05 2023-01-22 22:40:27.577494: step: 728/529, loss: 0.009539663791656494 2023-01-22 22:40:28.623442: step: 732/529, loss: 0.000875416211783886 2023-01-22 22:40:29.668247: step: 736/529, loss: 0.00022103587980382144 2023-01-22 22:40:30.724857: step: 740/529, loss: 0.0003958956222049892 2023-01-22 22:40:31.775609: step: 744/529, loss: 0.0006257392815314233 2023-01-22 22:40:32.839616: step: 748/529, loss: 0.007873651571571827 2023-01-22 22:40:33.890505: step: 752/529, loss: 0.0070301988162100315 2023-01-22 22:40:34.952958: step: 756/529, loss: 7.34411587473005e-05 2023-01-22 22:40:35.985923: step: 760/529, loss: 5.6964319810504094e-05 2023-01-22 22:40:37.030413: step: 764/529, loss: 0.0012240585638210177 2023-01-22 22:40:38.068020: step: 768/529, loss: 0.0002077206881949678 2023-01-22 22:40:39.114261: step: 772/529, loss: 0.0017296469304710627 2023-01-22 22:40:40.166120: step: 776/529, loss: 0.002311060903593898 2023-01-22 22:40:41.215712: step: 780/529, loss: 0.0025948307011276484 2023-01-22 22:40:42.277022: step: 784/529, loss: 0.004161428660154343 2023-01-22 22:40:43.320543: step: 788/529, loss: 0.005635375622659922 2023-01-22 22:40:44.357463: step: 792/529, loss: 0.00011246439680689946 2023-01-22 22:40:45.399724: step: 796/529, loss: 0.01886409893631935 2023-01-22 22:40:46.441114: step: 800/529, loss: 0.009209426119923592 2023-01-22 22:40:47.521726: step: 804/529, loss: 0.004663406405597925 2023-01-22 22:40:48.567125: step: 808/529, loss: 0.0011369960848242044 2023-01-22 22:40:49.613884: step: 812/529, loss: 0.0016129232244566083 2023-01-22 22:40:50.680002: step: 816/529, loss: 1.2793821952072904e-05 2023-01-22 22:40:51.735775: step: 820/529, loss: 0.0019323163433000445 2023-01-22 22:40:52.791784: step: 824/529, loss: 0.001820737379603088 2023-01-22 22:40:53.849783: step: 828/529, loss: 0.0008540013805031776 2023-01-22 22:40:54.897025: step: 832/529, loss: 0.0005600686999969184 2023-01-22 22:40:55.965651: step: 836/529, loss: 0.0010771576780825853 2023-01-22 22:40:57.015835: step: 840/529, loss: 0.00017267640214413404 2023-01-22 22:40:58.074715: step: 844/529, loss: 0.0029180734418332577 2023-01-22 22:40:59.120279: step: 848/529, loss: 0.0013812831602990627 2023-01-22 22:41:00.174451: step: 852/529, loss: 4.473709395824699e-06 2023-01-22 22:41:01.220432: step: 856/529, loss: 0.008165249601006508 2023-01-22 22:41:02.267681: step: 860/529, loss: 0.01582416333258152 2023-01-22 22:41:03.319910: step: 864/529, loss: 0.007594390772283077 2023-01-22 22:41:04.362411: step: 868/529, loss: 0.012652688659727573 2023-01-22 22:41:05.397803: step: 872/529, loss: 0.003081762697547674 2023-01-22 22:41:06.443481: step: 876/529, loss: 0.0009592437418177724 2023-01-22 22:41:07.495844: step: 880/529, loss: 0.0037696349900215864 2023-01-22 22:41:08.559212: step: 884/529, loss: 0.001623764750547707 2023-01-22 22:41:09.597135: step: 888/529, loss: 0.0001962757669389248 2023-01-22 22:41:10.629484: step: 892/529, loss: 0.0008244204218499362 2023-01-22 22:41:11.675894: step: 896/529, loss: 0.009198804385960102 2023-01-22 22:41:12.721501: step: 900/529, loss: 0.0005498820100910962 2023-01-22 22:41:13.770165: step: 904/529, loss: 0.006923763547092676 2023-01-22 22:41:14.823069: step: 908/529, loss: 2.6771653210744262e-05 2023-01-22 22:41:15.887050: step: 912/529, loss: 0.004113171715289354 2023-01-22 22:41:16.923400: step: 916/529, loss: 0.0010723625309765339 2023-01-22 22:41:17.985122: step: 920/529, loss: 0.006901412270963192 2023-01-22 22:41:19.047275: step: 924/529, loss: 0.008161801844835281 2023-01-22 22:41:20.095124: step: 928/529, loss: 2.9370366974035278e-05 2023-01-22 22:41:21.139111: step: 932/529, loss: 0.0035291474778205156 2023-01-22 22:41:22.185061: step: 936/529, loss: 0.002092214999720454 2023-01-22 22:41:23.240203: step: 940/529, loss: 0.0022200271487236023 2023-01-22 22:41:24.291376: step: 944/529, loss: 0.0048616607673466206 2023-01-22 22:41:25.341454: step: 948/529, loss: 0.007801448460668325 2023-01-22 22:41:26.373599: step: 952/529, loss: 0.0025164796970784664 2023-01-22 22:41:27.409042: step: 956/529, loss: 0.0007975324988365173 2023-01-22 22:41:28.453633: step: 960/529, loss: 0.0019139735959470272 2023-01-22 22:41:29.507290: step: 964/529, loss: 0.0012918042484670877 2023-01-22 22:41:30.558597: step: 968/529, loss: 0.002581177279353142 2023-01-22 22:41:31.608566: step: 972/529, loss: 4.3875129307480165e-08 2023-01-22 22:41:32.658742: step: 976/529, loss: 0.0031610054429620504 2023-01-22 22:41:33.687688: step: 980/529, loss: 0.0027935670223087072 2023-01-22 22:41:34.736870: step: 984/529, loss: 0.0070877536199986935 2023-01-22 22:41:35.787324: step: 988/529, loss: 0.008069803938269615 2023-01-22 22:41:36.850022: step: 992/529, loss: 0.014591886661946774 2023-01-22 22:41:37.897260: step: 996/529, loss: 0.0009630898712202907 2023-01-22 22:41:38.942986: step: 1000/529, loss: 0.0002405676495982334 2023-01-22 22:41:39.992862: step: 1004/529, loss: 0.009807177819311619 2023-01-22 22:41:41.041682: step: 1008/529, loss: 0.0001440555352019146 2023-01-22 22:41:42.097644: step: 1012/529, loss: 0.0021732975728809834 2023-01-22 22:41:43.161873: step: 1016/529, loss: 0.004840366542339325 2023-01-22 22:41:44.223125: step: 1020/529, loss: 0.0037037923466414213 2023-01-22 22:41:45.267096: step: 1024/529, loss: 0.0032236503902822733 2023-01-22 22:41:46.332126: step: 1028/529, loss: 0.0033569352235645056 2023-01-22 22:41:47.379255: step: 1032/529, loss: 0.0007555480115115643 2023-01-22 22:41:48.436198: step: 1036/529, loss: 0.0208508912473917 2023-01-22 22:41:49.481201: step: 1040/529, loss: 0.003467023139819503 2023-01-22 22:41:50.538568: step: 1044/529, loss: 0.011952086351811886