Command that produces this log: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> type_embedding.weight: torch.Size([123, 100]) >>> trans_rep.weight: torch.Size([1024, 1124]) >>> trans_rep.bias: torch.Size([1024]) >>> coref_type_ffn.weight: torch.Size([3, 4096]) >>> coref_type_ffn.bias: torch.Size([3]) n_trainable_params: 561067023, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 15:24:14.513846: step: 4/526, loss: 0.011542052030563354 2023-01-22 15:24:15.583198: step: 8/526, loss: 0.02023157849907875 2023-01-22 15:24:16.631113: step: 12/526, loss: 0.06262841820716858 2023-01-22 15:24:17.683260: step: 16/526, loss: 0.05780504643917084 2023-01-22 15:24:18.736109: step: 20/526, loss: 0.020506612956523895 2023-01-22 15:24:19.795369: step: 24/526, loss: 0.01437404379248619 2023-01-22 15:24:20.868107: step: 28/526, loss: 0.020581595599651337 2023-01-22 15:24:21.921821: step: 32/526, loss: 0.027495669201016426 2023-01-22 15:24:22.974344: step: 36/526, loss: 0.020445631816983223 2023-01-22 15:24:24.038331: step: 40/526, loss: 0.017695395275950432 2023-01-22 15:24:25.093167: step: 44/526, loss: 0.031221332028508186 2023-01-22 15:24:26.164539: step: 48/526, loss: 0.02319531887769699 2023-01-22 15:24:27.228800: step: 52/526, loss: 0.01580868847668171 2023-01-22 15:24:28.289458: step: 56/526, loss: 0.012067344039678574 2023-01-22 15:24:29.349079: step: 60/526, loss: 0.01946335658431053 2023-01-22 15:24:30.414234: step: 64/526, loss: 0.025493476539850235 2023-01-22 15:24:31.470900: step: 68/526, loss: 0.01326130423694849 2023-01-22 15:24:32.535903: step: 72/526, loss: 0.028966855257749557 2023-01-22 15:24:33.602072: step: 76/526, loss: 0.015298008918762207 2023-01-22 15:24:34.661153: step: 80/526, loss: 0.008636180311441422 2023-01-22 15:24:35.728109: step: 84/526, loss: 0.061954278498888016 2023-01-22 15:24:36.774116: step: 88/526, loss: 0.03162294626235962 2023-01-22 15:24:37.815851: step: 92/526, loss: 0.006181944161653519 2023-01-22 15:24:38.887325: step: 96/526, loss: 0.014761177822947502 2023-01-22 15:24:39.946372: step: 100/526, loss: 0.009932223707437515 2023-01-22 15:24:41.004768: step: 104/526, loss: 0.014439082704484463 2023-01-22 15:24:42.069304: step: 108/526, loss: 0.012355302460491657 2023-01-22 15:24:43.150848: step: 112/526, loss: 0.014503220096230507 2023-01-22 15:24:44.218409: step: 116/526, loss: 0.011955962516367435 2023-01-22 15:24:45.267154: step: 120/526, loss: 0.009412375278770924 2023-01-22 15:24:46.347699: step: 124/526, loss: 0.010395512916147709 2023-01-22 15:24:47.419582: step: 128/526, loss: 0.019374214112758636 2023-01-22 15:24:48.470624: step: 132/526, loss: 0.014935505576431751 2023-01-22 15:24:49.523750: step: 136/526, loss: 0.009510563686490059 2023-01-22 15:24:50.576536: step: 140/526, loss: 0.01242282334715128 2023-01-22 15:24:51.628329: step: 144/526, loss: 0.02149650827050209 2023-01-22 15:24:52.703046: step: 148/526, loss: 0.014446397311985493 2023-01-22 15:24:53.769966: step: 152/526, loss: 0.020998205989599228 2023-01-22 15:24:54.842208: step: 156/526, loss: 0.017171582207083702 2023-01-22 15:24:55.911913: step: 160/526, loss: 0.05841045081615448 2023-01-22 15:24:56.972578: step: 164/526, loss: 0.04393700510263443 2023-01-22 15:24:58.048951: step: 168/526, loss: 0.0171918086707592 2023-01-22 15:24:59.110742: step: 172/526, loss: 0.012977737002074718 2023-01-22 15:25:00.169313: step: 176/526, loss: 0.019465535879135132 2023-01-22 15:25:01.216951: step: 180/526, loss: 0.04735732451081276 2023-01-22 15:25:02.277274: step: 184/526, loss: 0.011476998217403889 2023-01-22 15:25:03.333175: step: 188/526, loss: 0.02026982232928276 2023-01-22 15:25:04.391590: step: 192/526, loss: 0.02624250203371048 2023-01-22 15:25:05.448532: step: 196/526, loss: 0.03472770005464554 2023-01-22 15:25:06.517780: step: 200/526, loss: 0.015562203712761402 2023-01-22 15:25:07.571820: step: 204/526, loss: 0.024400711059570312 2023-01-22 15:25:08.628983: step: 208/526, loss: 0.018951723352074623 2023-01-22 15:25:09.680993: step: 212/526, loss: 0.01060152892023325 2023-01-22 15:25:10.739624: step: 216/526, loss: 0.006733638234436512 2023-01-22 15:25:11.793091: step: 220/526, loss: 0.015451989136636257 2023-01-22 15:25:12.860232: step: 224/526, loss: 0.023213978856801987 2023-01-22 15:25:13.917112: step: 228/526, loss: 0.010583912953734398 2023-01-22 15:25:14.977891: step: 232/526, loss: 0.031098660081624985 2023-01-22 15:25:16.050593: step: 236/526, loss: 0.01834714412689209 2023-01-22 15:25:17.130945: step: 240/526, loss: 0.07209409773349762 2023-01-22 15:25:18.199179: step: 244/526, loss: 0.011912657879292965 2023-01-22 15:25:19.273828: step: 248/526, loss: 0.06079801544547081 2023-01-22 15:25:20.366326: step: 252/526, loss: 0.02100806124508381 2023-01-22 15:25:21.449887: step: 256/526, loss: 0.048354655504226685 2023-01-22 15:25:22.518357: step: 260/526, loss: 0.003983271773904562 2023-01-22 15:25:23.585061: step: 264/526, loss: 0.028111929073929787 2023-01-22 15:25:24.647762: step: 268/526, loss: 0.012731669470667839 2023-01-22 15:25:25.750639: step: 272/526, loss: 0.013230368494987488 2023-01-22 15:25:26.812040: step: 276/526, loss: 0.04263673722743988 2023-01-22 15:25:27.876257: step: 280/526, loss: 0.014705047011375427 2023-01-22 15:25:28.943380: step: 284/526, loss: 0.005776818376034498 2023-01-22 15:25:30.012348: step: 288/526, loss: 0.041990526020526886 2023-01-22 15:25:31.081463: step: 292/526, loss: 0.009674874134361744 2023-01-22 15:25:32.141210: step: 296/526, loss: 0.06541553139686584 2023-01-22 15:25:33.201062: step: 300/526, loss: 0.011652745306491852 2023-01-22 15:25:34.268408: step: 304/526, loss: 0.021124111488461494 2023-01-22 15:25:35.329113: step: 308/526, loss: 0.032830338925123215 2023-01-22 15:25:36.425508: step: 312/526, loss: 0.055567238479852676 2023-01-22 15:25:37.494612: step: 316/526, loss: 0.008490262553095818 2023-01-22 15:25:38.559039: step: 320/526, loss: 0.03531695902347565 2023-01-22 15:25:39.623886: step: 324/526, loss: 0.039421889930963516 2023-01-22 15:25:40.696168: step: 328/526, loss: 0.038977764546871185 2023-01-22 15:25:41.772209: step: 332/526, loss: 0.07475990056991577 2023-01-22 15:25:42.853761: step: 336/526, loss: 0.012722814455628395 2023-01-22 15:25:43.901096: step: 340/526, loss: 0.006297828629612923 2023-01-22 15:25:44.965630: step: 344/526, loss: 0.014703561551868916 2023-01-22 15:25:46.041328: step: 348/526, loss: 0.04576403275132179 2023-01-22 15:25:47.108296: step: 352/526, loss: 0.030041227117180824 2023-01-22 15:25:48.174788: step: 356/526, loss: 0.013157275505363941 2023-01-22 15:25:49.236070: step: 360/526, loss: 0.01257038302719593 2023-01-22 15:25:50.319361: step: 364/526, loss: 0.011035998351871967 2023-01-22 15:25:51.407047: step: 368/526, loss: 0.06662046909332275 2023-01-22 15:25:52.474475: step: 372/526, loss: 0.00764904310926795 2023-01-22 15:25:53.547670: step: 376/526, loss: 0.012579564936459064 2023-01-22 15:25:54.638932: step: 380/526, loss: 0.0183241069316864 2023-01-22 15:25:55.691101: step: 384/526, loss: 0.011891600675880909 2023-01-22 15:25:56.784480: step: 388/526, loss: 0.009133614599704742 2023-01-22 15:25:57.867919: step: 392/526, loss: 0.00827095378190279 2023-01-22 15:25:58.939568: step: 396/526, loss: 0.07623982429504395 2023-01-22 15:26:00.012750: step: 400/526, loss: 0.07408555597066879 2023-01-22 15:26:01.085205: step: 404/526, loss: 0.01287064142525196 2023-01-22 15:26:02.139496: step: 408/526, loss: 0.04000311717391014 2023-01-22 15:26:03.201139: step: 412/526, loss: 0.014216827228665352 2023-01-22 15:26:04.262350: step: 416/526, loss: 0.028156902641057968 2023-01-22 15:26:05.326582: step: 420/526, loss: 0.012215754948556423 2023-01-22 15:26:06.402064: step: 424/526, loss: 0.010989603586494923 2023-01-22 15:26:07.470437: step: 428/526, loss: 0.039528124034404755 2023-01-22 15:26:08.527110: step: 432/526, loss: 0.010094551369547844 2023-01-22 15:26:09.608627: step: 436/526, loss: 0.01602562516927719 2023-01-22 15:26:10.669334: step: 440/526, loss: 0.02160622365772724 2023-01-22 15:26:11.725715: step: 444/526, loss: 0.012479305267333984 2023-01-22 15:26:12.789076: step: 448/526, loss: 0.009602426551282406 2023-01-22 15:26:13.861441: step: 452/526, loss: 0.014257905073463917 2023-01-22 15:26:14.913998: step: 456/526, loss: 0.016463544219732285 2023-01-22 15:26:15.977143: step: 460/526, loss: 0.040781211107969284 2023-01-22 15:26:17.048506: step: 464/526, loss: 0.05287405103445053 2023-01-22 15:26:18.121340: step: 468/526, loss: 0.04636642709374428 2023-01-22 15:26:19.204952: step: 472/526, loss: 0.012500831857323647 2023-01-22 15:26:20.281397: step: 476/526, loss: 0.022106660529971123 2023-01-22 15:26:21.342236: step: 480/526, loss: 0.011811993084847927 2023-01-22 15:26:22.389268: step: 484/526, loss: 0.012277712114155293 2023-01-22 15:26:23.446360: step: 488/526, loss: 0.017902759835124016 2023-01-22 15:26:24.516524: step: 492/526, loss: 0.007290765643119812 2023-01-22 15:26:25.582365: step: 496/526, loss: 0.022858478128910065 2023-01-22 15:26:26.660604: step: 500/526, loss: 0.08065126836299896 2023-01-22 15:26:27.730527: step: 504/526, loss: 0.03802528232336044 2023-01-22 15:26:28.779540: step: 508/526, loss: 0.005456075072288513 2023-01-22 15:26:29.850856: step: 512/526, loss: 0.005667749792337418 2023-01-22 15:26:30.930180: step: 516/526, loss: 0.017475707456469536 2023-01-22 15:26:32.029458: step: 520/526, loss: 0.01729508861899376 2023-01-22 15:26:33.105782: step: 524/526, loss: 0.011713293381035328 2023-01-22 15:26:34.159399: step: 528/526, loss: 0.00553947314620018 2023-01-22 15:26:35.217644: step: 532/526, loss: 0.0069784484803676605 2023-01-22 15:26:36.283064: step: 536/526, loss: 0.03892875835299492 2023-01-22 15:26:37.361936: step: 540/526, loss: 0.04607051610946655 2023-01-22 15:26:38.420955: step: 544/526, loss: 0.01426483504474163 2023-01-22 15:26:39.480410: step: 548/526, loss: 0.017066525295376778 2023-01-22 15:26:40.546820: step: 552/526, loss: 0.007925783284008503 2023-01-22 15:26:41.621376: step: 556/526, loss: 0.02163388766348362 2023-01-22 15:26:42.676465: step: 560/526, loss: 0.04421288147568703 2023-01-22 15:26:43.729905: step: 564/526, loss: 0.012516318820416927 2023-01-22 15:26:44.797906: step: 568/526, loss: 0.049236468970775604 2023-01-22 15:26:45.875600: step: 572/526, loss: 0.023150749504566193 2023-01-22 15:26:46.957918: step: 576/526, loss: 0.009522246196866035 2023-01-22 15:26:48.042034: step: 580/526, loss: 0.016963254660367966 2023-01-22 15:26:49.130073: step: 584/526, loss: 0.01820969767868519 2023-01-22 15:26:50.213073: step: 588/526, loss: 0.07111315429210663 2023-01-22 15:26:51.271054: step: 592/526, loss: 0.007916552014648914 2023-01-22 15:26:52.325944: step: 596/526, loss: 0.007605835795402527 2023-01-22 15:26:53.382120: step: 600/526, loss: 0.012078801169991493 2023-01-22 15:26:54.454054: step: 604/526, loss: 0.019069718196988106 2023-01-22 15:26:55.514313: step: 608/526, loss: 0.010963761247694492 2023-01-22 15:26:56.583242: step: 612/526, loss: 0.013945738784968853 2023-01-22 15:26:57.642502: step: 616/526, loss: 0.011986438184976578 2023-01-22 15:26:58.707083: step: 620/526, loss: 0.013496545143425465 2023-01-22 15:26:59.785408: step: 624/526, loss: 0.0414331816136837 2023-01-22 15:27:00.846451: step: 628/526, loss: 0.02690134197473526 2023-01-22 15:27:01.911194: step: 632/526, loss: 0.015718039125204086 2023-01-22 15:27:02.986023: step: 636/526, loss: 0.010147477500140667 2023-01-22 15:27:04.061950: step: 640/526, loss: 0.03716598451137543 2023-01-22 15:27:05.138211: step: 644/526, loss: 0.015070073306560516 2023-01-22 15:27:06.198338: step: 648/526, loss: 0.04377167299389839 2023-01-22 15:27:07.280332: step: 652/526, loss: 0.02360081672668457 2023-01-22 15:27:08.354242: step: 656/526, loss: 0.0687611922621727 2023-01-22 15:27:09.419315: step: 660/526, loss: 0.01847033202648163 2023-01-22 15:27:10.476483: step: 664/526, loss: 0.006497836206108332 2023-01-22 15:27:11.535676: step: 668/526, loss: 0.025193439796566963 2023-01-22 15:27:12.602170: step: 672/526, loss: 0.042160212993621826 2023-01-22 15:27:13.681157: step: 676/526, loss: 0.018554389476776123 2023-01-22 15:27:14.753056: step: 680/526, loss: 0.01312668714672327 2023-01-22 15:27:15.827808: step: 684/526, loss: 0.010146260261535645 2023-01-22 15:27:16.895138: step: 688/526, loss: 0.034670326858758926 2023-01-22 15:27:17.976539: step: 692/526, loss: 0.03556656092405319 2023-01-22 15:27:19.051571: step: 696/526, loss: 0.01568073406815529 2023-01-22 15:27:20.124831: step: 700/526, loss: 0.0177853275090456 2023-01-22 15:27:21.201190: step: 704/526, loss: 0.00870177149772644 2023-01-22 15:27:22.259120: step: 708/526, loss: 0.014387092553079128 2023-01-22 15:27:23.338901: step: 712/526, loss: 0.005565970204770565 2023-01-22 15:27:24.407811: step: 716/526, loss: 0.0475313663482666 2023-01-22 15:27:25.479999: step: 720/526, loss: 0.012597961351275444 2023-01-22 15:27:26.551846: step: 724/526, loss: 0.005450345575809479 2023-01-22 15:27:27.618594: step: 728/526, loss: 0.007392000872641802 2023-01-22 15:27:28.682434: step: 732/526, loss: 0.019318392500281334 2023-01-22 15:27:29.754246: step: 736/526, loss: 0.013643491081893444 2023-01-22 15:27:30.825692: step: 740/526, loss: 0.010704146698117256 2023-01-22 15:27:31.901571: step: 744/526, loss: 0.015467265620827675 2023-01-22 15:27:32.966793: step: 748/526, loss: 0.0532383993268013 2023-01-22 15:27:34.045753: step: 752/526, loss: 0.03352460265159607 2023-01-22 15:27:35.119100: step: 756/526, loss: 0.010013245046138763 2023-01-22 15:27:36.192405: step: 760/526, loss: 0.029759852215647697 2023-01-22 15:27:37.255238: step: 764/526, loss: 0.0007408804376609623 2023-01-22 15:27:38.322035: step: 768/526, loss: 0.010924393311142921 2023-01-22 15:27:39.407040: step: 772/526, loss: 0.04788918420672417 2023-01-22 15:27:40.474572: step: 776/526, loss: 0.011204993352293968 2023-01-22 15:27:41.552144: step: 780/526, loss: 0.011356550268828869 2023-01-22 15:27:42.632837: step: 784/526, loss: 0.02712724357843399 2023-01-22 15:27:43.710181: step: 788/526, loss: 0.010598118416965008 2023-01-22 15:27:44.771850: step: 792/526, loss: 0.014653448946774006 2023-01-22 15:27:45.834179: step: 796/526, loss: 0.011020460166037083 2023-01-22 15:27:46.924317: step: 800/526, loss: 0.011566013097763062 2023-01-22 15:27:48.012573: step: 804/526, loss: 0.013965641148388386 2023-01-22 15:27:49.075392: step: 808/526, loss: 0.011844201944768429 2023-01-22 15:27:50.137091: step: 812/526, loss: 0.030684705823659897 2023-01-22 15:27:51.226096: step: 816/526, loss: 0.010975209064781666 2023-01-22 15:27:52.301848: step: 820/526, loss: 0.004901180975139141 2023-01-22 15:27:53.379304: step: 824/526, loss: 0.00804068986326456 2023-01-22 15:27:54.430374: step: 828/526, loss: 0.0274370014667511 2023-01-22 15:27:55.496400: step: 832/526, loss: 0.028265872970223427 2023-01-22 15:27:56.569802: step: 836/526, loss: 0.007112360559403896 2023-01-22 15:27:57.640154: step: 840/526, loss: 0.030012423172593117 2023-01-22 15:27:58.709436: step: 844/526, loss: 0.062013957649469376 2023-01-22 15:27:59.769723: step: 848/526, loss: 0.0032820170745253563 2023-01-22 15:28:00.833109: step: 852/526, loss: 0.014663055539131165 2023-01-22 15:28:01.895063: step: 856/526, loss: 0.013649990782141685 2023-01-22 15:28:02.997633: step: 860/526, loss: 0.01548759825527668 2023-01-22 15:28:04.062045: step: 864/526, loss: 0.015864748507738113 2023-01-22 15:28:05.125440: step: 868/526, loss: 0.016371339559555054 2023-01-22 15:28:06.189564: step: 872/526, loss: 0.014859255403280258 2023-01-22 15:28:07.269491: step: 876/526, loss: 0.011524135246872902 2023-01-22 15:28:08.327234: step: 880/526, loss: 0.032405126839876175 2023-01-22 15:28:09.412350: step: 884/526, loss: 0.0398188941180706 2023-01-22 15:28:10.486234: step: 888/526, loss: 0.00956823118031025 2023-01-22 15:28:11.544007: step: 892/526, loss: 0.012285396456718445 2023-01-22 15:28:12.610219: step: 896/526, loss: 0.017864594236016273 2023-01-22 15:28:13.681259: step: 900/526, loss: 0.026145169511437416 2023-01-22 15:28:14.749257: step: 904/526, loss: 0.008185205049812794 2023-01-22 15:28:15.823882: step: 908/526, loss: 0.03489832207560539 2023-01-22 15:28:16.888426: step: 912/526, loss: 0.012369459494948387 2023-01-22 15:28:17.955794: step: 916/526, loss: 0.012742091901600361 2023-01-22 15:28:19.024311: step: 920/526, loss: 0.0014365314273163676 2023-01-22 15:28:20.115027: step: 924/526, loss: 0.011076916940510273 2023-01-22 15:28:21.160481: step: 928/526, loss: 0.02785377763211727 2023-01-22 15:28:22.228497: step: 932/526, loss: 0.01068216934800148 2023-01-22 15:28:23.300140: step: 936/526, loss: 0.011536781676113605 2023-01-22 15:28:24.383476: step: 940/526, loss: 0.012976781465113163 2023-01-22 15:28:25.449831: step: 944/526, loss: 0.008641269989311695 2023-01-22 15:28:26.528864: step: 948/526, loss: 0.008370699360966682 2023-01-22 15:28:27.600062: step: 952/526, loss: 0.009716982953250408 2023-01-22 15:28:28.693104: step: 956/526, loss: 0.015492056496441364 2023-01-22 15:28:29.748889: step: 960/526, loss: 0.004958414006978273 2023-01-22 15:28:30.810479: step: 964/526, loss: 0.03704093396663666 2023-01-22 15:28:31.886827: step: 968/526, loss: 0.008817754685878754 2023-01-22 15:28:32.976639: step: 972/526, loss: 0.00804503820836544 2023-01-22 15:28:34.053636: step: 976/526, loss: 0.007515077944844961 2023-01-22 15:28:35.107048: step: 980/526, loss: 0.02187001146376133 2023-01-22 15:28:36.168372: step: 984/526, loss: 0.013517889194190502 2023-01-22 15:28:37.238358: step: 988/526, loss: 0.0042789471335709095 2023-01-22 15:28:38.296719: step: 992/526, loss: 0.013299938291311264 2023-01-22 15:28:39.356277: step: 996/526, loss: 0.011831467039883137 2023-01-22 15:28:40.425428: step: 1000/526, loss: 0.02314213663339615 2023-01-22 15:28:41.479692: step: 1004/526, loss: 0.011190442368388176 2023-01-22 15:28:42.537653: step: 1008/526, loss: 0.00815039873123169 2023-01-22 15:28:43.610987: step: 1012/526, loss: 0.008704792708158493 2023-01-22 15:28:44.677145: step: 1016/526, loss: 0.017575861886143684 2023-01-22 15:28:45.739928: step: 1020/526, loss: 0.010505554266273975 2023-01-22 15:28:46.808596: step: 1024/526, loss: 0.07492277026176453 2023-01-22 15:28:47.889552: step: 1028/526, loss: 0.02986185811460018 2023-01-22 15:28:48.960474: step: 1032/526, loss: 0.0039607989601790905 2023-01-22 15:28:50.013384: step: 1036/526, loss: 0.008863895200192928 2023-01-22 15:28:51.097740: step: 1040/526, loss: 0.009433879517018795 2023-01-22 15:28:52.160669: step: 1044/526, loss: 0.01492956280708313 2023-01-22 15:28:53.242396: step: 1048/526, loss: 0.0010975669138133526 2023-01-22 15:28:54.310143: step: 1052/526, loss: 0.006231072824448347 2023-01-22 15:28:55.375030: step: 1056/526, loss: 0.011758730746805668 2023-01-22 15:28:56.441450: step: 1060/526, loss: 0.00797701720148325 2023-01-22 15:28:57.541480: step: 1064/526, loss: 0.010580445639789104 2023-01-22 15:28:58.605766: step: 1068/526, loss: 0.014520341530442238 2023-01-22 15:28:59.686688: step: 1072/526, loss: 0.009447069838643074 2023-01-22 15:29:00.758469: step: 1076/526, loss: 0.04533913731575012 2023-01-22 15:29:01.832511: step: 1080/526, loss: 0.05336616933345795 2023-01-22 15:29:02.890857: step: 1084/526, loss: 0.017616352066397667 2023-01-22 15:29:03.949181: step: 1088/526, loss: 0.009178748354315758 2023-01-22 15:29:05.025924: step: 1092/526, loss: 0.03466084599494934 2023-01-22 15:29:06.122810: step: 1096/526, loss: 0.011660982854664326 2023-01-22 15:29:07.183695: step: 1100/526, loss: 0.009456636384129524 2023-01-22 15:29:08.249074: step: 1104/526, loss: 0.030066153034567833 2023-01-22 15:29:09.316188: step: 1108/526, loss: 0.004824388772249222 2023-01-22 15:29:10.373609: step: 1112/526, loss: 0.036501746624708176 2023-01-22 15:29:11.448224: step: 1116/526, loss: 0.01415973249822855 2023-01-22 15:29:12.517581: step: 1120/526, loss: 0.00685915956273675 2023-01-22 15:29:13.593118: step: 1124/526, loss: 0.008669904433190823 2023-01-22 15:29:14.677113: step: 1128/526, loss: 0.006808173377066851 2023-01-22 15:29:15.749583: step: 1132/526, loss: 0.009730562567710876 2023-01-22 15:29:16.823216: step: 1136/526, loss: 0.015168559737503529 2023-01-22 15:29:17.877763: step: 1140/526, loss: 0.006401766091585159 2023-01-22 15:29:18.947801: step: 1144/526, loss: 0.010507703758776188 2023-01-22 15:29:20.003959: step: 1148/526, loss: 0.016983985900878906 2023-01-22 15:29:21.081915: step: 1152/526, loss: 0.04353087767958641 2023-01-22 15:29:22.183436: step: 1156/526, loss: 0.012048405595123768 2023-01-22 15:29:23.241109: step: 1160/526, loss: 0.005415227264165878 2023-01-22 15:29:24.302139: step: 1164/526, loss: 0.010012845508754253 2023-01-22 15:29:25.355386: step: 1168/526, loss: 0.014785283245146275 2023-01-22 15:29:26.407241: step: 1172/526, loss: 0.01904195547103882 2023-01-22 15:29:27.471689: step: 1176/526, loss: 0.006536061409860849 2023-01-22 15:29:28.529915: step: 1180/526, loss: 0.05519216135144234 2023-01-22 15:29:29.602841: step: 1184/526, loss: 0.011332720518112183 2023-01-22 15:29:30.673467: step: 1188/526, loss: 0.00711508933454752 2023-01-22 15:29:31.750425: step: 1192/526, loss: 0.0708845853805542 2023-01-22 15:29:32.847662: step: 1196/526, loss: 0.004140944220125675 2023-01-22 15:29:33.911019: step: 1200/526, loss: 0.03137581795454025 2023-01-22 15:29:34.980396: step: 1204/526, loss: 0.04280191287398338 2023-01-22 15:29:36.034697: step: 1208/526, loss: 0.019610702991485596 2023-01-22 15:29:37.100776: step: 1212/526, loss: 0.019446009770035744 2023-01-22 15:29:38.149953: step: 1216/526, loss: 0.01401528250426054 2023-01-22 15:29:39.214212: step: 1220/526, loss: 0.005409129895269871 2023-01-22 15:29:40.293211: step: 1224/526, loss: 0.007175501901656389 2023-01-22 15:29:41.366892: step: 1228/526, loss: 0.008640944957733154 2023-01-22 15:29:42.425124: step: 1232/526, loss: 0.005673054605722427 2023-01-22 15:29:43.495916: step: 1236/526, loss: 0.02942793257534504 2023-01-22 15:29:44.554534: step: 1240/526, loss: 0.005041190888732672 2023-01-22 15:29:45.623296: step: 1244/526, loss: 0.041995421051979065 2023-01-22 15:29:46.689676: step: 1248/526, loss: 0.01346071157604456 2023-01-22 15:29:47.768591: step: 1252/526, loss: 0.01392744854092598 2023-01-22 15:29:48.852013: step: 1256/526, loss: 0.009032701142132282 2023-01-22 15:29:49.923244: step: 1260/526, loss: 0.026062192395329475 2023-01-22 15:29:50.975040: step: 1264/526, loss: 0.04490725323557854 2023-01-22 15:29:52.040569: step: 1268/526, loss: 0.00986095517873764 2023-01-22 15:29:53.123354: step: 1272/526, loss: 0.0012970336247235537 2023-01-22 15:29:54.200283: step: 1276/526, loss: 0.02037988230586052 2023-01-22 15:29:55.261241: step: 1280/526, loss: 0.02683100476861 2023-01-22 15:29:56.328528: step: 1284/526, loss: 0.007773365825414658 2023-01-22 15:29:57.389670: step: 1288/526, loss: 0.05968731641769409 2023-01-22 15:29:58.460526: step: 1292/526, loss: 0.03613681346178055 2023-01-22 15:29:59.524822: step: 1296/526, loss: 0.014471679925918579 2023-01-22 15:30:00.590418: step: 1300/526, loss: 0.05630014091730118 2023-01-22 15:30:01.671568: step: 1304/526, loss: 0.01242032554000616 2023-01-22 15:30:02.739170: step: 1308/526, loss: 0.014923516660928726 2023-01-22 15:30:03.809597: step: 1312/526, loss: 0.005437185056507587 2023-01-22 15:30:04.874759: step: 1316/526, loss: 0.026831425726413727 2023-01-22 15:30:05.944162: step: 1320/526, loss: 0.006485107820481062 2023-01-22 15:30:07.001788: step: 1324/526, loss: 0.0076478831470012665 2023-01-22 15:30:08.054624: step: 1328/526, loss: 0.012937773950397968 2023-01-22 15:30:09.120700: step: 1332/526, loss: 0.03799450397491455 2023-01-22 15:30:10.196703: step: 1336/526, loss: 0.04945323243737221 2023-01-22 15:30:11.273938: step: 1340/526, loss: 0.008653589524328709 2023-01-22 15:30:12.362165: step: 1344/526, loss: 0.011765801347792149 2023-01-22 15:30:13.424939: step: 1348/526, loss: 0.003939558286219835 2023-01-22 15:30:14.481905: step: 1352/526, loss: 0.010396410711109638 2023-01-22 15:30:15.553605: step: 1356/526, loss: 0.007071498781442642 2023-01-22 15:30:16.643549: step: 1360/526, loss: 0.02911684848368168 2023-01-22 15:30:17.703509: step: 1364/526, loss: 0.009999081492424011 2023-01-22 15:30:18.765913: step: 1368/526, loss: 0.01261632889509201 2023-01-22 15:30:19.825344: step: 1372/526, loss: 0.018445320427417755 2023-01-22 15:30:20.896749: step: 1376/526, loss: 0.029369182884693146 2023-01-22 15:30:21.965599: step: 1380/526, loss: 0.03203567489981651 2023-01-22 15:30:23.026448: step: 1384/526, loss: 0.01643977500498295 2023-01-22 15:30:24.091700: step: 1388/526, loss: 0.006959362421184778 2023-01-22 15:30:25.157701: step: 1392/526, loss: 0.009082360193133354 2023-01-22 15:30:26.216884: step: 1396/526, loss: 0.015994016081094742 2023-01-22 15:30:27.287229: step: 1400/526, loss: 0.007509440649300814 2023-01-22 15:30:28.379771: step: 1404/526, loss: 0.009382094256579876 2023-01-22 15:30:29.442536: step: 1408/526, loss: 0.06619424372911453 2023-01-22 15:30:30.503721: step: 1412/526, loss: 0.01072310097515583 2023-01-22 15:30:31.583682: step: 1416/526, loss: 0.011856546625494957 2023-01-22 15:30:32.645966: step: 1420/526, loss: 0.008005455136299133 2023-01-22 15:30:33.708174: step: 1424/526, loss: 0.029859593138098717 2023-01-22 15:30:34.781885: step: 1428/526, loss: 0.008419408462941647 2023-01-22 15:30:35.854518: step: 1432/526, loss: 0.00817930232733488 2023-01-22 15:30:36.919971: step: 1436/526, loss: 0.003652143059298396 2023-01-22 15:30:37.987079: step: 1440/526, loss: 0.02835831791162491 2023-01-22 15:30:39.050452: step: 1444/526, loss: 0.009940870106220245 2023-01-22 15:30:40.133319: step: 1448/526, loss: 0.009122544899582863 2023-01-22 15:30:41.190419: step: 1452/526, loss: 0.008882798254489899 2023-01-22 15:30:42.261942: step: 1456/526, loss: 0.002926712157204747 2023-01-22 15:30:43.330124: step: 1460/526, loss: 0.009402105584740639 2023-01-22 15:30:44.393550: step: 1464/526, loss: 0.008531627245247364 2023-01-22 15:30:45.456432: step: 1468/526, loss: 0.017533885315060616 2023-01-22 15:30:46.514047: step: 1472/526, loss: 0.01003988366574049 2023-01-22 15:30:47.581960: step: 1476/526, loss: 0.003773764008656144 2023-01-22 15:30:48.637566: step: 1480/526, loss: 0.03491215780377388 2023-01-22 15:30:49.718218: step: 1484/526, loss: 0.0 2023-01-22 15:30:50.786492: step: 1488/526, loss: 0.007994186133146286 2023-01-22 15:30:51.851948: step: 1492/526, loss: 0.005993510130792856 2023-01-22 15:30:52.905500: step: 1496/526, loss: 0.007079313043504953 2023-01-22 15:30:53.963930: step: 1500/526, loss: 0.0017002951353788376 2023-01-22 15:30:55.029074: step: 1504/526, loss: 0.015731265768408775 2023-01-22 15:30:56.100448: step: 1508/526, loss: 0.037618815898895264 2023-01-22 15:30:57.175971: step: 1512/526, loss: 0.012623411603271961 2023-01-22 15:30:58.249354: step: 1516/526, loss: 0.009160598739981651 2023-01-22 15:30:59.305694: step: 1520/526, loss: 0.006584780290722847 2023-01-22 15:31:00.356849: step: 1524/526, loss: 0.012651040218770504 2023-01-22 15:31:01.412107: step: 1528/526, loss: 0.008514382876455784 2023-01-22 15:31:02.482035: step: 1532/526, loss: 0.011343814432621002 2023-01-22 15:31:03.560102: step: 1536/526, loss: 0.0 2023-01-22 15:31:04.615295: step: 1540/526, loss: 0.026241444051265717 2023-01-22 15:31:05.676196: step: 1544/526, loss: 0.01048093568533659 2023-01-22 15:31:06.741168: step: 1548/526, loss: 0.019991302862763405 2023-01-22 15:31:07.828712: step: 1552/526, loss: 0.006727890577167273 2023-01-22 15:31:08.894090: step: 1556/526, loss: 0.008984006941318512 2023-01-22 15:31:09.975258: step: 1560/526, loss: 0.006317827384918928 2023-01-22 15:31:11.030734: step: 1564/526, loss: 0.028456710278987885 2023-01-22 15:31:12.096958: step: 1568/526, loss: 0.03435984626412392 2023-01-22 15:31:13.172995: step: 1572/526, loss: 0.008374986238777637 2023-01-22 15:31:14.237895: step: 1576/526, loss: 0.010556735098361969 2023-01-22 15:31:15.295928: step: 1580/526, loss: 0.007845778949558735 2023-01-22 15:31:16.344224: step: 1584/526, loss: 0.01241462491452694 2023-01-22 15:31:17.394810: step: 1588/526, loss: 0.008453438989818096 2023-01-22 15:31:18.449681: step: 1592/526, loss: 0.0023859762586653233 2023-01-22 15:31:19.501295: step: 1596/526, loss: 0.002180887386202812 2023-01-22 15:31:20.570361: step: 1600/526, loss: 0.055828213691711426 2023-01-22 15:31:21.639937: step: 1604/526, loss: 0.014245115220546722 2023-01-22 15:31:22.703274: step: 1608/526, loss: 0.0005981465801596642 2023-01-22 15:31:23.766374: step: 1612/526, loss: 0.01824469491839409 2023-01-22 15:31:24.852262: step: 1616/526, loss: 0.03601479157805443 2023-01-22 15:31:25.940853: step: 1620/526, loss: 0.010745086707174778 2023-01-22 15:31:27.017899: step: 1624/526, loss: 0.012959487736225128 2023-01-22 15:31:28.087719: step: 1628/526, loss: 0.031919367611408234 2023-01-22 15:31:29.161124: step: 1632/526, loss: 0.03662387281656265 2023-01-22 15:31:30.225673: step: 1636/526, loss: 0.014079691842198372 2023-01-22 15:31:31.286426: step: 1640/526, loss: 0.013199028559029102 2023-01-22 15:31:32.353531: step: 1644/526, loss: 0.005749897100031376 2023-01-22 15:31:33.437614: step: 1648/526, loss: 0.058484792709350586 2023-01-22 15:31:34.506830: step: 1652/526, loss: 0.009160355664789677 2023-01-22 15:31:35.577594: step: 1656/526, loss: 0.01271575503051281 2023-01-22 15:31:36.619219: step: 1660/526, loss: 0.005082838237285614 2023-01-22 15:31:37.693656: step: 1664/526, loss: 0.0063095358200371265 2023-01-22 15:31:38.775639: step: 1668/526, loss: 0.016301430761814117 2023-01-22 15:31:39.845083: step: 1672/526, loss: 0.01858590543270111 2023-01-22 15:31:40.935495: step: 1676/526, loss: 0.0562649667263031 2023-01-22 15:31:42.011378: step: 1680/526, loss: 0.03633774444460869 2023-01-22 15:31:43.071837: step: 1684/526, loss: 0.02037951350212097 2023-01-22 15:31:44.127163: step: 1688/526, loss: 0.009017981588840485 2023-01-22 15:31:45.182903: step: 1692/526, loss: 0.014851553365588188 2023-01-22 15:31:46.255903: step: 1696/526, loss: 0.03930414095520973 2023-01-22 15:31:47.309346: step: 1700/526, loss: 0.009732124395668507 2023-01-22 15:31:48.386144: step: 1704/526, loss: 0.03580033779144287 2023-01-22 15:31:49.446781: step: 1708/526, loss: 0.10349379479885101 2023-01-22 15:31:50.505178: step: 1712/526, loss: 0.02116229385137558 2023-01-22 15:31:51.549293: step: 1716/526, loss: 0.006866572890430689 2023-01-22 15:31:52.610687: step: 1720/526, loss: 0.004173388238996267 2023-01-22 15:31:53.679006: step: 1724/526, loss: 0.020609896630048752 2023-01-22 15:31:54.716427: step: 1728/526, loss: 0.008703921921551228 2023-01-22 15:31:55.791430: step: 1732/526, loss: 0.06990665942430496 2023-01-22 15:31:56.859429: step: 1736/526, loss: 0.05288764461874962 2023-01-22 15:31:57.953176: step: 1740/526, loss: 0.007890609093010426 2023-01-22 15:31:59.027448: step: 1744/526, loss: 0.01431087777018547 2023-01-22 15:32:00.084706: step: 1748/526, loss: 0.008707761764526367 2023-01-22 15:32:01.179277: step: 1752/526, loss: 0.024928338825702667 2023-01-22 15:32:02.256155: step: 1756/526, loss: 0.009167775511741638 2023-01-22 15:32:03.316552: step: 1760/526, loss: 0.012607909739017487 2023-01-22 15:32:04.378775: step: 1764/526, loss: 0.009761009365320206 2023-01-22 15:32:05.442528: step: 1768/526, loss: 0.040605463087558746 2023-01-22 15:32:06.516589: step: 1772/526, loss: 0.010327261872589588 2023-01-22 15:32:07.588147: step: 1776/526, loss: 0.00893464032560587 2023-01-22 15:32:08.681670: step: 1780/526, loss: 0.012258530594408512 2023-01-22 15:32:09.735768: step: 1784/526, loss: 0.011431992053985596 2023-01-22 15:32:10.831496: step: 1788/526, loss: 0.033019233494997025 2023-01-22 15:32:11.901929: step: 1792/526, loss: 0.011460560373961926 2023-01-22 15:32:12.974603: step: 1796/526, loss: 0.036431025713682175 2023-01-22 15:32:14.027373: step: 1800/526, loss: 0.01994819939136505 2023-01-22 15:32:15.094854: step: 1804/526, loss: 0.028628690168261528 2023-01-22 15:32:16.150681: step: 1808/526, loss: 0.03119327500462532 2023-01-22 15:32:17.220502: step: 1812/526, loss: 0.011507650837302208 2023-01-22 15:32:18.288545: step: 1816/526, loss: 0.020260179415345192 2023-01-22 15:32:19.348860: step: 1820/526, loss: 0.01271382998675108 2023-01-22 15:32:20.422648: step: 1824/526, loss: 0.007489434909075499 2023-01-22 15:32:21.492389: step: 1828/526, loss: 0.04710334166884422 2023-01-22 15:32:22.570298: step: 1832/526, loss: 0.00463204737752676 2023-01-22 15:32:23.636074: step: 1836/526, loss: 0.005719366483390331 2023-01-22 15:32:24.702930: step: 1840/526, loss: 0.0285815242677927 2023-01-22 15:32:25.779197: step: 1844/526, loss: 0.006801443640142679 2023-01-22 15:32:26.859635: step: 1848/526, loss: 0.03405589237809181 2023-01-22 15:32:27.940672: step: 1852/526, loss: 0.007955166511237621 2023-01-22 15:32:29.000351: step: 1856/526, loss: 0.03807443380355835 2023-01-22 15:32:30.070866: step: 1860/526, loss: 0.013254741206765175 2023-01-22 15:32:31.131654: step: 1864/526, loss: 0.03340696170926094 2023-01-22 15:32:32.212975: step: 1868/526, loss: 0.011111082509160042 2023-01-22 15:32:33.256868: step: 1872/526, loss: 0.028069892898201942 2023-01-22 15:32:34.329009: step: 1876/526, loss: 0.009006863459944725 2023-01-22 15:32:35.380289: step: 1880/526, loss: 0.0012652931036427617 2023-01-22 15:32:36.442471: step: 1884/526, loss: 0.011551257222890854 2023-01-22 15:32:37.510574: step: 1888/526, loss: 0.007737389300018549 2023-01-22 15:32:38.576244: step: 1892/526, loss: 0.013382695615291595 2023-01-22 15:32:39.642991: step: 1896/526, loss: 0.04424820840358734 2023-01-22 15:32:40.709590: step: 1900/526, loss: 0.009375272318720818 2023-01-22 15:32:41.777491: step: 1904/526, loss: 0.008670293726027012 2023-01-22 15:32:42.862891: step: 1908/526, loss: 0.02267920970916748 2023-01-22 15:32:43.923187: step: 1912/526, loss: 0.01814381405711174 2023-01-22 15:32:45.015841: step: 1916/526, loss: 0.016704678535461426 2023-01-22 15:32:46.080023: step: 1920/526, loss: 0.027080735191702843 2023-01-22 15:32:47.147572: step: 1924/526, loss: 0.008847378194332123 2023-01-22 15:32:48.234889: step: 1928/526, loss: 0.01489635743200779 2023-01-22 15:32:49.302964: step: 1932/526, loss: 0.00615769624710083 2023-01-22 15:32:50.370810: step: 1936/526, loss: 0.021490972489118576 2023-01-22 15:32:51.419629: step: 1940/526, loss: 0.0002509737969376147 2023-01-22 15:32:52.497902: step: 1944/526, loss: 0.008738047443330288 2023-01-22 15:32:53.571630: step: 1948/526, loss: 0.050118062645196915 2023-01-22 15:32:54.626090: step: 1952/526, loss: 0.06966641545295715 2023-01-22 15:32:55.705048: step: 1956/526, loss: 0.010268663987517357 2023-01-22 15:32:56.779124: step: 1960/526, loss: 0.04485553503036499 2023-01-22 15:32:57.861853: step: 1964/526, loss: 0.002371970796957612 2023-01-22 15:32:58.915125: step: 1968/526, loss: 0.008896823041141033 2023-01-22 15:32:59.993165: step: 1972/526, loss: 0.00835936889052391 2023-01-22 15:33:01.067643: step: 1976/526, loss: 0.015106378123164177 2023-01-22 15:33:02.152598: step: 1980/526, loss: 0.00909444224089384 2023-01-22 15:33:03.226341: step: 1984/526, loss: 0.010535995475947857 2023-01-22 15:33:04.291041: step: 1988/526, loss: 0.006307290866971016 2023-01-22 15:33:05.340366: step: 1992/526, loss: 0.01878141425549984 2023-01-22 15:33:06.396045: step: 1996/526, loss: 0.038284849375486374 2023-01-22 15:33:07.475520: step: 2000/526, loss: 0.00795657467097044 2023-01-22 15:33:08.537744: step: 2004/526, loss: 0.011157316155731678 2023-01-22 15:33:09.619604: step: 2008/526, loss: 0.04704447463154793 2023-01-22 15:33:10.673649: step: 2012/526, loss: 0.0013298210687935352 2023-01-22 15:33:11.729024: step: 2016/526, loss: 0.01780594140291214 2023-01-22 15:33:12.805983: step: 2020/526, loss: 0.012109140865504742 2023-01-22 15:33:13.876903: step: 2024/526, loss: 0.004032840020954609 2023-01-22 15:33:14.937147: step: 2028/526, loss: 0.0509190671145916 2023-01-22 15:33:15.999901: step: 2032/526, loss: 0.008496547117829323 2023-01-22 15:33:17.064211: step: 2036/526, loss: 0.009023867547512054 2023-01-22 15:33:18.139956: step: 2040/526, loss: 0.01766207255423069 2023-01-22 15:33:19.223783: step: 2044/526, loss: 0.010823347605764866 2023-01-22 15:33:20.289978: step: 2048/526, loss: 0.009245182387530804 2023-01-22 15:33:21.345869: step: 2052/526, loss: 0.006597570609301329 2023-01-22 15:33:22.421723: step: 2056/526, loss: 0.0073024514131248 2023-01-22 15:33:23.485716: step: 2060/526, loss: 0.014326004311442375 2023-01-22 15:33:24.542469: step: 2064/526, loss: 0.002964144805446267 2023-01-22 15:33:25.611679: step: 2068/526, loss: 0.03649190813302994 2023-01-22 15:33:26.678033: step: 2072/526, loss: 0.009398775175213814 2023-01-22 15:33:27.736289: step: 2076/526, loss: 0.004718531854450703 2023-01-22 15:33:28.804348: step: 2080/526, loss: 0.010285005904734135 2023-01-22 15:33:29.870062: step: 2084/526, loss: 0.03717676177620888 2023-01-22 15:33:30.952022: step: 2088/526, loss: 0.008828303776681423 2023-01-22 15:33:32.010957: step: 2092/526, loss: 0.0037476096767932177 2023-01-22 15:33:33.079461: step: 2096/526, loss: 0.01692165620625019 2023-01-22 15:33:34.159491: step: 2100/526, loss: 0.006278595887124538 2023-01-22 15:33:35.226657: step: 2104/526, loss: 0.018769849091768265 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3504063901345291, 'r': 0.2965488614800759, 'f1': 0.32123586844809865}, 'combined': 0.23670011359333584, 'stategy': 1, 'epoch': 0} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32759375573079036, 'r': 0.23369554552590266, 'f1': 0.2727905245476063}, 'combined': 0.1487948315714216, 'stategy': 1, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3089535056446821, 'r': 0.32888598987982287, 'f1': 0.31860830269607837}, 'combined': 0.23476401251289983, 'stategy': 1, 'epoch': 0} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32988317226522773, 'r': 0.26106948216781517, 'f1': 0.29146981001983857}, 'combined': 0.15898353273809376, 'stategy': 1, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32233244559559066, 'r': 0.32906044730631456, 'f1': 0.32566170090221175}, 'combined': 0.23996125329636653, 'stategy': 1, 'epoch': 0} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3231644366763761, 'r': 0.26827075154230673, 'f1': 0.29317013267347897}, 'combined': 0.15991098145826124, 'stategy': 1, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.484375, 'r': 0.2672413793103448, 'f1': 0.34444444444444444}, 'combined': 0.22962962962962963, 'stategy': 1, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3504063901345291, 'r': 0.2965488614800759, 'f1': 0.32123586844809865}, 'combined': 0.23670011359333584, 'stategy': 1, 'epoch': 0} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32759375573079036, 'r': 0.23369554552590266, 'f1': 0.2727905245476063}, 'combined': 0.1487948315714216, 'stategy': 1, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3089535056446821, 'r': 0.32888598987982287, 'f1': 0.31860830269607837}, 'combined': 0.23476401251289983, 'stategy': 1, 'epoch': 0} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32988317226522773, 'r': 0.26106948216781517, 'f1': 0.29146981001983857}, 'combined': 0.15898353273809376, 'stategy': 1, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32233244559559066, 'r': 0.32906044730631456, 'f1': 0.32566170090221175}, 'combined': 0.23996125329636653, 'stategy': 1, 'epoch': 0} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3231644366763761, 'r': 0.26827075154230673, 'f1': 0.29317013267347897}, 'combined': 0.15991098145826124, 'stategy': 1, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.484375, 'r': 0.2672413793103448, 'f1': 0.34444444444444444}, 'combined': 0.22962962962962963, 'stategy': 1, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 15:36:42.896155: step: 4/526, loss: 0.03211181238293648 2023-01-22 15:36:43.956638: step: 8/526, loss: 0.015572511591017246 2023-01-22 15:36:45.009095: step: 12/526, loss: 0.008042541332542896 2023-01-22 15:36:46.114035: step: 16/526, loss: 0.004841309506446123 2023-01-22 15:36:47.198626: step: 20/526, loss: 0.007955636829137802 2023-01-22 15:36:48.270478: step: 24/526, loss: 0.00797360297292471 2023-01-22 15:36:49.350351: step: 28/526, loss: 0.033014778047800064 2023-01-22 15:36:50.420873: step: 32/526, loss: 0.05221322178840637 2023-01-22 15:36:51.500017: step: 36/526, loss: 0.030476940795779228 2023-01-22 15:36:52.570387: step: 40/526, loss: 0.0014396319165825844 2023-01-22 15:36:53.632482: step: 44/526, loss: 0.01805701106786728 2023-01-22 15:36:54.694957: step: 48/526, loss: 0.007719062734395266 2023-01-22 15:36:55.756665: step: 52/526, loss: 0.007864895276725292 2023-01-22 15:36:56.816192: step: 56/526, loss: 0.010750040411949158 2023-01-22 15:36:57.882823: step: 60/526, loss: 0.014993387274444103 2023-01-22 15:36:58.964984: step: 64/526, loss: 0.015512635000050068 2023-01-22 15:37:00.026906: step: 68/526, loss: 0.003549936693161726 2023-01-22 15:37:01.097170: step: 72/526, loss: 0.007248771842569113 2023-01-22 15:37:02.151973: step: 76/526, loss: 0.016969798132777214 2023-01-22 15:37:03.208796: step: 80/526, loss: 0.008019802160561085 2023-01-22 15:37:04.265294: step: 84/526, loss: 0.009370728395879269 2023-01-22 15:37:05.336377: step: 88/526, loss: 0.010859012603759766 2023-01-22 15:37:06.397826: step: 92/526, loss: 0.0011244139168411493 2023-01-22 15:37:07.451719: step: 96/526, loss: 0.0029502692632377148 2023-01-22 15:37:08.528816: step: 100/526, loss: 0.006351650692522526 2023-01-22 15:37:09.590034: step: 104/526, loss: 0.023949936032295227 2023-01-22 15:37:10.650778: step: 108/526, loss: 0.007888903841376305 2023-01-22 15:37:11.709273: step: 112/526, loss: 0.008125972002744675 2023-01-22 15:37:12.784882: step: 116/526, loss: 0.009863548912107944 2023-01-22 15:37:13.860836: step: 120/526, loss: 0.03601766377687454 2023-01-22 15:37:14.918553: step: 124/526, loss: 0.015775542706251144 2023-01-22 15:37:15.981734: step: 128/526, loss: 0.011705438606441021 2023-01-22 15:37:17.042202: step: 132/526, loss: 0.011139459908008575 2023-01-22 15:37:18.096622: step: 136/526, loss: 0.021827878430485725 2023-01-22 15:37:19.166744: step: 140/526, loss: 0.007847513072192669 2023-01-22 15:37:20.233947: step: 144/526, loss: 0.04689466953277588 2023-01-22 15:37:21.305962: step: 148/526, loss: 0.009505599737167358 2023-01-22 15:37:22.373300: step: 152/526, loss: 0.009732306934893131 2023-01-22 15:37:23.457884: step: 156/526, loss: 0.007876728661358356 2023-01-22 15:37:24.518430: step: 160/526, loss: 0.015592919662594795 2023-01-22 15:37:25.562184: step: 164/526, loss: 0.009664716199040413 2023-01-22 15:37:26.620795: step: 168/526, loss: 0.01496057491749525 2023-01-22 15:37:27.678829: step: 172/526, loss: 0.013952945359051228 2023-01-22 15:37:28.758548: step: 176/526, loss: 0.01393144205212593 2023-01-22 15:37:29.827300: step: 180/526, loss: 0.03488390892744064 2023-01-22 15:37:30.904337: step: 184/526, loss: 0.0152826476842165 2023-01-22 15:37:31.976388: step: 188/526, loss: 0.012214967980980873 2023-01-22 15:37:33.059749: step: 192/526, loss: 0.011353610083460808 2023-01-22 15:37:34.137272: step: 196/526, loss: 0.004587080795317888 2023-01-22 15:37:35.201253: step: 200/526, loss: 0.012474067509174347 2023-01-22 15:37:36.273236: step: 204/526, loss: 0.017332954332232475 2023-01-22 15:37:37.339820: step: 208/526, loss: 0.02854897268116474 2023-01-22 15:37:38.407368: step: 212/526, loss: 0.026937387883663177 2023-01-22 15:37:39.473626: step: 216/526, loss: 0.05181463807821274 2023-01-22 15:37:40.547736: step: 220/526, loss: 0.012530874460935593 2023-01-22 15:37:41.630657: step: 224/526, loss: 0.007335149683058262 2023-01-22 15:37:42.709243: step: 228/526, loss: 0.0031346820760518312 2023-01-22 15:37:43.815221: step: 232/526, loss: 0.015300611965358257 2023-01-22 15:37:44.891532: step: 236/526, loss: 0.011877749115228653 2023-01-22 15:37:45.968966: step: 240/526, loss: 0.00818359013646841 2023-01-22 15:37:47.034139: step: 244/526, loss: 0.005689065903425217 2023-01-22 15:37:48.090274: step: 248/526, loss: 0.014335880987346172 2023-01-22 15:37:49.173017: step: 252/526, loss: 0.030663516372442245 2023-01-22 15:37:50.247318: step: 256/526, loss: 0.029263811185956 2023-01-22 15:37:51.326634: step: 260/526, loss: 0.026315417140722275 2023-01-22 15:37:52.420972: step: 264/526, loss: 0.015756351873278618 2023-01-22 15:37:53.494415: step: 268/526, loss: 0.0010223337449133396 2023-01-22 15:37:54.560901: step: 272/526, loss: 0.02698124386370182 2023-01-22 15:37:55.645893: step: 276/526, loss: 0.007111274637281895 2023-01-22 15:37:56.725002: step: 280/526, loss: 0.011177325621247292 2023-01-22 15:37:57.796759: step: 284/526, loss: 0.030898889526724815 2023-01-22 15:37:58.869990: step: 288/526, loss: 0.009922806173563004 2023-01-22 15:37:59.930471: step: 292/526, loss: 0.01405404694378376 2023-01-22 15:38:01.014639: step: 296/526, loss: 0.038369640707969666 2023-01-22 15:38:02.091753: step: 300/526, loss: 0.02093619480729103 2023-01-22 15:38:03.166026: step: 304/526, loss: 0.007910237647593021 2023-01-22 15:38:04.231002: step: 308/526, loss: 0.005331913474947214 2023-01-22 15:38:05.314140: step: 312/526, loss: 0.02157352678477764 2023-01-22 15:38:06.388585: step: 316/526, loss: 0.009920264594256878 2023-01-22 15:38:07.467352: step: 320/526, loss: 0.06362996250391006 2023-01-22 15:38:08.542401: step: 324/526, loss: 0.006740234326571226 2023-01-22 15:38:09.601651: step: 328/526, loss: 0.01942634768784046 2023-01-22 15:38:10.692847: step: 332/526, loss: 0.01024128869175911 2023-01-22 15:38:11.762211: step: 336/526, loss: 0.018934722989797592 2023-01-22 15:38:12.849680: step: 340/526, loss: 0.025675050914287567 2023-01-22 15:38:13.907461: step: 344/526, loss: 0.002226931508630514 2023-01-22 15:38:14.980296: step: 348/526, loss: 0.025554940104484558 2023-01-22 15:38:16.048118: step: 352/526, loss: 0.04201820120215416 2023-01-22 15:38:17.139061: step: 356/526, loss: 0.006230425555258989 2023-01-22 15:38:18.200218: step: 360/526, loss: 0.007900257594883442 2023-01-22 15:38:19.274394: step: 364/526, loss: 0.0032039673533290625 2023-01-22 15:38:20.340051: step: 368/526, loss: 0.01583411544561386 2023-01-22 15:38:21.411795: step: 372/526, loss: 0.005878274794667959 2023-01-22 15:38:22.471930: step: 376/526, loss: 0.007089337334036827 2023-01-22 15:38:23.547132: step: 380/526, loss: 0.03770569711923599 2023-01-22 15:38:24.620006: step: 384/526, loss: 0.03550266474485397 2023-01-22 15:38:25.719894: step: 388/526, loss: 0.009625154547393322 2023-01-22 15:38:26.785211: step: 392/526, loss: 0.005954100284725428 2023-01-22 15:38:27.861005: step: 396/526, loss: 0.01914738304913044 2023-01-22 15:38:28.933074: step: 400/526, loss: 0.010117245838046074 2023-01-22 15:38:29.996164: step: 404/526, loss: 0.038790617138147354 2023-01-22 15:38:31.066853: step: 408/526, loss: 0.009354247711598873 2023-01-22 15:38:32.145646: step: 412/526, loss: 0.009561301209032536 2023-01-22 15:38:33.218677: step: 416/526, loss: 0.022464746609330177 2023-01-22 15:38:34.286651: step: 420/526, loss: 0.015408172272145748 2023-01-22 15:38:35.377023: step: 424/526, loss: 0.02869766391813755 2023-01-22 15:38:36.465319: step: 428/526, loss: 0.02420332096517086 2023-01-22 15:38:37.555299: step: 432/526, loss: 0.011934206821024418 2023-01-22 15:38:38.633914: step: 436/526, loss: 0.06720546633005142 2023-01-22 15:38:39.708226: step: 440/526, loss: 0.013508724048733711 2023-01-22 15:38:40.770655: step: 444/526, loss: 0.003344995202496648 2023-01-22 15:38:41.835955: step: 448/526, loss: 0.01771223545074463 2023-01-22 15:38:42.916930: step: 452/526, loss: 0.00806915108114481 2023-01-22 15:38:44.002275: step: 456/526, loss: 0.032121557742357254 2023-01-22 15:38:45.069092: step: 460/526, loss: 0.007599648088216782 2023-01-22 15:38:46.144504: step: 464/526, loss: 0.012127426452934742 2023-01-22 15:38:47.231777: step: 468/526, loss: 0.0073621622286736965 2023-01-22 15:38:48.303285: step: 472/526, loss: 0.024229129776358604 2023-01-22 15:38:49.389157: step: 476/526, loss: 0.008662429638206959 2023-01-22 15:38:50.482864: step: 480/526, loss: 0.015572981908917427 2023-01-22 15:38:51.567207: step: 484/526, loss: 0.03178900107741356 2023-01-22 15:38:52.648986: step: 488/526, loss: 0.04805722087621689 2023-01-22 15:38:53.742262: step: 492/526, loss: 0.00870773196220398 2023-01-22 15:38:54.822158: step: 496/526, loss: 0.021471833810210228 2023-01-22 15:38:55.899536: step: 500/526, loss: 0.03204527869820595 2023-01-22 15:38:56.979906: step: 504/526, loss: 0.012247931212186813 2023-01-22 15:38:58.054231: step: 508/526, loss: 0.00462923850864172 2023-01-22 15:38:59.172273: step: 512/526, loss: 0.0216665118932724 2023-01-22 15:39:00.250599: step: 516/526, loss: 0.0545852854847908 2023-01-22 15:39:01.321151: step: 520/526, loss: 0.009089004248380661 2023-01-22 15:39:02.402697: step: 524/526, loss: 0.04380949214100838 2023-01-22 15:39:03.479049: step: 528/526, loss: 0.010037143714725971 2023-01-22 15:39:04.569591: step: 532/526, loss: 0.011802319437265396 2023-01-22 15:39:05.648112: step: 536/526, loss: 0.007577423006296158 2023-01-22 15:39:06.718181: step: 540/526, loss: 0.012781643308699131 2023-01-22 15:39:07.794263: step: 544/526, loss: 0.005990928970277309 2023-01-22 15:39:08.863021: step: 548/526, loss: 0.020134612917900085 2023-01-22 15:39:09.937747: step: 552/526, loss: 0.01833156682550907 2023-01-22 15:39:11.001093: step: 556/526, loss: 0.012743410654366016 2023-01-22 15:39:12.086253: step: 560/526, loss: 0.0056757088750600815 2023-01-22 15:39:13.160789: step: 564/526, loss: 0.005099541507661343 2023-01-22 15:39:14.228744: step: 568/526, loss: 0.03215373679995537 2023-01-22 15:39:15.289153: step: 572/526, loss: 0.030809998512268066 2023-01-22 15:39:16.357474: step: 576/526, loss: 0.018042458221316338 2023-01-22 15:39:17.407026: step: 580/526, loss: 0.015842700377106667 2023-01-22 15:39:18.468388: step: 584/526, loss: 0.007151165511459112 2023-01-22 15:39:19.538120: step: 588/526, loss: 0.00645815534517169 2023-01-22 15:39:20.622298: step: 592/526, loss: 0.006514847278594971 2023-01-22 15:39:21.683973: step: 596/526, loss: 0.006465676706284285 2023-01-22 15:39:22.783291: step: 600/526, loss: 0.021039044484496117 2023-01-22 15:39:23.859818: step: 604/526, loss: 0.03174136206507683 2023-01-22 15:39:24.932041: step: 608/526, loss: 0.005157373379915953 2023-01-22 15:39:26.012245: step: 612/526, loss: 0.014290643855929375 2023-01-22 15:39:27.075976: step: 616/526, loss: 0.0008734349976293743 2023-01-22 15:39:28.158598: step: 620/526, loss: 0.006645748857408762 2023-01-22 15:39:29.216011: step: 624/526, loss: 0.00627474719658494 2023-01-22 15:39:30.281004: step: 628/526, loss: 0.039031971246004105 2023-01-22 15:39:31.340146: step: 632/526, loss: 0.0062651680782437325 2023-01-22 15:39:32.431057: step: 636/526, loss: 0.010329682379961014 2023-01-22 15:39:33.502013: step: 640/526, loss: 0.03935340419411659 2023-01-22 15:39:34.577073: step: 644/526, loss: 0.013947218656539917 2023-01-22 15:39:35.661629: step: 648/526, loss: 0.02123948559165001 2023-01-22 15:39:36.733992: step: 652/526, loss: 0.011712118983268738 2023-01-22 15:39:37.813795: step: 656/526, loss: 0.0040458738803863525 2023-01-22 15:39:38.876868: step: 660/526, loss: 0.00468495674431324 2023-01-22 15:39:39.940040: step: 664/526, loss: 0.02137726917862892 2023-01-22 15:39:41.003581: step: 668/526, loss: 0.006868351716548204 2023-01-22 15:39:42.066603: step: 672/526, loss: 0.054988425225019455 2023-01-22 15:39:43.168865: step: 676/526, loss: 0.022215574979782104 2023-01-22 15:39:44.242887: step: 680/526, loss: 0.007315394002944231 2023-01-22 15:39:45.328137: step: 684/526, loss: 0.009959024377167225 2023-01-22 15:39:46.378824: step: 688/526, loss: 0.003706212854012847 2023-01-22 15:39:47.452699: step: 692/526, loss: 0.0057921684347093105 2023-01-22 15:39:48.520939: step: 696/526, loss: 0.01116146519780159 2023-01-22 15:39:49.597545: step: 700/526, loss: 0.0040785176679492 2023-01-22 15:39:50.685680: step: 704/526, loss: 0.017844034358859062 2023-01-22 15:39:51.764068: step: 708/526, loss: 0.015944886952638626 2023-01-22 15:39:52.850477: step: 712/526, loss: 0.0108705535531044 2023-01-22 15:39:53.909245: step: 716/526, loss: 0.009291144087910652 2023-01-22 15:39:54.978247: step: 720/526, loss: 0.007601814344525337 2023-01-22 15:39:56.049382: step: 724/526, loss: 0.03549468517303467 2023-01-22 15:39:57.105086: step: 728/526, loss: 0.003963234834372997 2023-01-22 15:39:58.164707: step: 732/526, loss: 0.007039588876068592 2023-01-22 15:39:59.236410: step: 736/526, loss: 0.010890254750847816 2023-01-22 15:40:00.310202: step: 740/526, loss: 0.00566583639010787 2023-01-22 15:40:01.375477: step: 744/526, loss: 0.0019247246673330665 2023-01-22 15:40:02.437484: step: 748/526, loss: 0.008732607588171959 2023-01-22 15:40:03.500243: step: 752/526, loss: 0.008327975869178772 2023-01-22 15:40:04.554107: step: 756/526, loss: 0.007925166748464108 2023-01-22 15:40:05.610779: step: 760/526, loss: 0.013754901476204395 2023-01-22 15:40:06.689455: step: 764/526, loss: 0.01145532913506031 2023-01-22 15:40:07.754594: step: 768/526, loss: 0.004006172064691782 2023-01-22 15:40:08.803835: step: 772/526, loss: 0.012019657529890537 2023-01-22 15:40:09.871513: step: 776/526, loss: 0.012505311518907547 2023-01-22 15:40:10.930438: step: 780/526, loss: 0.0036865200381726027 2023-01-22 15:40:11.994770: step: 784/526, loss: 0.0048268320970237255 2023-01-22 15:40:13.065204: step: 788/526, loss: 0.000239708082517609 2023-01-22 15:40:14.127949: step: 792/526, loss: 0.0015891763614490628 2023-01-22 15:40:15.196140: step: 796/526, loss: 0.010582678951323032 2023-01-22 15:40:16.255149: step: 800/526, loss: 0.05896751210093498 2023-01-22 15:40:17.333747: step: 804/526, loss: 0.011327753774821758 2023-01-22 15:40:18.408283: step: 808/526, loss: 0.046806029975414276 2023-01-22 15:40:19.465373: step: 812/526, loss: 0.048858143389225006 2023-01-22 15:40:20.517288: step: 816/526, loss: 0.01034831814467907 2023-01-22 15:40:21.571501: step: 820/526, loss: 0.02128666639328003 2023-01-22 15:40:22.641108: step: 824/526, loss: 0.013094465248286724 2023-01-22 15:40:23.706363: step: 828/526, loss: 0.0029561948031187057 2023-01-22 15:40:24.775387: step: 832/526, loss: 0.019411412999033928 2023-01-22 15:40:25.836353: step: 836/526, loss: 0.032024700194597244 2023-01-22 15:40:26.904807: step: 840/526, loss: 0.005607653874903917 2023-01-22 15:40:27.979164: step: 844/526, loss: 0.01002599485218525 2023-01-22 15:40:29.033852: step: 848/526, loss: 0.029176589101552963 2023-01-22 15:40:30.079718: step: 852/526, loss: 0.02625167742371559 2023-01-22 15:40:31.143195: step: 856/526, loss: 0.006641499698162079 2023-01-22 15:40:32.200798: step: 860/526, loss: 0.038459356874227524 2023-01-22 15:40:33.255695: step: 864/526, loss: 0.00858103483915329 2023-01-22 15:40:34.319171: step: 868/526, loss: 0.025573449209332466 2023-01-22 15:40:35.398878: step: 872/526, loss: 0.011654009111225605 2023-01-22 15:40:36.482421: step: 876/526, loss: 0.02767193876206875 2023-01-22 15:40:37.557546: step: 880/526, loss: 0.008378477767109871 2023-01-22 15:40:38.615597: step: 884/526, loss: 0.024453088641166687 2023-01-22 15:40:39.681807: step: 888/526, loss: 0.02721349149942398 2023-01-22 15:40:40.735574: step: 892/526, loss: 0.013855252414941788 2023-01-22 15:40:41.804767: step: 896/526, loss: 0.004693435505032539 2023-01-22 15:40:42.904062: step: 900/526, loss: 0.015292895957827568 2023-01-22 15:40:43.971669: step: 904/526, loss: 0.020867938175797462 2023-01-22 15:40:45.042335: step: 908/526, loss: 0.039456333965063095 2023-01-22 15:40:46.131154: step: 912/526, loss: 0.007400562521070242 2023-01-22 15:40:47.217299: step: 916/526, loss: 0.04387707635760307 2023-01-22 15:40:48.278689: step: 920/526, loss: 0.009956590831279755 2023-01-22 15:40:49.337238: step: 924/526, loss: 0.005391793791204691 2023-01-22 15:40:50.395067: step: 928/526, loss: 0.0059174406342208385 2023-01-22 15:40:51.447784: step: 932/526, loss: 0.008824181742966175 2023-01-22 15:40:52.513931: step: 936/526, loss: 0.013592577539384365 2023-01-22 15:40:53.580990: step: 940/526, loss: 0.011900778859853745 2023-01-22 15:40:54.642822: step: 944/526, loss: 0.009875464253127575 2023-01-22 15:40:55.724338: step: 948/526, loss: 0.0007198238163255155 2023-01-22 15:40:56.781987: step: 952/526, loss: 0.002723206765949726 2023-01-22 15:40:57.854954: step: 956/526, loss: 0.04314401000738144 2023-01-22 15:40:58.925958: step: 960/526, loss: 0.01172436773777008 2023-01-22 15:40:59.982655: step: 964/526, loss: 0.01210050005465746 2023-01-22 15:41:01.025602: step: 968/526, loss: 0.009707611054182053 2023-01-22 15:41:02.077646: step: 972/526, loss: 0.00920557975769043 2023-01-22 15:41:03.148055: step: 976/526, loss: 0.0039517320692539215 2023-01-22 15:41:04.220037: step: 980/526, loss: 0.035349588841199875 2023-01-22 15:41:05.296702: step: 984/526, loss: 0.05844910442829132 2023-01-22 15:41:06.374542: step: 988/526, loss: 0.008539840579032898 2023-01-22 15:41:07.441011: step: 992/526, loss: 0.0036421450786292553 2023-01-22 15:41:08.502354: step: 996/526, loss: 0.006346558686345816 2023-01-22 15:41:09.571853: step: 1000/526, loss: 0.020023275166749954 2023-01-22 15:41:10.636565: step: 1004/526, loss: 0.023386115208268166 2023-01-22 15:41:11.697267: step: 1008/526, loss: 0.012384321540594101 2023-01-22 15:41:12.776684: step: 1012/526, loss: 0.07021833956241608 2023-01-22 15:41:13.848080: step: 1016/526, loss: 0.020438725128769875 2023-01-22 15:41:14.920835: step: 1020/526, loss: 0.04313286393880844 2023-01-22 15:41:15.974606: step: 1024/526, loss: 0.011168815195560455 2023-01-22 15:41:17.034771: step: 1028/526, loss: 0.005922115407884121 2023-01-22 15:41:18.094614: step: 1032/526, loss: 0.010224668309092522 2023-01-22 15:41:19.174941: step: 1036/526, loss: 0.06196172907948494 2023-01-22 15:41:20.248077: step: 1040/526, loss: 0.021300524473190308 2023-01-22 15:41:21.328148: step: 1044/526, loss: 0.009970474056899548 2023-01-22 15:41:22.378743: step: 1048/526, loss: 0.014697756618261337 2023-01-22 15:41:23.441858: step: 1052/526, loss: 0.008854944258928299 2023-01-22 15:41:24.519833: step: 1056/526, loss: 0.014498366974294186 2023-01-22 15:41:25.593271: step: 1060/526, loss: 0.006762553472071886 2023-01-22 15:41:26.672459: step: 1064/526, loss: 0.04316618666052818 2023-01-22 15:41:27.732327: step: 1068/526, loss: 0.00620792293921113 2023-01-22 15:41:28.785527: step: 1072/526, loss: 0.005949460435658693 2023-01-22 15:41:29.860130: step: 1076/526, loss: 0.02700575813651085 2023-01-22 15:41:30.924709: step: 1080/526, loss: 0.02261035330593586 2023-01-22 15:41:31.995828: step: 1084/526, loss: 0.012013763189315796 2023-01-22 15:41:33.061382: step: 1088/526, loss: 0.015156416222453117 2023-01-22 15:41:34.137747: step: 1092/526, loss: 0.007228700909763575 2023-01-22 15:41:35.185180: step: 1096/526, loss: 0.011648271232843399 2023-01-22 15:41:36.260593: step: 1100/526, loss: 0.008078822866082191 2023-01-22 15:41:37.338326: step: 1104/526, loss: 0.005949000362306833 2023-01-22 15:41:38.409471: step: 1108/526, loss: 0.010286852717399597 2023-01-22 15:41:39.497826: step: 1112/526, loss: 0.031086308881640434 2023-01-22 15:41:40.573263: step: 1116/526, loss: 0.011280796490609646 2023-01-22 15:41:41.657428: step: 1120/526, loss: 0.004612576682120562 2023-01-22 15:41:42.725164: step: 1124/526, loss: 0.05066349729895592 2023-01-22 15:41:43.812596: step: 1128/526, loss: 0.004325521644204855 2023-01-22 15:41:44.865906: step: 1132/526, loss: 0.022121351212263107 2023-01-22 15:41:45.926897: step: 1136/526, loss: 0.0010887769749388099 2023-01-22 15:41:46.987953: step: 1140/526, loss: 0.018265364691615105 2023-01-22 15:41:48.049134: step: 1144/526, loss: 0.02968595176935196 2023-01-22 15:41:49.113776: step: 1148/526, loss: 0.03147466853260994 2023-01-22 15:41:50.178273: step: 1152/526, loss: 0.02486201375722885 2023-01-22 15:41:51.255412: step: 1156/526, loss: 0.01647029258310795 2023-01-22 15:41:52.311143: step: 1160/526, loss: 0.029502950608730316 2023-01-22 15:41:53.363053: step: 1164/526, loss: 0.02906990610063076 2023-01-22 15:41:54.434068: step: 1168/526, loss: 0.04194151982665062 2023-01-22 15:41:55.503642: step: 1172/526, loss: 0.00660712132230401 2023-01-22 15:41:56.576034: step: 1176/526, loss: 0.011205222457647324 2023-01-22 15:41:57.645280: step: 1180/526, loss: 0.011131997220218182 2023-01-22 15:41:58.694766: step: 1184/526, loss: 0.01965154893696308 2023-01-22 15:41:59.765887: step: 1188/526, loss: 0.01701144129037857 2023-01-22 15:42:00.823569: step: 1192/526, loss: 0.008872097358107567 2023-01-22 15:42:01.889399: step: 1196/526, loss: 0.021640565246343613 2023-01-22 15:42:02.948099: step: 1200/526, loss: 0.005938517861068249 2023-01-22 15:42:04.000192: step: 1204/526, loss: 0.007166590075939894 2023-01-22 15:42:05.067838: step: 1208/526, loss: 0.07776742428541183 2023-01-22 15:42:06.134515: step: 1212/526, loss: 0.005381354130804539 2023-01-22 15:42:07.192872: step: 1216/526, loss: 0.029926860705018044 2023-01-22 15:42:08.261815: step: 1220/526, loss: 0.00538078136742115 2023-01-22 15:42:09.348757: step: 1224/526, loss: 0.005435564089566469 2023-01-22 15:42:10.411957: step: 1228/526, loss: 0.04002247005701065 2023-01-22 15:42:11.470408: step: 1232/526, loss: 0.0023748199455440044 2023-01-22 15:42:12.534850: step: 1236/526, loss: 0.02900881879031658 2023-01-22 15:42:13.606972: step: 1240/526, loss: 0.01200336217880249 2023-01-22 15:42:14.665837: step: 1244/526, loss: 0.03268972411751747 2023-01-22 15:42:15.749679: step: 1248/526, loss: 0.00803168024867773 2023-01-22 15:42:16.800595: step: 1252/526, loss: 0.01995820552110672 2023-01-22 15:42:17.858661: step: 1256/526, loss: 0.048296958208084106 2023-01-22 15:42:18.940280: step: 1260/526, loss: 0.0035812933929264545 2023-01-22 15:42:20.010760: step: 1264/526, loss: 0.005360682960599661 2023-01-22 15:42:21.070090: step: 1268/526, loss: 0.0019773298408836126 2023-01-22 15:42:22.134234: step: 1272/526, loss: 0.008220542222261429 2023-01-22 15:42:23.195652: step: 1276/526, loss: 0.007411687169224024 2023-01-22 15:42:24.270778: step: 1280/526, loss: 0.009043864905834198 2023-01-22 15:42:25.336866: step: 1284/526, loss: 0.013929463922977448 2023-01-22 15:42:26.408737: step: 1288/526, loss: 0.0 2023-01-22 15:42:27.479921: step: 1292/526, loss: 0.005940048024058342 2023-01-22 15:42:28.546377: step: 1296/526, loss: 0.09276102483272552 2023-01-22 15:42:29.617160: step: 1300/526, loss: 0.008096638135612011 2023-01-22 15:42:30.692118: step: 1304/526, loss: 0.012664354406297207 2023-01-22 15:42:31.742145: step: 1308/526, loss: 0.006375948898494244 2023-01-22 15:42:32.809222: step: 1312/526, loss: 0.06691757589578629 2023-01-22 15:42:33.887415: step: 1316/526, loss: 0.04357099160552025 2023-01-22 15:42:34.973295: step: 1320/526, loss: 0.01275489293038845 2023-01-22 15:42:36.035702: step: 1324/526, loss: 0.007187874987721443 2023-01-22 15:42:37.095694: step: 1328/526, loss: 0.0026793908327817917 2023-01-22 15:42:38.149422: step: 1332/526, loss: 0.013523890636861324 2023-01-22 15:42:39.208508: step: 1336/526, loss: 0.005205713678151369 2023-01-22 15:42:40.274447: step: 1340/526, loss: 0.05279330536723137 2023-01-22 15:42:41.334537: step: 1344/526, loss: 0.006751976907253265 2023-01-22 15:42:42.396096: step: 1348/526, loss: 0.00653742766007781 2023-01-22 15:42:43.469579: step: 1352/526, loss: 0.009395278058946133 2023-01-22 15:42:44.544319: step: 1356/526, loss: 0.008364694193005562 2023-01-22 15:42:45.598605: step: 1360/526, loss: 0.009754997678101063 2023-01-22 15:42:46.678473: step: 1364/526, loss: 0.017432140186429024 2023-01-22 15:42:47.724623: step: 1368/526, loss: 0.010237367823719978 2023-01-22 15:42:48.798624: step: 1372/526, loss: 0.006789594888687134 2023-01-22 15:42:49.870344: step: 1376/526, loss: 0.007290634326636791 2023-01-22 15:42:50.946693: step: 1380/526, loss: 0.02280835248529911 2023-01-22 15:42:52.029167: step: 1384/526, loss: 0.02002943493425846 2023-01-22 15:42:53.090561: step: 1388/526, loss: 0.007367415819317102 2023-01-22 15:42:54.156695: step: 1392/526, loss: 0.02557818405330181 2023-01-22 15:42:55.223967: step: 1396/526, loss: 0.00872527901083231 2023-01-22 15:42:56.288613: step: 1400/526, loss: 0.009468142874538898 2023-01-22 15:42:57.348913: step: 1404/526, loss: 0.006287826225161552 2023-01-22 15:42:58.394868: step: 1408/526, loss: 0.0003901036689057946 2023-01-22 15:42:59.447086: step: 1412/526, loss: 0.022159673273563385 2023-01-22 15:43:00.524032: step: 1416/526, loss: 0.010757877491414547 2023-01-22 15:43:01.587796: step: 1420/526, loss: 0.004200148396193981 2023-01-22 15:43:02.651291: step: 1424/526, loss: 0.003942762967199087 2023-01-22 15:43:03.695468: step: 1428/526, loss: 0.002182086231186986 2023-01-22 15:43:04.763423: step: 1432/526, loss: 0.005723259411752224 2023-01-22 15:43:05.834709: step: 1436/526, loss: 0.004808911122381687 2023-01-22 15:43:06.907028: step: 1440/526, loss: 0.022685237228870392 2023-01-22 15:43:07.971402: step: 1444/526, loss: 0.027270250022411346 2023-01-22 15:43:09.046388: step: 1448/526, loss: 0.008733347989618778 2023-01-22 15:43:10.099338: step: 1452/526, loss: 0.008996464312076569 2023-01-22 15:43:11.173942: step: 1456/526, loss: 0.010059132240712643 2023-01-22 15:43:12.240749: step: 1460/526, loss: 0.04958635941147804 2023-01-22 15:43:13.327595: step: 1464/526, loss: 0.007838988676667213 2023-01-22 15:43:14.385911: step: 1468/526, loss: 0.014982925727963448 2023-01-22 15:43:15.465885: step: 1472/526, loss: 0.02857663668692112 2023-01-22 15:43:16.551412: step: 1476/526, loss: 0.03424534201622009 2023-01-22 15:43:17.619500: step: 1480/526, loss: 0.005225681234151125 2023-01-22 15:43:18.688217: step: 1484/526, loss: 0.006253308150917292 2023-01-22 15:43:19.752525: step: 1488/526, loss: 0.006416618824005127 2023-01-22 15:43:20.821099: step: 1492/526, loss: 0.005713945254683495 2023-01-22 15:43:21.878312: step: 1496/526, loss: 0.007323291152715683 2023-01-22 15:43:22.945286: step: 1500/526, loss: 0.0054649063386023045 2023-01-22 15:43:24.008561: step: 1504/526, loss: 0.0751451775431633 2023-01-22 15:43:25.067540: step: 1508/526, loss: 0.007875404320657253 2023-01-22 15:43:26.125772: step: 1512/526, loss: 0.016096655279397964 2023-01-22 15:43:27.202713: step: 1516/526, loss: 0.013204229064285755 2023-01-22 15:43:28.267191: step: 1520/526, loss: 0.0037331937346607447 2023-01-22 15:43:29.336982: step: 1524/526, loss: 0.047402508556842804 2023-01-22 15:43:30.412691: step: 1528/526, loss: 0.008372505195438862 2023-01-22 15:43:31.479257: step: 1532/526, loss: 0.0040089040994644165 2023-01-22 15:43:32.554189: step: 1536/526, loss: 0.003782615065574646 2023-01-22 15:43:33.613823: step: 1540/526, loss: 0.0023831261787563562 2023-01-22 15:43:34.680769: step: 1544/526, loss: 0.004488678649067879 2023-01-22 15:43:35.749623: step: 1548/526, loss: 0.0026706543285399675 2023-01-22 15:43:36.841612: step: 1552/526, loss: 0.016022304072976112 2023-01-22 15:43:37.902193: step: 1556/526, loss: 0.016195174306631088 2023-01-22 15:43:38.972680: step: 1560/526, loss: 0.015752162784337997 2023-01-22 15:43:40.044882: step: 1564/526, loss: 0.00617462582886219 2023-01-22 15:43:41.103092: step: 1568/526, loss: 0.00824673194438219 2023-01-22 15:43:42.174076: step: 1572/526, loss: 0.04886094108223915 2023-01-22 15:43:43.237459: step: 1576/526, loss: 0.016282636672258377 2023-01-22 15:43:44.302145: step: 1580/526, loss: 0.005639585200697184 2023-01-22 15:43:45.375286: step: 1584/526, loss: 0.009086117148399353 2023-01-22 15:43:46.453329: step: 1588/526, loss: 0.007528063375502825 2023-01-22 15:43:47.521621: step: 1592/526, loss: 0.0 2023-01-22 15:43:48.576061: step: 1596/526, loss: 0.0008297221502289176 2023-01-22 15:43:49.640819: step: 1600/526, loss: 0.015622070990502834 2023-01-22 15:43:50.709687: step: 1604/526, loss: 0.0505061000585556 2023-01-22 15:43:51.767013: step: 1608/526, loss: 0.029565712437033653 2023-01-22 15:43:52.837487: step: 1612/526, loss: 0.00864845234900713 2023-01-22 15:43:53.919048: step: 1616/526, loss: 0.002517703687772155 2023-01-22 15:43:54.977269: step: 1620/526, loss: 0.009909183718264103 2023-01-22 15:43:56.049071: step: 1624/526, loss: 0.011386432684957981 2023-01-22 15:43:57.114653: step: 1628/526, loss: 0.030048370361328125 2023-01-22 15:43:58.185214: step: 1632/526, loss: 0.011784379370510578 2023-01-22 15:43:59.235364: step: 1636/526, loss: 0.005533888470381498 2023-01-22 15:44:00.299366: step: 1640/526, loss: 0.009105381555855274 2023-01-22 15:44:01.349886: step: 1644/526, loss: 0.005557889584451914 2023-01-22 15:44:02.419423: step: 1648/526, loss: 0.011499984189867973 2023-01-22 15:44:03.491941: step: 1652/526, loss: 0.013947533443570137 2023-01-22 15:44:04.554570: step: 1656/526, loss: 0.0017300623003393412 2023-01-22 15:44:05.633811: step: 1660/526, loss: 0.0029353441204875708 2023-01-22 15:44:06.705043: step: 1664/526, loss: 0.005586166866123676 2023-01-22 15:44:07.799435: step: 1668/526, loss: 0.06570611894130707 2023-01-22 15:44:08.864119: step: 1672/526, loss: 0.0074541871435940266 2023-01-22 15:44:09.947491: step: 1676/526, loss: 0.003746382426470518 2023-01-22 15:44:11.013789: step: 1680/526, loss: 0.004787694662809372 2023-01-22 15:44:12.084713: step: 1684/526, loss: 0.007458502892404795 2023-01-22 15:44:13.165124: step: 1688/526, loss: 0.029496192932128906 2023-01-22 15:44:14.225505: step: 1692/526, loss: 0.012440165504813194 2023-01-22 15:44:15.297436: step: 1696/526, loss: 0.0144155602902174 2023-01-22 15:44:16.374355: step: 1700/526, loss: 0.008919878862798214 2023-01-22 15:44:17.448934: step: 1704/526, loss: 0.0047838035970926285 2023-01-22 15:44:18.526240: step: 1708/526, loss: 0.015375535003840923 2023-01-22 15:44:19.609217: step: 1712/526, loss: 0.052577659487724304 2023-01-22 15:44:20.670684: step: 1716/526, loss: 0.013651976361870766 2023-01-22 15:44:21.727848: step: 1720/526, loss: 0.018864955753087997 2023-01-22 15:44:22.801587: step: 1724/526, loss: 0.00418910151347518 2023-01-22 15:44:23.859999: step: 1728/526, loss: 0.017243286594748497 2023-01-22 15:44:24.928891: step: 1732/526, loss: 0.010998336598277092 2023-01-22 15:44:25.986856: step: 1736/526, loss: 0.007787647657096386 2023-01-22 15:44:27.063770: step: 1740/526, loss: 0.012227986939251423 2023-01-22 15:44:28.114509: step: 1744/526, loss: 0.031399860978126526 2023-01-22 15:44:29.225427: step: 1748/526, loss: 0.0052993446588516235 2023-01-22 15:44:30.299521: step: 1752/526, loss: 0.022675102576613426 2023-01-22 15:44:31.370758: step: 1756/526, loss: 0.008326425217092037 2023-01-22 15:44:32.431524: step: 1760/526, loss: 0.0073272292502224445 2023-01-22 15:44:33.509370: step: 1764/526, loss: 0.01987486705183983 2023-01-22 15:44:34.563875: step: 1768/526, loss: 0.04475490748882294 2023-01-22 15:44:35.634474: step: 1772/526, loss: 0.004304112400859594 2023-01-22 15:44:36.716876: step: 1776/526, loss: 0.02367010898888111 2023-01-22 15:44:37.780524: step: 1780/526, loss: 0.006102908868342638 2023-01-22 15:44:38.842992: step: 1784/526, loss: 0.0031300713308155537 2023-01-22 15:44:39.915543: step: 1788/526, loss: 0.030115678906440735 2023-01-22 15:44:40.995459: step: 1792/526, loss: 0.05035819113254547 2023-01-22 15:44:42.064221: step: 1796/526, loss: 0.016981353983283043 2023-01-22 15:44:43.155469: step: 1800/526, loss: 0.007648933213204145 2023-01-22 15:44:44.213704: step: 1804/526, loss: 0.007050811313092709 2023-01-22 15:44:45.287454: step: 1808/526, loss: 0.007111803628504276 2023-01-22 15:44:46.359247: step: 1812/526, loss: 0.049223240464925766 2023-01-22 15:44:47.424470: step: 1816/526, loss: 0.011343998834490776 2023-01-22 15:44:48.497532: step: 1820/526, loss: 0.011715702712535858 2023-01-22 15:44:49.577952: step: 1824/526, loss: 0.0065694102086126804 2023-01-22 15:44:50.638282: step: 1828/526, loss: 0.004104414954781532 2023-01-22 15:44:51.715514: step: 1832/526, loss: 0.05072854459285736 2023-01-22 15:44:52.786896: step: 1836/526, loss: 0.0064126052893698215 2023-01-22 15:44:53.855554: step: 1840/526, loss: 0.004947653040289879 2023-01-22 15:44:54.919844: step: 1844/526, loss: 0.005876810755580664 2023-01-22 15:44:55.984731: step: 1848/526, loss: 0.007638882379978895 2023-01-22 15:44:57.039861: step: 1852/526, loss: 0.026459213346242905 2023-01-22 15:44:58.112203: step: 1856/526, loss: 0.01469198614358902 2023-01-22 15:44:59.187453: step: 1860/526, loss: 0.01027812622487545 2023-01-22 15:45:00.277667: step: 1864/526, loss: 0.01965285837650299 2023-01-22 15:45:01.336808: step: 1868/526, loss: 0.02159073017537594 2023-01-22 15:45:02.414836: step: 1872/526, loss: 0.005820313468575478 2023-01-22 15:45:03.484744: step: 1876/526, loss: 0.0221233032643795 2023-01-22 15:45:04.562541: step: 1880/526, loss: 0.0033949408680200577 2023-01-22 15:45:05.637114: step: 1884/526, loss: 0.050316862761974335 2023-01-22 15:45:06.703329: step: 1888/526, loss: 0.05392443761229515 2023-01-22 15:45:07.784607: step: 1892/526, loss: 0.010367084294557571 2023-01-22 15:45:08.845467: step: 1896/526, loss: 0.012242653407156467 2023-01-22 15:45:09.904197: step: 1900/526, loss: 0.02093501016497612 2023-01-22 15:45:10.981393: step: 1904/526, loss: 0.008596562780439854 2023-01-22 15:45:12.048665: step: 1908/526, loss: 0.00320019805803895 2023-01-22 15:45:13.115089: step: 1912/526, loss: 0.005016467534005642 2023-01-22 15:45:14.183733: step: 1916/526, loss: 0.06825742870569229 2023-01-22 15:45:15.248368: step: 1920/526, loss: 0.004963371902704239 2023-01-22 15:45:16.338670: step: 1924/526, loss: 0.01704205945134163 2023-01-22 15:45:17.401186: step: 1928/526, loss: 0.007099714130163193 2023-01-22 15:45:18.464654: step: 1932/526, loss: 0.012352654710412025 2023-01-22 15:45:19.530453: step: 1936/526, loss: 0.03212769702076912 2023-01-22 15:45:20.600393: step: 1940/526, loss: 0.017122428864240646 2023-01-22 15:45:21.668632: step: 1944/526, loss: 0.009441671893000603 2023-01-22 15:45:22.756647: step: 1948/526, loss: 0.011189322918653488 2023-01-22 15:45:23.835390: step: 1952/526, loss: 0.008775146678090096 2023-01-22 15:45:24.894627: step: 1956/526, loss: 0.004551365040242672 2023-01-22 15:45:25.976843: step: 1960/526, loss: 0.01498501654714346 2023-01-22 15:45:27.051936: step: 1964/526, loss: 0.026638157665729523 2023-01-22 15:45:28.112153: step: 1968/526, loss: 0.01696990244090557 2023-01-22 15:45:29.188099: step: 1972/526, loss: 0.005672011990100145 2023-01-22 15:45:30.253738: step: 1976/526, loss: 0.011845101602375507 2023-01-22 15:45:31.328889: step: 1980/526, loss: 0.006821760442107916 2023-01-22 15:45:32.410479: step: 1984/526, loss: 0.02255084179341793 2023-01-22 15:45:33.477103: step: 1988/526, loss: 6.998753815423697e-05 2023-01-22 15:45:34.558716: step: 1992/526, loss: 0.0037554940208792686 2023-01-22 15:45:35.628780: step: 1996/526, loss: 0.0038460008800029755 2023-01-22 15:45:36.703678: step: 2000/526, loss: 0.004586793016642332 2023-01-22 15:45:37.796828: step: 2004/526, loss: 0.009389428421854973 2023-01-22 15:45:38.864159: step: 2008/526, loss: 0.009711971506476402 2023-01-22 15:45:39.924634: step: 2012/526, loss: 0.005631749983876944 2023-01-22 15:45:40.995675: step: 2016/526, loss: 0.00222161877900362 2023-01-22 15:45:42.057495: step: 2020/526, loss: 0.00375740067102015 2023-01-22 15:45:43.115659: step: 2024/526, loss: 0.0015059334691613913 2023-01-22 15:45:44.183554: step: 2028/526, loss: 0.0029861547518521547 2023-01-22 15:45:45.275199: step: 2032/526, loss: 0.021876122802495956 2023-01-22 15:45:46.318834: step: 2036/526, loss: 0.004218714311718941 2023-01-22 15:45:47.386345: step: 2040/526, loss: 0.07000627368688583 2023-01-22 15:45:48.448935: step: 2044/526, loss: 0.004202850162982941 2023-01-22 15:45:49.520958: step: 2048/526, loss: 0.0029538043309003115 2023-01-22 15:45:50.622174: step: 2052/526, loss: 0.040568865835666656 2023-01-22 15:45:51.689732: step: 2056/526, loss: 0.010062962770462036 2023-01-22 15:45:52.742780: step: 2060/526, loss: 0.007632994093000889 2023-01-22 15:45:53.804826: step: 2064/526, loss: 0.01092681847512722 2023-01-22 15:45:54.858360: step: 2068/526, loss: 0.023032061755657196 2023-01-22 15:45:55.925681: step: 2072/526, loss: 0.03470155596733093 2023-01-22 15:45:57.031948: step: 2076/526, loss: 0.05821975693106651 2023-01-22 15:45:58.100047: step: 2080/526, loss: 0.04016618803143501 2023-01-22 15:45:59.165176: step: 2084/526, loss: 0.03075486421585083 2023-01-22 15:46:00.216638: step: 2088/526, loss: 0.03850513696670532 2023-01-22 15:46:01.281503: step: 2092/526, loss: 0.00623524934053421 2023-01-22 15:46:02.370210: step: 2096/526, loss: 0.003364350413903594 2023-01-22 15:46:03.439684: step: 2100/526, loss: 0.01366267167031765 2023-01-22 15:46:04.513237: step: 2104/526, loss: 0.01837616041302681 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34777565502183405, 'r': 0.30224146110056926, 'f1': 0.3234137055837564}, 'combined': 0.23830483569329416, 'stategy': 1, 'epoch': 1} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33984919066440805, 'r': 0.2362138605442177, 'f1': 0.27870938488847724}, 'combined': 0.1520233008482603, 'stategy': 1, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31418668446026093, 'r': 0.335052972802024, 'f1': 0.3242845117845118}, 'combined': 0.238946482367535, 'stategy': 1, 'epoch': 1} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34272064379813255, 'r': 0.2649578627716578, 'f1': 0.298863719307969}, 'combined': 0.1630165741679831, 'stategy': 1, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34777565502183405, 'r': 0.30224146110056926, 'f1': 0.3234137055837564}, 'combined': 0.23830483569329416, 'stategy': 1, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33984919066440805, 'r': 0.2362138605442177, 'f1': 0.27870938488847724}, 'combined': 0.1520233008482603, 'stategy': 1, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31418668446026093, 'r': 0.335052972802024, 'f1': 0.3242845117845118}, 'combined': 0.238946482367535, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34272064379813255, 'r': 0.2649578627716578, 'f1': 0.298863719307969}, 'combined': 0.1630165741679831, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 15:49:11.006496: step: 4/526, loss: 0.009650016203522682 2023-01-22 15:49:12.059758: step: 8/526, loss: 0.010013791732490063 2023-01-22 15:49:13.135125: step: 12/526, loss: 0.03347144275903702 2023-01-22 15:49:14.181345: step: 16/526, loss: 0.013068323954939842 2023-01-22 15:49:15.247598: step: 20/526, loss: 0.01097600907087326 2023-01-22 15:49:16.298111: step: 24/526, loss: 0.019244518131017685 2023-01-22 15:49:17.351245: step: 28/526, loss: 0.012354213744401932 2023-01-22 15:49:18.411592: step: 32/526, loss: 0.014527775347232819 2023-01-22 15:49:19.481174: step: 36/526, loss: 0.0045250277034938335 2023-01-22 15:49:20.566863: step: 40/526, loss: 0.008542371913790703 2023-01-22 15:49:21.612016: step: 44/526, loss: 0.006913589779287577 2023-01-22 15:49:22.672559: step: 48/526, loss: 0.007144640665501356 2023-01-22 15:49:23.703532: step: 52/526, loss: 0.011766207404434681 2023-01-22 15:49:24.784975: step: 56/526, loss: 0.0032600248232483864 2023-01-22 15:49:25.849701: step: 60/526, loss: 0.03873160853981972 2023-01-22 15:49:26.922213: step: 64/526, loss: 0.020658886060118675 2023-01-22 15:49:27.978142: step: 68/526, loss: 0.0038150351028889418 2023-01-22 15:49:29.032104: step: 72/526, loss: 0.008012867532670498 2023-01-22 15:49:30.081653: step: 76/526, loss: 0.008683625608682632 2023-01-22 15:49:31.150846: step: 80/526, loss: 0.007013949099928141 2023-01-22 15:49:32.235709: step: 84/526, loss: 0.004606140777468681 2023-01-22 15:49:33.300021: step: 88/526, loss: 0.01227221917361021 2023-01-22 15:49:34.372991: step: 92/526, loss: 0.007199561223387718 2023-01-22 15:49:35.442204: step: 96/526, loss: 0.057429712265729904 2023-01-22 15:49:36.524049: step: 100/526, loss: 0.00389100331813097 2023-01-22 15:49:37.586916: step: 104/526, loss: 0.00589369423687458 2023-01-22 15:49:38.637378: step: 108/526, loss: 0.012208799831569195 2023-01-22 15:49:39.695832: step: 112/526, loss: 0.034113410860300064 2023-01-22 15:49:40.774643: step: 116/526, loss: 0.005686573684215546 2023-01-22 15:49:41.831184: step: 120/526, loss: 0.00732325529679656 2023-01-22 15:49:42.910848: step: 124/526, loss: 0.06855107843875885 2023-01-22 15:49:43.996621: step: 128/526, loss: 0.008350858464837074 2023-01-22 15:49:45.055795: step: 132/526, loss: 0.0071120294742286205 2023-01-22 15:49:46.127994: step: 136/526, loss: 0.010930932126939297 2023-01-22 15:49:47.202593: step: 140/526, loss: 0.01904200203716755 2023-01-22 15:49:48.271055: step: 144/526, loss: 0.030828766524791718 2023-01-22 15:49:49.351082: step: 148/526, loss: 0.004793122410774231 2023-01-22 15:49:50.424011: step: 152/526, loss: 0.010236724279820919 2023-01-22 15:49:51.497745: step: 156/526, loss: 0.030238352715969086 2023-01-22 15:49:52.592536: step: 160/526, loss: 0.006312841083854437 2023-01-22 15:49:53.652953: step: 164/526, loss: 0.01839604042470455 2023-01-22 15:49:54.733407: step: 168/526, loss: 0.0034752930514514446 2023-01-22 15:49:55.796446: step: 172/526, loss: 0.006147360894829035 2023-01-22 15:49:56.860808: step: 176/526, loss: 0.01481261570006609 2023-01-22 15:49:57.937443: step: 180/526, loss: 0.011082170531153679 2023-01-22 15:49:59.001431: step: 184/526, loss: 0.010606542229652405 2023-01-22 15:50:00.077542: step: 188/526, loss: 0.026972047984600067 2023-01-22 15:50:01.142185: step: 192/526, loss: 0.009704423137009144 2023-01-22 15:50:02.221040: step: 196/526, loss: 0.04423944279551506 2023-01-22 15:50:03.295845: step: 200/526, loss: 0.001866355654783547 2023-01-22 15:50:04.381930: step: 204/526, loss: 0.025922570377588272 2023-01-22 15:50:05.458436: step: 208/526, loss: 0.011625106446444988 2023-01-22 15:50:06.529003: step: 212/526, loss: 0.012148432433605194 2023-01-22 15:50:07.608228: step: 216/526, loss: 0.03693895414471626 2023-01-22 15:50:08.687689: step: 220/526, loss: 0.027484169229865074 2023-01-22 15:50:09.775795: step: 224/526, loss: 0.007740234490483999 2023-01-22 15:50:10.836955: step: 228/526, loss: 0.004102836828678846 2023-01-22 15:50:11.918242: step: 232/526, loss: 0.004029305651783943 2023-01-22 15:50:12.988964: step: 236/526, loss: 0.005516399163752794 2023-01-22 15:50:14.084852: step: 240/526, loss: 0.013329179026186466 2023-01-22 15:50:15.151355: step: 244/526, loss: 0.021181335672736168 2023-01-22 15:50:16.214820: step: 248/526, loss: 0.02165023237466812 2023-01-22 15:50:17.292267: step: 252/526, loss: 0.004780726041644812 2023-01-22 15:50:18.371069: step: 256/526, loss: 0.003291594795882702 2023-01-22 15:50:19.445635: step: 260/526, loss: 0.02164202742278576 2023-01-22 15:50:20.524229: step: 264/526, loss: 0.007455023936927319 2023-01-22 15:50:21.581184: step: 268/526, loss: 0.012338540516793728 2023-01-22 15:50:22.660163: step: 272/526, loss: 0.022265000268816948 2023-01-22 15:50:23.730293: step: 276/526, loss: 0.0051247356459498405 2023-01-22 15:50:24.803948: step: 280/526, loss: 0.004940933547914028 2023-01-22 15:50:25.879950: step: 284/526, loss: 0.010958746075630188 2023-01-22 15:50:26.971418: step: 288/526, loss: 0.01980961114168167 2023-01-22 15:50:28.040975: step: 292/526, loss: 0.010513312183320522 2023-01-22 15:50:29.125570: step: 296/526, loss: 0.029352642595767975 2023-01-22 15:50:30.186949: step: 300/526, loss: 0.005348069127649069 2023-01-22 15:50:31.250171: step: 304/526, loss: 0.006212959531694651 2023-01-22 15:50:32.331335: step: 308/526, loss: 0.005067212041467428 2023-01-22 15:50:33.421123: step: 312/526, loss: 0.006340665742754936 2023-01-22 15:50:34.501875: step: 316/526, loss: 0.006352726370096207 2023-01-22 15:50:35.584981: step: 320/526, loss: 0.004037776030600071 2023-01-22 15:50:36.650682: step: 324/526, loss: 0.004941687453538179 2023-01-22 15:50:37.744334: step: 328/526, loss: 0.005619700998067856 2023-01-22 15:50:38.824472: step: 332/526, loss: 0.026720736175775528 2023-01-22 15:50:39.898454: step: 336/526, loss: 0.018055040389299393 2023-01-22 15:50:40.960448: step: 340/526, loss: 0.00468561053276062 2023-01-22 15:50:42.017752: step: 344/526, loss: 0.005450468976050615 2023-01-22 15:50:43.093509: step: 348/526, loss: 0.006480196490883827 2023-01-22 15:50:44.163954: step: 352/526, loss: 0.0015610882546752691 2023-01-22 15:50:45.231461: step: 356/526, loss: 0.0013867387315258384 2023-01-22 15:50:46.287698: step: 360/526, loss: 0.025500910356640816 2023-01-22 15:50:47.356495: step: 364/526, loss: 0.0005921070696786046 2023-01-22 15:50:48.436332: step: 368/526, loss: 0.004796150140464306 2023-01-22 15:50:49.514166: step: 372/526, loss: 0.013667273335158825 2023-01-22 15:50:50.593228: step: 376/526, loss: 0.008390936069190502 2023-01-22 15:50:51.677721: step: 380/526, loss: 0.007209073752164841 2023-01-22 15:50:52.746400: step: 384/526, loss: 0.004153812769800425 2023-01-22 15:50:53.837965: step: 388/526, loss: 0.004443436861038208 2023-01-22 15:50:54.916222: step: 392/526, loss: 0.008662360720336437 2023-01-22 15:50:55.997762: step: 396/526, loss: 0.010088438168168068 2023-01-22 15:50:57.089781: step: 400/526, loss: 0.02405945584177971 2023-01-22 15:50:58.161364: step: 404/526, loss: 0.012908042408525944 2023-01-22 15:50:59.229730: step: 408/526, loss: 0.004260818473994732 2023-01-22 15:51:00.295899: step: 412/526, loss: 0.03877304866909981 2023-01-22 15:51:01.381924: step: 416/526, loss: 0.0057965912856161594 2023-01-22 15:51:02.459311: step: 420/526, loss: 0.00363843678496778 2023-01-22 15:51:03.527356: step: 424/526, loss: 0.0026531137991696596 2023-01-22 15:51:04.603804: step: 428/526, loss: 0.023729026317596436 2023-01-22 15:51:05.679958: step: 432/526, loss: 0.0037401465233415365 2023-01-22 15:51:06.749057: step: 436/526, loss: 0.001971272751688957 2023-01-22 15:51:07.811866: step: 440/526, loss: 0.014780450612306595 2023-01-22 15:51:08.880927: step: 444/526, loss: 0.008082711137831211 2023-01-22 15:51:09.967556: step: 448/526, loss: 0.013978217728435993 2023-01-22 15:51:11.040353: step: 452/526, loss: 0.0030507894698530436 2023-01-22 15:51:12.107183: step: 456/526, loss: 0.005185616668313742 2023-01-22 15:51:13.180209: step: 460/526, loss: 0.012244999408721924 2023-01-22 15:51:14.242978: step: 464/526, loss: 0.018588056787848473 2023-01-22 15:51:15.313929: step: 468/526, loss: 0.010285770520567894 2023-01-22 15:51:16.403293: step: 472/526, loss: 0.008559764362871647 2023-01-22 15:51:17.492950: step: 476/526, loss: 0.05094970762729645 2023-01-22 15:51:18.555850: step: 480/526, loss: 0.015446318313479424 2023-01-22 15:51:19.610731: step: 484/526, loss: 0.008355128578841686 2023-01-22 15:51:20.684667: step: 488/526, loss: 0.007788226939737797 2023-01-22 15:51:21.762766: step: 492/526, loss: 0.04156165570020676 2023-01-22 15:51:22.844749: step: 496/526, loss: 0.02522088587284088 2023-01-22 15:51:23.921585: step: 500/526, loss: 0.011526042595505714 2023-01-22 15:51:24.995053: step: 504/526, loss: 0.002771098632365465 2023-01-22 15:51:26.070888: step: 508/526, loss: 0.007668028119951487 2023-01-22 15:51:27.153814: step: 512/526, loss: 0.00046785661834292114 2023-01-22 15:51:28.221970: step: 516/526, loss: 0.008059462532401085 2023-01-22 15:51:29.299363: step: 520/526, loss: 0.006946189794689417 2023-01-22 15:51:30.374973: step: 524/526, loss: 0.007862059399485588 2023-01-22 15:51:31.446207: step: 528/526, loss: 0.013094890862703323 2023-01-22 15:51:32.530080: step: 532/526, loss: 0.0 2023-01-22 15:51:33.601842: step: 536/526, loss: 0.0189999807626009 2023-01-22 15:51:34.675950: step: 540/526, loss: 0.006072042975574732 2023-01-22 15:51:35.751259: step: 544/526, loss: 0.004083544015884399 2023-01-22 15:51:36.829308: step: 548/526, loss: 0.027086535468697548 2023-01-22 15:51:37.904803: step: 552/526, loss: 0.05943413823843002 2023-01-22 15:51:38.969395: step: 556/526, loss: 0.006296733859926462 2023-01-22 15:51:40.025978: step: 560/526, loss: 0.004672641400247812 2023-01-22 15:51:41.107777: step: 564/526, loss: 0.0034309227485209703 2023-01-22 15:51:42.185959: step: 568/526, loss: 0.01013965904712677 2023-01-22 15:51:43.244282: step: 572/526, loss: 0.004993991460651159 2023-01-22 15:51:44.305506: step: 576/526, loss: 0.003762580454349518 2023-01-22 15:51:45.359707: step: 580/526, loss: 0.006700827274471521 2023-01-22 15:51:46.457257: step: 584/526, loss: 0.0065978216007351875 2023-01-22 15:51:47.528645: step: 588/526, loss: 0.0027285825926810503 2023-01-22 15:51:48.584215: step: 592/526, loss: 0.0026484897825866938 2023-01-22 15:51:49.646970: step: 596/526, loss: 0.008796604350209236 2023-01-22 15:51:50.723355: step: 600/526, loss: 0.02042219042778015 2023-01-22 15:51:51.785571: step: 604/526, loss: 0.008733711205422878 2023-01-22 15:51:52.860875: step: 608/526, loss: 0.0034685188438743353 2023-01-22 15:51:53.937922: step: 612/526, loss: 0.058398570865392685 2023-01-22 15:51:55.002573: step: 616/526, loss: 0.007216060534119606 2023-01-22 15:51:56.067024: step: 620/526, loss: 0.0075833601877093315 2023-01-22 15:51:57.131651: step: 624/526, loss: 0.01524543296545744 2023-01-22 15:51:58.189372: step: 628/526, loss: 0.008459759876132011 2023-01-22 15:51:59.253487: step: 632/526, loss: 0.019246211275458336 2023-01-22 15:52:00.331067: step: 636/526, loss: 0.01280898880213499 2023-01-22 15:52:01.406356: step: 640/526, loss: 0.010272116400301456 2023-01-22 15:52:02.496916: step: 644/526, loss: 0.004532577469944954 2023-01-22 15:52:03.575354: step: 648/526, loss: 0.04452493414282799 2023-01-22 15:52:04.645895: step: 652/526, loss: 0.025648871436715126 2023-01-22 15:52:05.719450: step: 656/526, loss: 0.004953925032168627 2023-01-22 15:52:06.791944: step: 660/526, loss: 0.007000575307756662 2023-01-22 15:52:07.863463: step: 664/526, loss: 0.0065264273434877396 2023-01-22 15:52:08.938326: step: 668/526, loss: 0.006868980824947357 2023-01-22 15:52:09.997244: step: 672/526, loss: 0.0071555995382368565 2023-01-22 15:52:11.060261: step: 676/526, loss: 0.00946854893118143 2023-01-22 15:52:12.149268: step: 680/526, loss: 0.01051437109708786 2023-01-22 15:52:13.219273: step: 684/526, loss: 0.004051868803799152 2023-01-22 15:52:14.301739: step: 688/526, loss: 0.010793021880090237 2023-01-22 15:52:15.371829: step: 692/526, loss: 0.0088266646489501 2023-01-22 15:52:16.429931: step: 696/526, loss: 0.005774748045951128 2023-01-22 15:52:17.495013: step: 700/526, loss: 0.018759895116090775 2023-01-22 15:52:18.561530: step: 704/526, loss: 0.010701971128582954 2023-01-22 15:52:19.631891: step: 708/526, loss: 0.005979751702398062 2023-01-22 15:52:20.723290: step: 712/526, loss: 0.007589823566377163 2023-01-22 15:52:21.799660: step: 716/526, loss: 0.08392458409070969 2023-01-22 15:52:22.867427: step: 720/526, loss: 0.0021874525118619204 2023-01-22 15:52:23.912851: step: 724/526, loss: 0.016090866178274155 2023-01-22 15:52:24.991245: step: 728/526, loss: 0.018652459606528282 2023-01-22 15:52:26.048000: step: 732/526, loss: 0.007374058477580547 2023-01-22 15:52:27.107494: step: 736/526, loss: 0.004847398493438959 2023-01-22 15:52:28.168412: step: 740/526, loss: 0.004190844018012285 2023-01-22 15:52:29.242596: step: 744/526, loss: 0.004264814779162407 2023-01-22 15:52:30.316031: step: 748/526, loss: 0.023278141394257545 2023-01-22 15:52:31.378261: step: 752/526, loss: 0.005648747552186251 2023-01-22 15:52:32.455333: step: 756/526, loss: 0.007402417249977589 2023-01-22 15:52:33.525099: step: 760/526, loss: 0.002384051214903593 2023-01-22 15:52:34.594422: step: 764/526, loss: 0.013636879622936249 2023-01-22 15:52:35.654481: step: 768/526, loss: 0.0011240590829402208 2023-01-22 15:52:36.759998: step: 772/526, loss: 0.007019934244453907 2023-01-22 15:52:37.837161: step: 776/526, loss: 0.014455851167440414 2023-01-22 15:52:38.898345: step: 780/526, loss: 0.014101763255894184 2023-01-22 15:52:39.960004: step: 784/526, loss: 0.005727006122469902 2023-01-22 15:52:41.023804: step: 788/526, loss: 0.011075682938098907 2023-01-22 15:52:42.091996: step: 792/526, loss: 0.0020008953288197517 2023-01-22 15:52:43.161216: step: 796/526, loss: 0.020557090640068054 2023-01-22 15:52:44.221060: step: 800/526, loss: 0.02410268783569336 2023-01-22 15:52:45.284249: step: 804/526, loss: 0.0063035693019628525 2023-01-22 15:52:46.333877: step: 808/526, loss: 0.004689145367592573 2023-01-22 15:52:47.402151: step: 812/526, loss: 0.01358272135257721 2023-01-22 15:52:48.477299: step: 816/526, loss: 0.003380807815119624 2023-01-22 15:52:49.560078: step: 820/526, loss: 0.011991090141236782 2023-01-22 15:52:50.631577: step: 824/526, loss: 0.0027609998360276222 2023-01-22 15:52:51.703905: step: 828/526, loss: 0.009906318038702011 2023-01-22 15:52:52.787852: step: 832/526, loss: 0.004464716650545597 2023-01-22 15:52:53.871714: step: 836/526, loss: 0.030358489602804184 2023-01-22 15:52:54.956324: step: 840/526, loss: 0.0034298240207135677 2023-01-22 15:52:56.035800: step: 844/526, loss: 0.0026594619266688824 2023-01-22 15:52:57.107215: step: 848/526, loss: 0.008309874683618546 2023-01-22 15:52:58.176924: step: 852/526, loss: 0.010560836642980576 2023-01-22 15:52:59.250885: step: 856/526, loss: 0.012410067953169346 2023-01-22 15:53:00.317291: step: 860/526, loss: 0.0041047558188438416 2023-01-22 15:53:01.380322: step: 864/526, loss: 0.0036047259345650673 2023-01-22 15:53:02.460062: step: 868/526, loss: 0.08083193004131317 2023-01-22 15:53:03.524732: step: 872/526, loss: 0.004556929226964712 2023-01-22 15:53:04.580467: step: 876/526, loss: 0.007955954410135746 2023-01-22 15:53:05.666603: step: 880/526, loss: 0.04868384078145027 2023-01-22 15:53:06.738352: step: 884/526, loss: 0.003371535800397396 2023-01-22 15:53:07.791954: step: 888/526, loss: 0.006219801492989063 2023-01-22 15:53:08.852623: step: 892/526, loss: 0.007122638635337353 2023-01-22 15:53:09.927089: step: 896/526, loss: 0.0032529502641409636 2023-01-22 15:53:10.991641: step: 900/526, loss: 0.02301887236535549 2023-01-22 15:53:12.056801: step: 904/526, loss: 0.0051975552923977375 2023-01-22 15:53:13.142861: step: 908/526, loss: 0.043318770825862885 2023-01-22 15:53:14.203524: step: 912/526, loss: 0.003209874499589205 2023-01-22 15:53:15.271563: step: 916/526, loss: 0.02868613414466381 2023-01-22 15:53:16.359609: step: 920/526, loss: 0.004495481960475445 2023-01-22 15:53:17.419944: step: 924/526, loss: 0.03996625542640686 2023-01-22 15:53:18.495474: step: 928/526, loss: 0.027455996721982956 2023-01-22 15:53:19.560664: step: 932/526, loss: 0.004166061989963055 2023-01-22 15:53:20.635196: step: 936/526, loss: 0.005743321031332016 2023-01-22 15:53:21.700140: step: 940/526, loss: 0.010885016992688179 2023-01-22 15:53:22.756261: step: 944/526, loss: 0.010835173539817333 2023-01-22 15:53:23.823319: step: 948/526, loss: 0.011836973018944263 2023-01-22 15:53:24.903148: step: 952/526, loss: 0.016941901296377182 2023-01-22 15:53:25.962011: step: 956/526, loss: 0.0031142006628215313 2023-01-22 15:53:27.032875: step: 960/526, loss: 0.010438790544867516 2023-01-22 15:53:28.102869: step: 964/526, loss: 0.01830361969769001 2023-01-22 15:53:29.184523: step: 968/526, loss: 0.008257209323346615 2023-01-22 15:53:30.256321: step: 972/526, loss: 0.005571051966398954 2023-01-22 15:53:31.342375: step: 976/526, loss: 0.005304281134158373 2023-01-22 15:53:32.428291: step: 980/526, loss: 0.01096657570451498 2023-01-22 15:53:33.526315: step: 984/526, loss: 0.027911212295293808 2023-01-22 15:53:34.593608: step: 988/526, loss: 0.011240304447710514 2023-01-22 15:53:35.666601: step: 992/526, loss: 0.06038908660411835 2023-01-22 15:53:36.724135: step: 996/526, loss: 0.01339729130268097 2023-01-22 15:53:37.800263: step: 1000/526, loss: 0.00668367138132453 2023-01-22 15:53:38.874633: step: 1004/526, loss: 0.018606390804052353 2023-01-22 15:53:39.928554: step: 1008/526, loss: 0.0033643865026533604 2023-01-22 15:53:40.987186: step: 1012/526, loss: 0.01883353292942047 2023-01-22 15:53:42.074717: step: 1016/526, loss: 0.006229817867279053 2023-01-22 15:53:43.155529: step: 1020/526, loss: 0.029786109924316406 2023-01-22 15:53:44.212798: step: 1024/526, loss: 0.0017382865771651268 2023-01-22 15:53:45.283305: step: 1028/526, loss: 0.013280978426337242 2023-01-22 15:53:46.356176: step: 1032/526, loss: 0.011007885448634624 2023-01-22 15:53:47.430090: step: 1036/526, loss: 0.0034824381582438946 2023-01-22 15:53:48.503052: step: 1040/526, loss: 0.005445824004709721 2023-01-22 15:53:49.582092: step: 1044/526, loss: 0.007069554645568132 2023-01-22 15:53:50.649250: step: 1048/526, loss: 0.0007817599689587951 2023-01-22 15:53:51.704209: step: 1052/526, loss: 0.08426336944103241 2023-01-22 15:53:52.774239: step: 1056/526, loss: 0.012588013894855976 2023-01-22 15:53:53.850951: step: 1060/526, loss: 0.00295455870218575 2023-01-22 15:53:54.912261: step: 1064/526, loss: 0.007516203913837671 2023-01-22 15:53:55.982576: step: 1068/526, loss: 0.02468854747712612 2023-01-22 15:53:57.047112: step: 1072/526, loss: 0.0390239879488945 2023-01-22 15:53:58.121675: step: 1076/526, loss: 0.0055543649941682816 2023-01-22 15:53:59.189951: step: 1080/526, loss: 0.030798103660345078 2023-01-22 15:54:00.251391: step: 1084/526, loss: 0.01350131630897522 2023-01-22 15:54:01.321956: step: 1088/526, loss: 0.009989018552005291 2023-01-22 15:54:02.393187: step: 1092/526, loss: 0.006031517405062914 2023-01-22 15:54:03.471578: step: 1096/526, loss: 0.034877825528383255 2023-01-22 15:54:04.540507: step: 1100/526, loss: 0.01338210143148899 2023-01-22 15:54:05.607143: step: 1104/526, loss: 0.011179731227457523 2023-01-22 15:54:06.678627: step: 1108/526, loss: 0.0026474054902791977 2023-01-22 15:54:07.746809: step: 1112/526, loss: 0.00873781368136406 2023-01-22 15:54:08.823495: step: 1116/526, loss: 0.0117159029468894 2023-01-22 15:54:09.891881: step: 1120/526, loss: 0.0076748887076973915 2023-01-22 15:54:10.964103: step: 1124/526, loss: 0.007969887927174568 2023-01-22 15:54:12.032029: step: 1128/526, loss: 0.004495459608733654 2023-01-22 15:54:13.095302: step: 1132/526, loss: 0.019307535141706467 2023-01-22 15:54:14.167142: step: 1136/526, loss: 0.0219793189316988 2023-01-22 15:54:15.247117: step: 1140/526, loss: 0.008263111114501953 2023-01-22 15:54:16.347644: step: 1144/526, loss: 0.015542508102953434 2023-01-22 15:54:17.427064: step: 1148/526, loss: 0.03168462589383125 2023-01-22 15:54:18.488494: step: 1152/526, loss: 0.0023571166675537825 2023-01-22 15:54:19.560850: step: 1156/526, loss: 0.05652833729982376 2023-01-22 15:54:20.621561: step: 1160/526, loss: 0.01070844754576683 2023-01-22 15:54:21.699691: step: 1164/526, loss: 0.0044338093139231205 2023-01-22 15:54:22.779524: step: 1168/526, loss: 0.003867821302264929 2023-01-22 15:54:23.847136: step: 1172/526, loss: 0.024858953431248665 2023-01-22 15:54:24.902771: step: 1176/526, loss: 0.01870400458574295 2023-01-22 15:54:25.985521: step: 1180/526, loss: 0.007217070087790489 2023-01-22 15:54:27.066450: step: 1184/526, loss: 0.00820702500641346 2023-01-22 15:54:28.153688: step: 1188/526, loss: 0.009867927059531212 2023-01-22 15:54:29.214243: step: 1192/526, loss: 0.04338536411523819 2023-01-22 15:54:30.282861: step: 1196/526, loss: 0.01646721176803112 2023-01-22 15:54:31.349164: step: 1200/526, loss: 0.030806325376033783 2023-01-22 15:54:32.418191: step: 1204/526, loss: 0.009647082537412643 2023-01-22 15:54:33.500255: step: 1208/526, loss: 0.025725239887833595 2023-01-22 15:54:34.562820: step: 1212/526, loss: 0.025477230548858643 2023-01-22 15:54:35.643063: step: 1216/526, loss: 0.011215124279260635 2023-01-22 15:54:36.712147: step: 1220/526, loss: 0.02604593336582184 2023-01-22 15:54:37.782702: step: 1224/526, loss: 0.047443535178899765 2023-01-22 15:54:38.848397: step: 1228/526, loss: 0.022466301918029785 2023-01-22 15:54:39.925898: step: 1232/526, loss: 0.005243474151939154 2023-01-22 15:54:40.999125: step: 1236/526, loss: 0.017764508724212646 2023-01-22 15:54:42.061625: step: 1240/526, loss: 0.0067218998447060585 2023-01-22 15:54:43.151419: step: 1244/526, loss: 0.03833760693669319 2023-01-22 15:54:44.237308: step: 1248/526, loss: 0.00447751535102725 2023-01-22 15:54:45.313723: step: 1252/526, loss: 0.006492044311016798 2023-01-22 15:54:46.389946: step: 1256/526, loss: 0.021420910954475403 2023-01-22 15:54:47.462788: step: 1260/526, loss: 0.011233736760914326 2023-01-22 15:54:48.522637: step: 1264/526, loss: 0.010537364520132542 2023-01-22 15:54:49.601207: step: 1268/526, loss: 0.01400777418166399 2023-01-22 15:54:50.680045: step: 1272/526, loss: 0.0026878828648477793 2023-01-22 15:54:51.747560: step: 1276/526, loss: 0.03409126028418541 2023-01-22 15:54:52.831574: step: 1280/526, loss: 0.012045320123434067 2023-01-22 15:54:53.904433: step: 1284/526, loss: 0.00646931491792202 2023-01-22 15:54:54.978544: step: 1288/526, loss: 0.0 2023-01-22 15:54:56.058927: step: 1292/526, loss: 0.035632483661174774 2023-01-22 15:54:57.125506: step: 1296/526, loss: 0.009531461633741856 2023-01-22 15:54:58.202503: step: 1300/526, loss: 0.008342907764017582 2023-01-22 15:54:59.267718: step: 1304/526, loss: 0.016757989302277565 2023-01-22 15:55:00.333079: step: 1308/526, loss: 0.003976346459239721 2023-01-22 15:55:01.400408: step: 1312/526, loss: 0.010344144888222218 2023-01-22 15:55:02.497283: step: 1316/526, loss: 0.005565538536757231 2023-01-22 15:55:03.566342: step: 1320/526, loss: 0.0057592978700995445 2023-01-22 15:55:04.640703: step: 1324/526, loss: 0.010162352584302425 2023-01-22 15:55:05.723288: step: 1328/526, loss: 0.0221356600522995 2023-01-22 15:55:06.805313: step: 1332/526, loss: 0.017170565202832222 2023-01-22 15:55:07.868445: step: 1336/526, loss: 0.00286900089122355 2023-01-22 15:55:08.952186: step: 1340/526, loss: 0.00820174254477024 2023-01-22 15:55:10.019603: step: 1344/526, loss: 0.014971645548939705 2023-01-22 15:55:11.132269: step: 1348/526, loss: 0.0065859099850058556 2023-01-22 15:55:12.225655: step: 1352/526, loss: 0.0292165819555521 2023-01-22 15:55:13.314649: step: 1356/526, loss: 0.022404586896300316 2023-01-22 15:55:14.396136: step: 1360/526, loss: 0.0029354114085435867 2023-01-22 15:55:15.472892: step: 1364/526, loss: 0.012884167023003101 2023-01-22 15:55:16.544822: step: 1368/526, loss: 0.0014245564816519618 2023-01-22 15:55:17.628169: step: 1372/526, loss: 0.026715008541941643 2023-01-22 15:55:18.699305: step: 1376/526, loss: 0.04357811436057091 2023-01-22 15:55:19.772576: step: 1380/526, loss: 0.004910048563033342 2023-01-22 15:55:20.843124: step: 1384/526, loss: 0.011042262427508831 2023-01-22 15:55:21.920562: step: 1388/526, loss: 0.0014828643761575222 2023-01-22 15:55:22.989506: step: 1392/526, loss: 0.018773356452584267 2023-01-22 15:55:24.062627: step: 1396/526, loss: 0.011864673346281052 2023-01-22 15:55:25.129082: step: 1400/526, loss: 0.00803462602198124 2023-01-22 15:55:26.193900: step: 1404/526, loss: 0.002856550505384803 2023-01-22 15:55:27.272839: step: 1408/526, loss: 0.0033457232639193535 2023-01-22 15:55:28.352636: step: 1412/526, loss: 0.01171930506825447 2023-01-22 15:55:29.422395: step: 1416/526, loss: 0.020162900909781456 2023-01-22 15:55:30.504312: step: 1420/526, loss: 0.01573345437645912 2023-01-22 15:55:31.595887: step: 1424/526, loss: 0.008854788728058338 2023-01-22 15:55:32.668344: step: 1428/526, loss: 0.006366438698023558 2023-01-22 15:55:33.739056: step: 1432/526, loss: 0.04270756617188454 2023-01-22 15:55:34.809545: step: 1436/526, loss: 0.02801160328090191 2023-01-22 15:55:35.880960: step: 1440/526, loss: 0.00948960892856121 2023-01-22 15:55:36.961380: step: 1444/526, loss: 0.009206300601363182 2023-01-22 15:55:38.049152: step: 1448/526, loss: 0.009319613687694073 2023-01-22 15:55:39.140203: step: 1452/526, loss: 0.008261908777058125 2023-01-22 15:55:40.205273: step: 1456/526, loss: 0.004989591892808676 2023-01-22 15:55:41.278623: step: 1460/526, loss: 0.03424499183893204 2023-01-22 15:55:42.349511: step: 1464/526, loss: 0.006096519995480776 2023-01-22 15:55:43.431457: step: 1468/526, loss: 0.010850897990167141 2023-01-22 15:55:44.503539: step: 1472/526, loss: 0.010049943812191486 2023-01-22 15:55:45.582518: step: 1476/526, loss: 0.004420015029609203 2023-01-22 15:55:46.669488: step: 1480/526, loss: 0.007550597190856934 2023-01-22 15:55:47.745155: step: 1484/526, loss: 0.0037027643993496895 2023-01-22 15:55:48.839056: step: 1488/526, loss: 0.02450309507548809 2023-01-22 15:55:49.908568: step: 1492/526, loss: 0.004156967159360647 2023-01-22 15:55:50.988954: step: 1496/526, loss: 0.0030682943761348724 2023-01-22 15:55:52.081880: step: 1500/526, loss: 0.006102921906858683 2023-01-22 15:55:53.158589: step: 1504/526, loss: 0.004751682281494141 2023-01-22 15:55:54.219398: step: 1508/526, loss: 0.002415277063846588 2023-01-22 15:55:55.293398: step: 1512/526, loss: 0.016951916739344597 2023-01-22 15:55:56.372500: step: 1516/526, loss: 0.012345204129815102 2023-01-22 15:55:57.453845: step: 1520/526, loss: 0.018035629764199257 2023-01-22 15:55:58.527160: step: 1524/526, loss: 0.007199098821729422 2023-01-22 15:55:59.591146: step: 1528/526, loss: 0.04139915481209755 2023-01-22 15:56:00.679808: step: 1532/526, loss: 0.005168642848730087 2023-01-22 15:56:01.755155: step: 1536/526, loss: 0.010584330186247826 2023-01-22 15:56:02.849447: step: 1540/526, loss: 0.006846735253930092 2023-01-22 15:56:03.929599: step: 1544/526, loss: 0.026852548122406006 2023-01-22 15:56:05.001583: step: 1548/526, loss: 0.002502594143152237 2023-01-22 15:56:06.080271: step: 1552/526, loss: 0.035872578620910645 2023-01-22 15:56:07.147972: step: 1556/526, loss: 0.005310544278472662 2023-01-22 15:56:08.216597: step: 1560/526, loss: 0.017590191215276718 2023-01-22 15:56:09.284958: step: 1564/526, loss: 0.0029978761449456215 2023-01-22 15:56:10.348286: step: 1568/526, loss: 0.01053419429808855 2023-01-22 15:56:11.415705: step: 1572/526, loss: 0.0053388746455311775 2023-01-22 15:56:12.490086: step: 1576/526, loss: 0.010215004906058311 2023-01-22 15:56:13.574759: step: 1580/526, loss: 0.023346543312072754 2023-01-22 15:56:14.635598: step: 1584/526, loss: 0.003912911284714937 2023-01-22 15:56:15.706858: step: 1588/526, loss: 0.00445833895355463 2023-01-22 15:56:16.776503: step: 1592/526, loss: 0.004861005581915379 2023-01-22 15:56:17.838888: step: 1596/526, loss: 0.00711582787334919 2023-01-22 15:56:18.916439: step: 1600/526, loss: 0.017639338970184326 2023-01-22 15:56:20.010830: step: 1604/526, loss: 0.004676608834415674 2023-01-22 15:56:21.086947: step: 1608/526, loss: 0.09627410024404526 2023-01-22 15:56:22.175981: step: 1612/526, loss: 0.007182906847447157 2023-01-22 15:56:23.255641: step: 1616/526, loss: 0.009184567257761955 2023-01-22 15:56:24.322980: step: 1620/526, loss: 0.0014865609118714929 2023-01-22 15:56:25.383591: step: 1624/526, loss: 0.005441845860332251 2023-01-22 15:56:26.433740: step: 1628/526, loss: 0.005753014702349901 2023-01-22 15:56:27.507671: step: 1632/526, loss: 0.005112130660563707 2023-01-22 15:56:28.596855: step: 1636/526, loss: 0.007921729236841202 2023-01-22 15:56:29.665346: step: 1640/526, loss: 0.0006049563526175916 2023-01-22 15:56:30.741766: step: 1644/526, loss: 0.021332260221242905 2023-01-22 15:56:31.814063: step: 1648/526, loss: 0.010541192255914211 2023-01-22 15:56:32.899385: step: 1652/526, loss: 0.002833027159795165 2023-01-22 15:56:33.962999: step: 1656/526, loss: 0.008171668276190758 2023-01-22 15:56:35.024573: step: 1660/526, loss: 0.027608271688222885 2023-01-22 15:56:36.089884: step: 1664/526, loss: 0.0032521956600248814 2023-01-22 15:56:37.145261: step: 1668/526, loss: 0.0023409565910696983 2023-01-22 15:56:38.216907: step: 1672/526, loss: 0.006066160276532173 2023-01-22 15:56:39.294324: step: 1676/526, loss: 0.023671183735132217 2023-01-22 15:56:40.361514: step: 1680/526, loss: 0.04049052298069 2023-01-22 15:56:41.432037: step: 1684/526, loss: 0.03451811522245407 2023-01-22 15:56:42.506617: step: 1688/526, loss: 0.02045125514268875 2023-01-22 15:56:43.566123: step: 1692/526, loss: 0.0036604974884539843 2023-01-22 15:56:44.635758: step: 1696/526, loss: 0.018069837242364883 2023-01-22 15:56:45.698343: step: 1700/526, loss: 0.03992658108472824 2023-01-22 15:56:46.783705: step: 1704/526, loss: 0.03465672582387924 2023-01-22 15:56:47.859282: step: 1708/526, loss: 0.018688617274165154 2023-01-22 15:56:48.939333: step: 1712/526, loss: 0.026766885071992874 2023-01-22 15:56:49.996354: step: 1716/526, loss: 0.02549033984541893 2023-01-22 15:56:51.050428: step: 1720/526, loss: 0.007111749611794949 2023-01-22 15:56:52.109116: step: 1724/526, loss: 0.014270732179284096 2023-01-22 15:56:53.183692: step: 1728/526, loss: 0.0008265099604614079 2023-01-22 15:56:54.237576: step: 1732/526, loss: 0.023535097017884254 2023-01-22 15:56:55.287310: step: 1736/526, loss: 0.012633095495402813 2023-01-22 15:56:56.354928: step: 1740/526, loss: 0.01793331652879715 2023-01-22 15:56:57.414070: step: 1744/526, loss: 0.00403120880946517 2023-01-22 15:56:58.470586: step: 1748/526, loss: 0.0001715904800221324 2023-01-22 15:56:59.540634: step: 1752/526, loss: 0.0037404517643153667 2023-01-22 15:57:00.592714: step: 1756/526, loss: 0.008556806482374668 2023-01-22 15:57:01.650862: step: 1760/526, loss: 0.07043598592281342 2023-01-22 15:57:02.728769: step: 1764/526, loss: 0.006439608987420797 2023-01-22 15:57:03.784606: step: 1768/526, loss: 0.02019934169948101 2023-01-22 15:57:04.844872: step: 1772/526, loss: 0.007826108485460281 2023-01-22 15:57:05.904562: step: 1776/526, loss: 0.01723911426961422 2023-01-22 15:57:06.984167: step: 1780/526, loss: 0.022307654842734337 2023-01-22 15:57:08.083642: step: 1784/526, loss: 0.010500245727598667 2023-01-22 15:57:09.142026: step: 1788/526, loss: 0.04364487901329994 2023-01-22 15:57:10.209921: step: 1792/526, loss: 0.0074179829098284245 2023-01-22 15:57:11.273382: step: 1796/526, loss: 0.010694097727537155 2023-01-22 15:57:12.343331: step: 1800/526, loss: 0.005141766741871834 2023-01-22 15:57:13.411993: step: 1804/526, loss: 0.023395854979753494 2023-01-22 15:57:14.479676: step: 1808/526, loss: 0.009554535150527954 2023-01-22 15:57:15.545931: step: 1812/526, loss: 0.006269097328186035 2023-01-22 15:57:16.627394: step: 1816/526, loss: 0.004594143014401197 2023-01-22 15:57:17.694843: step: 1820/526, loss: 0.02031327784061432 2023-01-22 15:57:18.766411: step: 1824/526, loss: 0.0019776553381234407 2023-01-22 15:57:19.840873: step: 1828/526, loss: 0.002862253226339817 2023-01-22 15:57:20.925422: step: 1832/526, loss: 0.010650093667209148 2023-01-22 15:57:21.986974: step: 1836/526, loss: 0.04844445735216141 2023-01-22 15:57:23.048036: step: 1840/526, loss: 0.02523704245686531 2023-01-22 15:57:24.120714: step: 1844/526, loss: 0.019803304225206375 2023-01-22 15:57:25.194842: step: 1848/526, loss: 0.04520859941840172 2023-01-22 15:57:26.262551: step: 1852/526, loss: 0.004723967052996159 2023-01-22 15:57:27.323153: step: 1856/526, loss: 0.01091139018535614 2023-01-22 15:57:28.393663: step: 1860/526, loss: 0.011228218674659729 2023-01-22 15:57:29.474136: step: 1864/526, loss: 0.007062103133648634 2023-01-22 15:57:30.528300: step: 1868/526, loss: 0.008993699215352535 2023-01-22 15:57:31.586418: step: 1872/526, loss: 0.013751370832324028 2023-01-22 15:57:32.660496: step: 1876/526, loss: 0.008922052569687366 2023-01-22 15:57:33.739769: step: 1880/526, loss: 0.0034748322796076536 2023-01-22 15:57:34.801832: step: 1884/526, loss: 0.013804436661303043 2023-01-22 15:57:35.867791: step: 1888/526, loss: 0.0052545093931257725 2023-01-22 15:57:36.941626: step: 1892/526, loss: 0.005114257801324129 2023-01-22 15:57:38.049447: step: 1896/526, loss: 0.006395290605723858 2023-01-22 15:57:39.114046: step: 1900/526, loss: 0.005421869922429323 2023-01-22 15:57:40.176639: step: 1904/526, loss: 0.0018781094113364816 2023-01-22 15:57:41.231318: step: 1908/526, loss: 0.011902541853487492 2023-01-22 15:57:42.290842: step: 1912/526, loss: 0.010770831257104874 2023-01-22 15:57:43.344196: step: 1916/526, loss: 0.02877545729279518 2023-01-22 15:57:44.411328: step: 1920/526, loss: 0.0035146879963576794 2023-01-22 15:57:45.464092: step: 1924/526, loss: 0.03203630819916725 2023-01-22 15:57:46.523833: step: 1928/526, loss: 0.0071519771590828896 2023-01-22 15:57:47.590230: step: 1932/526, loss: 0.005815187469124794 2023-01-22 15:57:48.673353: step: 1936/526, loss: 0.003906155237928033 2023-01-22 15:57:49.725862: step: 1940/526, loss: 0.0051356470212340355 2023-01-22 15:57:50.789265: step: 1944/526, loss: 0.005927410442382097 2023-01-22 15:57:51.882187: step: 1948/526, loss: 0.010274640284478664 2023-01-22 15:57:52.945384: step: 1952/526, loss: 0.0034792316146194935 2023-01-22 15:57:53.997291: step: 1956/526, loss: 0.0031065037474036217 2023-01-22 15:57:55.060615: step: 1960/526, loss: 0.03285900503396988 2023-01-22 15:57:56.130184: step: 1964/526, loss: 0.0037486539222300053 2023-01-22 15:57:57.191231: step: 1968/526, loss: 0.013327890075743198 2023-01-22 15:57:58.258253: step: 1972/526, loss: 0.003992380574345589 2023-01-22 15:57:59.323590: step: 1976/526, loss: 0.002244959818199277 2023-01-22 15:58:00.395468: step: 1980/526, loss: 0.07178151607513428 2023-01-22 15:58:01.478791: step: 1984/526, loss: 0.050462786108255386 2023-01-22 15:58:02.539301: step: 1988/526, loss: 0.007395831402391195 2023-01-22 15:58:03.612816: step: 1992/526, loss: 0.00534437969326973 2023-01-22 15:58:04.666969: step: 1996/526, loss: 0.011167893186211586 2023-01-22 15:58:05.734491: step: 2000/526, loss: 0.004772778134793043 2023-01-22 15:58:06.791269: step: 2004/526, loss: 0.006650469731539488 2023-01-22 15:58:07.853137: step: 2008/526, loss: 0.010275108739733696 2023-01-22 15:58:08.931119: step: 2012/526, loss: 0.010766251012682915 2023-01-22 15:58:10.001189: step: 2016/526, loss: 0.0193684883415699 2023-01-22 15:58:11.067208: step: 2020/526, loss: 0.008803722448647022 2023-01-22 15:58:12.141693: step: 2024/526, loss: 0.002253290032967925 2023-01-22 15:58:13.210248: step: 2028/526, loss: 0.010889406315982342 2023-01-22 15:58:14.267195: step: 2032/526, loss: 0.003997150808572769 2023-01-22 15:58:15.332461: step: 2036/526, loss: 0.006157516036182642 2023-01-22 15:58:16.399535: step: 2040/526, loss: 0.06259596347808838 2023-01-22 15:58:17.472611: step: 2044/526, loss: 0.0840030312538147 2023-01-22 15:58:18.545097: step: 2048/526, loss: 0.035455912351608276 2023-01-22 15:58:19.626581: step: 2052/526, loss: 0.02177589014172554 2023-01-22 15:58:20.702207: step: 2056/526, loss: 0.007078463677316904 2023-01-22 15:58:21.770975: step: 2060/526, loss: 0.007366360165178776 2023-01-22 15:58:22.854571: step: 2064/526, loss: 0.01395875308662653 2023-01-22 15:58:23.917118: step: 2068/526, loss: 0.013152029365301132 2023-01-22 15:58:25.003494: step: 2072/526, loss: 0.002955480944365263 2023-01-22 15:58:26.058081: step: 2076/526, loss: 0.0292070172727108 2023-01-22 15:58:27.127437: step: 2080/526, loss: 0.011541560292243958 2023-01-22 15:58:28.191984: step: 2084/526, loss: 0.010321681387722492 2023-01-22 15:58:29.264362: step: 2088/526, loss: 0.015211386606097221 2023-01-22 15:58:30.327043: step: 2092/526, loss: 0.004252570681273937 2023-01-22 15:58:31.388237: step: 2096/526, loss: 0.003494285512715578 2023-01-22 15:58:32.440941: step: 2100/526, loss: 0.030963025987148285 2023-01-22 15:58:33.503830: step: 2104/526, loss: 0.007188418880105019 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3382711038961039, 'r': 0.2965488614800759, 'f1': 0.31603892821031343}, 'combined': 0.23287078920759935, 'stategy': 1, 'epoch': 2} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.328763235821967, 'r': 0.23964792647828362, 'f1': 0.2772198471549637}, 'combined': 0.15121082572088929, 'stategy': 1, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30640997673065734, 'r': 0.3331554395951929, 'f1': 0.31922348484848484}, 'combined': 0.23521730462519935, 'stategy': 1, 'epoch': 2} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3343915369005925, 'r': 0.26953242818794326, 'f1': 0.2984791732618257}, 'combined': 0.16280682177917766, 'stategy': 1, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32034835913839954, 'r': 0.3312900488243411, 'f1': 0.3257273427806488}, 'combined': 0.24000962099626752, 'stategy': 1, 'epoch': 2} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32768760715407647, 'r': 0.27352189309481817, 'f1': 0.298164731646417}, 'combined': 0.1626353081707729, 'stategy': 1, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3252032520325203, 'r': 0.38095238095238093, 'f1': 0.3508771929824561}, 'combined': 0.23391812865497075, 'stategy': 1, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3425925925925926, 'r': 0.40217391304347827, 'f1': 0.37}, 'combined': 0.185, 'stategy': 1, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 2} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34777565502183405, 'r': 0.30224146110056926, 'f1': 0.3234137055837564}, 'combined': 0.23830483569329416, 'stategy': 1, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33984919066440805, 'r': 0.2362138605442177, 'f1': 0.27870938488847724}, 'combined': 0.1520233008482603, 'stategy': 1, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31418668446026093, 'r': 0.335052972802024, 'f1': 0.3242845117845118}, 'combined': 0.238946482367535, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34272064379813255, 'r': 0.2649578627716578, 'f1': 0.298863719307969}, 'combined': 0.1630165741679831, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:01:23.392174: step: 4/526, loss: 0.01453336887061596 2023-01-22 16:01:24.446129: step: 8/526, loss: 0.00482061505317688 2023-01-22 16:01:25.494766: step: 12/526, loss: 0.019346054643392563 2023-01-22 16:01:26.556134: step: 16/526, loss: 0.0028248419985175133 2023-01-22 16:01:27.605377: step: 20/526, loss: 0.001616541645489633 2023-01-22 16:01:28.708913: step: 24/526, loss: 0.018345719203352928 2023-01-22 16:01:29.765339: step: 28/526, loss: 0.002214467851445079 2023-01-22 16:01:30.815409: step: 32/526, loss: 0.006031076889485121 2023-01-22 16:01:31.876819: step: 36/526, loss: 0.021169841289520264 2023-01-22 16:01:32.934925: step: 40/526, loss: 0.014332202263176441 2023-01-22 16:01:33.996052: step: 44/526, loss: 0.01166242454200983 2023-01-22 16:01:35.061162: step: 48/526, loss: 0.007443530485033989 2023-01-22 16:01:36.136449: step: 52/526, loss: 0.006939054466784 2023-01-22 16:01:37.177956: step: 56/526, loss: 0.00618600333109498 2023-01-22 16:01:38.244930: step: 60/526, loss: 0.049402832984924316 2023-01-22 16:01:39.290305: step: 64/526, loss: 0.003533771028742194 2023-01-22 16:01:40.359189: step: 68/526, loss: 0.006433280650526285 2023-01-22 16:01:41.428150: step: 72/526, loss: 0.015450327657163143 2023-01-22 16:01:42.502552: step: 76/526, loss: 0.013647615909576416 2023-01-22 16:01:43.593337: step: 80/526, loss: 0.01516517624258995 2023-01-22 16:01:44.661507: step: 84/526, loss: 0.009910054504871368 2023-01-22 16:01:45.713326: step: 88/526, loss: 0.0041787223890423775 2023-01-22 16:01:46.768143: step: 92/526, loss: 0.007803673390299082 2023-01-22 16:01:47.838294: step: 96/526, loss: 0.004188187420368195 2023-01-22 16:01:48.905982: step: 100/526, loss: 0.03430202230811119 2023-01-22 16:01:49.978920: step: 104/526, loss: 0.01833048276603222 2023-01-22 16:01:51.055076: step: 108/526, loss: 0.008701053448021412 2023-01-22 16:01:52.129069: step: 112/526, loss: 0.005170323420315981 2023-01-22 16:01:53.185174: step: 116/526, loss: 0.0031774239614605904 2023-01-22 16:01:54.260027: step: 120/526, loss: 0.014394068159162998 2023-01-22 16:01:55.341174: step: 124/526, loss: 0.007874553091824055 2023-01-22 16:01:56.406250: step: 128/526, loss: 0.05461150407791138 2023-01-22 16:01:57.457952: step: 132/526, loss: 0.006642511114478111 2023-01-22 16:01:58.531725: step: 136/526, loss: 0.013262495398521423 2023-01-22 16:01:59.600473: step: 140/526, loss: 0.008064507506787777 2023-01-22 16:02:00.674100: step: 144/526, loss: 0.007895958609879017 2023-01-22 16:02:01.755171: step: 148/526, loss: 6.85572813381441e-05 2023-01-22 16:02:02.812096: step: 152/526, loss: 0.0062192995101213455 2023-01-22 16:02:03.897384: step: 156/526, loss: 0.048363275825977325 2023-01-22 16:02:04.954103: step: 160/526, loss: 0.001769882976077497 2023-01-22 16:02:06.038550: step: 164/526, loss: 0.004625684581696987 2023-01-22 16:02:07.092577: step: 168/526, loss: 0.02058299072086811 2023-01-22 16:02:08.168353: step: 172/526, loss: 0.009122991003096104 2023-01-22 16:02:09.232335: step: 176/526, loss: 0.001211250782944262 2023-01-22 16:02:10.304586: step: 180/526, loss: 0.009969021193683147 2023-01-22 16:02:11.380135: step: 184/526, loss: 0.00723232189193368 2023-01-22 16:02:12.431166: step: 188/526, loss: 0.008864911273121834 2023-01-22 16:02:13.522016: step: 192/526, loss: 0.007698389235883951 2023-01-22 16:02:14.614838: step: 196/526, loss: 0.011442573741078377 2023-01-22 16:02:15.694390: step: 200/526, loss: 0.020363764837384224 2023-01-22 16:02:16.760747: step: 204/526, loss: 0.004419893492013216 2023-01-22 16:02:17.821128: step: 208/526, loss: 0.022030601277947426 2023-01-22 16:02:18.892359: step: 212/526, loss: 0.006434077396988869 2023-01-22 16:02:19.978927: step: 216/526, loss: 0.016334280371665955 2023-01-22 16:02:21.088110: step: 220/526, loss: 0.015567542053759098 2023-01-22 16:02:22.149212: step: 224/526, loss: 0.028035342693328857 2023-01-22 16:02:23.229705: step: 228/526, loss: 0.006301909685134888 2023-01-22 16:02:24.297635: step: 232/526, loss: 0.023465782403945923 2023-01-22 16:02:25.358315: step: 236/526, loss: 0.004896972328424454 2023-01-22 16:02:26.434388: step: 240/526, loss: 0.0032686295453459024 2023-01-22 16:02:27.508967: step: 244/526, loss: 0.0016423244960606098 2023-01-22 16:02:28.581188: step: 248/526, loss: 0.005847712513059378 2023-01-22 16:02:29.674072: step: 252/526, loss: 0.017023583874106407 2023-01-22 16:02:30.785623: step: 256/526, loss: 0.02328610047698021 2023-01-22 16:02:31.867850: step: 260/526, loss: 0.006989973597228527 2023-01-22 16:02:32.930127: step: 264/526, loss: 0.02049330808222294 2023-01-22 16:02:33.986430: step: 268/526, loss: 0.01033694576472044 2023-01-22 16:02:35.064101: step: 272/526, loss: 0.003345879027619958 2023-01-22 16:02:36.169822: step: 276/526, loss: 0.027556832879781723 2023-01-22 16:02:37.245688: step: 280/526, loss: 0.0013843950582668185 2023-01-22 16:02:38.314970: step: 284/526, loss: 0.00024239910999312997 2023-01-22 16:02:39.379620: step: 288/526, loss: 0.0097554512321949 2023-01-22 16:02:40.456977: step: 292/526, loss: 0.01125571969896555 2023-01-22 16:02:41.507459: step: 296/526, loss: 0.007423019502311945 2023-01-22 16:02:42.587170: step: 300/526, loss: 0.00855428259819746 2023-01-22 16:02:43.680974: step: 304/526, loss: 0.0006459562573581934 2023-01-22 16:02:44.768779: step: 308/526, loss: 0.028279097750782967 2023-01-22 16:02:45.863451: step: 312/526, loss: 0.01437902171164751 2023-01-22 16:02:46.939502: step: 316/526, loss: 0.02789865992963314 2023-01-22 16:02:48.013571: step: 320/526, loss: 0.01503366231918335 2023-01-22 16:02:49.089491: step: 324/526, loss: 0.002005531219765544 2023-01-22 16:02:50.167768: step: 328/526, loss: 0.0030598067678511143 2023-01-22 16:02:51.244466: step: 332/526, loss: 0.018018824979662895 2023-01-22 16:02:52.320182: step: 336/526, loss: 0.006739713251590729 2023-01-22 16:02:53.388109: step: 340/526, loss: 0.024852771311998367 2023-01-22 16:02:54.460117: step: 344/526, loss: 0.004392530769109726 2023-01-22 16:02:55.532599: step: 348/526, loss: 0.017079133540391922 2023-01-22 16:02:56.620274: step: 352/526, loss: 0.013240063562989235 2023-01-22 16:02:57.696616: step: 356/526, loss: 0.006750912871211767 2023-01-22 16:02:58.775808: step: 360/526, loss: 0.0078098042868077755 2023-01-22 16:02:59.854161: step: 364/526, loss: 0.012694346718490124 2023-01-22 16:03:00.946215: step: 368/526, loss: 0.006354122888296843 2023-01-22 16:03:02.018605: step: 372/526, loss: 0.0067864893935620785 2023-01-22 16:03:03.086875: step: 376/526, loss: 0.0327744223177433 2023-01-22 16:03:04.149820: step: 380/526, loss: 0.016337445005774498 2023-01-22 16:03:05.239010: step: 384/526, loss: 0.0034072063863277435 2023-01-22 16:03:06.313897: step: 388/526, loss: 0.00803084671497345 2023-01-22 16:03:07.390213: step: 392/526, loss: 0.050930123776197433 2023-01-22 16:03:08.465115: step: 396/526, loss: 0.009968280792236328 2023-01-22 16:03:09.545295: step: 400/526, loss: 0.008098617196083069 2023-01-22 16:03:10.627817: step: 404/526, loss: 0.004890930373221636 2023-01-22 16:03:11.695856: step: 408/526, loss: 0.006930863484740257 2023-01-22 16:03:12.775536: step: 412/526, loss: 0.012169006280601025 2023-01-22 16:03:13.856025: step: 416/526, loss: 0.04285794496536255 2023-01-22 16:03:14.921654: step: 420/526, loss: 0.003666388802230358 2023-01-22 16:03:16.007594: step: 424/526, loss: 0.012774799950420856 2023-01-22 16:03:17.100322: step: 428/526, loss: 0.007495295722037554 2023-01-22 16:03:18.174592: step: 432/526, loss: 0.014199631288647652 2023-01-22 16:03:19.237007: step: 436/526, loss: 0.009778316132724285 2023-01-22 16:03:20.292272: step: 440/526, loss: 0.060826994478702545 2023-01-22 16:03:21.371068: step: 444/526, loss: 0.004873138852417469 2023-01-22 16:03:22.429441: step: 448/526, loss: 0.013729703612625599 2023-01-22 16:03:23.509555: step: 452/526, loss: 0.05647847056388855 2023-01-22 16:03:24.590800: step: 456/526, loss: 0.009778296574950218 2023-01-22 16:03:25.660079: step: 460/526, loss: 0.006020212080329657 2023-01-22 16:03:26.742371: step: 464/526, loss: 0.012150839902460575 2023-01-22 16:03:27.811689: step: 468/526, loss: 0.006403443403542042 2023-01-22 16:03:28.897175: step: 472/526, loss: 0.02598799578845501 2023-01-22 16:03:29.981218: step: 476/526, loss: 0.004478775896131992 2023-01-22 16:03:31.057364: step: 480/526, loss: 0.022910412400960922 2023-01-22 16:03:32.111052: step: 484/526, loss: 0.02777860499918461 2023-01-22 16:03:33.184983: step: 488/526, loss: 0.037520118057727814 2023-01-22 16:03:34.265493: step: 492/526, loss: 0.002860916079953313 2023-01-22 16:03:35.341149: step: 496/526, loss: 0.0039716255851089954 2023-01-22 16:03:36.419845: step: 500/526, loss: 0.03581337258219719 2023-01-22 16:03:37.483336: step: 504/526, loss: 0.0014725279761478305 2023-01-22 16:03:38.560228: step: 508/526, loss: 0.005930229555815458 2023-01-22 16:03:39.623183: step: 512/526, loss: 0.0031955912709236145 2023-01-22 16:03:40.686854: step: 516/526, loss: 0.006874459329992533 2023-01-22 16:03:41.785116: step: 520/526, loss: 0.008637451566755772 2023-01-22 16:03:42.862635: step: 524/526, loss: 0.03221502527594566 2023-01-22 16:03:43.923474: step: 528/526, loss: 0.01700977422297001 2023-01-22 16:03:45.003946: step: 532/526, loss: 0.007120462600141764 2023-01-22 16:03:46.054587: step: 536/526, loss: 0.005697491113096476 2023-01-22 16:03:47.128061: step: 540/526, loss: 0.013995268382132053 2023-01-22 16:03:48.189770: step: 544/526, loss: 0.043236516416072845 2023-01-22 16:03:49.254831: step: 548/526, loss: 0.0010032171849161386 2023-01-22 16:03:50.334872: step: 552/526, loss: 0.003636823734268546 2023-01-22 16:03:51.383152: step: 556/526, loss: 0.003714676247909665 2023-01-22 16:03:52.449371: step: 560/526, loss: 0.06154327467083931 2023-01-22 16:03:53.519096: step: 564/526, loss: 0.0006883384194225073 2023-01-22 16:03:54.582846: step: 568/526, loss: 0.016375605016946793 2023-01-22 16:03:55.655458: step: 572/526, loss: 0.006623808294534683 2023-01-22 16:03:56.718628: step: 576/526, loss: 0.0106643782928586 2023-01-22 16:03:57.775126: step: 580/526, loss: 0.011396318674087524 2023-01-22 16:03:58.844675: step: 584/526, loss: 0.044407010078430176 2023-01-22 16:03:59.909689: step: 588/526, loss: 0.05138601362705231 2023-01-22 16:04:00.968062: step: 592/526, loss: 0.013208644464612007 2023-01-22 16:04:02.029410: step: 596/526, loss: 0.009223378263413906 2023-01-22 16:04:03.114292: step: 600/526, loss: 0.003958255518227816 2023-01-22 16:04:04.183959: step: 604/526, loss: 0.020762505009770393 2023-01-22 16:04:05.236959: step: 608/526, loss: 0.02664206735789776 2023-01-22 16:04:06.300205: step: 612/526, loss: 0.01851213350892067 2023-01-22 16:04:07.367161: step: 616/526, loss: 0.01115064974874258 2023-01-22 16:04:08.436086: step: 620/526, loss: 0.009437215514481068 2023-01-22 16:04:09.498860: step: 624/526, loss: 0.015818025916814804 2023-01-22 16:04:10.561407: step: 628/526, loss: 0.022757917642593384 2023-01-22 16:04:11.630448: step: 632/526, loss: 0.004255269188433886 2023-01-22 16:04:12.695683: step: 636/526, loss: 0.007974151521921158 2023-01-22 16:04:13.769470: step: 640/526, loss: 0.03277970105409622 2023-01-22 16:04:14.840604: step: 644/526, loss: 0.00660000741481781 2023-01-22 16:04:15.886316: step: 648/526, loss: 0.00025135863688774407 2023-01-22 16:04:16.943805: step: 652/526, loss: 0.014811350964009762 2023-01-22 16:04:18.026911: step: 656/526, loss: 0.009015078656375408 2023-01-22 16:04:19.098007: step: 660/526, loss: 0.008203686214983463 2023-01-22 16:04:20.170419: step: 664/526, loss: 0.0030238081235438585 2023-01-22 16:04:21.234962: step: 668/526, loss: 0.027519404888153076 2023-01-22 16:04:22.309429: step: 672/526, loss: 0.009931741282343864 2023-01-22 16:04:23.380713: step: 676/526, loss: 0.00881365966051817 2023-01-22 16:04:24.439784: step: 680/526, loss: 0.0015198359033092856 2023-01-22 16:04:25.494076: step: 684/526, loss: 0.03769846633076668 2023-01-22 16:04:26.551296: step: 688/526, loss: 0.0 2023-01-22 16:04:27.607987: step: 692/526, loss: 0.01602693274617195 2023-01-22 16:04:28.670746: step: 696/526, loss: 0.01890532486140728 2023-01-22 16:04:29.735137: step: 700/526, loss: 0.05904241278767586 2023-01-22 16:04:30.792213: step: 704/526, loss: 0.0017161288997158408 2023-01-22 16:04:31.860380: step: 708/526, loss: 0.004789257887750864 2023-01-22 16:04:32.938252: step: 712/526, loss: 0.003328604158014059 2023-01-22 16:04:34.004084: step: 716/526, loss: 0.008297652006149292 2023-01-22 16:04:35.084260: step: 720/526, loss: 0.0026028358843177557 2023-01-22 16:04:36.161862: step: 724/526, loss: 0.01887928880751133 2023-01-22 16:04:37.252360: step: 728/526, loss: 0.0021802117116749287 2023-01-22 16:04:38.318234: step: 732/526, loss: 0.006900761742144823 2023-01-22 16:04:39.371602: step: 736/526, loss: 0.017902949824929237 2023-01-22 16:04:40.439295: step: 740/526, loss: 0.007469620555639267 2023-01-22 16:04:41.516570: step: 744/526, loss: 0.007927126251161098 2023-01-22 16:04:42.576880: step: 748/526, loss: 0.004104436840862036 2023-01-22 16:04:43.653079: step: 752/526, loss: 0.007140577770769596 2023-01-22 16:04:44.729176: step: 756/526, loss: 0.0020137031096965075 2023-01-22 16:04:45.803062: step: 760/526, loss: 0.022616535425186157 2023-01-22 16:04:46.876696: step: 764/526, loss: 0.01716579683125019 2023-01-22 16:04:47.958850: step: 768/526, loss: 0.006678692996501923 2023-01-22 16:04:49.023632: step: 772/526, loss: 0.010869510471820831 2023-01-22 16:04:50.087558: step: 776/526, loss: 0.002949516987428069 2023-01-22 16:04:51.139398: step: 780/526, loss: 0.006804764270782471 2023-01-22 16:04:52.207977: step: 784/526, loss: 0.0047178310342133045 2023-01-22 16:04:53.275958: step: 788/526, loss: 0.002043781103566289 2023-01-22 16:04:54.337013: step: 792/526, loss: 0.005039089825004339 2023-01-22 16:04:55.392136: step: 796/526, loss: 0.019487515091896057 2023-01-22 16:04:56.453967: step: 800/526, loss: 0.006454563234001398 2023-01-22 16:04:57.548009: step: 804/526, loss: 0.008772947825491428 2023-01-22 16:04:58.598645: step: 808/526, loss: 0.0012387357419356704 2023-01-22 16:04:59.687437: step: 812/526, loss: 0.019759926944971085 2023-01-22 16:05:00.754433: step: 816/526, loss: 0.011131839826703072 2023-01-22 16:05:01.818586: step: 820/526, loss: 0.0336940661072731 2023-01-22 16:05:02.872708: step: 824/526, loss: 0.0021152161061763763 2023-01-22 16:05:03.928932: step: 828/526, loss: 0.010587775148451328 2023-01-22 16:05:04.984893: step: 832/526, loss: 0.008336754515767097 2023-01-22 16:05:06.055159: step: 836/526, loss: 0.01640806719660759 2023-01-22 16:05:07.110092: step: 840/526, loss: 0.008297319523990154 2023-01-22 16:05:08.170368: step: 844/526, loss: 0.002325724344700575 2023-01-22 16:05:09.225314: step: 848/526, loss: 0.013057859614491463 2023-01-22 16:05:10.284256: step: 852/526, loss: 0.0060117305256426334 2023-01-22 16:05:11.356616: step: 856/526, loss: 0.006096655502915382 2023-01-22 16:05:12.419675: step: 860/526, loss: 0.01787818782031536 2023-01-22 16:05:13.472512: step: 864/526, loss: 0.009486567229032516 2023-01-22 16:05:14.540384: step: 868/526, loss: 0.0019624163396656513 2023-01-22 16:05:15.596043: step: 872/526, loss: 0.0045420927926898 2023-01-22 16:05:16.654811: step: 876/526, loss: 0.010387561284005642 2023-01-22 16:05:17.699565: step: 880/526, loss: 0.029061662033200264 2023-01-22 16:05:18.786835: step: 884/526, loss: 0.010791003704071045 2023-01-22 16:05:19.852930: step: 888/526, loss: 0.011274965479969978 2023-01-22 16:05:20.921476: step: 892/526, loss: 0.010358316823840141 2023-01-22 16:05:21.974174: step: 896/526, loss: 0.0020715061109513044 2023-01-22 16:05:23.031140: step: 900/526, loss: 0.014876225031912327 2023-01-22 16:05:24.113662: step: 904/526, loss: 0.0020804698579013348 2023-01-22 16:05:25.179719: step: 908/526, loss: 0.0026322698686271906 2023-01-22 16:05:26.265370: step: 912/526, loss: 0.0040582045912742615 2023-01-22 16:05:27.331248: step: 916/526, loss: 0.008920358493924141 2023-01-22 16:05:28.398711: step: 920/526, loss: 0.0029874430038034916 2023-01-22 16:05:29.466976: step: 924/526, loss: 0.009158621542155743 2023-01-22 16:05:30.524472: step: 928/526, loss: 0.00733718229457736 2023-01-22 16:05:31.590900: step: 932/526, loss: 0.007403201889246702 2023-01-22 16:05:32.663648: step: 936/526, loss: 0.03757746145129204 2023-01-22 16:05:33.729981: step: 940/526, loss: 0.018963400274515152 2023-01-22 16:05:34.785206: step: 944/526, loss: 0.054025933146476746 2023-01-22 16:05:35.849324: step: 948/526, loss: 0.006316265556961298 2023-01-22 16:05:36.911498: step: 952/526, loss: 0.040511757135391235 2023-01-22 16:05:37.995959: step: 956/526, loss: 0.02917717956006527 2023-01-22 16:05:39.064154: step: 960/526, loss: 0.0006092271069064736 2023-01-22 16:05:40.155206: step: 964/526, loss: 0.05069291964173317 2023-01-22 16:05:41.230286: step: 968/526, loss: 0.006297879386693239 2023-01-22 16:05:42.284509: step: 972/526, loss: 0.0001518265635240823 2023-01-22 16:05:43.348210: step: 976/526, loss: 0.0017175829270854592 2023-01-22 16:05:44.407740: step: 980/526, loss: 0.00459779379889369 2023-01-22 16:05:45.466891: step: 984/526, loss: 0.00311674945987761 2023-01-22 16:05:46.545285: step: 988/526, loss: 0.007379377260804176 2023-01-22 16:05:47.615972: step: 992/526, loss: 0.004725204780697823 2023-01-22 16:05:48.679353: step: 996/526, loss: 0.01223618071526289 2023-01-22 16:05:49.728512: step: 1000/526, loss: 0.0008432284812442958 2023-01-22 16:05:50.785100: step: 1004/526, loss: 0.0011946444865316153 2023-01-22 16:05:51.841556: step: 1008/526, loss: 0.013188197277486324 2023-01-22 16:05:52.911025: step: 1012/526, loss: 0.03502798080444336 2023-01-22 16:05:53.963338: step: 1016/526, loss: 0.006396422628313303 2023-01-22 16:05:55.038361: step: 1020/526, loss: 0.005777356680482626 2023-01-22 16:05:56.100172: step: 1024/526, loss: 0.02009611576795578 2023-01-22 16:05:57.168813: step: 1028/526, loss: 0.0015835731755942106 2023-01-22 16:05:58.220822: step: 1032/526, loss: 0.0016257904935628176 2023-01-22 16:05:59.305427: step: 1036/526, loss: 0.008096247911453247 2023-01-22 16:06:00.368226: step: 1040/526, loss: 0.00093059241771698 2023-01-22 16:06:01.442479: step: 1044/526, loss: 0.010792925953865051 2023-01-22 16:06:02.514683: step: 1048/526, loss: 0.0051745218224823475 2023-01-22 16:06:03.576148: step: 1052/526, loss: 0.009874873794615269 2023-01-22 16:06:04.644017: step: 1056/526, loss: 0.0026784767396748066 2023-01-22 16:06:05.708653: step: 1060/526, loss: 0.006505837198346853 2023-01-22 16:06:06.766760: step: 1064/526, loss: 0.023519212380051613 2023-01-22 16:06:07.820241: step: 1068/526, loss: 0.013372170738875866 2023-01-22 16:06:08.873780: step: 1072/526, loss: 0.008404867723584175 2023-01-22 16:06:09.929424: step: 1076/526, loss: 0.001452846685424447 2023-01-22 16:06:11.002801: step: 1080/526, loss: 0.014082156121730804 2023-01-22 16:06:12.072475: step: 1084/526, loss: 0.006291474215686321 2023-01-22 16:06:13.149420: step: 1088/526, loss: 0.011282151564955711 2023-01-22 16:06:14.215009: step: 1092/526, loss: 0.019947927445173264 2023-01-22 16:06:15.284549: step: 1096/526, loss: 0.0006907903007231653 2023-01-22 16:06:16.341299: step: 1100/526, loss: 0.030990425497293472 2023-01-22 16:06:17.400167: step: 1104/526, loss: 0.004109003581106663 2023-01-22 16:06:18.472240: step: 1108/526, loss: 0.009068459272384644 2023-01-22 16:06:19.528245: step: 1112/526, loss: 0.022836284711956978 2023-01-22 16:06:20.585780: step: 1116/526, loss: 0.014025052078068256 2023-01-22 16:06:21.647023: step: 1120/526, loss: 0.00168801739346236 2023-01-22 16:06:22.723348: step: 1124/526, loss: 0.02312871813774109 2023-01-22 16:06:23.795401: step: 1128/526, loss: 0.00856107845902443 2023-01-22 16:06:24.880168: step: 1132/526, loss: 0.03813363239169121 2023-01-22 16:06:25.956961: step: 1136/526, loss: 0.005415665917098522 2023-01-22 16:06:27.023483: step: 1140/526, loss: 0.005216538906097412 2023-01-22 16:06:28.080972: step: 1144/526, loss: 0.01107375044375658 2023-01-22 16:06:29.145103: step: 1148/526, loss: 0.002300586085766554 2023-01-22 16:06:30.217014: step: 1152/526, loss: 0.006091007962822914 2023-01-22 16:06:31.291182: step: 1156/526, loss: 0.006083650980144739 2023-01-22 16:06:32.370068: step: 1160/526, loss: 0.005855833645910025 2023-01-22 16:06:33.432630: step: 1164/526, loss: 0.0070433830842375755 2023-01-22 16:06:34.495380: step: 1168/526, loss: 0.002403890946879983 2023-01-22 16:06:35.566599: step: 1172/526, loss: 0.022314131259918213 2023-01-22 16:06:36.625451: step: 1176/526, loss: 0.0044588735327124596 2023-01-22 16:06:37.690231: step: 1180/526, loss: 0.005917669273912907 2023-01-22 16:06:38.741617: step: 1184/526, loss: 0.0057390546426177025 2023-01-22 16:06:39.816404: step: 1188/526, loss: 0.008226591162383556 2023-01-22 16:06:40.878645: step: 1192/526, loss: 0.010439438745379448 2023-01-22 16:06:41.959007: step: 1196/526, loss: 0.004426050931215286 2023-01-22 16:06:43.056082: step: 1200/526, loss: 0.038710419088602066 2023-01-22 16:06:44.116929: step: 1204/526, loss: 0.00555199570953846 2023-01-22 16:06:45.167236: step: 1208/526, loss: 0.010031957179307938 2023-01-22 16:06:46.243947: step: 1212/526, loss: 0.007387576159089804 2023-01-22 16:06:47.312259: step: 1216/526, loss: 0.003792413743212819 2023-01-22 16:06:48.371735: step: 1220/526, loss: 0.015015135519206524 2023-01-22 16:06:49.427783: step: 1224/526, loss: 0.0019042622298002243 2023-01-22 16:06:50.511261: step: 1228/526, loss: 0.009735428728163242 2023-01-22 16:06:51.584152: step: 1232/526, loss: 0.020870467647910118 2023-01-22 16:06:52.645048: step: 1236/526, loss: 0.008933988399803638 2023-01-22 16:06:53.699848: step: 1240/526, loss: 0.009933794848620892 2023-01-22 16:06:54.763538: step: 1244/526, loss: 0.015355129726231098 2023-01-22 16:06:55.822737: step: 1248/526, loss: 0.002939500380307436 2023-01-22 16:06:56.895800: step: 1252/526, loss: 0.017817232757806778 2023-01-22 16:06:57.962560: step: 1256/526, loss: 0.03595886379480362 2023-01-22 16:06:59.025020: step: 1260/526, loss: 0.0031141669023782015 2023-01-22 16:07:00.100444: step: 1264/526, loss: 0.06592642515897751 2023-01-22 16:07:01.165518: step: 1268/526, loss: 0.0035397219471633434 2023-01-22 16:07:02.245093: step: 1272/526, loss: 0.017418434843420982 2023-01-22 16:07:03.317579: step: 1276/526, loss: 0.005319727584719658 2023-01-22 16:07:04.384107: step: 1280/526, loss: 0.0031767389737069607 2023-01-22 16:07:05.460719: step: 1284/526, loss: 0.05969233810901642 2023-01-22 16:07:06.540049: step: 1288/526, loss: 0.004849054850637913 2023-01-22 16:07:07.601321: step: 1292/526, loss: 0.013007866218686104 2023-01-22 16:07:08.670720: step: 1296/526, loss: 0.003963765688240528 2023-01-22 16:07:09.743914: step: 1300/526, loss: 0.02162555791437626 2023-01-22 16:07:10.803911: step: 1304/526, loss: 0.0017899292288348079 2023-01-22 16:07:11.870446: step: 1308/526, loss: 0.008302192203700542 2023-01-22 16:07:12.946754: step: 1312/526, loss: 0.0018911845982074738 2023-01-22 16:07:14.012414: step: 1316/526, loss: 0.0028253381606191397 2023-01-22 16:07:15.098674: step: 1320/526, loss: 0.010233358480036259 2023-01-22 16:07:16.164148: step: 1324/526, loss: 0.010742838494479656 2023-01-22 16:07:17.234423: step: 1328/526, loss: 0.0020561402197927237 2023-01-22 16:07:18.298320: step: 1332/526, loss: 0.0062849028035998344 2023-01-22 16:07:19.360595: step: 1336/526, loss: 0.004208073485642672 2023-01-22 16:07:20.422248: step: 1340/526, loss: 0.004627154674381018 2023-01-22 16:07:21.482843: step: 1344/526, loss: 0.009445788338780403 2023-01-22 16:07:22.554461: step: 1348/526, loss: 0.008006146177649498 2023-01-22 16:07:23.624496: step: 1352/526, loss: 0.005335070192813873 2023-01-22 16:07:24.691700: step: 1356/526, loss: 0.00019222882110625505 2023-01-22 16:07:25.763432: step: 1360/526, loss: 0.0024903197772800922 2023-01-22 16:07:26.833161: step: 1364/526, loss: 0.011357102543115616 2023-01-22 16:07:27.894770: step: 1368/526, loss: 0.005335256457328796 2023-01-22 16:07:28.960441: step: 1372/526, loss: 0.005676691420376301 2023-01-22 16:07:30.013891: step: 1376/526, loss: 0.0011054989881813526 2023-01-22 16:07:31.086006: step: 1380/526, loss: 0.004948033485561609 2023-01-22 16:07:32.163196: step: 1384/526, loss: 0.004876416176557541 2023-01-22 16:07:33.235892: step: 1388/526, loss: 0.006801893003284931 2023-01-22 16:07:34.303553: step: 1392/526, loss: 0.0010870686965063214 2023-01-22 16:07:35.360481: step: 1396/526, loss: 0.0039533707313239574 2023-01-22 16:07:36.415628: step: 1400/526, loss: 0.00811266154050827 2023-01-22 16:07:37.476885: step: 1404/526, loss: 0.0011624109465628862 2023-01-22 16:07:38.541446: step: 1408/526, loss: 0.0040072244592010975 2023-01-22 16:07:39.611127: step: 1412/526, loss: 0.004937449935823679 2023-01-22 16:07:40.675686: step: 1416/526, loss: 0.003369309240952134 2023-01-22 16:07:41.719504: step: 1420/526, loss: 0.004999875091016293 2023-01-22 16:07:42.789849: step: 1424/526, loss: 0.02944946102797985 2023-01-22 16:07:43.853949: step: 1428/526, loss: 0.01299162395298481 2023-01-22 16:07:44.927805: step: 1432/526, loss: 0.004025980830192566 2023-01-22 16:07:46.005438: step: 1436/526, loss: 0.012488791719079018 2023-01-22 16:07:47.058436: step: 1440/526, loss: 0.00259688263759017 2023-01-22 16:07:48.131186: step: 1444/526, loss: 0.005833064671605825 2023-01-22 16:07:49.192114: step: 1448/526, loss: 0.01693911850452423 2023-01-22 16:07:50.263900: step: 1452/526, loss: 0.0222051739692688 2023-01-22 16:07:51.331368: step: 1456/526, loss: 0.009521521627902985 2023-01-22 16:07:52.400520: step: 1460/526, loss: 0.006272811908274889 2023-01-22 16:07:53.485575: step: 1464/526, loss: 0.07236727327108383 2023-01-22 16:07:54.556130: step: 1468/526, loss: 0.0027400723192840815 2023-01-22 16:07:55.620298: step: 1472/526, loss: 0.007792161777615547 2023-01-22 16:07:56.682354: step: 1476/526, loss: 0.007038488052785397 2023-01-22 16:07:57.739831: step: 1480/526, loss: 0.004880255553871393 2023-01-22 16:07:58.811731: step: 1484/526, loss: 0.021832432597875595 2023-01-22 16:07:59.882062: step: 1488/526, loss: 0.02342221885919571 2023-01-22 16:08:00.948810: step: 1492/526, loss: 0.008433393202722073 2023-01-22 16:08:02.014316: step: 1496/526, loss: 0.013123411685228348 2023-01-22 16:08:03.102497: step: 1500/526, loss: 0.029821787029504776 2023-01-22 16:08:04.169511: step: 1504/526, loss: 0.00908041000366211 2023-01-22 16:08:05.262431: step: 1508/526, loss: 0.007355151232331991 2023-01-22 16:08:06.326942: step: 1512/526, loss: 0.010647871531546116 2023-01-22 16:08:07.376160: step: 1516/526, loss: 0.0041663506999611855 2023-01-22 16:08:08.439631: step: 1520/526, loss: 0.0074890777468681335 2023-01-22 16:08:09.505255: step: 1524/526, loss: 0.013116743415594101 2023-01-22 16:08:10.572504: step: 1528/526, loss: 0.010179124772548676 2023-01-22 16:08:11.635811: step: 1532/526, loss: 0.002877237508073449 2023-01-22 16:08:12.720188: step: 1536/526, loss: 0.00595517922192812 2023-01-22 16:08:13.835999: step: 1540/526, loss: 0.002787857549265027 2023-01-22 16:08:14.932267: step: 1544/526, loss: 0.01670055463910103 2023-01-22 16:08:15.986988: step: 1548/526, loss: 0.015345240011811256 2023-01-22 16:08:17.061840: step: 1552/526, loss: 0.006643395870923996 2023-01-22 16:08:18.128312: step: 1556/526, loss: 0.006088084075599909 2023-01-22 16:08:19.203998: step: 1560/526, loss: 0.01892280764877796 2023-01-22 16:08:20.261027: step: 1564/526, loss: 0.0015434101223945618 2023-01-22 16:08:21.332675: step: 1568/526, loss: 0.04486711323261261 2023-01-22 16:08:22.403311: step: 1572/526, loss: 0.007180152926594019 2023-01-22 16:08:23.473267: step: 1576/526, loss: 0.0025067832320928574 2023-01-22 16:08:24.521440: step: 1580/526, loss: 0.0025074954610317945 2023-01-22 16:08:25.591568: step: 1584/526, loss: 0.007054222282022238 2023-01-22 16:08:26.664259: step: 1588/526, loss: 0.008581985719501972 2023-01-22 16:08:27.735267: step: 1592/526, loss: 0.00856317114084959 2023-01-22 16:08:28.801512: step: 1596/526, loss: 0.012064811773598194 2023-01-22 16:08:29.888992: step: 1600/526, loss: 0.00014818670751992613 2023-01-22 16:08:30.962668: step: 1604/526, loss: 0.012100787833333015 2023-01-22 16:08:32.043018: step: 1608/526, loss: 0.003516745986416936 2023-01-22 16:08:33.106485: step: 1612/526, loss: 0.03523869812488556 2023-01-22 16:08:34.187784: step: 1616/526, loss: 0.004461720120161772 2023-01-22 16:08:35.257019: step: 1620/526, loss: 0.032758813351392746 2023-01-22 16:08:36.343044: step: 1624/526, loss: 0.0041448506526649 2023-01-22 16:08:37.420572: step: 1628/526, loss: 0.045187175273895264 2023-01-22 16:08:38.482205: step: 1632/526, loss: 0.009123891592025757 2023-01-22 16:08:39.566478: step: 1636/526, loss: 0.005572086665779352 2023-01-22 16:08:40.630832: step: 1640/526, loss: 0.01416696235537529 2023-01-22 16:08:41.694389: step: 1644/526, loss: 0.004191957879811525 2023-01-22 16:08:42.763496: step: 1648/526, loss: 0.02418801188468933 2023-01-22 16:08:43.829794: step: 1652/526, loss: 0.005827329587191343 2023-01-22 16:08:44.910203: step: 1656/526, loss: 0.005489479750394821 2023-01-22 16:08:45.993389: step: 1660/526, loss: 0.014370226301252842 2023-01-22 16:08:47.072494: step: 1664/526, loss: 0.006896455306559801 2023-01-22 16:08:48.150431: step: 1668/526, loss: 0.009979184716939926 2023-01-22 16:08:49.218518: step: 1672/526, loss: 0.002629706170409918 2023-01-22 16:08:50.305974: step: 1676/526, loss: 0.0019369483925402164 2023-01-22 16:08:51.390289: step: 1680/526, loss: 0.004555763676762581 2023-01-22 16:08:52.469062: step: 1684/526, loss: 0.00010799784649861977 2023-01-22 16:08:53.543563: step: 1688/526, loss: 0.001377445412799716 2023-01-22 16:08:54.621806: step: 1692/526, loss: 0.007142608053982258 2023-01-22 16:08:55.684027: step: 1696/526, loss: 0.006169511936604977 2023-01-22 16:08:56.761968: step: 1700/526, loss: 0.005927258636802435 2023-01-22 16:08:57.827283: step: 1704/526, loss: 0.022003227844834328 2023-01-22 16:08:58.888267: step: 1708/526, loss: 0.018207816407084465 2023-01-22 16:08:59.955165: step: 1712/526, loss: 0.0013692082138732076 2023-01-22 16:09:01.042778: step: 1716/526, loss: 0.007651047315448523 2023-01-22 16:09:02.124013: step: 1720/526, loss: 0.0025622923858463764 2023-01-22 16:09:03.186300: step: 1724/526, loss: 0.0033063122536987066 2023-01-22 16:09:04.260062: step: 1728/526, loss: 0.0012544144410640001 2023-01-22 16:09:05.335341: step: 1732/526, loss: 0.005951226688921452 2023-01-22 16:09:06.416481: step: 1736/526, loss: 0.0012751303147524595 2023-01-22 16:09:07.494536: step: 1740/526, loss: 0.011261491104960442 2023-01-22 16:09:08.561102: step: 1744/526, loss: 0.01785769872367382 2023-01-22 16:09:09.632678: step: 1748/526, loss: 0.010424482636153698 2023-01-22 16:09:10.689468: step: 1752/526, loss: 0.004392494447529316 2023-01-22 16:09:11.774233: step: 1756/526, loss: 0.005734024103730917 2023-01-22 16:09:12.867168: step: 1760/526, loss: 0.008379615843296051 2023-01-22 16:09:13.938611: step: 1764/526, loss: 0.006811490282416344 2023-01-22 16:09:15.014642: step: 1768/526, loss: 0.004867592826485634 2023-01-22 16:09:16.097002: step: 1772/526, loss: 0.010081914253532887 2023-01-22 16:09:17.157983: step: 1776/526, loss: 0.009950308129191399 2023-01-22 16:09:18.234631: step: 1780/526, loss: 0.005420364905148745 2023-01-22 16:09:19.321585: step: 1784/526, loss: 0.009828265756368637 2023-01-22 16:09:20.410748: step: 1788/526, loss: 0.004393778275698423 2023-01-22 16:09:21.500410: step: 1792/526, loss: 0.021046601235866547 2023-01-22 16:09:22.571790: step: 1796/526, loss: 0.0021652954164892435 2023-01-22 16:09:23.643175: step: 1800/526, loss: 0.02698512002825737 2023-01-22 16:09:24.736534: step: 1804/526, loss: 0.00442785257473588 2023-01-22 16:09:25.820390: step: 1808/526, loss: 0.0035056646447628736 2023-01-22 16:09:26.893282: step: 1812/526, loss: 0.008704300038516521 2023-01-22 16:09:27.969647: step: 1816/526, loss: 0.00465176347643137 2023-01-22 16:09:29.046532: step: 1820/526, loss: 0.005390379577875137 2023-01-22 16:09:30.132911: step: 1824/526, loss: 0.005838509649038315 2023-01-22 16:09:31.219735: step: 1828/526, loss: 0.0067871734499931335 2023-01-22 16:09:32.296964: step: 1832/526, loss: 0.02269057184457779 2023-01-22 16:09:33.408939: step: 1836/526, loss: 0.020564686506986618 2023-01-22 16:09:34.484890: step: 1840/526, loss: 0.00322343735024333 2023-01-22 16:09:35.558182: step: 1844/526, loss: 0.004267917014658451 2023-01-22 16:09:36.634244: step: 1848/526, loss: 0.003914065193384886 2023-01-22 16:09:37.703745: step: 1852/526, loss: 0.014520698226988316 2023-01-22 16:09:38.786375: step: 1856/526, loss: 0.0070778545923531055 2023-01-22 16:09:39.853321: step: 1860/526, loss: 0.004852039739489555 2023-01-22 16:09:40.932975: step: 1864/526, loss: 0.013096362352371216 2023-01-22 16:09:42.003009: step: 1868/526, loss: 0.003364998148754239 2023-01-22 16:09:43.076107: step: 1872/526, loss: 0.017421839758753777 2023-01-22 16:09:44.163268: step: 1876/526, loss: 0.04439467191696167 2023-01-22 16:09:45.230413: step: 1880/526, loss: 0.006587847135961056 2023-01-22 16:09:46.309613: step: 1884/526, loss: 0.0025913419667631388 2023-01-22 16:09:47.391455: step: 1888/526, loss: 0.016689134761691093 2023-01-22 16:09:48.470388: step: 1892/526, loss: 0.012358699925243855 2023-01-22 16:09:49.545412: step: 1896/526, loss: 0.03405987471342087 2023-01-22 16:09:50.657545: step: 1900/526, loss: 0.016695545986294746 2023-01-22 16:09:51.751692: step: 1904/526, loss: 0.007997590117156506 2023-01-22 16:09:52.832506: step: 1908/526, loss: 0.0005133855156600475 2023-01-22 16:09:53.915151: step: 1912/526, loss: 0.0783570259809494 2023-01-22 16:09:55.000594: step: 1916/526, loss: 0.008114861324429512 2023-01-22 16:09:56.079523: step: 1920/526, loss: 0.008517680689692497 2023-01-22 16:09:57.144349: step: 1924/526, loss: 0.001336141605861485 2023-01-22 16:09:58.235260: step: 1928/526, loss: 0.007499922998249531 2023-01-22 16:09:59.307149: step: 1932/526, loss: 0.0058199567720294 2023-01-22 16:10:00.399225: step: 1936/526, loss: 0.0038512928877025843 2023-01-22 16:10:01.484429: step: 1940/526, loss: 0.01217322051525116 2023-01-22 16:10:02.552356: step: 1944/526, loss: 0.011248461902141571 2023-01-22 16:10:03.630530: step: 1948/526, loss: 0.005188351962715387 2023-01-22 16:10:04.709181: step: 1952/526, loss: 0.0009030320798046887 2023-01-22 16:10:05.781310: step: 1956/526, loss: 0.004988142289221287 2023-01-22 16:10:06.846801: step: 1960/526, loss: 0.005158405285328627 2023-01-22 16:10:07.941007: step: 1964/526, loss: 0.0034259711392223835 2023-01-22 16:10:09.005361: step: 1968/526, loss: 0.007517583202570677 2023-01-22 16:10:10.086673: step: 1972/526, loss: 0.02355273626744747 2023-01-22 16:10:11.166047: step: 1976/526, loss: 0.02196827344596386 2023-01-22 16:10:12.255772: step: 1980/526, loss: 0.008239859715104103 2023-01-22 16:10:13.339941: step: 1984/526, loss: 0.02101517654955387 2023-01-22 16:10:14.419009: step: 1988/526, loss: 0.007755276747047901 2023-01-22 16:10:15.490355: step: 1992/526, loss: 0.0046648369170725346 2023-01-22 16:10:16.582283: step: 1996/526, loss: 0.012993941083550453 2023-01-22 16:10:17.674999: step: 2000/526, loss: 0.006515428423881531 2023-01-22 16:10:18.759262: step: 2004/526, loss: 0.006839878391474485 2023-01-22 16:10:19.823435: step: 2008/526, loss: 0.014127875678241253 2023-01-22 16:10:20.912357: step: 2012/526, loss: 0.009130644612014294 2023-01-22 16:10:21.981942: step: 2016/526, loss: 0.003195829689502716 2023-01-22 16:10:23.056521: step: 2020/526, loss: 0.0185227207839489 2023-01-22 16:10:24.141115: step: 2024/526, loss: 0.006691941060125828 2023-01-22 16:10:25.231564: step: 2028/526, loss: 0.01640489511191845 2023-01-22 16:10:26.301750: step: 2032/526, loss: 0.008451828733086586 2023-01-22 16:10:27.377620: step: 2036/526, loss: 0.016497690230607986 2023-01-22 16:10:28.460987: step: 2040/526, loss: 0.003117659827694297 2023-01-22 16:10:29.520951: step: 2044/526, loss: 0.007110433652997017 2023-01-22 16:10:30.584236: step: 2048/526, loss: 0.01805409975349903 2023-01-22 16:10:31.650076: step: 2052/526, loss: 0.023531576618552208 2023-01-22 16:10:32.720088: step: 2056/526, loss: 0.03242948651313782 2023-01-22 16:10:33.778947: step: 2060/526, loss: 0.002636838238686323 2023-01-22 16:10:34.833970: step: 2064/526, loss: 0.0014066470321267843 2023-01-22 16:10:35.890726: step: 2068/526, loss: 0.008077271282672882 2023-01-22 16:10:36.963503: step: 2072/526, loss: 0.00169633817858994 2023-01-22 16:10:38.035214: step: 2076/526, loss: 0.023290548473596573 2023-01-22 16:10:39.109852: step: 2080/526, loss: 0.009173968806862831 2023-01-22 16:10:40.165769: step: 2084/526, loss: 0.003000599332153797 2023-01-22 16:10:41.232235: step: 2088/526, loss: 0.007806662004441023 2023-01-22 16:10:42.315488: step: 2092/526, loss: 0.0024546682834625244 2023-01-22 16:10:43.409918: step: 2096/526, loss: 0.05753093957901001 2023-01-22 16:10:44.472229: step: 2100/526, loss: 0.018630729988217354 2023-01-22 16:10:45.528952: step: 2104/526, loss: 0.008223350159823895 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170727079538555, 'r': 0.3303091397849462, 'f1': 0.32355560718711274}, 'combined': 0.23840939476945147, 'stategy': 1, 'epoch': 3} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.341675319868516, 'r': 0.26289930833432934, 'f1': 0.2971550610231871}, 'combined': 0.16208457873992024, 'stategy': 1, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32605187286858056, 'r': 0.33223881542775663, 'f1': 0.329116270169977}, 'combined': 0.24250672538840412, 'stategy': 1, 'epoch': 3} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340578826215801, 'r': 0.27215203008111954, 'f1': 0.30254464257748825}, 'combined': 0.16502435049681177, 'stategy': 1, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 3} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31418668446026093, 'r': 0.335052972802024, 'f1': 0.3242845117845118}, 'combined': 0.238946482367535, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34272064379813255, 'r': 0.2649578627716578, 'f1': 0.298863719307969}, 'combined': 0.1630165741679831, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 4 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:13:34.181400: step: 4/526, loss: 0.04561716690659523 2023-01-22 16:13:35.235616: step: 8/526, loss: 0.009583407081663609 2023-01-22 16:13:36.287890: step: 12/526, loss: 0.008871912024915218 2023-01-22 16:13:37.341357: step: 16/526, loss: 0.03346626088023186 2023-01-22 16:13:38.383269: step: 20/526, loss: 0.016370423138141632 2023-01-22 16:13:39.431929: step: 24/526, loss: 0.009490296244621277 2023-01-22 16:13:40.496498: step: 28/526, loss: 0.016934407874941826 2023-01-22 16:13:41.564086: step: 32/526, loss: 0.015565715730190277 2023-01-22 16:13:42.604132: step: 36/526, loss: 0.0012508517829701304 2023-01-22 16:13:43.686209: step: 40/526, loss: 0.008080514147877693 2023-01-22 16:13:44.748956: step: 44/526, loss: 0.004121420439332724 2023-01-22 16:13:45.807485: step: 48/526, loss: 0.0066663045436143875 2023-01-22 16:13:46.859471: step: 52/526, loss: 0.03254383057355881 2023-01-22 16:13:47.929963: step: 56/526, loss: 0.020131584256887436 2023-01-22 16:13:48.986757: step: 60/526, loss: 0.0019100387580692768 2023-01-22 16:13:50.047387: step: 64/526, loss: 0.004221797920763493 2023-01-22 16:13:51.102977: step: 68/526, loss: 0.0019880777690559626 2023-01-22 16:13:52.175169: step: 72/526, loss: 0.008283071219921112 2023-01-22 16:13:53.252454: step: 76/526, loss: 0.002142650308087468 2023-01-22 16:13:54.316200: step: 80/526, loss: 0.01260561402887106 2023-01-22 16:13:55.394162: step: 84/526, loss: 0.005416129715740681 2023-01-22 16:13:56.462429: step: 88/526, loss: 0.010029232129454613 2023-01-22 16:13:57.525416: step: 92/526, loss: 0.0063294656574726105 2023-01-22 16:13:58.594083: step: 96/526, loss: 0.006487805861979723 2023-01-22 16:13:59.655922: step: 100/526, loss: 0.027292942628264427 2023-01-22 16:14:00.707855: step: 104/526, loss: 0.00549099687486887 2023-01-22 16:14:01.768186: step: 108/526, loss: 0.04029904678463936 2023-01-22 16:14:02.831646: step: 112/526, loss: 0.0190633125603199 2023-01-22 16:14:03.890138: step: 116/526, loss: 0.0027551420498639345 2023-01-22 16:14:04.954450: step: 120/526, loss: 0.008756263181567192 2023-01-22 16:14:06.024372: step: 124/526, loss: 0.02007344178855419 2023-01-22 16:14:07.081081: step: 128/526, loss: 0.005198202561587095 2023-01-22 16:14:08.160844: step: 132/526, loss: 0.018453950062394142 2023-01-22 16:14:09.229211: step: 136/526, loss: 0.009264092892408371 2023-01-22 16:14:10.302914: step: 140/526, loss: 0.012572417967021465 2023-01-22 16:14:11.361502: step: 144/526, loss: 0.0030364994890987873 2023-01-22 16:14:12.438056: step: 148/526, loss: 0.006098009645938873 2023-01-22 16:14:13.509984: step: 152/526, loss: 0.007495851255953312 2023-01-22 16:14:14.561783: step: 156/526, loss: 0.005035730544477701 2023-01-22 16:14:15.622912: step: 160/526, loss: 0.001721342676319182 2023-01-22 16:14:16.698093: step: 164/526, loss: 0.006296331528574228 2023-01-22 16:14:17.754897: step: 168/526, loss: 0.00730692595243454 2023-01-22 16:14:18.828842: step: 172/526, loss: 0.0013020512415096164 2023-01-22 16:14:19.909326: step: 176/526, loss: 0.001522004371508956 2023-01-22 16:14:20.990657: step: 180/526, loss: 0.04031140357255936 2023-01-22 16:14:22.048492: step: 184/526, loss: 0.006029242649674416 2023-01-22 16:14:23.117588: step: 188/526, loss: 0.0065521495416760445 2023-01-22 16:14:24.170315: step: 192/526, loss: 0.029529938474297523 2023-01-22 16:14:25.245983: step: 196/526, loss: 0.006208635400980711 2023-01-22 16:14:26.315190: step: 200/526, loss: 0.020144633948802948 2023-01-22 16:14:27.380646: step: 204/526, loss: 0.007020852528512478 2023-01-22 16:14:28.460072: step: 208/526, loss: 0.04450790584087372 2023-01-22 16:14:29.522611: step: 212/526, loss: 0.015817951411008835 2023-01-22 16:14:30.589063: step: 216/526, loss: 0.008612891659140587 2023-01-22 16:14:31.649345: step: 220/526, loss: 0.005087288562208414 2023-01-22 16:14:32.703502: step: 224/526, loss: 0.002728690393269062 2023-01-22 16:14:33.769283: step: 228/526, loss: 0.015435674227774143 2023-01-22 16:14:34.839348: step: 232/526, loss: 0.006181768141686916 2023-01-22 16:14:35.933609: step: 236/526, loss: 0.011566904373466969 2023-01-22 16:14:37.000822: step: 240/526, loss: 0.005427936092019081 2023-01-22 16:14:38.059959: step: 244/526, loss: 0.0050893500447273254 2023-01-22 16:14:39.120240: step: 248/526, loss: 0.009158079512417316 2023-01-22 16:14:40.195565: step: 252/526, loss: 0.0015035689575597644 2023-01-22 16:14:41.254403: step: 256/526, loss: 0.04388522356748581 2023-01-22 16:14:42.344294: step: 260/526, loss: 0.007679319009184837 2023-01-22 16:14:43.418559: step: 264/526, loss: 0.00824405811727047 2023-01-22 16:14:44.483982: step: 268/526, loss: 0.012040205299854279 2023-01-22 16:14:45.559300: step: 272/526, loss: 0.002386068692430854 2023-01-22 16:14:46.646544: step: 276/526, loss: 0.004297530744224787 2023-01-22 16:14:47.720263: step: 280/526, loss: 0.011628060601651669 2023-01-22 16:14:48.807446: step: 284/526, loss: 0.005363657139241695 2023-01-22 16:14:49.887499: step: 288/526, loss: 0.00326823559589684 2023-01-22 16:14:50.965502: step: 292/526, loss: 0.0031411077361553907 2023-01-22 16:14:52.049095: step: 296/526, loss: 0.003462613094598055 2023-01-22 16:14:53.128486: step: 300/526, loss: 0.01911255531013012 2023-01-22 16:14:54.202166: step: 304/526, loss: 0.006545764394104481 2023-01-22 16:14:55.258125: step: 308/526, loss: 0.0066093867644667625 2023-01-22 16:14:56.328441: step: 312/526, loss: 0.021443061530590057 2023-01-22 16:14:57.400336: step: 316/526, loss: 0.004921016748994589 2023-01-22 16:14:58.464845: step: 320/526, loss: 0.0036913591902703047 2023-01-22 16:14:59.528687: step: 324/526, loss: 0.005502030253410339 2023-01-22 16:15:00.612285: step: 328/526, loss: 0.014686096459627151 2023-01-22 16:15:01.679013: step: 332/526, loss: 0.0074418387375772 2023-01-22 16:15:02.765234: step: 336/526, loss: 0.011564705520868301 2023-01-22 16:15:03.836947: step: 340/526, loss: 0.02050345204770565 2023-01-22 16:15:04.889808: step: 344/526, loss: 0.00533823249861598 2023-01-22 16:15:05.952590: step: 348/526, loss: 0.014311171136796474 2023-01-22 16:15:07.049316: step: 352/526, loss: 0.003921550698578358 2023-01-22 16:15:08.103676: step: 356/526, loss: 0.0388287752866745 2023-01-22 16:15:09.177107: step: 360/526, loss: 0.004193302243947983 2023-01-22 16:15:10.246570: step: 364/526, loss: 0.009171021170914173 2023-01-22 16:15:11.316840: step: 368/526, loss: 0.006631837692111731 2023-01-22 16:15:12.385552: step: 372/526, loss: 0.03862081840634346 2023-01-22 16:15:13.436876: step: 376/526, loss: 0.005430086050182581 2023-01-22 16:15:14.506824: step: 380/526, loss: 0.005405530333518982 2023-01-22 16:15:15.578454: step: 384/526, loss: 0.04989420250058174 2023-01-22 16:15:16.629626: step: 388/526, loss: 0.003538896329700947 2023-01-22 16:15:17.697222: step: 392/526, loss: 0.012618141248822212 2023-01-22 16:15:18.759211: step: 396/526, loss: 0.00478720897808671 2023-01-22 16:15:19.813497: step: 400/526, loss: 0.012967278249561787 2023-01-22 16:15:20.886682: step: 404/526, loss: 0.0074020931497216225 2023-01-22 16:15:21.949050: step: 408/526, loss: 0.013714803382754326 2023-01-22 16:15:23.019527: step: 412/526, loss: 0.024566100910305977 2023-01-22 16:15:24.087192: step: 416/526, loss: 0.014504571445286274 2023-01-22 16:15:25.173880: step: 420/526, loss: 0.03642435744404793 2023-01-22 16:15:26.236270: step: 424/526, loss: 0.004088229034096003 2023-01-22 16:15:27.296632: step: 428/526, loss: 0.0028984721284359694 2023-01-22 16:15:28.360375: step: 432/526, loss: 0.021795539185404778 2023-01-22 16:15:29.427668: step: 436/526, loss: 0.011544923298060894 2023-01-22 16:15:30.489605: step: 440/526, loss: 0.008165497332811356 2023-01-22 16:15:31.566116: step: 444/526, loss: 0.009896943345665932 2023-01-22 16:15:32.643341: step: 448/526, loss: 0.0070206522941589355 2023-01-22 16:15:33.730095: step: 452/526, loss: 0.003799766767770052 2023-01-22 16:15:34.816709: step: 456/526, loss: 0.01626633293926716 2023-01-22 16:15:35.892032: step: 460/526, loss: 0.00402452889829874 2023-01-22 16:15:36.981119: step: 464/526, loss: 0.008234470151364803 2023-01-22 16:15:38.041571: step: 468/526, loss: 0.01112395990639925 2023-01-22 16:15:39.112492: step: 472/526, loss: 0.024101847782731056 2023-01-22 16:15:40.178677: step: 476/526, loss: 0.03141447901725769 2023-01-22 16:15:41.239351: step: 480/526, loss: 0.003685676958411932 2023-01-22 16:15:42.319865: step: 484/526, loss: 0.002476579276844859 2023-01-22 16:15:43.396548: step: 488/526, loss: 0.006108688656240702 2023-01-22 16:15:44.465163: step: 492/526, loss: 0.044986505061388016 2023-01-22 16:15:45.533910: step: 496/526, loss: 0.026766033843159676 2023-01-22 16:15:46.590522: step: 500/526, loss: 0.013256989419460297 2023-01-22 16:15:47.654395: step: 504/526, loss: 0.024712851271033287 2023-01-22 16:15:48.722838: step: 508/526, loss: 0.008650749921798706 2023-01-22 16:15:49.790420: step: 512/526, loss: 0.0394333079457283 2023-01-22 16:15:50.856464: step: 516/526, loss: 0.01165279932320118 2023-01-22 16:15:51.953399: step: 520/526, loss: 0.045307960361242294 2023-01-22 16:15:53.025622: step: 524/526, loss: 0.05910734459757805 2023-01-22 16:15:54.091903: step: 528/526, loss: 0.008395752869546413 2023-01-22 16:15:55.156250: step: 532/526, loss: 0.007766869384795427 2023-01-22 16:15:56.226981: step: 536/526, loss: 0.011209053918719292 2023-01-22 16:15:57.297090: step: 540/526, loss: 0.00456573348492384 2023-01-22 16:15:58.358538: step: 544/526, loss: 0.0036347168497741222 2023-01-22 16:15:59.441981: step: 548/526, loss: 0.007989378646016121 2023-01-22 16:16:00.517470: step: 552/526, loss: 0.030643368139863014 2023-01-22 16:16:01.586121: step: 556/526, loss: 0.0010265993187204003 2023-01-22 16:16:02.663316: step: 560/526, loss: 0.008294590748846531 2023-01-22 16:16:03.729698: step: 564/526, loss: 0.03510265052318573 2023-01-22 16:16:04.806261: step: 568/526, loss: 0.010723576880991459 2023-01-22 16:16:05.865725: step: 572/526, loss: 0.00177483179140836 2023-01-22 16:16:06.950889: step: 576/526, loss: 0.005997425876557827 2023-01-22 16:16:08.017750: step: 580/526, loss: 0.021022701635956764 2023-01-22 16:16:09.082872: step: 584/526, loss: 0.013442527502775192 2023-01-22 16:16:10.153863: step: 588/526, loss: 0.005017580930143595 2023-01-22 16:16:11.223966: step: 592/526, loss: 0.010660244151949883 2023-01-22 16:16:12.303267: step: 596/526, loss: 0.0018021485302597284 2023-01-22 16:16:13.391292: step: 600/526, loss: 0.018023859709501266 2023-01-22 16:16:14.441704: step: 604/526, loss: 0.015794062986969948 2023-01-22 16:16:15.525416: step: 608/526, loss: 0.028301579877734184 2023-01-22 16:16:16.597758: step: 612/526, loss: 0.0025440328754484653 2023-01-22 16:16:17.670413: step: 616/526, loss: 0.013181930407881737 2023-01-22 16:16:18.745651: step: 620/526, loss: 0.011142008006572723 2023-01-22 16:16:19.798385: step: 624/526, loss: 0.00868635531514883 2023-01-22 16:16:20.885131: step: 628/526, loss: 0.010363152250647545 2023-01-22 16:16:21.977185: step: 632/526, loss: 0.01566331833600998 2023-01-22 16:16:23.058969: step: 636/526, loss: 0.002800422254949808 2023-01-22 16:16:24.127648: step: 640/526, loss: 0.009345663711428642 2023-01-22 16:16:25.198397: step: 644/526, loss: 0.007596664130687714 2023-01-22 16:16:26.256698: step: 648/526, loss: 0.010002491995692253 2023-01-22 16:16:27.328015: step: 652/526, loss: 0.011854047887027264 2023-01-22 16:16:28.402190: step: 656/526, loss: 0.0031592161394655704 2023-01-22 16:16:29.466685: step: 660/526, loss: 0.005276334006339312 2023-01-22 16:16:30.545232: step: 664/526, loss: 0.04187704622745514 2023-01-22 16:16:31.598928: step: 668/526, loss: 0.008595994673669338 2023-01-22 16:16:32.694632: step: 672/526, loss: 0.011099678464233875 2023-01-22 16:16:33.766700: step: 676/526, loss: 0.029476739466190338 2023-01-22 16:16:34.848991: step: 680/526, loss: 0.005831962917000055 2023-01-22 16:16:35.908768: step: 684/526, loss: 0.013440776616334915 2023-01-22 16:16:36.975500: step: 688/526, loss: 0.00838993676006794 2023-01-22 16:16:38.035717: step: 692/526, loss: 0.0006554737337864935 2023-01-22 16:16:39.096415: step: 696/526, loss: 0.012389463372528553 2023-01-22 16:16:40.165372: step: 700/526, loss: 0.007408950477838516 2023-01-22 16:16:41.242568: step: 704/526, loss: 0.006197801791131496 2023-01-22 16:16:42.316336: step: 708/526, loss: 0.006031715776771307 2023-01-22 16:16:43.389301: step: 712/526, loss: 0.002810501726344228 2023-01-22 16:16:44.452509: step: 716/526, loss: 0.025104759261012077 2023-01-22 16:16:45.524793: step: 720/526, loss: 0.009968100115656853 2023-01-22 16:16:46.581142: step: 724/526, loss: 0.019871799275279045 2023-01-22 16:16:47.647243: step: 728/526, loss: 0.01130816712975502 2023-01-22 16:16:48.718712: step: 732/526, loss: 0.011507372371852398 2023-01-22 16:16:49.778908: step: 736/526, loss: 0.013298151083290577 2023-01-22 16:16:50.837972: step: 740/526, loss: 0.06474484503269196 2023-01-22 16:16:51.900617: step: 744/526, loss: 0.012374096550047398 2023-01-22 16:16:52.978152: step: 748/526, loss: 0.028651466593146324 2023-01-22 16:16:54.060517: step: 752/526, loss: 0.018522465601563454 2023-01-22 16:16:55.127722: step: 756/526, loss: 0.0014116679085418582 2023-01-22 16:16:56.191197: step: 760/526, loss: 0.012200803495943546 2023-01-22 16:16:57.267570: step: 764/526, loss: 0.0045927297323942184 2023-01-22 16:16:58.340246: step: 768/526, loss: 0.005744764115661383 2023-01-22 16:16:59.415337: step: 772/526, loss: 0.007151265162974596 2023-01-22 16:17:00.474183: step: 776/526, loss: 0.002220354275777936 2023-01-22 16:17:01.540541: step: 780/526, loss: 0.008260620757937431 2023-01-22 16:17:02.612835: step: 784/526, loss: 0.008544021286070347 2023-01-22 16:17:03.700581: step: 788/526, loss: 0.007146183401346207 2023-01-22 16:17:04.782094: step: 792/526, loss: 0.004178935196250677 2023-01-22 16:17:05.867305: step: 796/526, loss: 0.030275003984570503 2023-01-22 16:17:06.959177: step: 800/526, loss: 0.005692994687706232 2023-01-22 16:17:08.032392: step: 804/526, loss: 0.028237413614988327 2023-01-22 16:17:09.115654: step: 808/526, loss: 0.015095150098204613 2023-01-22 16:17:10.187596: step: 812/526, loss: 0.011295042000710964 2023-01-22 16:17:11.262276: step: 816/526, loss: 0.0037475123535841703 2023-01-22 16:17:12.318416: step: 820/526, loss: 0.0073763844557106495 2023-01-22 16:17:13.416818: step: 824/526, loss: 0.002914538374170661 2023-01-22 16:17:14.481815: step: 828/526, loss: 0.0021776368375867605 2023-01-22 16:17:15.545526: step: 832/526, loss: 0.014286899007856846 2023-01-22 16:17:16.618195: step: 836/526, loss: 0.00038108142325654626 2023-01-22 16:17:17.706534: step: 840/526, loss: 0.007071319036185741 2023-01-22 16:17:18.778703: step: 844/526, loss: 0.0065015205182135105 2023-01-22 16:17:19.852603: step: 848/526, loss: 0.00446522980928421 2023-01-22 16:17:20.921616: step: 852/526, loss: 0.01957670785486698 2023-01-22 16:17:22.008137: step: 856/526, loss: 0.02963337115943432 2023-01-22 16:17:23.079211: step: 860/526, loss: 0.009095244109630585 2023-01-22 16:17:24.152095: step: 864/526, loss: 0.023104403167963028 2023-01-22 16:17:25.242466: step: 868/526, loss: 0.005967161152511835 2023-01-22 16:17:26.319993: step: 872/526, loss: 0.0035698008723556995 2023-01-22 16:17:27.392960: step: 876/526, loss: 0.021016787737607956 2023-01-22 16:17:28.472934: step: 880/526, loss: 0.0043370104394853115 2023-01-22 16:17:29.536318: step: 884/526, loss: 0.012632861733436584 2023-01-22 16:17:30.620528: step: 888/526, loss: 0.014910683035850525 2023-01-22 16:17:31.710002: step: 892/526, loss: 0.010937982238829136 2023-01-22 16:17:32.779040: step: 896/526, loss: 0.002072559203952551 2023-01-22 16:17:33.846326: step: 900/526, loss: 0.019145850092172623 2023-01-22 16:17:34.927146: step: 904/526, loss: 0.004065972287207842 2023-01-22 16:17:35.985154: step: 908/526, loss: 0.0021442428696900606 2023-01-22 16:17:37.045290: step: 912/526, loss: 0.004783442709594965 2023-01-22 16:17:38.119912: step: 916/526, loss: 0.029454268515110016 2023-01-22 16:17:39.197873: step: 920/526, loss: 0.009567839093506336 2023-01-22 16:17:40.289670: step: 924/526, loss: 0.030318789184093475 2023-01-22 16:17:41.361134: step: 928/526, loss: 0.008138017728924751 2023-01-22 16:17:42.437611: step: 932/526, loss: 0.009428447112441063 2023-01-22 16:17:43.510044: step: 936/526, loss: 0.016100618988275528 2023-01-22 16:17:44.569439: step: 940/526, loss: 0.0042785778641700745 2023-01-22 16:17:45.625774: step: 944/526, loss: 0.00715788546949625 2023-01-22 16:17:46.694774: step: 948/526, loss: 0.006330091506242752 2023-01-22 16:17:47.763142: step: 952/526, loss: 0.02403317019343376 2023-01-22 16:17:48.834642: step: 956/526, loss: 0.0019050012342631817 2023-01-22 16:17:49.915690: step: 960/526, loss: 0.009052552282810211 2023-01-22 16:17:51.001320: step: 964/526, loss: 0.022979607805609703 2023-01-22 16:17:52.103390: step: 968/526, loss: 0.026355665177106857 2023-01-22 16:17:53.197948: step: 972/526, loss: 0.005978655070066452 2023-01-22 16:17:54.265491: step: 976/526, loss: 0.024148106575012207 2023-01-22 16:17:55.331235: step: 980/526, loss: 0.012789415195584297 2023-01-22 16:17:56.407131: step: 984/526, loss: 0.008457096293568611 2023-01-22 16:17:57.471624: step: 988/526, loss: 0.0015296322526410222 2023-01-22 16:17:58.540470: step: 992/526, loss: 0.006805008742958307 2023-01-22 16:17:59.607379: step: 996/526, loss: 0.0014696972211822867 2023-01-22 16:18:00.679854: step: 1000/526, loss: 0.004566519986838102 2023-01-22 16:18:01.769633: step: 1004/526, loss: 0.00858213659375906 2023-01-22 16:18:02.840235: step: 1008/526, loss: 0.019987408071756363 2023-01-22 16:18:03.910178: step: 1012/526, loss: 0.008939806371927261 2023-01-22 16:18:04.976758: step: 1016/526, loss: 0.009215113706886768 2023-01-22 16:18:06.053710: step: 1020/526, loss: 0.0041547357104718685 2023-01-22 16:18:07.132416: step: 1024/526, loss: 0.00252532004378736 2023-01-22 16:18:08.202181: step: 1028/526, loss: 0.008638842962682247 2023-01-22 16:18:09.265401: step: 1032/526, loss: 0.0027806328143924475 2023-01-22 16:18:10.336266: step: 1036/526, loss: 0.0036902576684951782 2023-01-22 16:18:11.400021: step: 1040/526, loss: 0.00746373925358057 2023-01-22 16:18:12.464820: step: 1044/526, loss: 0.0027634340804070234 2023-01-22 16:18:13.573094: step: 1048/526, loss: 0.05988109111785889 2023-01-22 16:18:14.638930: step: 1052/526, loss: 0.009773884899914265 2023-01-22 16:18:15.714220: step: 1056/526, loss: 0.009274709969758987 2023-01-22 16:18:16.809030: step: 1060/526, loss: 0.008162522688508034 2023-01-22 16:18:17.889140: step: 1064/526, loss: 0.0061418358236551285 2023-01-22 16:18:18.956557: step: 1068/526, loss: 0.0016014976426959038 2023-01-22 16:18:20.005220: step: 1072/526, loss: 0.0014428264694288373 2023-01-22 16:18:21.105270: step: 1076/526, loss: 0.006895169150084257 2023-01-22 16:18:22.176549: step: 1080/526, loss: 0.004000116139650345 2023-01-22 16:18:23.239434: step: 1084/526, loss: 0.008304869756102562 2023-01-22 16:18:24.300492: step: 1088/526, loss: 0.006547779776155949 2023-01-22 16:18:25.379492: step: 1092/526, loss: 0.017215324565768242 2023-01-22 16:18:26.472109: step: 1096/526, loss: 0.010768868029117584 2023-01-22 16:18:27.538654: step: 1100/526, loss: 0.003673287807032466 2023-01-22 16:18:28.615874: step: 1104/526, loss: 0.006040376611053944 2023-01-22 16:18:29.700590: step: 1108/526, loss: 0.008560117334127426 2023-01-22 16:18:30.778213: step: 1112/526, loss: 0.007589159067720175 2023-01-22 16:18:31.855880: step: 1116/526, loss: 0.024056492373347282 2023-01-22 16:18:32.917832: step: 1120/526, loss: 0.0018451682990416884 2023-01-22 16:18:33.978610: step: 1124/526, loss: 0.03260701522231102 2023-01-22 16:18:35.036135: step: 1128/526, loss: 0.00204491033218801 2023-01-22 16:18:36.118416: step: 1132/526, loss: 0.0033616709988564253 2023-01-22 16:18:37.194471: step: 1136/526, loss: 0.007189306430518627 2023-01-22 16:18:38.270597: step: 1140/526, loss: 0.00677073560655117 2023-01-22 16:18:39.342270: step: 1144/526, loss: 0.03640957176685333 2023-01-22 16:18:40.416479: step: 1148/526, loss: 0.011349931359291077 2023-01-22 16:18:41.483952: step: 1152/526, loss: 0.009325725957751274 2023-01-22 16:18:42.554132: step: 1156/526, loss: 0.014557684771716595 2023-01-22 16:18:43.640183: step: 1160/526, loss: 0.009800802916288376 2023-01-22 16:18:44.711415: step: 1164/526, loss: 0.004345850553363562 2023-01-22 16:18:45.786592: step: 1168/526, loss: 0.026662593707442284 2023-01-22 16:18:46.860157: step: 1172/526, loss: 0.03165102377533913 2023-01-22 16:18:47.929617: step: 1176/526, loss: 0.004347292240709066 2023-01-22 16:18:49.026503: step: 1180/526, loss: 0.03501264750957489 2023-01-22 16:18:50.090773: step: 1184/526, loss: 0.011472995392978191 2023-01-22 16:18:51.156158: step: 1188/526, loss: 0.006443241611123085 2023-01-22 16:18:52.210500: step: 1192/526, loss: 0.014392136596143246 2023-01-22 16:18:53.305989: step: 1196/526, loss: 0.006001131609082222 2023-01-22 16:18:54.370659: step: 1200/526, loss: 0.008973582647740841 2023-01-22 16:18:55.459481: step: 1204/526, loss: 0.018911825492978096 2023-01-22 16:18:56.535261: step: 1208/526, loss: 0.008818590082228184 2023-01-22 16:18:57.603091: step: 1212/526, loss: 0.005954004358500242 2023-01-22 16:18:58.666337: step: 1216/526, loss: 0.003068016143515706 2023-01-22 16:18:59.727840: step: 1220/526, loss: 0.0067404573783278465 2023-01-22 16:19:00.800932: step: 1224/526, loss: 0.009271468035876751 2023-01-22 16:19:01.882371: step: 1228/526, loss: 0.00014476769138127565 2023-01-22 16:19:02.954879: step: 1232/526, loss: 0.000542073103133589 2023-01-22 16:19:04.039244: step: 1236/526, loss: 0.002124140737578273 2023-01-22 16:19:05.097929: step: 1240/526, loss: 0.005091676954180002 2023-01-22 16:19:06.166354: step: 1244/526, loss: 0.006924670655280352 2023-01-22 16:19:07.239349: step: 1248/526, loss: 0.01914382353425026 2023-01-22 16:19:08.292367: step: 1252/526, loss: 0.010995729826390743 2023-01-22 16:19:09.372849: step: 1256/526, loss: 0.004292840138077736 2023-01-22 16:19:10.456232: step: 1260/526, loss: 0.003048386424779892 2023-01-22 16:19:11.524622: step: 1264/526, loss: 0.0044347262009978294 2023-01-22 16:19:12.597597: step: 1268/526, loss: 0.012820238247513771 2023-01-22 16:19:13.658090: step: 1272/526, loss: 0.00520427105948329 2023-01-22 16:19:14.718176: step: 1276/526, loss: 0.000263952708337456 2023-01-22 16:19:15.798262: step: 1280/526, loss: 0.0015274424804374576 2023-01-22 16:19:16.898223: step: 1284/526, loss: 0.0005827751010656357 2023-01-22 16:19:17.988361: step: 1288/526, loss: 0.011152703315019608 2023-01-22 16:19:19.086289: step: 1292/526, loss: 0.02018805406987667 2023-01-22 16:19:20.160182: step: 1296/526, loss: 0.0033085145987570286 2023-01-22 16:19:21.244117: step: 1300/526, loss: 0.004105363041162491 2023-01-22 16:19:22.313140: step: 1304/526, loss: 0.003182013053447008 2023-01-22 16:19:23.371340: step: 1308/526, loss: 0.0524308942258358 2023-01-22 16:19:24.481864: step: 1312/526, loss: 0.024237370118498802 2023-01-22 16:19:25.550960: step: 1316/526, loss: 0.00324096716940403 2023-01-22 16:19:26.621224: step: 1320/526, loss: 7.782007742207497e-05 2023-01-22 16:19:27.694882: step: 1324/526, loss: 0.006855267100036144 2023-01-22 16:19:28.766901: step: 1328/526, loss: 0.0 2023-01-22 16:19:29.827462: step: 1332/526, loss: 0.011863539926707745 2023-01-22 16:19:30.917420: step: 1336/526, loss: 0.007088163401931524 2023-01-22 16:19:31.984241: step: 1340/526, loss: 0.011153963394463062 2023-01-22 16:19:33.035975: step: 1344/526, loss: 0.008860246278345585 2023-01-22 16:19:34.105463: step: 1348/526, loss: 0.0036861076951026917 2023-01-22 16:19:35.188208: step: 1352/526, loss: 0.010988442227244377 2023-01-22 16:19:36.250380: step: 1356/526, loss: 0.012536215595901012 2023-01-22 16:19:37.327189: step: 1360/526, loss: 0.0062779588624835014 2023-01-22 16:19:38.378930: step: 1364/526, loss: 0.003220032202079892 2023-01-22 16:19:39.444080: step: 1368/526, loss: 0.018705377355217934 2023-01-22 16:19:40.516867: step: 1372/526, loss: 0.0033201919868588448 2023-01-22 16:19:41.600302: step: 1376/526, loss: 0.002651113783940673 2023-01-22 16:19:42.656843: step: 1380/526, loss: 0.003597776172682643 2023-01-22 16:19:43.713360: step: 1384/526, loss: 0.0337894968688488 2023-01-22 16:19:44.780415: step: 1388/526, loss: 0.009352417662739754 2023-01-22 16:19:45.841225: step: 1392/526, loss: 0.014289254322648048 2023-01-22 16:19:46.911329: step: 1396/526, loss: 0.006687379442155361 2023-01-22 16:19:47.975061: step: 1400/526, loss: 0.03225626051425934 2023-01-22 16:19:49.053910: step: 1404/526, loss: 0.009241082705557346 2023-01-22 16:19:50.142845: step: 1408/526, loss: 0.013390500098466873 2023-01-22 16:19:51.219582: step: 1412/526, loss: 0.048898614943027496 2023-01-22 16:19:52.278044: step: 1416/526, loss: 0.03494878113269806 2023-01-22 16:19:53.334441: step: 1420/526, loss: 0.011786588467657566 2023-01-22 16:19:54.427773: step: 1424/526, loss: 0.003117185551673174 2023-01-22 16:19:55.496142: step: 1428/526, loss: 0.025984065607190132 2023-01-22 16:19:56.570641: step: 1432/526, loss: 0.0054995352402329445 2023-01-22 16:19:57.644994: step: 1436/526, loss: 0.004419961012899876 2023-01-22 16:19:58.728289: step: 1440/526, loss: 0.009918817318975925 2023-01-22 16:19:59.784984: step: 1444/526, loss: 0.040321722626686096 2023-01-22 16:20:00.858982: step: 1448/526, loss: 0.009533429518342018 2023-01-22 16:20:01.935723: step: 1452/526, loss: 0.001707096816971898 2023-01-22 16:20:03.003428: step: 1456/526, loss: 0.009042011573910713 2023-01-22 16:20:04.072837: step: 1460/526, loss: 0.011686024256050587 2023-01-22 16:20:05.146389: step: 1464/526, loss: 0.009117784909904003 2023-01-22 16:20:06.222100: step: 1468/526, loss: 0.0022855165880173445 2023-01-22 16:20:07.320153: step: 1472/526, loss: 0.006800004281103611 2023-01-22 16:20:08.386717: step: 1476/526, loss: 0.0025610551238059998 2023-01-22 16:20:09.449628: step: 1480/526, loss: 0.00568034965544939 2023-01-22 16:20:10.506496: step: 1484/526, loss: 0.0033750650472939014 2023-01-22 16:20:11.561176: step: 1488/526, loss: 0.006656867917627096 2023-01-22 16:20:12.643459: step: 1492/526, loss: 0.010373681783676147 2023-01-22 16:20:13.704495: step: 1496/526, loss: 0.013088964857161045 2023-01-22 16:20:14.776677: step: 1500/526, loss: 0.005912484135478735 2023-01-22 16:20:15.861181: step: 1504/526, loss: 0.006564716808497906 2023-01-22 16:20:16.945082: step: 1508/526, loss: 0.03038611263036728 2023-01-22 16:20:18.009530: step: 1512/526, loss: 0.004240270704030991 2023-01-22 16:20:19.069683: step: 1516/526, loss: 0.020026564598083496 2023-01-22 16:20:20.163731: step: 1520/526, loss: 0.005455078557133675 2023-01-22 16:20:21.239521: step: 1524/526, loss: 0.009953298605978489 2023-01-22 16:20:22.317041: step: 1528/526, loss: 0.009301455691456795 2023-01-22 16:20:23.398591: step: 1532/526, loss: 0.0074359881691634655 2023-01-22 16:20:24.474722: step: 1536/526, loss: 0.045385610312223434 2023-01-22 16:20:25.530208: step: 1540/526, loss: 0.012754186987876892 2023-01-22 16:20:26.608462: step: 1544/526, loss: 0.0032758621964603662 2023-01-22 16:20:27.683026: step: 1548/526, loss: 0.010862481780350208 2023-01-22 16:20:28.754451: step: 1552/526, loss: 0.03498229756951332 2023-01-22 16:20:29.816400: step: 1556/526, loss: 0.00948853138834238 2023-01-22 16:20:30.874388: step: 1560/526, loss: 0.005905711092054844 2023-01-22 16:20:31.939477: step: 1564/526, loss: 0.036922141909599304 2023-01-22 16:20:33.014584: step: 1568/526, loss: 0.010879021137952805 2023-01-22 16:20:34.073715: step: 1572/526, loss: 0.01162551250308752 2023-01-22 16:20:35.140208: step: 1576/526, loss: 0.003965223673731089 2023-01-22 16:20:36.217633: step: 1580/526, loss: 0.004534436855465174 2023-01-22 16:20:37.277071: step: 1584/526, loss: 0.014405800960958004 2023-01-22 16:20:38.342699: step: 1588/526, loss: 0.006816651206463575 2023-01-22 16:20:39.403163: step: 1592/526, loss: 0.0028753068763762712 2023-01-22 16:20:40.469812: step: 1596/526, loss: 0.007793857250362635 2023-01-22 16:20:41.541486: step: 1600/526, loss: 0.014219994656741619 2023-01-22 16:20:42.593531: step: 1604/526, loss: 0.021587563678622246 2023-01-22 16:20:43.676081: step: 1608/526, loss: 0.014752312563359737 2023-01-22 16:20:44.756114: step: 1612/526, loss: 0.0066559696570038795 2023-01-22 16:20:45.828509: step: 1616/526, loss: 0.009911958128213882 2023-01-22 16:20:46.909897: step: 1620/526, loss: 0.005569026339799166 2023-01-22 16:20:47.972855: step: 1624/526, loss: 0.008063066750764847 2023-01-22 16:20:49.031029: step: 1628/526, loss: 0.009966726414859295 2023-01-22 16:20:50.107431: step: 1632/526, loss: 0.005356654059141874 2023-01-22 16:20:51.167838: step: 1636/526, loss: 0.003811662783846259 2023-01-22 16:20:52.250357: step: 1640/526, loss: 0.0017620675498619676 2023-01-22 16:20:53.330794: step: 1644/526, loss: 0.0027516367845237255 2023-01-22 16:20:54.390343: step: 1648/526, loss: 0.0026889985892921686 2023-01-22 16:20:55.463718: step: 1652/526, loss: 0.012888060882687569 2023-01-22 16:20:56.526909: step: 1656/526, loss: 0.009739807806909084 2023-01-22 16:20:57.604712: step: 1660/526, loss: 0.18117985129356384 2023-01-22 16:20:58.673609: step: 1664/526, loss: 0.0027895020321011543 2023-01-22 16:20:59.726503: step: 1668/526, loss: 0.020923787727952003 2023-01-22 16:21:00.792633: step: 1672/526, loss: 0.040421612560749054 2023-01-22 16:21:01.854774: step: 1676/526, loss: 0.052533335983753204 2023-01-22 16:21:02.918968: step: 1680/526, loss: 0.005521412938833237 2023-01-22 16:21:03.989469: step: 1684/526, loss: 0.0008487410959787667 2023-01-22 16:21:05.066517: step: 1688/526, loss: 0.006765359081327915 2023-01-22 16:21:06.134938: step: 1692/526, loss: 0.0014767349930480123 2023-01-22 16:21:07.216782: step: 1696/526, loss: 0.04667261987924576 2023-01-22 16:21:08.295777: step: 1700/526, loss: 0.08534608781337738 2023-01-22 16:21:09.383098: step: 1704/526, loss: 0.004594265483319759 2023-01-22 16:21:10.445325: step: 1708/526, loss: 0.005243775900453329 2023-01-22 16:21:11.519966: step: 1712/526, loss: 0.00753947626799345 2023-01-22 16:21:12.579102: step: 1716/526, loss: 0.005114629864692688 2023-01-22 16:21:13.641005: step: 1720/526, loss: 0.006579662673175335 2023-01-22 16:21:14.725002: step: 1724/526, loss: 0.019045347347855568 2023-01-22 16:21:15.798948: step: 1728/526, loss: 0.003987054340541363 2023-01-22 16:21:16.884147: step: 1732/526, loss: 0.004686868283897638 2023-01-22 16:21:17.964364: step: 1736/526, loss: 0.029049349948763847 2023-01-22 16:21:19.031528: step: 1740/526, loss: 0.007744913455098867 2023-01-22 16:21:20.090382: step: 1744/526, loss: 0.006797657813876867 2023-01-22 16:21:21.154883: step: 1748/526, loss: 0.006971979979425669 2023-01-22 16:21:22.234108: step: 1752/526, loss: 0.016374295577406883 2023-01-22 16:21:23.284261: step: 1756/526, loss: 0.011435119435191154 2023-01-22 16:21:24.379261: step: 1760/526, loss: 0.007884442806243896 2023-01-22 16:21:25.445852: step: 1764/526, loss: 0.030283812433481216 2023-01-22 16:21:26.529526: step: 1768/526, loss: 0.010101406835019588 2023-01-22 16:21:27.591314: step: 1772/526, loss: 0.01943332329392433 2023-01-22 16:21:28.660949: step: 1776/526, loss: 0.020612303167581558 2023-01-22 16:21:29.735062: step: 1780/526, loss: 0.0027994103729724884 2023-01-22 16:21:30.812958: step: 1784/526, loss: 0.028179163113236427 2023-01-22 16:21:31.870220: step: 1788/526, loss: 0.0013047674437984824 2023-01-22 16:21:32.943937: step: 1792/526, loss: 0.004112100228667259 2023-01-22 16:21:34.011124: step: 1796/526, loss: 0.007697451859712601 2023-01-22 16:21:35.077965: step: 1800/526, loss: 0.026814941316843033 2023-01-22 16:21:36.144076: step: 1804/526, loss: 0.0 2023-01-22 16:21:37.233101: step: 1808/526, loss: 0.04936361312866211 2023-01-22 16:21:38.302318: step: 1812/526, loss: 0.004547793883830309 2023-01-22 16:21:39.372304: step: 1816/526, loss: 0.005128554534167051 2023-01-22 16:21:40.444542: step: 1820/526, loss: 0.005846178624778986 2023-01-22 16:21:41.527943: step: 1824/526, loss: 0.05496903881430626 2023-01-22 16:21:42.591350: step: 1828/526, loss: 0.0005155499093234539 2023-01-22 16:21:43.663035: step: 1832/526, loss: 0.007163556758314371 2023-01-22 16:21:44.745111: step: 1836/526, loss: 0.014238342642784119 2023-01-22 16:21:45.809669: step: 1840/526, loss: 0.0009088402730412781 2023-01-22 16:21:46.869913: step: 1844/526, loss: 0.010880879126489162 2023-01-22 16:21:47.943375: step: 1848/526, loss: 0.011659126728773117 2023-01-22 16:21:49.016486: step: 1852/526, loss: 0.003695074236020446 2023-01-22 16:21:50.085625: step: 1856/526, loss: 0.0075827776454389095 2023-01-22 16:21:51.171078: step: 1860/526, loss: 0.01746492087841034 2023-01-22 16:21:52.235562: step: 1864/526, loss: 0.0021194086875766516 2023-01-22 16:21:53.315227: step: 1868/526, loss: 0.016487963497638702 2023-01-22 16:21:54.371960: step: 1872/526, loss: 0.010434810072183609 2023-01-22 16:21:55.434440: step: 1876/526, loss: 0.003129052696749568 2023-01-22 16:21:56.509713: step: 1880/526, loss: 0.02115667052567005 2023-01-22 16:21:57.580901: step: 1884/526, loss: 0.006560357753187418 2023-01-22 16:21:58.647187: step: 1888/526, loss: 0.013516881503164768 2023-01-22 16:21:59.717677: step: 1892/526, loss: 0.0017213800456374884 2023-01-22 16:22:00.783460: step: 1896/526, loss: 0.005959602072834969 2023-01-22 16:22:01.873688: step: 1900/526, loss: 0.0611736923456192 2023-01-22 16:22:02.958500: step: 1904/526, loss: 0.029303476214408875 2023-01-22 16:22:04.036950: step: 1908/526, loss: 0.006181257776916027 2023-01-22 16:22:05.142738: step: 1912/526, loss: 0.0008349527488462627 2023-01-22 16:22:06.211777: step: 1916/526, loss: 0.006332958582788706 2023-01-22 16:22:07.261880: step: 1920/526, loss: 0.0026066889986395836 2023-01-22 16:22:08.353943: step: 1924/526, loss: 0.09447556734085083 2023-01-22 16:22:09.426795: step: 1928/526, loss: 0.00949473213404417 2023-01-22 16:22:10.498533: step: 1932/526, loss: 0.038961321115493774 2023-01-22 16:22:11.563900: step: 1936/526, loss: 0.02917501889169216 2023-01-22 16:22:12.640886: step: 1940/526, loss: 0.0060004303231835365 2023-01-22 16:22:13.711296: step: 1944/526, loss: 0.03455030173063278 2023-01-22 16:22:14.772649: step: 1948/526, loss: 0.024984188377857208 2023-01-22 16:22:15.837367: step: 1952/526, loss: 0.0010579327354207635 2023-01-22 16:22:16.907262: step: 1956/526, loss: 0.012575664557516575 2023-01-22 16:22:17.976271: step: 1960/526, loss: 0.039289750158786774 2023-01-22 16:22:19.043711: step: 1964/526, loss: 0.008157818578183651 2023-01-22 16:22:20.107071: step: 1968/526, loss: 0.0105209955945611 2023-01-22 16:22:21.184040: step: 1972/526, loss: 0.011319060809910297 2023-01-22 16:22:22.262424: step: 1976/526, loss: 0.003198441583663225 2023-01-22 16:22:23.356707: step: 1980/526, loss: 0.008319240994751453 2023-01-22 16:22:24.414288: step: 1984/526, loss: 0.007458826061338186 2023-01-22 16:22:25.473061: step: 1988/526, loss: 0.0017861765809357166 2023-01-22 16:22:26.551775: step: 1992/526, loss: 0.04923545941710472 2023-01-22 16:22:27.618547: step: 1996/526, loss: 0.0041978806257247925 2023-01-22 16:22:28.692933: step: 2000/526, loss: 0.002960208337754011 2023-01-22 16:22:29.794435: step: 2004/526, loss: 0.00424683652818203 2023-01-22 16:22:30.863179: step: 2008/526, loss: 0.0012761231046169996 2023-01-22 16:22:31.938505: step: 2012/526, loss: 0.031041881069540977 2023-01-22 16:22:33.015570: step: 2016/526, loss: 0.04580167308449745 2023-01-22 16:22:34.071543: step: 2020/526, loss: 0.002427198924124241 2023-01-22 16:22:35.132919: step: 2024/526, loss: 0.009471496567130089 2023-01-22 16:22:36.208617: step: 2028/526, loss: 0.02107635699212551 2023-01-22 16:22:37.273950: step: 2032/526, loss: 0.00196745409630239 2023-01-22 16:22:38.333884: step: 2036/526, loss: 0.004283446352928877 2023-01-22 16:22:39.412846: step: 2040/526, loss: 0.004115102346986532 2023-01-22 16:22:40.479683: step: 2044/526, loss: 0.008813424035906792 2023-01-22 16:22:41.566276: step: 2048/526, loss: 0.010435586795210838 2023-01-22 16:22:42.635623: step: 2052/526, loss: 0.008322176523506641 2023-01-22 16:22:43.702708: step: 2056/526, loss: 0.0054909116588532925 2023-01-22 16:22:44.775278: step: 2060/526, loss: 0.0046149869449436665 2023-01-22 16:22:45.846843: step: 2064/526, loss: 0.06510575860738754 2023-01-22 16:22:46.918596: step: 2068/526, loss: 0.009735976345837116 2023-01-22 16:22:47.986045: step: 2072/526, loss: 0.037499263882637024 2023-01-22 16:22:49.050776: step: 2076/526, loss: 0.037874024361371994 2023-01-22 16:22:50.155199: step: 2080/526, loss: 0.003954877145588398 2023-01-22 16:22:51.225153: step: 2084/526, loss: 0.011672727763652802 2023-01-22 16:22:52.295307: step: 2088/526, loss: 0.00925710890442133 2023-01-22 16:22:53.361176: step: 2092/526, loss: 0.03238888457417488 2023-01-22 16:22:54.435376: step: 2096/526, loss: 0.00904457550495863 2023-01-22 16:22:55.493173: step: 2100/526, loss: 0.006982952356338501 2023-01-22 16:22:56.562917: step: 2104/526, loss: 0.019799476489424706 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3247536307053942, 'r': 0.29702324478178366, 'f1': 0.3102700693756194}, 'combined': 0.22862005111887745, 'stategy': 1, 'epoch': 4} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32957716272349646, 'r': 0.23851375637171612, 'f1': 0.2767468531998787}, 'combined': 0.15095282901811563, 'stategy': 1, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3042858174465627, 'r': 0.3331554395951929, 'f1': 0.31806687801932365}, 'combined': 0.23436506801423845, 'stategy': 1, 'epoch': 4} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3360426113142122, 'r': 0.26655896066735774, 'f1': 0.2972948408259408}, 'combined': 0.162160822268695, 'stategy': 1, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3177294697845545, 'r': 0.33461073193629554, 'f1': 0.32595167417823984}, 'combined': 0.24017491781554512, 'stategy': 1, 'epoch': 4} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3295651634683159, 'r': 0.2723803405834026, 'f1': 0.29825647293882585}, 'combined': 0.16268534887572317, 'stategy': 1, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.3017241379310345, 'f1': 0.3723404255319149}, 'combined': 0.2482269503546099, 'stategy': 1, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31418668446026093, 'r': 0.335052972802024, 'f1': 0.3242845117845118}, 'combined': 0.238946482367535, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34272064379813255, 'r': 0.2649578627716578, 'f1': 0.298863719307969}, 'combined': 0.1630165741679831, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 5 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:25:47.069744: step: 4/526, loss: 0.004179727286100388 2023-01-22 16:25:48.146375: step: 8/526, loss: 0.009753537364304066 2023-01-22 16:25:49.208914: step: 12/526, loss: 0.012803785502910614 2023-01-22 16:25:50.283456: step: 16/526, loss: 0.022175660356879234 2023-01-22 16:25:51.345810: step: 20/526, loss: 0.010767531581223011 2023-01-22 16:25:52.419276: step: 24/526, loss: 0.008397763594985008 2023-01-22 16:25:53.496196: step: 28/526, loss: 0.013322950340807438 2023-01-22 16:25:54.590971: step: 32/526, loss: 0.017810318619012833 2023-01-22 16:25:55.640426: step: 36/526, loss: 0.0019666124135255814 2023-01-22 16:25:56.697996: step: 40/526, loss: 0.0003568820538930595 2023-01-22 16:25:57.745771: step: 44/526, loss: 0.008955714292824268 2023-01-22 16:25:58.817271: step: 48/526, loss: 0.027219904586672783 2023-01-22 16:25:59.877800: step: 52/526, loss: 0.005599867086857557 2023-01-22 16:26:00.938522: step: 56/526, loss: 0.012424616143107414 2023-01-22 16:26:02.007621: step: 60/526, loss: 0.01338270679116249 2023-01-22 16:26:03.063676: step: 64/526, loss: 0.011990762315690517 2023-01-22 16:26:04.136180: step: 68/526, loss: 0.026391271501779556 2023-01-22 16:26:05.196137: step: 72/526, loss: 0.009049460291862488 2023-01-22 16:26:06.242786: step: 76/526, loss: 0.0063121626153588295 2023-01-22 16:26:07.305429: step: 80/526, loss: 0.01013687252998352 2023-01-22 16:26:08.379960: step: 84/526, loss: 0.007272184826433659 2023-01-22 16:26:09.459778: step: 88/526, loss: 0.005076649133116007 2023-01-22 16:26:10.541617: step: 92/526, loss: 0.006101812701672316 2023-01-22 16:26:11.596827: step: 96/526, loss: 0.0061790477484464645 2023-01-22 16:26:12.673126: step: 100/526, loss: 0.010714037343859673 2023-01-22 16:26:13.765372: step: 104/526, loss: 0.0031410446390509605 2023-01-22 16:26:14.837529: step: 108/526, loss: 0.010581405833363533 2023-01-22 16:26:15.902732: step: 112/526, loss: 0.005645041819661856 2023-01-22 16:26:16.968971: step: 116/526, loss: 0.011688296683132648 2023-01-22 16:26:18.060606: step: 120/526, loss: 0.008537991903722286 2023-01-22 16:26:19.120871: step: 124/526, loss: 0.003914898727089167 2023-01-22 16:26:20.192537: step: 128/526, loss: 0.019281161949038506 2023-01-22 16:26:21.254791: step: 132/526, loss: 0.009023968130350113 2023-01-22 16:26:22.338906: step: 136/526, loss: 0.019723786041140556 2023-01-22 16:26:23.408013: step: 140/526, loss: 0.0014853033935651183 2023-01-22 16:26:24.472673: step: 144/526, loss: 0.0001279439020436257 2023-01-22 16:26:25.553043: step: 148/526, loss: 0.0023864794056862593 2023-01-22 16:26:26.624496: step: 152/526, loss: 0.008895018137991428 2023-01-22 16:26:27.683922: step: 156/526, loss: 0.005564650055021048 2023-01-22 16:26:28.761015: step: 160/526, loss: 0.012530139647424221 2023-01-22 16:26:29.843884: step: 164/526, loss: 0.003924074117094278 2023-01-22 16:26:30.921189: step: 168/526, loss: 0.012075257487595081 2023-01-22 16:26:31.990354: step: 172/526, loss: 0.006730757653713226 2023-01-22 16:26:33.072256: step: 176/526, loss: 0.005005224607884884 2023-01-22 16:26:34.147617: step: 180/526, loss: 0.007279685698449612 2023-01-22 16:26:35.218884: step: 184/526, loss: 0.029033029451966286 2023-01-22 16:26:36.291440: step: 188/526, loss: 0.0035127755254507065 2023-01-22 16:26:37.369176: step: 192/526, loss: 0.018917713314294815 2023-01-22 16:26:38.439706: step: 196/526, loss: 0.0007094627362675965 2023-01-22 16:26:39.510416: step: 200/526, loss: 0.015020279213786125 2023-01-22 16:26:40.590078: step: 204/526, loss: 0.010218324139714241 2023-01-22 16:26:41.685998: step: 208/526, loss: 0.015337863937020302 2023-01-22 16:26:42.757124: step: 212/526, loss: 0.004920123610645533 2023-01-22 16:26:43.817954: step: 216/526, loss: 0.003300401382148266 2023-01-22 16:26:44.887428: step: 220/526, loss: 0.019213594496250153 2023-01-22 16:26:45.993260: step: 224/526, loss: 0.0035804021172225475 2023-01-22 16:26:47.066827: step: 228/526, loss: 0.00969106424599886 2023-01-22 16:26:48.134827: step: 232/526, loss: 0.013144612312316895 2023-01-22 16:26:49.213975: step: 236/526, loss: 0.0018158546881750226 2023-01-22 16:26:50.290918: step: 240/526, loss: 0.007170368451625109 2023-01-22 16:26:51.357920: step: 244/526, loss: 0.004397746175527573 2023-01-22 16:26:52.425737: step: 248/526, loss: 0.0020392145961523056 2023-01-22 16:26:53.514804: step: 252/526, loss: 0.017630061134696007 2023-01-22 16:26:54.576174: step: 256/526, loss: 0.005034159403294325 2023-01-22 16:26:55.642238: step: 260/526, loss: 0.012424739077687263 2023-01-22 16:26:56.721094: step: 264/526, loss: 0.00014680727326776832 2023-01-22 16:26:57.815189: step: 268/526, loss: 0.009586957283318043 2023-01-22 16:26:58.901313: step: 272/526, loss: 0.005599708762019873 2023-01-22 16:26:59.979508: step: 276/526, loss: 0.0 2023-01-22 16:27:01.067369: step: 280/526, loss: 0.0060498532839119434 2023-01-22 16:27:02.154613: step: 284/526, loss: 0.008151538670063019 2023-01-22 16:27:03.245453: step: 288/526, loss: 0.0022136939223855734 2023-01-22 16:27:04.325987: step: 292/526, loss: 0.006422633770853281 2023-01-22 16:27:05.392928: step: 296/526, loss: 0.01070362702012062 2023-01-22 16:27:06.474273: step: 300/526, loss: 0.016296258196234703 2023-01-22 16:27:07.548391: step: 304/526, loss: 0.015589741058647633 2023-01-22 16:27:08.622565: step: 308/526, loss: 0.011069845408201218 2023-01-22 16:27:09.702530: step: 312/526, loss: 0.008630359545350075 2023-01-22 16:27:10.773548: step: 316/526, loss: 0.02647574618458748 2023-01-22 16:27:11.833877: step: 320/526, loss: 0.014696372672915459 2023-01-22 16:27:12.927574: step: 324/526, loss: 0.005338984541594982 2023-01-22 16:27:14.003310: step: 328/526, loss: 0.032988324761390686 2023-01-22 16:27:15.119476: step: 332/526, loss: 0.0032164589501917362 2023-01-22 16:27:16.197580: step: 336/526, loss: 0.006383334752172232 2023-01-22 16:27:17.277147: step: 340/526, loss: 0.027826670557260513 2023-01-22 16:27:18.395951: step: 344/526, loss: 0.0038276338018476963 2023-01-22 16:27:19.490785: step: 348/526, loss: 0.005485524423420429 2023-01-22 16:27:20.546264: step: 352/526, loss: 0.021132487803697586 2023-01-22 16:27:21.609659: step: 356/526, loss: 0.021186646074056625 2023-01-22 16:27:22.678946: step: 360/526, loss: 0.00475958501920104 2023-01-22 16:27:23.767321: step: 364/526, loss: 0.006460696458816528 2023-01-22 16:27:24.844622: step: 368/526, loss: 0.008834296837449074 2023-01-22 16:27:25.912801: step: 372/526, loss: 0.021211925894021988 2023-01-22 16:27:26.986620: step: 376/526, loss: 0.07888615876436234 2023-01-22 16:27:28.061777: step: 380/526, loss: 0.004614879377186298 2023-01-22 16:27:29.158445: step: 384/526, loss: 0.005075387191027403 2023-01-22 16:27:30.227718: step: 388/526, loss: 0.022108184173703194 2023-01-22 16:27:31.308585: step: 392/526, loss: 0.0026717737782746553 2023-01-22 16:27:32.383156: step: 396/526, loss: 0.009287252090871334 2023-01-22 16:27:33.449139: step: 400/526, loss: 0.016370058059692383 2023-01-22 16:27:34.516366: step: 404/526, loss: 0.007083205506205559 2023-01-22 16:27:35.585995: step: 408/526, loss: 0.00928169209510088 2023-01-22 16:27:36.664699: step: 412/526, loss: 0.00436942745000124 2023-01-22 16:27:37.738612: step: 416/526, loss: 0.01769758202135563 2023-01-22 16:27:38.805161: step: 420/526, loss: 0.001189651433378458 2023-01-22 16:27:39.879824: step: 424/526, loss: 0.01525026559829712 2023-01-22 16:27:40.956506: step: 428/526, loss: 0.00677567208185792 2023-01-22 16:27:42.008121: step: 432/526, loss: 0.020197875797748566 2023-01-22 16:27:43.093743: step: 436/526, loss: 0.0060077751986682415 2023-01-22 16:27:44.167403: step: 440/526, loss: 0.007549986243247986 2023-01-22 16:27:45.235970: step: 444/526, loss: 0.002993487287312746 2023-01-22 16:27:46.305787: step: 448/526, loss: 0.05734114721417427 2023-01-22 16:27:47.364476: step: 452/526, loss: 0.003148432355374098 2023-01-22 16:27:48.454499: step: 456/526, loss: 0.006887455936521292 2023-01-22 16:27:49.518576: step: 460/526, loss: 0.004125951323658228 2023-01-22 16:27:50.605293: step: 464/526, loss: 0.010618254542350769 2023-01-22 16:27:51.657508: step: 468/526, loss: 0.011702966876327991 2023-01-22 16:27:52.709013: step: 472/526, loss: 0.006534098647534847 2023-01-22 16:27:53.806775: step: 476/526, loss: 0.0038337104488164186 2023-01-22 16:27:54.863954: step: 480/526, loss: 0.0068637914955616 2023-01-22 16:27:55.932498: step: 484/526, loss: 0.0014776729512959719 2023-01-22 16:27:56.978189: step: 488/526, loss: 0.024531979113817215 2023-01-22 16:27:58.040236: step: 492/526, loss: 0.0010675002122297883 2023-01-22 16:27:59.106801: step: 496/526, loss: 0.004829818848520517 2023-01-22 16:28:00.180028: step: 500/526, loss: 0.017555639147758484 2023-01-22 16:28:01.256184: step: 504/526, loss: 0.0015893825329840183 2023-01-22 16:28:02.333159: step: 508/526, loss: 0.0165556650608778 2023-01-22 16:28:03.398897: step: 512/526, loss: 0.014914087019860744 2023-01-22 16:28:04.476272: step: 516/526, loss: 0.0004235340456943959 2023-01-22 16:28:05.531989: step: 520/526, loss: 0.004362288862466812 2023-01-22 16:28:06.581144: step: 524/526, loss: 0.003972609061747789 2023-01-22 16:28:07.657515: step: 528/526, loss: 0.008093221113085747 2023-01-22 16:28:08.709962: step: 532/526, loss: 0.005594367161393166 2023-01-22 16:28:09.804521: step: 536/526, loss: 0.013115398585796356 2023-01-22 16:28:10.887078: step: 540/526, loss: 0.002219150774180889 2023-01-22 16:28:11.964677: step: 544/526, loss: 0.0037508830428123474 2023-01-22 16:28:13.056016: step: 548/526, loss: 0.01808079145848751 2023-01-22 16:28:14.112174: step: 552/526, loss: 0.02747494913637638 2023-01-22 16:28:15.165849: step: 556/526, loss: 0.0013319260906428099 2023-01-22 16:28:16.224759: step: 560/526, loss: 0.003826683387160301 2023-01-22 16:28:17.288762: step: 564/526, loss: 0.0022062547504901886 2023-01-22 16:28:18.353533: step: 568/526, loss: 0.028814613819122314 2023-01-22 16:28:19.417321: step: 572/526, loss: 5.136105028213933e-05 2023-01-22 16:28:20.496307: step: 576/526, loss: 0.0 2023-01-22 16:28:21.555993: step: 580/526, loss: 0.0004023563815280795 2023-01-22 16:28:22.611582: step: 584/526, loss: 0.00519277760758996 2023-01-22 16:28:23.679308: step: 588/526, loss: 0.02121228538453579 2023-01-22 16:28:24.747794: step: 592/526, loss: 0.00031139684142544866 2023-01-22 16:28:25.825468: step: 596/526, loss: 0.007170545868575573 2023-01-22 16:28:26.893720: step: 600/526, loss: 0.05462522432208061 2023-01-22 16:28:27.963517: step: 604/526, loss: 0.006987426895648241 2023-01-22 16:28:29.025921: step: 608/526, loss: 0.015615695156157017 2023-01-22 16:28:30.092617: step: 612/526, loss: 0.008923091925680637 2023-01-22 16:28:31.156137: step: 616/526, loss: 0.014183570630848408 2023-01-22 16:28:32.231343: step: 620/526, loss: 0.00889973621815443 2023-01-22 16:28:33.332309: step: 624/526, loss: 0.02296479046344757 2023-01-22 16:28:34.437881: step: 628/526, loss: 0.04363659769296646 2023-01-22 16:28:35.499697: step: 632/526, loss: 0.002031927229836583 2023-01-22 16:28:36.574808: step: 636/526, loss: 0.007867912761867046 2023-01-22 16:28:37.644220: step: 640/526, loss: 0.01001647487282753 2023-01-22 16:28:38.724026: step: 644/526, loss: 0.03372461348772049 2023-01-22 16:28:39.793100: step: 648/526, loss: 0.030671315267682076 2023-01-22 16:28:40.867589: step: 652/526, loss: 0.004191583022475243 2023-01-22 16:28:41.940510: step: 656/526, loss: 0.0054128230549395084 2023-01-22 16:28:42.993371: step: 660/526, loss: 0.009346045553684235 2023-01-22 16:28:44.068449: step: 664/526, loss: 0.017420567572116852 2023-01-22 16:28:45.141327: step: 668/526, loss: 0.008749575354158878 2023-01-22 16:28:46.206972: step: 672/526, loss: 0.006593168713152409 2023-01-22 16:28:47.268664: step: 676/526, loss: 0.0041855741292238235 2023-01-22 16:28:48.322551: step: 680/526, loss: 0.0022743400186300278 2023-01-22 16:28:49.408589: step: 684/526, loss: 0.008821789175271988 2023-01-22 16:28:50.485619: step: 688/526, loss: 0.00741354376077652 2023-01-22 16:28:51.544072: step: 692/526, loss: 0.000775563414208591 2023-01-22 16:28:52.617981: step: 696/526, loss: 0.002990931738168001 2023-01-22 16:28:53.680004: step: 700/526, loss: 0.006738306023180485 2023-01-22 16:28:54.740163: step: 704/526, loss: 0.0076050241477787495 2023-01-22 16:28:55.798272: step: 708/526, loss: 0.000842511944938451 2023-01-22 16:28:56.866895: step: 712/526, loss: 0.007258014753460884 2023-01-22 16:28:57.927115: step: 716/526, loss: 0.004061924759298563 2023-01-22 16:28:59.000374: step: 720/526, loss: 0.01440152432769537 2023-01-22 16:29:00.064849: step: 724/526, loss: 0.004295687656849623 2023-01-22 16:29:01.125077: step: 728/526, loss: 0.029901940375566483 2023-01-22 16:29:02.185151: step: 732/526, loss: 0.011741629801690578 2023-01-22 16:29:03.253913: step: 736/526, loss: 0.015979982912540436 2023-01-22 16:29:04.299254: step: 740/526, loss: 0.003879495430737734 2023-01-22 16:29:05.364819: step: 744/526, loss: 0.0013728139456361532 2023-01-22 16:29:06.428988: step: 748/526, loss: 0.0025152782909572124 2023-01-22 16:29:07.514473: step: 752/526, loss: 0.00845578033477068 2023-01-22 16:29:08.592949: step: 756/526, loss: 0.015433473512530327 2023-01-22 16:29:09.658211: step: 760/526, loss: 0.012670719064772129 2023-01-22 16:29:10.719093: step: 764/526, loss: 0.013586791232228279 2023-01-22 16:29:11.777668: step: 768/526, loss: 0.004927001893520355 2023-01-22 16:29:12.868148: step: 772/526, loss: 0.005283229984343052 2023-01-22 16:29:13.951406: step: 776/526, loss: 0.002940341830253601 2023-01-22 16:29:15.010418: step: 780/526, loss: 0.017641184851527214 2023-01-22 16:29:16.089092: step: 784/526, loss: 0.015966741368174553 2023-01-22 16:29:17.133612: step: 788/526, loss: 1.613814129086677e-05 2023-01-22 16:29:18.202498: step: 792/526, loss: 0.014263163320720196 2023-01-22 16:29:19.280447: step: 796/526, loss: 0.0054789320565760136 2023-01-22 16:29:20.333336: step: 800/526, loss: 0.01663241907954216 2023-01-22 16:29:21.384467: step: 804/526, loss: 0.0024871286004781723 2023-01-22 16:29:22.458402: step: 808/526, loss: 0.0029280565213412046 2023-01-22 16:29:23.517546: step: 812/526, loss: 0.004747483879327774 2023-01-22 16:29:24.607134: step: 816/526, loss: 0.017158519476652145 2023-01-22 16:29:25.677749: step: 820/526, loss: 0.004833065904676914 2023-01-22 16:29:26.739994: step: 824/526, loss: 0.0025594686158001423 2023-01-22 16:29:27.844205: step: 828/526, loss: 0.004094590898603201 2023-01-22 16:29:28.909176: step: 832/526, loss: 0.00923250149935484 2023-01-22 16:29:30.004892: step: 836/526, loss: 0.006786765996366739 2023-01-22 16:29:31.089016: step: 840/526, loss: 0.020672639831900597 2023-01-22 16:29:32.156364: step: 844/526, loss: 0.037219345569610596 2023-01-22 16:29:33.247776: step: 848/526, loss: 0.0044114491902291775 2023-01-22 16:29:34.319750: step: 852/526, loss: 0.0033491034992039204 2023-01-22 16:29:35.396361: step: 856/526, loss: 0.005475528072565794 2023-01-22 16:29:36.442674: step: 860/526, loss: 0.09362396597862244 2023-01-22 16:29:37.496705: step: 864/526, loss: 0.004950490314513445 2023-01-22 16:29:38.556080: step: 868/526, loss: 0.039368707686662674 2023-01-22 16:29:39.624697: step: 872/526, loss: 0.008840540423989296 2023-01-22 16:29:40.675051: step: 876/526, loss: 0.0014541647396981716 2023-01-22 16:29:41.755272: step: 880/526, loss: 0.005111478269100189 2023-01-22 16:29:42.847277: step: 884/526, loss: 0.015759732574224472 2023-01-22 16:29:43.901159: step: 888/526, loss: 0.004627150017768145 2023-01-22 16:29:44.958652: step: 892/526, loss: 0.004093928728252649 2023-01-22 16:29:46.017921: step: 896/526, loss: 0.0012903203023597598 2023-01-22 16:29:47.106371: step: 900/526, loss: 0.005743737798184156 2023-01-22 16:29:48.177585: step: 904/526, loss: 0.002530170138925314 2023-01-22 16:29:49.233175: step: 908/526, loss: 0.0028703827410936356 2023-01-22 16:29:50.321798: step: 912/526, loss: 0.03889927640557289 2023-01-22 16:29:51.398276: step: 916/526, loss: 0.009523030370473862 2023-01-22 16:29:52.455268: step: 920/526, loss: 0.0016209364403039217 2023-01-22 16:29:53.514013: step: 924/526, loss: 0.02691265381872654 2023-01-22 16:29:54.583321: step: 928/526, loss: 0.004761083982884884 2023-01-22 16:29:55.651335: step: 932/526, loss: 0.0039682695642113686 2023-01-22 16:29:56.708255: step: 936/526, loss: 0.0037177246995270252 2023-01-22 16:29:57.776969: step: 940/526, loss: 0.020434020087122917 2023-01-22 16:29:58.845343: step: 944/526, loss: 0.025928188115358353 2023-01-22 16:29:59.896945: step: 948/526, loss: 0.006867233198136091 2023-01-22 16:30:00.964240: step: 952/526, loss: 0.005631602369248867 2023-01-22 16:30:02.060970: step: 956/526, loss: 0.022991664707660675 2023-01-22 16:30:03.123230: step: 960/526, loss: 0.011087313294410706 2023-01-22 16:30:04.185936: step: 964/526, loss: 0.06224283203482628 2023-01-22 16:30:05.259983: step: 968/526, loss: 0.012012952007353306 2023-01-22 16:30:06.324177: step: 972/526, loss: 0.03288606181740761 2023-01-22 16:30:07.384927: step: 976/526, loss: 0.011233695782721043 2023-01-22 16:30:08.442937: step: 980/526, loss: 0.014032876119017601 2023-01-22 16:30:09.510018: step: 984/526, loss: 0.0059246402233839035 2023-01-22 16:30:10.573071: step: 988/526, loss: 0.0023051395546644926 2023-01-22 16:30:11.650013: step: 992/526, loss: 0.011434398591518402 2023-01-22 16:30:12.713629: step: 996/526, loss: 0.00955211091786623 2023-01-22 16:30:13.808910: step: 1000/526, loss: 0.008953534997999668 2023-01-22 16:30:14.867601: step: 1004/526, loss: 0.0033022526185959578 2023-01-22 16:30:15.943578: step: 1008/526, loss: 0.0066132317297160625 2023-01-22 16:30:17.009581: step: 1012/526, loss: 0.00418096873909235 2023-01-22 16:30:18.094484: step: 1016/526, loss: 0.0013415899593383074 2023-01-22 16:30:19.154660: step: 1020/526, loss: 0.0038441969081759453 2023-01-22 16:30:20.221780: step: 1024/526, loss: 0.010874784551560879 2023-01-22 16:30:21.301176: step: 1028/526, loss: 0.00413973443210125 2023-01-22 16:30:22.352944: step: 1032/526, loss: 0.03429199382662773 2023-01-22 16:30:23.396617: step: 1036/526, loss: 0.007938297465443611 2023-01-22 16:30:24.465098: step: 1040/526, loss: 0.0060849254950881 2023-01-22 16:30:25.536947: step: 1044/526, loss: 0.009710460901260376 2023-01-22 16:30:26.591508: step: 1048/526, loss: 0.01117774099111557 2023-01-22 16:30:27.664375: step: 1052/526, loss: 0.0043641552329063416 2023-01-22 16:30:28.741262: step: 1056/526, loss: 0.028264088556170464 2023-01-22 16:30:29.800708: step: 1060/526, loss: 0.009676550514996052 2023-01-22 16:30:30.858039: step: 1064/526, loss: 0.005418520886451006 2023-01-22 16:30:31.912103: step: 1068/526, loss: 0.012382950633764267 2023-01-22 16:30:32.982916: step: 1072/526, loss: 0.01254848763346672 2023-01-22 16:30:34.044501: step: 1076/526, loss: 5.0700480642262846e-05 2023-01-22 16:30:35.112172: step: 1080/526, loss: 0.0033137549180537462 2023-01-22 16:30:36.189545: step: 1084/526, loss: 0.008108108304440975 2023-01-22 16:30:37.245690: step: 1088/526, loss: 0.014603527262806892 2023-01-22 16:30:38.329537: step: 1092/526, loss: 0.010312979109585285 2023-01-22 16:30:39.400806: step: 1096/526, loss: 0.005603244062513113 2023-01-22 16:30:40.461082: step: 1100/526, loss: 0.025438936427235603 2023-01-22 16:30:41.509221: step: 1104/526, loss: 0.0005932244821451604 2023-01-22 16:30:42.559474: step: 1108/526, loss: 0.026346363127231598 2023-01-22 16:30:43.629150: step: 1112/526, loss: 0.0018432587385177612 2023-01-22 16:30:44.684624: step: 1116/526, loss: 0.007865209132432938 2023-01-22 16:30:45.739278: step: 1120/526, loss: 0.014973930083215237 2023-01-22 16:30:46.812489: step: 1124/526, loss: 0.005887983366847038 2023-01-22 16:30:47.891789: step: 1128/526, loss: 0.0100959911942482 2023-01-22 16:30:48.949999: step: 1132/526, loss: 0.0026406978722661734 2023-01-22 16:30:50.044783: step: 1136/526, loss: 0.0018007908947765827 2023-01-22 16:30:51.119098: step: 1140/526, loss: 0.014743267558515072 2023-01-22 16:30:52.182290: step: 1144/526, loss: 0.004292478319257498 2023-01-22 16:30:53.260396: step: 1148/526, loss: 0.00288110482506454 2023-01-22 16:30:54.316496: step: 1152/526, loss: 0.015256117098033428 2023-01-22 16:30:55.390014: step: 1156/526, loss: 0.01817067340016365 2023-01-22 16:30:56.454784: step: 1160/526, loss: 4.900084968539886e-05 2023-01-22 16:30:57.519054: step: 1164/526, loss: 0.011496701277792454 2023-01-22 16:30:58.585290: step: 1168/526, loss: 0.0056101372465491295 2023-01-22 16:30:59.646438: step: 1172/526, loss: 0.016931403428316116 2023-01-22 16:31:00.733106: step: 1176/526, loss: 0.04071391746401787 2023-01-22 16:31:01.791261: step: 1180/526, loss: 0.0033191132824867964 2023-01-22 16:31:02.852939: step: 1184/526, loss: 0.021475009620189667 2023-01-22 16:31:03.929820: step: 1188/526, loss: 0.0073226201348006725 2023-01-22 16:31:04.984476: step: 1192/526, loss: 0.0016866996884346008 2023-01-22 16:31:06.042055: step: 1196/526, loss: 0.006712088827043772 2023-01-22 16:31:07.107746: step: 1200/526, loss: 0.020988360047340393 2023-01-22 16:31:08.179687: step: 1204/526, loss: 0.00301780691370368 2023-01-22 16:31:09.229952: step: 1208/526, loss: 0.0002655574062373489 2023-01-22 16:31:10.289864: step: 1212/526, loss: 0.004946304950863123 2023-01-22 16:31:11.352679: step: 1216/526, loss: 0.01819589175283909 2023-01-22 16:31:12.435532: step: 1220/526, loss: 0.06364208459854126 2023-01-22 16:31:13.502559: step: 1224/526, loss: 0.01923818141222 2023-01-22 16:31:14.575214: step: 1228/526, loss: 0.0029363848734647036 2023-01-22 16:31:15.640843: step: 1232/526, loss: 0.003072553314268589 2023-01-22 16:31:16.702255: step: 1236/526, loss: 0.00881422683596611 2023-01-22 16:31:17.764689: step: 1240/526, loss: 0.015196265652775764 2023-01-22 16:31:18.821812: step: 1244/526, loss: 0.00371684436686337 2023-01-22 16:31:19.868676: step: 1248/526, loss: 0.004852376878261566 2023-01-22 16:31:20.928435: step: 1252/526, loss: 0.016384728252887726 2023-01-22 16:31:21.990009: step: 1256/526, loss: 0.0049131265841424465 2023-01-22 16:31:23.059499: step: 1260/526, loss: 0.026592087000608444 2023-01-22 16:31:24.122584: step: 1264/526, loss: 0.0047128344886004925 2023-01-22 16:31:25.190815: step: 1268/526, loss: 0.04022995010018349 2023-01-22 16:31:26.260211: step: 1272/526, loss: 0.042646054178476334 2023-01-22 16:31:27.332454: step: 1276/526, loss: 0.005116072949022055 2023-01-22 16:31:28.393366: step: 1280/526, loss: 0.005358300171792507 2023-01-22 16:31:29.465623: step: 1284/526, loss: 0.0066224741749465466 2023-01-22 16:31:30.533031: step: 1288/526, loss: 0.02043723315000534 2023-01-22 16:31:31.606308: step: 1292/526, loss: 0.033883560448884964 2023-01-22 16:31:32.666577: step: 1296/526, loss: 0.04861082509160042 2023-01-22 16:31:33.741805: step: 1300/526, loss: 0.005043553188443184 2023-01-22 16:31:34.810106: step: 1304/526, loss: 0.014144033193588257 2023-01-22 16:31:35.877698: step: 1308/526, loss: 0.03527822718024254 2023-01-22 16:31:36.924503: step: 1312/526, loss: 0.015718458220362663 2023-01-22 16:31:38.004736: step: 1316/526, loss: 0.0011573919327929616 2023-01-22 16:31:39.073045: step: 1320/526, loss: 0.010420053265988827 2023-01-22 16:31:40.126324: step: 1324/526, loss: 0.008654913865029812 2023-01-22 16:31:41.180748: step: 1328/526, loss: 0.01659526489675045 2023-01-22 16:31:42.264778: step: 1332/526, loss: 0.007835150696337223 2023-01-22 16:31:43.337395: step: 1336/526, loss: 0.0 2023-01-22 16:31:44.396530: step: 1340/526, loss: 0.004185378551483154 2023-01-22 16:31:45.468856: step: 1344/526, loss: 0.0023382902145385742 2023-01-22 16:31:46.528344: step: 1348/526, loss: 0.0475771389901638 2023-01-22 16:31:47.584197: step: 1352/526, loss: 0.0010176255600526929 2023-01-22 16:31:48.642272: step: 1356/526, loss: 0.004088009241968393 2023-01-22 16:31:49.711181: step: 1360/526, loss: 0.0069541484117507935 2023-01-22 16:31:50.772339: step: 1364/526, loss: 0.0013889835681766272 2023-01-22 16:31:51.827759: step: 1368/526, loss: 0.030257955193519592 2023-01-22 16:31:52.906045: step: 1372/526, loss: 0.005183276254683733 2023-01-22 16:31:53.967234: step: 1376/526, loss: 0.04388915374875069 2023-01-22 16:31:55.036813: step: 1380/526, loss: 0.007960905320942402 2023-01-22 16:31:56.119900: step: 1384/526, loss: 0.015411360189318657 2023-01-22 16:31:57.203746: step: 1388/526, loss: 0.011983797885477543 2023-01-22 16:31:58.275155: step: 1392/526, loss: 0.0038152674678713083 2023-01-22 16:31:59.346760: step: 1396/526, loss: 0.0023523501586169004 2023-01-22 16:32:00.430462: step: 1400/526, loss: 0.009872794151306152 2023-01-22 16:32:01.507852: step: 1404/526, loss: 0.045891087502241135 2023-01-22 16:32:02.566915: step: 1408/526, loss: 0.0015547976363450289 2023-01-22 16:32:03.638532: step: 1412/526, loss: 0.004930392373353243 2023-01-22 16:32:04.706127: step: 1416/526, loss: 0.0009030046639963984 2023-01-22 16:32:05.766181: step: 1420/526, loss: 0.004493011627346277 2023-01-22 16:32:06.830572: step: 1424/526, loss: 0.04012685269117355 2023-01-22 16:32:07.911087: step: 1428/526, loss: 0.002666450571268797 2023-01-22 16:32:08.959642: step: 1432/526, loss: 0.004061040468513966 2023-01-22 16:32:10.024028: step: 1436/526, loss: 0.00790204294025898 2023-01-22 16:32:11.095845: step: 1440/526, loss: 0.007304815575480461 2023-01-22 16:32:12.170682: step: 1444/526, loss: 0.027478881180286407 2023-01-22 16:32:13.229896: step: 1448/526, loss: 0.003109800862148404 2023-01-22 16:32:14.304743: step: 1452/526, loss: 0.005814953241497278 2023-01-22 16:32:15.380209: step: 1456/526, loss: 0.03077506460249424 2023-01-22 16:32:16.449372: step: 1460/526, loss: 0.031066054478287697 2023-01-22 16:32:17.516956: step: 1464/526, loss: 0.004671929404139519 2023-01-22 16:32:18.597397: step: 1468/526, loss: 0.009432507678866386 2023-01-22 16:32:19.664170: step: 1472/526, loss: 0.004772165324538946 2023-01-22 16:32:20.732956: step: 1476/526, loss: 0.008943991735577583 2023-01-22 16:32:21.803280: step: 1480/526, loss: 0.022126391530036926 2023-01-22 16:32:22.872359: step: 1484/526, loss: 0.00010280396963935345 2023-01-22 16:32:23.938553: step: 1488/526, loss: 0.0018554840935394168 2023-01-22 16:32:25.013669: step: 1492/526, loss: 0.0035776132717728615 2023-01-22 16:32:26.075581: step: 1496/526, loss: 0.0030853047501295805 2023-01-22 16:32:27.145018: step: 1500/526, loss: 0.0024554000701755285 2023-01-22 16:32:28.226572: step: 1504/526, loss: 0.015467529185116291 2023-01-22 16:32:29.287358: step: 1508/526, loss: 0.01205506268888712 2023-01-22 16:32:30.347570: step: 1512/526, loss: 0.011453481391072273 2023-01-22 16:32:31.420575: step: 1516/526, loss: 0.02545788325369358 2023-01-22 16:32:32.488192: step: 1520/526, loss: 0.002958260476589203 2023-01-22 16:32:33.553374: step: 1524/526, loss: 0.01944192685186863 2023-01-22 16:32:34.613781: step: 1528/526, loss: 0.022666987031698227 2023-01-22 16:32:35.669229: step: 1532/526, loss: 0.028071008622646332 2023-01-22 16:32:36.723784: step: 1536/526, loss: 0.011643345467746258 2023-01-22 16:32:37.798227: step: 1540/526, loss: 0.005877944175153971 2023-01-22 16:32:38.860816: step: 1544/526, loss: 0.0032326069194823503 2023-01-22 16:32:39.933824: step: 1548/526, loss: 0.004629484377801418 2023-01-22 16:32:40.981448: step: 1552/526, loss: 0.0 2023-01-22 16:32:42.038260: step: 1556/526, loss: 0.007165815215557814 2023-01-22 16:32:43.105016: step: 1560/526, loss: 0.0036267568357288837 2023-01-22 16:32:44.172444: step: 1564/526, loss: 0.0032491059973835945 2023-01-22 16:32:45.246349: step: 1568/526, loss: 0.0034492635168135166 2023-01-22 16:32:46.312463: step: 1572/526, loss: 0.0032850925344973803 2023-01-22 16:32:47.383579: step: 1576/526, loss: 0.004286310635507107 2023-01-22 16:32:48.476229: step: 1580/526, loss: 0.004146732855588198 2023-01-22 16:32:49.554979: step: 1584/526, loss: 0.004961901810020208 2023-01-22 16:32:50.621490: step: 1588/526, loss: 0.000617970887105912 2023-01-22 16:32:51.686926: step: 1592/526, loss: 0.023812079802155495 2023-01-22 16:32:52.756194: step: 1596/526, loss: 0.009973493404686451 2023-01-22 16:32:53.810582: step: 1600/526, loss: 0.005224699154496193 2023-01-22 16:32:54.868256: step: 1604/526, loss: 0.013935333117842674 2023-01-22 16:32:55.943910: step: 1608/526, loss: 0.007755911909043789 2023-01-22 16:32:57.014986: step: 1612/526, loss: 0.009887021966278553 2023-01-22 16:32:58.061844: step: 1616/526, loss: 0.002896562684327364 2023-01-22 16:32:59.137395: step: 1620/526, loss: 0.00788772851228714 2023-01-22 16:33:00.229945: step: 1624/526, loss: 0.0056227995082736015 2023-01-22 16:33:01.301186: step: 1628/526, loss: 0.0067305234260857105 2023-01-22 16:33:02.367689: step: 1632/526, loss: 0.03223757445812225 2023-01-22 16:33:03.423812: step: 1636/526, loss: 0.01878163404762745 2023-01-22 16:33:04.511638: step: 1640/526, loss: 0.003956172615289688 2023-01-22 16:33:05.582309: step: 1644/526, loss: 0.006648341193795204 2023-01-22 16:33:06.648924: step: 1648/526, loss: 0.010734629817306995 2023-01-22 16:33:07.719965: step: 1652/526, loss: 0.006555869244039059 2023-01-22 16:33:08.791512: step: 1656/526, loss: 0.016228245571255684 2023-01-22 16:33:09.862448: step: 1660/526, loss: 0.015174386091530323 2023-01-22 16:33:10.912067: step: 1664/526, loss: 0.005016935057938099 2023-01-22 16:33:11.983149: step: 1668/526, loss: 0.008743592537939548 2023-01-22 16:33:13.073430: step: 1672/526, loss: 0.005999026820063591 2023-01-22 16:33:14.152125: step: 1676/526, loss: 0.0069392831064760685 2023-01-22 16:33:15.220091: step: 1680/526, loss: 0.00973892118781805 2023-01-22 16:33:16.271288: step: 1684/526, loss: 0.035884082317352295 2023-01-22 16:33:17.328873: step: 1688/526, loss: 0.0021622362546622753 2023-01-22 16:33:18.383298: step: 1692/526, loss: 0.005209111142903566 2023-01-22 16:33:19.453267: step: 1696/526, loss: 0.005372251849621534 2023-01-22 16:33:20.526396: step: 1700/526, loss: 0.0032869232818484306 2023-01-22 16:33:21.607667: step: 1704/526, loss: 0.020500414073467255 2023-01-22 16:33:22.655693: step: 1708/526, loss: 0.03416214883327484 2023-01-22 16:33:23.718130: step: 1712/526, loss: 0.0037869231309741735 2023-01-22 16:33:24.779873: step: 1716/526, loss: 0.009350288659334183 2023-01-22 16:33:25.837991: step: 1720/526, loss: 0.0017497781664133072 2023-01-22 16:33:26.911507: step: 1724/526, loss: 0.00040508943493478 2023-01-22 16:33:27.973676: step: 1728/526, loss: 0.016947848722338676 2023-01-22 16:33:29.051569: step: 1732/526, loss: 0.00021597662998829037 2023-01-22 16:33:30.109726: step: 1736/526, loss: 0.003410627832636237 2023-01-22 16:33:31.162274: step: 1740/526, loss: 0.0011602664599195123 2023-01-22 16:33:32.207006: step: 1744/526, loss: 0.005013324320316315 2023-01-22 16:33:33.283029: step: 1748/526, loss: 0.004208932165056467 2023-01-22 16:33:34.342739: step: 1752/526, loss: 0.01655876263976097 2023-01-22 16:33:35.401119: step: 1756/526, loss: 0.007851890288293362 2023-01-22 16:33:36.466679: step: 1760/526, loss: 0.18781514465808868 2023-01-22 16:33:37.543853: step: 1764/526, loss: 0.029294034466147423 2023-01-22 16:33:38.631424: step: 1768/526, loss: 0.015067586675286293 2023-01-22 16:33:39.694800: step: 1772/526, loss: 0.0035023908130824566 2023-01-22 16:33:40.761111: step: 1776/526, loss: 0.009105285629630089 2023-01-22 16:33:41.816004: step: 1780/526, loss: 0.0038787275552749634 2023-01-22 16:33:42.904623: step: 1784/526, loss: 0.006649780087172985 2023-01-22 16:33:43.976488: step: 1788/526, loss: 0.009858941659331322 2023-01-22 16:33:45.042931: step: 1792/526, loss: 0.001505280495621264 2023-01-22 16:33:46.123929: step: 1796/526, loss: 0.007555418182164431 2023-01-22 16:33:47.195260: step: 1800/526, loss: 0.0028510408010333776 2023-01-22 16:33:48.257948: step: 1804/526, loss: 0.0017563748406246305 2023-01-22 16:33:49.323416: step: 1808/526, loss: 0.001117513282224536 2023-01-22 16:33:50.409845: step: 1812/526, loss: 0.006782998330891132 2023-01-22 16:33:51.464267: step: 1816/526, loss: 0.0010308542987331748 2023-01-22 16:33:52.530654: step: 1820/526, loss: 0.009075156413018703 2023-01-22 16:33:53.598489: step: 1824/526, loss: 0.01912175677716732 2023-01-22 16:33:54.649467: step: 1828/526, loss: 0.013318387791514397 2023-01-22 16:33:55.719403: step: 1832/526, loss: 0.005430066492408514 2023-01-22 16:33:56.788276: step: 1836/526, loss: 0.0027415018994361162 2023-01-22 16:33:57.867881: step: 1840/526, loss: 0.03963654115796089 2023-01-22 16:33:58.934033: step: 1844/526, loss: 0.025179734453558922 2023-01-22 16:34:00.006397: step: 1848/526, loss: 0.0015749731101095676 2023-01-22 16:34:01.059246: step: 1852/526, loss: 0.012397871352732182 2023-01-22 16:34:02.123183: step: 1856/526, loss: 0.04175456240773201 2023-01-22 16:34:03.187692: step: 1860/526, loss: 0.006538006942719221 2023-01-22 16:34:04.260110: step: 1864/526, loss: 0.0031106697861105204 2023-01-22 16:34:05.311188: step: 1868/526, loss: 0.004724698141217232 2023-01-22 16:34:06.377894: step: 1872/526, loss: 0.012229977175593376 2023-01-22 16:34:07.440534: step: 1876/526, loss: 0.008140761405229568 2023-01-22 16:34:08.512331: step: 1880/526, loss: 0.0005193506949581206 2023-01-22 16:34:09.582195: step: 1884/526, loss: 0.015678822994232178 2023-01-22 16:34:10.643474: step: 1888/526, loss: 0.013109872117638588 2023-01-22 16:34:11.703135: step: 1892/526, loss: 0.007637638133019209 2023-01-22 16:34:12.775782: step: 1896/526, loss: 0.008921423926949501 2023-01-22 16:34:13.838051: step: 1900/526, loss: 0.00119243492372334 2023-01-22 16:34:14.907859: step: 1904/526, loss: 0.011440916918218136 2023-01-22 16:34:15.968625: step: 1908/526, loss: 0.011566330678761005 2023-01-22 16:34:17.026187: step: 1912/526, loss: 0.022982196882367134 2023-01-22 16:34:18.076429: step: 1916/526, loss: 0.020851679146289825 2023-01-22 16:34:19.170183: step: 1920/526, loss: 0.011146592907607555 2023-01-22 16:34:20.239545: step: 1924/526, loss: 0.0020736802835017443 2023-01-22 16:34:21.301875: step: 1928/526, loss: 0.0029388205148279667 2023-01-22 16:34:22.358424: step: 1932/526, loss: 0.007810981944203377 2023-01-22 16:34:23.433267: step: 1936/526, loss: 0.010792273096740246 2023-01-22 16:34:24.486939: step: 1940/526, loss: 0.009290199726819992 2023-01-22 16:34:25.554791: step: 1944/526, loss: 0.014095891267061234 2023-01-22 16:34:26.629431: step: 1948/526, loss: 0.018211590126156807 2023-01-22 16:34:27.720907: step: 1952/526, loss: 0.08417269587516785 2023-01-22 16:34:28.783522: step: 1956/526, loss: 0.003627112368121743 2023-01-22 16:34:29.858547: step: 1960/526, loss: 0.00410380307585001 2023-01-22 16:34:30.915644: step: 1964/526, loss: 0.003400822402909398 2023-01-22 16:34:31.994122: step: 1968/526, loss: 0.007164774928241968 2023-01-22 16:34:33.054075: step: 1972/526, loss: 0.006590539589524269 2023-01-22 16:34:34.109344: step: 1976/526, loss: 0.004770446568727493 2023-01-22 16:34:35.172287: step: 1980/526, loss: 0.0013130620354786515 2023-01-22 16:34:36.250804: step: 1984/526, loss: 0.006059564184397459 2023-01-22 16:34:37.303027: step: 1988/526, loss: 0.016506999731063843 2023-01-22 16:34:38.370574: step: 1992/526, loss: 0.003427164163440466 2023-01-22 16:34:39.445082: step: 1996/526, loss: 0.04940352216362953 2023-01-22 16:34:40.502521: step: 2000/526, loss: 0.002892750781029463 2023-01-22 16:34:41.573042: step: 2004/526, loss: 0.0016225673025473952 2023-01-22 16:34:42.647471: step: 2008/526, loss: 0.007531145587563515 2023-01-22 16:34:43.717232: step: 2012/526, loss: 0.008236940950155258 2023-01-22 16:34:44.792040: step: 2016/526, loss: 0.002024533925577998 2023-01-22 16:34:45.866027: step: 2020/526, loss: 0.007904056459665298 2023-01-22 16:34:46.940112: step: 2024/526, loss: 0.01853630505502224 2023-01-22 16:34:47.997679: step: 2028/526, loss: 0.0016017219750210643 2023-01-22 16:34:49.060719: step: 2032/526, loss: 0.006075717508792877 2023-01-22 16:34:50.139977: step: 2036/526, loss: 0.014267859980463982 2023-01-22 16:34:51.210447: step: 2040/526, loss: 0.000822831760160625 2023-01-22 16:34:52.290874: step: 2044/526, loss: 0.03987755998969078 2023-01-22 16:34:53.364227: step: 2048/526, loss: 0.003301649121567607 2023-01-22 16:34:54.442860: step: 2052/526, loss: 0.005291596986353397 2023-01-22 16:34:55.499907: step: 2056/526, loss: 0.010310320183634758 2023-01-22 16:34:56.574867: step: 2060/526, loss: 0.0006486689671874046 2023-01-22 16:34:57.648066: step: 2064/526, loss: 0.012938452884554863 2023-01-22 16:34:58.734002: step: 2068/526, loss: 0.027604874223470688 2023-01-22 16:34:59.799971: step: 2072/526, loss: 0.00320068234577775 2023-01-22 16:35:00.880853: step: 2076/526, loss: 0.006932328920811415 2023-01-22 16:35:01.962652: step: 2080/526, loss: 0.004462625365704298 2023-01-22 16:35:03.044548: step: 2084/526, loss: 0.005529774818569422 2023-01-22 16:35:04.122616: step: 2088/526, loss: 0.023342285305261612 2023-01-22 16:35:05.193862: step: 2092/526, loss: 0.03901619091629982 2023-01-22 16:35:06.254432: step: 2096/526, loss: 0.026070749387145042 2023-01-22 16:35:07.314905: step: 2100/526, loss: 0.00875986646860838 2023-01-22 16:35:08.395179: step: 2104/526, loss: 0.008785846643149853 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3461376404494382, 'r': 0.2922794117647059, 'f1': 0.3169367283950617}, 'combined': 0.23353232618583492, 'stategy': 1, 'epoch': 5} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3359785341776867, 'r': 0.23598492281527997, 'f1': 0.2772410282025667}, 'combined': 0.15122237901958183, 'stategy': 1, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31206986834231, 'r': 0.32983475648323846, 'f1': 0.32070648831488313}, 'combined': 0.23631004402149283, 'stategy': 1, 'epoch': 5} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3387122152156982, 'r': 0.2649578627716578, 'f1': 0.2973295113033079}, 'combined': 0.16217973343816794, 'stategy': 1, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3266195800195277, 'r': 0.3309579805131457, 'f1': 0.3287744688603728}, 'combined': 0.24225487179185365, 'stategy': 1, 'epoch': 5} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3335280910447985, 'r': 0.2707821670674209, 'f1': 0.2988976541722035}, 'combined': 0.16303508409392917, 'stategy': 1, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.3017241379310345, 'f1': 0.3723404255319149}, 'combined': 0.2482269503546099, 'stategy': 1, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31418668446026093, 'r': 0.335052972802024, 'f1': 0.3242845117845118}, 'combined': 0.238946482367535, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34272064379813255, 'r': 0.2649578627716578, 'f1': 0.298863719307969}, 'combined': 0.1630165741679831, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 6 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:37:51.542038: step: 4/526, loss: 0.03056020848453045 2023-01-22 16:37:52.588713: step: 8/526, loss: 0.005550703965127468 2023-01-22 16:37:53.660394: step: 12/526, loss: 0.007741289678961039 2023-01-22 16:37:54.726624: step: 16/526, loss: 0.07527825981378555 2023-01-22 16:37:55.789644: step: 20/526, loss: 0.004757534712553024 2023-01-22 16:37:56.857221: step: 24/526, loss: 0.0036126484628766775 2023-01-22 16:37:57.917029: step: 28/526, loss: 0.005891560576856136 2023-01-22 16:37:58.985606: step: 32/526, loss: 0.01705353893339634 2023-01-22 16:38:00.051069: step: 36/526, loss: 0.009754710830748081 2023-01-22 16:38:01.113475: step: 40/526, loss: 0.02737494930624962 2023-01-22 16:38:02.172915: step: 44/526, loss: 0.0036318402271717787 2023-01-22 16:38:03.233236: step: 48/526, loss: 0.0021759348455816507 2023-01-22 16:38:04.285430: step: 52/526, loss: 0.01103932224214077 2023-01-22 16:38:05.356543: step: 56/526, loss: 0.007948559708893299 2023-01-22 16:38:06.429176: step: 60/526, loss: 0.009094356559216976 2023-01-22 16:38:07.514481: step: 64/526, loss: 0.008030201308429241 2023-01-22 16:38:08.587394: step: 68/526, loss: 0.007121507078409195 2023-01-22 16:38:09.646691: step: 72/526, loss: 0.0044227358885109425 2023-01-22 16:38:10.727804: step: 76/526, loss: 0.007090034894645214 2023-01-22 16:38:11.790007: step: 80/526, loss: 0.0025880110915750265 2023-01-22 16:38:12.886177: step: 84/526, loss: 0.013140284456312656 2023-01-22 16:38:13.958487: step: 88/526, loss: 0.010930776596069336 2023-01-22 16:38:15.025330: step: 92/526, loss: 0.00831281952559948 2023-01-22 16:38:16.090121: step: 96/526, loss: 0.003119352040812373 2023-01-22 16:38:17.159787: step: 100/526, loss: 0.015038120560348034 2023-01-22 16:38:18.226125: step: 104/526, loss: 0.004123392049223185 2023-01-22 16:38:19.310297: step: 108/526, loss: 0.005310199223458767 2023-01-22 16:38:20.390608: step: 112/526, loss: 0.0037086547818034887 2023-01-22 16:38:21.458883: step: 116/526, loss: 0.01209168415516615 2023-01-22 16:38:22.524971: step: 120/526, loss: 0.00614965008571744 2023-01-22 16:38:23.592992: step: 124/526, loss: 0.022402137517929077 2023-01-22 16:38:24.655116: step: 128/526, loss: 0.015202345326542854 2023-01-22 16:38:25.715185: step: 132/526, loss: 0.004186494275927544 2023-01-22 16:38:26.797043: step: 136/526, loss: 0.014832055196166039 2023-01-22 16:38:27.869083: step: 140/526, loss: 0.007426561322063208 2023-01-22 16:38:28.954808: step: 144/526, loss: 0.00980204064399004 2023-01-22 16:38:30.021386: step: 148/526, loss: 0.006996991112828255 2023-01-22 16:38:31.077706: step: 152/526, loss: 0.004004724323749542 2023-01-22 16:38:32.145459: step: 156/526, loss: 0.007940438576042652 2023-01-22 16:38:33.206765: step: 160/526, loss: 0.008022377267479897 2023-01-22 16:38:34.279585: step: 164/526, loss: 0.002451209584251046 2023-01-22 16:38:35.337305: step: 168/526, loss: 0.008931722491979599 2023-01-22 16:38:36.414339: step: 172/526, loss: 0.002556374529376626 2023-01-22 16:38:37.498432: step: 176/526, loss: 0.015206307172775269 2023-01-22 16:38:38.575004: step: 180/526, loss: 0.013622324913740158 2023-01-22 16:38:39.649583: step: 184/526, loss: 0.005480619613081217 2023-01-22 16:38:40.717280: step: 188/526, loss: 0.0 2023-01-22 16:38:41.792250: step: 192/526, loss: 0.0005863471305929124 2023-01-22 16:38:42.897204: step: 196/526, loss: 0.027543986216187477 2023-01-22 16:38:43.984416: step: 200/526, loss: 0.00865589827299118 2023-01-22 16:38:45.065787: step: 204/526, loss: 0.0020030769519507885 2023-01-22 16:38:46.132137: step: 208/526, loss: 0.009460987523198128 2023-01-22 16:38:47.186069: step: 212/526, loss: 0.003951487131416798 2023-01-22 16:38:48.271169: step: 216/526, loss: 0.004548159893602133 2023-01-22 16:38:49.356182: step: 220/526, loss: 0.006977055687457323 2023-01-22 16:38:50.422243: step: 224/526, loss: 0.006539938040077686 2023-01-22 16:38:51.484528: step: 228/526, loss: 0.01938641257584095 2023-01-22 16:38:52.572501: step: 232/526, loss: 0.024029238149523735 2023-01-22 16:38:53.648017: step: 236/526, loss: 0.0006959624006412923 2023-01-22 16:38:54.701087: step: 240/526, loss: 0.004568490665405989 2023-01-22 16:38:55.770490: step: 244/526, loss: 0.0032213397789746523 2023-01-22 16:38:56.834694: step: 248/526, loss: 0.0056900824420154095 2023-01-22 16:38:57.897441: step: 252/526, loss: 0.01235982496291399 2023-01-22 16:38:58.995493: step: 256/526, loss: 0.003292555920779705 2023-01-22 16:39:00.063458: step: 260/526, loss: 0.0020116984378546476 2023-01-22 16:39:01.136257: step: 264/526, loss: 0.004956017713993788 2023-01-22 16:39:02.200981: step: 268/526, loss: 0.004580358974635601 2023-01-22 16:39:03.290359: step: 272/526, loss: 0.009182633832097054 2023-01-22 16:39:04.359451: step: 276/526, loss: 0.0059911152347922325 2023-01-22 16:39:05.441571: step: 280/526, loss: 0.014766387641429901 2023-01-22 16:39:06.511829: step: 284/526, loss: 0.0010674602817744017 2023-01-22 16:39:07.593157: step: 288/526, loss: 0.010107072070240974 2023-01-22 16:39:08.663276: step: 292/526, loss: 0.008091469295322895 2023-01-22 16:39:09.741327: step: 296/526, loss: 0.00393038522452116 2023-01-22 16:39:10.817979: step: 300/526, loss: 0.0021983629558235407 2023-01-22 16:39:11.888392: step: 304/526, loss: 0.009886692278087139 2023-01-22 16:39:12.960461: step: 308/526, loss: 0.034597739577293396 2023-01-22 16:39:14.023745: step: 312/526, loss: 0.020144330337643623 2023-01-22 16:39:15.088940: step: 316/526, loss: 0.008640216663479805 2023-01-22 16:39:16.158546: step: 320/526, loss: 0.0056711360812187195 2023-01-22 16:39:17.242750: step: 324/526, loss: 0.004932489711791277 2023-01-22 16:39:18.315608: step: 328/526, loss: 0.0008659413433633745 2023-01-22 16:39:19.384919: step: 332/526, loss: 0.00555976340547204 2023-01-22 16:39:20.465801: step: 336/526, loss: 0.006461212877184153 2023-01-22 16:39:21.567567: step: 340/526, loss: 0.007345697842538357 2023-01-22 16:39:22.646949: step: 344/526, loss: 0.0035378271713852882 2023-01-22 16:39:23.747697: step: 348/526, loss: 0.0037171063013374805 2023-01-22 16:39:24.815827: step: 352/526, loss: 0.007670004386454821 2023-01-22 16:39:25.886927: step: 356/526, loss: 0.008089261129498482 2023-01-22 16:39:26.975986: step: 360/526, loss: 0.008118961937725544 2023-01-22 16:39:28.065882: step: 364/526, loss: 0.014272321946918964 2023-01-22 16:39:29.144857: step: 368/526, loss: 0.002501644194126129 2023-01-22 16:39:30.199291: step: 372/526, loss: 0.0013490453129634261 2023-01-22 16:39:31.262918: step: 376/526, loss: 0.02342231012880802 2023-01-22 16:39:32.337137: step: 380/526, loss: 0.005635560490190983 2023-01-22 16:39:33.420035: step: 384/526, loss: 0.0032323936466127634 2023-01-22 16:39:34.480686: step: 388/526, loss: 0.007500453852117062 2023-01-22 16:39:35.539606: step: 392/526, loss: 0.006203955505043268 2023-01-22 16:39:36.601109: step: 396/526, loss: 0.0024333924520760775 2023-01-22 16:39:37.674842: step: 400/526, loss: 0.012974249199032784 2023-01-22 16:39:38.753744: step: 404/526, loss: 0.005696407984942198 2023-01-22 16:39:39.836567: step: 408/526, loss: 0.0061072902753949165 2023-01-22 16:39:40.896372: step: 412/526, loss: 0.008957736194133759 2023-01-22 16:39:41.967594: step: 416/526, loss: 0.001270537730306387 2023-01-22 16:39:43.046259: step: 420/526, loss: 0.028876159340143204 2023-01-22 16:39:44.109094: step: 424/526, loss: 0.007041980978101492 2023-01-22 16:39:45.172473: step: 428/526, loss: 0.011806683614850044 2023-01-22 16:39:46.253873: step: 432/526, loss: 0.014579186215996742 2023-01-22 16:39:47.337521: step: 436/526, loss: 0.02383432164788246 2023-01-22 16:39:48.400853: step: 440/526, loss: 0.003819467034190893 2023-01-22 16:39:49.472355: step: 444/526, loss: 0.001048753154464066 2023-01-22 16:39:50.565042: step: 448/526, loss: 0.004892498254776001 2023-01-22 16:39:51.634696: step: 452/526, loss: 0.025785304605960846 2023-01-22 16:39:52.706143: step: 456/526, loss: 0.0007533314055763185 2023-01-22 16:39:53.759649: step: 460/526, loss: 0.0004199769755359739 2023-01-22 16:39:54.833952: step: 464/526, loss: 0.009533392265439034 2023-01-22 16:39:55.892771: step: 468/526, loss: 4.4106909626862034e-05 2023-01-22 16:39:56.969678: step: 472/526, loss: 0.009595033712685108 2023-01-22 16:39:58.027911: step: 476/526, loss: 2.0704645066871308e-05 2023-01-22 16:39:59.111389: step: 480/526, loss: 0.03623385727405548 2023-01-22 16:40:00.192121: step: 484/526, loss: 0.011518680490553379 2023-01-22 16:40:01.279724: step: 488/526, loss: 0.047334909439086914 2023-01-22 16:40:02.342515: step: 492/526, loss: 0.004484002012759447 2023-01-22 16:40:03.398300: step: 496/526, loss: 0.004415825009346008 2023-01-22 16:40:04.463556: step: 500/526, loss: 0.0012383628636598587 2023-01-22 16:40:05.538241: step: 504/526, loss: 0.007894769310951233 2023-01-22 16:40:06.605364: step: 508/526, loss: 0.007927427999675274 2023-01-22 16:40:07.669464: step: 512/526, loss: 0.034742943942546844 2023-01-22 16:40:08.740913: step: 516/526, loss: 0.0035664604511111975 2023-01-22 16:40:09.797877: step: 520/526, loss: 0.0035678299609571695 2023-01-22 16:40:10.854509: step: 524/526, loss: 0.0065514277666807175 2023-01-22 16:40:11.931556: step: 528/526, loss: 0.001496276119723916 2023-01-22 16:40:12.998177: step: 532/526, loss: 0.00012217026960570365 2023-01-22 16:40:14.050684: step: 536/526, loss: 0.03548990190029144 2023-01-22 16:40:15.122298: step: 540/526, loss: 0.01503920741379261 2023-01-22 16:40:16.193149: step: 544/526, loss: 0.018218837678432465 2023-01-22 16:40:17.252246: step: 548/526, loss: 0.011441313661634922 2023-01-22 16:40:18.322021: step: 552/526, loss: 0.014774641953408718 2023-01-22 16:40:19.398195: step: 556/526, loss: 0.007690586615353823 2023-01-22 16:40:20.483817: step: 560/526, loss: 0.03551023080945015 2023-01-22 16:40:21.552259: step: 564/526, loss: 0.015166421420872211 2023-01-22 16:40:22.622713: step: 568/526, loss: 0.02888898365199566 2023-01-22 16:40:23.699518: step: 572/526, loss: 0.003510272828862071 2023-01-22 16:40:24.764796: step: 576/526, loss: 0.0005080753471702337 2023-01-22 16:40:25.821501: step: 580/526, loss: 0.007521493826061487 2023-01-22 16:40:26.890137: step: 584/526, loss: 0.04921819269657135 2023-01-22 16:40:27.973588: step: 588/526, loss: 0.012841240502893925 2023-01-22 16:40:29.032714: step: 592/526, loss: 0.008358056657016277 2023-01-22 16:40:30.112255: step: 596/526, loss: 0.012431146577000618 2023-01-22 16:40:31.176446: step: 600/526, loss: 0.006740952841937542 2023-01-22 16:40:32.246059: step: 604/526, loss: 0.0030201685149222612 2023-01-22 16:40:33.317935: step: 608/526, loss: 0.008728429675102234 2023-01-22 16:40:34.398599: step: 612/526, loss: 0.01645864173769951 2023-01-22 16:40:35.458560: step: 616/526, loss: 0.0011229512747377157 2023-01-22 16:40:36.530410: step: 620/526, loss: 0.002545175841078162 2023-01-22 16:40:37.591122: step: 624/526, loss: 0.012281525880098343 2023-01-22 16:40:38.671246: step: 628/526, loss: 0.020619850605726242 2023-01-22 16:40:39.745541: step: 632/526, loss: 0.016358500346541405 2023-01-22 16:40:40.811973: step: 636/526, loss: 0.0026365816593170166 2023-01-22 16:40:41.868383: step: 640/526, loss: 0.005131471436470747 2023-01-22 16:40:42.936119: step: 644/526, loss: 0.015510068275034428 2023-01-22 16:40:44.000591: step: 648/526, loss: 0.017619166523218155 2023-01-22 16:40:45.059680: step: 652/526, loss: 0.0033808285370469093 2023-01-22 16:40:46.131005: step: 656/526, loss: 0.0013684448786079884 2023-01-22 16:40:47.176242: step: 660/526, loss: 0.006766584236174822 2023-01-22 16:40:48.240410: step: 664/526, loss: 0.005997321102768183 2023-01-22 16:40:49.316717: step: 668/526, loss: 0.017537444829940796 2023-01-22 16:40:50.362703: step: 672/526, loss: 0.004844771698117256 2023-01-22 16:40:51.410976: step: 676/526, loss: 0.0007653268403373659 2023-01-22 16:40:52.487603: step: 680/526, loss: 0.0016080039786174893 2023-01-22 16:40:53.548605: step: 684/526, loss: 0.0026193975936621428 2023-01-22 16:40:54.612042: step: 688/526, loss: 0.003294553142040968 2023-01-22 16:40:55.686863: step: 692/526, loss: 0.014027602039277554 2023-01-22 16:40:56.772842: step: 696/526, loss: 0.014805924147367477 2023-01-22 16:40:57.844153: step: 700/526, loss: 0.008626986294984818 2023-01-22 16:40:58.924586: step: 704/526, loss: 0.05580979585647583 2023-01-22 16:40:59.979478: step: 708/526, loss: 0.0016504075611010194 2023-01-22 16:41:01.032527: step: 712/526, loss: 0.026155177503824234 2023-01-22 16:41:02.095589: step: 716/526, loss: 0.0008136740070767701 2023-01-22 16:41:03.183577: step: 720/526, loss: 0.005017417948693037 2023-01-22 16:41:04.230146: step: 724/526, loss: 0.01748201623558998 2023-01-22 16:41:05.294367: step: 728/526, loss: 0.016648339107632637 2023-01-22 16:41:06.352522: step: 732/526, loss: 0.00246058264747262 2023-01-22 16:41:07.413356: step: 736/526, loss: 0.004434761591255665 2023-01-22 16:41:08.481750: step: 740/526, loss: 0.00963085237890482 2023-01-22 16:41:09.539097: step: 744/526, loss: 0.005070148501545191 2023-01-22 16:41:10.620258: step: 748/526, loss: 0.0008291593985632062 2023-01-22 16:41:11.676155: step: 752/526, loss: 0.0025919857434928417 2023-01-22 16:41:12.733442: step: 756/526, loss: 0.000503011979162693 2023-01-22 16:41:13.829515: step: 760/526, loss: 0.0016447021625936031 2023-01-22 16:41:14.899979: step: 764/526, loss: 0.002962449798360467 2023-01-22 16:41:15.946656: step: 768/526, loss: 1.1187505151610821e-05 2023-01-22 16:41:17.011891: step: 772/526, loss: 0.0026745335198938847 2023-01-22 16:41:18.095968: step: 776/526, loss: 0.008055642247200012 2023-01-22 16:41:19.149940: step: 780/526, loss: 0.0012769351014867425 2023-01-22 16:41:20.204745: step: 784/526, loss: 0.001039610942825675 2023-01-22 16:41:21.280722: step: 788/526, loss: 0.013878018595278263 2023-01-22 16:41:22.355543: step: 792/526, loss: 0.030783390626311302 2023-01-22 16:41:23.420564: step: 796/526, loss: 9.280815902457107e-06 2023-01-22 16:41:24.485400: step: 800/526, loss: 0.008469845168292522 2023-01-22 16:41:25.551033: step: 804/526, loss: 0.00041606550803408027 2023-01-22 16:41:26.614965: step: 808/526, loss: 0.0457325279712677 2023-01-22 16:41:27.691260: step: 812/526, loss: 0.004375222604721785 2023-01-22 16:41:28.753657: step: 816/526, loss: 0.0010926228715106845 2023-01-22 16:41:29.808385: step: 820/526, loss: 0.0020371773280203342 2023-01-22 16:41:30.881143: step: 824/526, loss: 0.00791111309081316 2023-01-22 16:41:31.940606: step: 828/526, loss: 0.017372306436300278 2023-01-22 16:41:32.994573: step: 832/526, loss: 0.001993887359276414 2023-01-22 16:41:34.050111: step: 836/526, loss: 0.002606706228107214 2023-01-22 16:41:35.138018: step: 840/526, loss: 0.03999406844377518 2023-01-22 16:41:36.202895: step: 844/526, loss: 0.00800907053053379 2023-01-22 16:41:37.269963: step: 848/526, loss: 0.010427029803395271 2023-01-22 16:41:38.373626: step: 852/526, loss: 0.0029530322644859552 2023-01-22 16:41:39.462086: step: 856/526, loss: 0.03662458434700966 2023-01-22 16:41:40.529216: step: 860/526, loss: 0.009072757326066494 2023-01-22 16:41:41.609658: step: 864/526, loss: 0.00900545809417963 2023-01-22 16:41:42.676146: step: 868/526, loss: 0.003854315495118499 2023-01-22 16:41:43.743305: step: 872/526, loss: 0.0015699114883318543 2023-01-22 16:41:44.824815: step: 876/526, loss: 0.03132845088839531 2023-01-22 16:41:45.896425: step: 880/526, loss: 0.026862921193242073 2023-01-22 16:41:46.966085: step: 884/526, loss: 0.008050457574427128 2023-01-22 16:41:48.025742: step: 888/526, loss: 0.013367857784032822 2023-01-22 16:41:49.098545: step: 892/526, loss: 0.0027895092498511076 2023-01-22 16:41:50.157503: step: 896/526, loss: 0.006650068331509829 2023-01-22 16:41:51.218021: step: 900/526, loss: 0.002781215589493513 2023-01-22 16:41:52.282602: step: 904/526, loss: 0.004364557098597288 2023-01-22 16:41:53.349226: step: 908/526, loss: 0.007103148382157087 2023-01-22 16:41:54.437106: step: 912/526, loss: 0.035328458994627 2023-01-22 16:41:55.519438: step: 916/526, loss: 0.006334730423986912 2023-01-22 16:41:56.580459: step: 920/526, loss: 0.005595427006483078 2023-01-22 16:41:57.656722: step: 924/526, loss: 0.0027736076153814793 2023-01-22 16:41:58.728927: step: 928/526, loss: 0.006798542104661465 2023-01-22 16:41:59.796485: step: 932/526, loss: 0.0187666155397892 2023-01-22 16:42:00.874035: step: 936/526, loss: 0.0067521752789616585 2023-01-22 16:42:01.947712: step: 940/526, loss: 0.0026990140322595835 2023-01-22 16:42:03.020799: step: 944/526, loss: 0.019883237779140472 2023-01-22 16:42:04.079270: step: 948/526, loss: 0.000835177197586745 2023-01-22 16:42:05.141007: step: 952/526, loss: 0.007185594644397497 2023-01-22 16:42:06.201027: step: 956/526, loss: 0.013472693040966988 2023-01-22 16:42:07.266734: step: 960/526, loss: 0.003318228991702199 2023-01-22 16:42:08.359227: step: 964/526, loss: 0.02671235240995884 2023-01-22 16:42:09.421965: step: 968/526, loss: 0.00032923376420512795 2023-01-22 16:42:10.474526: step: 972/526, loss: 0.0039630127139389515 2023-01-22 16:42:11.545401: step: 976/526, loss: 0.0021149753592908382 2023-01-22 16:42:12.611774: step: 980/526, loss: 0.0038768374361097813 2023-01-22 16:42:13.694460: step: 984/526, loss: 0.009342167526483536 2023-01-22 16:42:14.792754: step: 988/526, loss: 0.02566170133650303 2023-01-22 16:42:15.870497: step: 992/526, loss: 0.0455283522605896 2023-01-22 16:42:16.953482: step: 996/526, loss: 0.003501344006508589 2023-01-22 16:42:18.029639: step: 1000/526, loss: 0.021333366632461548 2023-01-22 16:42:19.082938: step: 1004/526, loss: 0.006034106016159058 2023-01-22 16:42:20.167782: step: 1008/526, loss: 0.0113820256665349 2023-01-22 16:42:21.249110: step: 1012/526, loss: 0.0076561542227864265 2023-01-22 16:42:22.314959: step: 1016/526, loss: 0.0024924827739596367 2023-01-22 16:42:23.384721: step: 1020/526, loss: 0.037824634462594986 2023-01-22 16:42:24.438409: step: 1024/526, loss: 0.00809899065643549 2023-01-22 16:42:25.499821: step: 1028/526, loss: 0.007951125502586365 2023-01-22 16:42:26.573828: step: 1032/526, loss: 0.004146024119108915 2023-01-22 16:42:27.639884: step: 1036/526, loss: 0.040169648826122284 2023-01-22 16:42:28.713546: step: 1040/526, loss: 0.014608575031161308 2023-01-22 16:42:29.790741: step: 1044/526, loss: 0.005859078839421272 2023-01-22 16:42:30.839857: step: 1048/526, loss: 0.0035897556226700544 2023-01-22 16:42:31.910341: step: 1052/526, loss: 0.006033977959305048 2023-01-22 16:42:32.969468: step: 1056/526, loss: 0.0017644037725403905 2023-01-22 16:42:34.029926: step: 1060/526, loss: 0.046444620937108994 2023-01-22 16:42:35.094687: step: 1064/526, loss: 0.003138477448374033 2023-01-22 16:42:36.158804: step: 1068/526, loss: 0.0003456490230746567 2023-01-22 16:42:37.237558: step: 1072/526, loss: 0.0021224389784038067 2023-01-22 16:42:38.312787: step: 1076/526, loss: 0.004722969140857458 2023-01-22 16:42:39.377779: step: 1080/526, loss: 0.022206544876098633 2023-01-22 16:42:40.464899: step: 1084/526, loss: 0.01786890998482704 2023-01-22 16:42:41.529962: step: 1088/526, loss: 0.005798814352601767 2023-01-22 16:42:42.612822: step: 1092/526, loss: 0.010083862580358982 2023-01-22 16:42:43.682192: step: 1096/526, loss: 0.06651788204908371 2023-01-22 16:42:44.759316: step: 1100/526, loss: 0.00998808816075325 2023-01-22 16:42:45.830353: step: 1104/526, loss: 0.002352374140173197 2023-01-22 16:42:46.905878: step: 1108/526, loss: 0.007733537815511227 2023-01-22 16:42:47.981157: step: 1112/526, loss: 0.01831551268696785 2023-01-22 16:42:49.047035: step: 1116/526, loss: 0.005822804290801287 2023-01-22 16:42:50.137906: step: 1120/526, loss: 0.0307039525359869 2023-01-22 16:42:51.208519: step: 1124/526, loss: 0.00418469263240695 2023-01-22 16:42:52.286851: step: 1128/526, loss: 0.003654086496680975 2023-01-22 16:42:53.353112: step: 1132/526, loss: 0.04210471361875534 2023-01-22 16:42:54.428686: step: 1136/526, loss: 0.007529860362410545 2023-01-22 16:42:55.481199: step: 1140/526, loss: 0.004805960692465305 2023-01-22 16:42:56.543773: step: 1144/526, loss: 0.003602338721975684 2023-01-22 16:42:57.602575: step: 1148/526, loss: 0.010789827443659306 2023-01-22 16:42:58.673485: step: 1152/526, loss: 0.002499269088730216 2023-01-22 16:42:59.737074: step: 1156/526, loss: 0.005996648222208023 2023-01-22 16:43:00.788215: step: 1160/526, loss: 0.009914937429130077 2023-01-22 16:43:01.871865: step: 1164/526, loss: 0.004271358251571655 2023-01-22 16:43:02.940305: step: 1168/526, loss: 0.055827584117650986 2023-01-22 16:43:04.012796: step: 1172/526, loss: 0.021684378385543823 2023-01-22 16:43:05.079164: step: 1176/526, loss: 0.016439255326986313 2023-01-22 16:43:06.148214: step: 1180/526, loss: 0.0057567209005355835 2023-01-22 16:43:07.247495: step: 1184/526, loss: 0.0607495978474617 2023-01-22 16:43:08.311540: step: 1188/526, loss: 0.024399401620030403 2023-01-22 16:43:09.381670: step: 1192/526, loss: 0.0054026078432798386 2023-01-22 16:43:10.458632: step: 1196/526, loss: 0.012957009486854076 2023-01-22 16:43:11.529422: step: 1200/526, loss: 0.02486690692603588 2023-01-22 16:43:12.606979: step: 1204/526, loss: 0.0027699570637196302 2023-01-22 16:43:13.694087: step: 1208/526, loss: 0.01208297349512577 2023-01-22 16:43:14.766820: step: 1212/526, loss: 0.0010945043759420514 2023-01-22 16:43:15.828920: step: 1216/526, loss: 0.006781335920095444 2023-01-22 16:43:16.887179: step: 1220/526, loss: 0.001397983287461102 2023-01-22 16:43:17.956707: step: 1224/526, loss: 0.0023020459339022636 2023-01-22 16:43:19.049152: step: 1228/526, loss: 0.006321605294942856 2023-01-22 16:43:20.118556: step: 1232/526, loss: 0.0024445573799312115 2023-01-22 16:43:21.202041: step: 1236/526, loss: 0.0010653740027919412 2023-01-22 16:43:22.267966: step: 1240/526, loss: 0.0010765297338366508 2023-01-22 16:43:23.363654: step: 1244/526, loss: 0.00206439895555377 2023-01-22 16:43:24.431561: step: 1248/526, loss: 0.0053231497295200825 2023-01-22 16:43:25.515535: step: 1252/526, loss: 0.015214274637401104 2023-01-22 16:43:26.575076: step: 1256/526, loss: 0.0002356090844841674 2023-01-22 16:43:27.645659: step: 1260/526, loss: 0.00901406817138195 2023-01-22 16:43:28.715453: step: 1264/526, loss: 0.004142566584050655 2023-01-22 16:43:29.779640: step: 1268/526, loss: 0.004701070953160524 2023-01-22 16:43:30.868649: step: 1272/526, loss: 0.0008033206686377525 2023-01-22 16:43:31.972970: step: 1276/526, loss: 0.013277271762490273 2023-01-22 16:43:33.038214: step: 1280/526, loss: 0.002346867462620139 2023-01-22 16:43:34.115809: step: 1284/526, loss: 0.013487554155290127 2023-01-22 16:43:35.194849: step: 1288/526, loss: 0.020648233592510223 2023-01-22 16:43:36.265074: step: 1292/526, loss: 0.0020224498584866524 2023-01-22 16:43:37.343478: step: 1296/526, loss: 0.002598815131932497 2023-01-22 16:43:38.429537: step: 1300/526, loss: 0.006972632370889187 2023-01-22 16:43:39.509201: step: 1304/526, loss: 0.03460879623889923 2023-01-22 16:43:40.592020: step: 1308/526, loss: 0.010561812669038773 2023-01-22 16:43:41.685721: step: 1312/526, loss: 0.017433661967515945 2023-01-22 16:43:42.735349: step: 1316/526, loss: 0.0009524445631541312 2023-01-22 16:43:43.823179: step: 1320/526, loss: 0.017058931291103363 2023-01-22 16:43:44.893561: step: 1324/526, loss: 0.00552911963313818 2023-01-22 16:43:45.979516: step: 1328/526, loss: 0.07649354636669159 2023-01-22 16:43:47.058958: step: 1332/526, loss: 0.004486635327339172 2023-01-22 16:43:48.118691: step: 1336/526, loss: 0.03997723385691643 2023-01-22 16:43:49.178902: step: 1340/526, loss: 0.007076819892972708 2023-01-22 16:43:50.258798: step: 1344/526, loss: 0.0127756642177701 2023-01-22 16:43:51.345149: step: 1348/526, loss: 0.007796027697622776 2023-01-22 16:43:52.413679: step: 1352/526, loss: 0.012270638719201088 2023-01-22 16:43:53.500941: step: 1356/526, loss: 0.003149248892441392 2023-01-22 16:43:54.572875: step: 1360/526, loss: 0.004529505502432585 2023-01-22 16:43:55.658582: step: 1364/526, loss: 0.002220523776486516 2023-01-22 16:43:56.739594: step: 1368/526, loss: 0.0028114519082009792 2023-01-22 16:43:57.813644: step: 1372/526, loss: 0.0045374296605587006 2023-01-22 16:43:58.893842: step: 1376/526, loss: 0.007525020278990269 2023-01-22 16:43:59.960295: step: 1380/526, loss: 0.004115311894565821 2023-01-22 16:44:01.040374: step: 1384/526, loss: 0.011983995325863361 2023-01-22 16:44:02.125960: step: 1388/526, loss: 0.014218327589333057 2023-01-22 16:44:03.228843: step: 1392/526, loss: 0.0334528349339962 2023-01-22 16:44:04.303768: step: 1396/526, loss: 0.038804586976766586 2023-01-22 16:44:05.368203: step: 1400/526, loss: 0.004139270633459091 2023-01-22 16:44:06.435049: step: 1404/526, loss: 0.02219662442803383 2023-01-22 16:44:07.526233: step: 1408/526, loss: 0.026016244664788246 2023-01-22 16:44:08.592410: step: 1412/526, loss: 0.01674872264266014 2023-01-22 16:44:09.666196: step: 1416/526, loss: 0.01808893494307995 2023-01-22 16:44:10.742664: step: 1420/526, loss: 0.002857440384104848 2023-01-22 16:44:11.824270: step: 1424/526, loss: 0.0011322400532662868 2023-01-22 16:44:12.903690: step: 1428/526, loss: 0.004235779866576195 2023-01-22 16:44:13.970930: step: 1432/526, loss: 0.0038578875828534365 2023-01-22 16:44:15.037118: step: 1436/526, loss: 0.006223521661013365 2023-01-22 16:44:16.117554: step: 1440/526, loss: 0.003061942756175995 2023-01-22 16:44:17.180240: step: 1444/526, loss: 0.015388989821076393 2023-01-22 16:44:18.264255: step: 1448/526, loss: 0.018897997215390205 2023-01-22 16:44:19.349112: step: 1452/526, loss: 0.01957865059375763 2023-01-22 16:44:20.435185: step: 1456/526, loss: 0.021583186462521553 2023-01-22 16:44:21.512835: step: 1460/526, loss: 0.005597573705017567 2023-01-22 16:44:22.577061: step: 1464/526, loss: 0.00262489914894104 2023-01-22 16:44:23.653646: step: 1468/526, loss: 0.019800467416644096 2023-01-22 16:44:24.735430: step: 1472/526, loss: 0.0027936387341469526 2023-01-22 16:44:25.811576: step: 1476/526, loss: 0.007734362501651049 2023-01-22 16:44:26.884872: step: 1480/526, loss: 0.002555450890213251 2023-01-22 16:44:27.956115: step: 1484/526, loss: 0.03706897050142288 2023-01-22 16:44:29.035073: step: 1488/526, loss: 0.006347267888486385 2023-01-22 16:44:30.096639: step: 1492/526, loss: 0.019081544131040573 2023-01-22 16:44:31.185256: step: 1496/526, loss: 0.00281783239915967 2023-01-22 16:44:32.294627: step: 1500/526, loss: 0.006851533427834511 2023-01-22 16:44:33.364213: step: 1504/526, loss: 0.005703883245587349 2023-01-22 16:44:34.450911: step: 1508/526, loss: 0.0031425543129444122 2023-01-22 16:44:35.513864: step: 1512/526, loss: 0.009820224717259407 2023-01-22 16:44:36.578813: step: 1516/526, loss: 0.0020063077099621296 2023-01-22 16:44:37.642742: step: 1520/526, loss: 0.07253638654947281 2023-01-22 16:44:38.719134: step: 1524/526, loss: 0.11937223374843597 2023-01-22 16:44:39.785502: step: 1528/526, loss: 0.00813398975878954 2023-01-22 16:44:40.864947: step: 1532/526, loss: 0.0008280671900138259 2023-01-22 16:44:41.932513: step: 1536/526, loss: 0.00029721410828642547 2023-01-22 16:44:43.010657: step: 1540/526, loss: 0.014271453022956848 2023-01-22 16:44:44.080937: step: 1544/526, loss: 0.004293354693800211 2023-01-22 16:44:45.157114: step: 1548/526, loss: 0.011623783968389034 2023-01-22 16:44:46.232545: step: 1552/526, loss: 0.011222359724342823 2023-01-22 16:44:47.318785: step: 1556/526, loss: 0.035718921571969986 2023-01-22 16:44:48.390974: step: 1560/526, loss: 0.017994921654462814 2023-01-22 16:44:49.460771: step: 1564/526, loss: 0.0064345127902925014 2023-01-22 16:44:50.547800: step: 1568/526, loss: 0.013070063665509224 2023-01-22 16:44:51.602761: step: 1572/526, loss: 0.009704822674393654 2023-01-22 16:44:52.668740: step: 1576/526, loss: 0.014365030452609062 2023-01-22 16:44:53.729379: step: 1580/526, loss: 0.02319900505244732 2023-01-22 16:44:54.792677: step: 1584/526, loss: 0.00878023449331522 2023-01-22 16:44:55.865860: step: 1588/526, loss: 0.01395992562174797 2023-01-22 16:44:56.931546: step: 1592/526, loss: 0.0020557951647788286 2023-01-22 16:44:58.022740: step: 1596/526, loss: 0.009000587277114391 2023-01-22 16:44:59.094682: step: 1600/526, loss: 0.004282928537577391 2023-01-22 16:45:00.160625: step: 1604/526, loss: 0.0016594675835222006 2023-01-22 16:45:01.232127: step: 1608/526, loss: 0.005927237682044506 2023-01-22 16:45:02.315825: step: 1612/526, loss: 0.003080021822825074 2023-01-22 16:45:03.390643: step: 1616/526, loss: 0.0035863418597728014 2023-01-22 16:45:04.459638: step: 1620/526, loss: 0.0049010468646883965 2023-01-22 16:45:05.518447: step: 1624/526, loss: 0.0029122792184352875 2023-01-22 16:45:06.572619: step: 1628/526, loss: 0.007731992285698652 2023-01-22 16:45:07.638755: step: 1632/526, loss: 0.0012869666097685695 2023-01-22 16:45:08.718650: step: 1636/526, loss: 0.002036880701780319 2023-01-22 16:45:09.802018: step: 1640/526, loss: 0.024723196402192116 2023-01-22 16:45:10.863403: step: 1644/526, loss: 0.0007505103712901473 2023-01-22 16:45:11.939175: step: 1648/526, loss: 0.006666247732937336 2023-01-22 16:45:12.996996: step: 1652/526, loss: 0.002535420935600996 2023-01-22 16:45:14.056945: step: 1656/526, loss: 0.017096517607569695 2023-01-22 16:45:15.120284: step: 1660/526, loss: 0.014411868527531624 2023-01-22 16:45:16.188832: step: 1664/526, loss: 0.004635794088244438 2023-01-22 16:45:17.274857: step: 1668/526, loss: 0.006019299384206533 2023-01-22 16:45:18.348274: step: 1672/526, loss: 0.005905449390411377 2023-01-22 16:45:19.417296: step: 1676/526, loss: 0.027766354382038116 2023-01-22 16:45:20.497478: step: 1680/526, loss: 0.0011953338980674744 2023-01-22 16:45:21.579539: step: 1684/526, loss: 0.004011579789221287 2023-01-22 16:45:22.656974: step: 1688/526, loss: 0.0030191901605576277 2023-01-22 16:45:23.719871: step: 1692/526, loss: 0.05773008614778519 2023-01-22 16:45:24.799415: step: 1696/526, loss: 0.014474649913609028 2023-01-22 16:45:25.870916: step: 1700/526, loss: 0.002133729634806514 2023-01-22 16:45:26.941193: step: 1704/526, loss: 0.02012203447520733 2023-01-22 16:45:28.021258: step: 1708/526, loss: 0.0069390772841870785 2023-01-22 16:45:29.084887: step: 1712/526, loss: 0.06547851860523224 2023-01-22 16:45:30.150298: step: 1716/526, loss: 0.01818758435547352 2023-01-22 16:45:31.234717: step: 1720/526, loss: 0.016970161348581314 2023-01-22 16:45:32.294391: step: 1724/526, loss: 0.003974339924752712 2023-01-22 16:45:33.362839: step: 1728/526, loss: 0.0018917451379820704 2023-01-22 16:45:34.441905: step: 1732/526, loss: 0.01116181816905737 2023-01-22 16:45:35.523629: step: 1736/526, loss: 0.016343293711543083 2023-01-22 16:45:36.606179: step: 1740/526, loss: 0.0036974104586988688 2023-01-22 16:45:37.684541: step: 1744/526, loss: 0.007054630666971207 2023-01-22 16:45:38.782677: step: 1748/526, loss: 0.09989713877439499 2023-01-22 16:45:39.847496: step: 1752/526, loss: 0.007389947306364775 2023-01-22 16:45:40.894883: step: 1756/526, loss: 0.003911925479769707 2023-01-22 16:45:41.948779: step: 1760/526, loss: 0.0023412429727613926 2023-01-22 16:45:43.038261: step: 1764/526, loss: 0.005940565839409828 2023-01-22 16:45:44.121493: step: 1768/526, loss: 0.0019187636207789183 2023-01-22 16:45:45.198300: step: 1772/526, loss: 0.0025826399214565754 2023-01-22 16:45:46.264636: step: 1776/526, loss: 0.004844007547944784 2023-01-22 16:45:47.323376: step: 1780/526, loss: 0.002479694550856948 2023-01-22 16:45:48.386495: step: 1784/526, loss: 0.02238212339580059 2023-01-22 16:45:49.453424: step: 1788/526, loss: 0.00439298665151 2023-01-22 16:45:50.540187: step: 1792/526, loss: 0.01564399152994156 2023-01-22 16:45:51.624700: step: 1796/526, loss: 0.012087401002645493 2023-01-22 16:45:52.715154: step: 1800/526, loss: 0.006518587935715914 2023-01-22 16:45:53.780405: step: 1804/526, loss: 0.01234168466180563 2023-01-22 16:45:54.839968: step: 1808/526, loss: 0.04436902329325676 2023-01-22 16:45:55.892550: step: 1812/526, loss: 0.010153175331652164 2023-01-22 16:45:56.949403: step: 1816/526, loss: 0.007166760042309761 2023-01-22 16:45:58.008788: step: 1820/526, loss: 0.028396114706993103 2023-01-22 16:45:59.057465: step: 1824/526, loss: 0.0012265837285667658 2023-01-22 16:46:00.125187: step: 1828/526, loss: 0.0029733222909271717 2023-01-22 16:46:01.207256: step: 1832/526, loss: 0.010580579750239849 2023-01-22 16:46:02.262940: step: 1836/526, loss: 0.006216373760253191 2023-01-22 16:46:03.325443: step: 1840/526, loss: 0.0009228076669387519 2023-01-22 16:46:04.375390: step: 1844/526, loss: 0.006918182130903006 2023-01-22 16:46:05.440710: step: 1848/526, loss: 0.003109990619122982 2023-01-22 16:46:06.517782: step: 1852/526, loss: 0.010111591778695583 2023-01-22 16:46:07.611902: step: 1856/526, loss: 0.0370626263320446 2023-01-22 16:46:08.677107: step: 1860/526, loss: 0.0003386267344467342 2023-01-22 16:46:09.739239: step: 1864/526, loss: 0.00941428728401661 2023-01-22 16:46:10.809100: step: 1868/526, loss: 0.0021819707471877337 2023-01-22 16:46:11.886670: step: 1872/526, loss: 0.03399588167667389 2023-01-22 16:46:12.951346: step: 1876/526, loss: 0.006060609593987465 2023-01-22 16:46:14.014194: step: 1880/526, loss: 0.0033985571935772896 2023-01-22 16:46:15.063948: step: 1884/526, loss: 0.009023567661643028 2023-01-22 16:46:16.141410: step: 1888/526, loss: 0.001036790432408452 2023-01-22 16:46:17.201809: step: 1892/526, loss: 0.006437205243855715 2023-01-22 16:46:18.270589: step: 1896/526, loss: 0.0008709206013008952 2023-01-22 16:46:19.327032: step: 1900/526, loss: 0.0014735977165400982 2023-01-22 16:46:20.380674: step: 1904/526, loss: 0.003793305018916726 2023-01-22 16:46:21.438770: step: 1908/526, loss: 0.0007709282217547297 2023-01-22 16:46:22.506858: step: 1912/526, loss: 0.027955088764429092 2023-01-22 16:46:23.583489: step: 1916/526, loss: 0.003278878750279546 2023-01-22 16:46:24.660358: step: 1920/526, loss: 0.004938701633363962 2023-01-22 16:46:25.722344: step: 1924/526, loss: 0.03605610132217407 2023-01-22 16:46:26.814191: step: 1928/526, loss: 0.0028789339121431112 2023-01-22 16:46:27.884739: step: 1932/526, loss: 0.007453788537532091 2023-01-22 16:46:28.969828: step: 1936/526, loss: 0.013742885552346706 2023-01-22 16:46:30.032600: step: 1940/526, loss: 0.0036725637037307024 2023-01-22 16:46:31.105945: step: 1944/526, loss: 0.008672547526657581 2023-01-22 16:46:32.170106: step: 1948/526, loss: 0.05057437717914581 2023-01-22 16:46:33.232934: step: 1952/526, loss: 0.04378332570195198 2023-01-22 16:46:34.299973: step: 1956/526, loss: 0.003826582571491599 2023-01-22 16:46:35.376454: step: 1960/526, loss: 0.01907321624457836 2023-01-22 16:46:36.450862: step: 1964/526, loss: 0.016609249636530876 2023-01-22 16:46:37.512870: step: 1968/526, loss: 0.007328836712986231 2023-01-22 16:46:38.581323: step: 1972/526, loss: 0.007735108491033316 2023-01-22 16:46:39.633495: step: 1976/526, loss: 0.01149886753410101 2023-01-22 16:46:40.695174: step: 1980/526, loss: 0.001523140468634665 2023-01-22 16:46:41.767873: step: 1984/526, loss: 0.006447824649512768 2023-01-22 16:46:42.851624: step: 1988/526, loss: 0.010037658736109734 2023-01-22 16:46:43.900628: step: 1992/526, loss: 0.0008003299008123577 2023-01-22 16:46:44.990439: step: 1996/526, loss: 0.06361684203147888 2023-01-22 16:46:46.056361: step: 2000/526, loss: 0.010843368247151375 2023-01-22 16:46:47.115680: step: 2004/526, loss: 0.004187176004052162 2023-01-22 16:46:48.175843: step: 2008/526, loss: 0.01911861076951027 2023-01-22 16:46:49.246936: step: 2012/526, loss: 0.003816250478848815 2023-01-22 16:46:50.317847: step: 2016/526, loss: 0.005473429337143898 2023-01-22 16:46:51.406683: step: 2020/526, loss: 0.0051955487579107285 2023-01-22 16:46:52.472476: step: 2024/526, loss: 0.0020086413715034723 2023-01-22 16:46:53.539046: step: 2028/526, loss: 0.03298501297831535 2023-01-22 16:46:54.597640: step: 2032/526, loss: 0.05014680325984955 2023-01-22 16:46:55.676798: step: 2036/526, loss: 0.03912079334259033 2023-01-22 16:46:56.745355: step: 2040/526, loss: 0.013243984431028366 2023-01-22 16:46:57.811040: step: 2044/526, loss: 0.006227582227438688 2023-01-22 16:46:58.859623: step: 2048/526, loss: 0.007000616751611233 2023-01-22 16:46:59.917726: step: 2052/526, loss: 0.05694907158613205 2023-01-22 16:47:00.994536: step: 2056/526, loss: 0.011590370908379555 2023-01-22 16:47:02.067467: step: 2060/526, loss: 0.018102457746863365 2023-01-22 16:47:03.120932: step: 2064/526, loss: 0.0022255312651395798 2023-01-22 16:47:04.173355: step: 2068/526, loss: 0.0034415286500006914 2023-01-22 16:47:05.235158: step: 2072/526, loss: 0.00653142761439085 2023-01-22 16:47:06.300796: step: 2076/526, loss: 0.00302940234541893 2023-01-22 16:47:07.355485: step: 2080/526, loss: 0.006129369605332613 2023-01-22 16:47:08.426770: step: 2084/526, loss: 0.007726317271590233 2023-01-22 16:47:09.497900: step: 2088/526, loss: 0.009634272195398808 2023-01-22 16:47:10.564261: step: 2092/526, loss: 0.013455990701913834 2023-01-22 16:47:11.620464: step: 2096/526, loss: 0.010858725756406784 2023-01-22 16:47:12.673575: step: 2100/526, loss: 0.01281960029155016 2023-01-22 16:47:13.740091: step: 2104/526, loss: 0.003467496484518051 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3465716704288939, 'r': 0.2913306451612903, 'f1': 0.3165592783505155}, 'combined': 0.23325420510037984, 'stategy': 1, 'epoch': 6} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3404238746279762, 'r': 0.2394189887493459, 'f1': 0.28112423195084485}, 'combined': 0.15334049015500628, 'stategy': 1, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.311847793679189, 'r': 0.330783523086654, 'f1': 0.32103667894413745}, 'combined': 0.23655334237989073, 'stategy': 1, 'epoch': 6} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33992808481675146, 'r': 0.2640429496884007, 'f1': 0.2972182739540907}, 'combined': 0.16211905852041308, 'stategy': 1, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32479450695975326, 'r': 0.3303412822209255, 'f1': 0.32754441341566837}, 'combined': 0.2413485151483872, 'stategy': 1, 'epoch': 6} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3403007595246342, 'r': 0.275348377113083, 'f1': 0.3043982563743825}, 'combined': 0.166035412567845, 'stategy': 1, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31418668446026093, 'r': 0.335052972802024, 'f1': 0.3242845117845118}, 'combined': 0.238946482367535, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34272064379813255, 'r': 0.2649578627716578, 'f1': 0.298863719307969}, 'combined': 0.1630165741679831, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 7 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:49:58.527634: step: 4/526, loss: 0.005681129172444344 2023-01-22 16:49:59.585788: step: 8/526, loss: 0.0010143601102754474 2023-01-22 16:50:00.625339: step: 12/526, loss: 0.005159999709576368 2023-01-22 16:50:01.696220: step: 16/526, loss: 0.011183780618011951 2023-01-22 16:50:02.759576: step: 20/526, loss: 0.0037257422227412462 2023-01-22 16:50:03.819174: step: 24/526, loss: 0.001278422074392438 2023-01-22 16:50:04.885083: step: 28/526, loss: 0.0008670516544952989 2023-01-22 16:50:05.951450: step: 32/526, loss: 0.005145782604813576 2023-01-22 16:50:07.023863: step: 36/526, loss: 0.002572249621152878 2023-01-22 16:50:08.079696: step: 40/526, loss: 0.00873457733541727 2023-01-22 16:50:09.151847: step: 44/526, loss: 0.003323943819850683 2023-01-22 16:50:10.204701: step: 48/526, loss: 0.015720967203378677 2023-01-22 16:50:11.253541: step: 52/526, loss: 0.003789663314819336 2023-01-22 16:50:12.328601: step: 56/526, loss: 0.004524076823145151 2023-01-22 16:50:13.397340: step: 60/526, loss: 0.0029734980780631304 2023-01-22 16:50:14.452868: step: 64/526, loss: 0.008790159597992897 2023-01-22 16:50:15.519260: step: 68/526, loss: 0.008159545250236988 2023-01-22 16:50:16.580502: step: 72/526, loss: 0.00930866226553917 2023-01-22 16:50:17.646032: step: 76/526, loss: 0.003312517888844013 2023-01-22 16:50:18.711994: step: 80/526, loss: 0.0054994067177176476 2023-01-22 16:50:19.773874: step: 84/526, loss: 0.0047514731995761395 2023-01-22 16:50:20.864243: step: 88/526, loss: 0.0011768946424126625 2023-01-22 16:50:21.935776: step: 92/526, loss: 0.005265416111797094 2023-01-22 16:50:23.030134: step: 96/526, loss: 0.009324588812887669 2023-01-22 16:50:24.087104: step: 100/526, loss: 0.003282174002379179 2023-01-22 16:50:25.165863: step: 104/526, loss: 0.0016753204399719834 2023-01-22 16:50:26.242186: step: 108/526, loss: 0.0028524647932499647 2023-01-22 16:50:27.315225: step: 112/526, loss: 0.0067680226638913155 2023-01-22 16:50:28.385223: step: 116/526, loss: 0.001611438812687993 2023-01-22 16:50:29.444672: step: 120/526, loss: 0.00673902640119195 2023-01-22 16:50:30.518758: step: 124/526, loss: 0.008489931933581829 2023-01-22 16:50:31.569698: step: 128/526, loss: 0.007176227401942015 2023-01-22 16:50:32.629236: step: 132/526, loss: 0.016124073415994644 2023-01-22 16:50:33.687887: step: 136/526, loss: 0.00031997408950701356 2023-01-22 16:50:34.756258: step: 140/526, loss: 0.0012919764267280698 2023-01-22 16:50:35.833017: step: 144/526, loss: 0.004090574570000172 2023-01-22 16:50:36.910467: step: 148/526, loss: 0.012636663392186165 2023-01-22 16:50:37.968705: step: 152/526, loss: 0.0058204373344779015 2023-01-22 16:50:39.051206: step: 156/526, loss: 0.004791403189301491 2023-01-22 16:50:40.118071: step: 160/526, loss: 0.0048788427375257015 2023-01-22 16:50:41.193851: step: 164/526, loss: 0.008330618031322956 2023-01-22 16:50:42.266274: step: 168/526, loss: 0.016464125365018845 2023-01-22 16:50:43.349379: step: 172/526, loss: 0.04827934503555298 2023-01-22 16:50:44.433221: step: 176/526, loss: 0.003101169364526868 2023-01-22 16:50:45.508897: step: 180/526, loss: 0.0015956700081005692 2023-01-22 16:50:46.585532: step: 184/526, loss: 0.004257692955434322 2023-01-22 16:50:47.645789: step: 188/526, loss: 0.0006411054637283087 2023-01-22 16:50:48.710319: step: 192/526, loss: 0.0002515445521567017 2023-01-22 16:50:49.783471: step: 196/526, loss: 0.005419893655925989 2023-01-22 16:50:50.879893: step: 200/526, loss: 0.0034781487192958593 2023-01-22 16:50:51.971830: step: 204/526, loss: 0.0037832753732800484 2023-01-22 16:50:53.040178: step: 208/526, loss: 0.0065412214025855064 2023-01-22 16:50:54.119513: step: 212/526, loss: 0.022938640788197517 2023-01-22 16:50:55.185314: step: 216/526, loss: 0.00046589982230216265 2023-01-22 16:50:56.257793: step: 220/526, loss: 0.003605988807976246 2023-01-22 16:50:57.331276: step: 224/526, loss: 0.010898103006184101 2023-01-22 16:50:58.419059: step: 228/526, loss: 0.0004372690455056727 2023-01-22 16:50:59.499700: step: 232/526, loss: 0.00962343905121088 2023-01-22 16:51:00.558614: step: 236/526, loss: 0.0 2023-01-22 16:51:01.631701: step: 240/526, loss: 0.020165417343378067 2023-01-22 16:51:02.702741: step: 244/526, loss: 0.004791875369846821 2023-01-22 16:51:03.777284: step: 248/526, loss: 0.0031337961554527283 2023-01-22 16:51:04.849240: step: 252/526, loss: 0.0110794547945261 2023-01-22 16:51:05.906352: step: 256/526, loss: 0.008565178140997887 2023-01-22 16:51:06.975578: step: 260/526, loss: 0.0027253078296780586 2023-01-22 16:51:08.044588: step: 264/526, loss: 0.0019073592266067863 2023-01-22 16:51:09.133507: step: 268/526, loss: 0.0028594681061804295 2023-01-22 16:51:10.205517: step: 272/526, loss: 0.008038518019020557 2023-01-22 16:51:11.273306: step: 276/526, loss: 0.008343706838786602 2023-01-22 16:51:12.338223: step: 280/526, loss: 0.014430110342800617 2023-01-22 16:51:13.416744: step: 284/526, loss: 0.010138564743101597 2023-01-22 16:51:14.484511: step: 288/526, loss: 0.012738027609884739 2023-01-22 16:51:15.550025: step: 292/526, loss: 0.002907796995714307 2023-01-22 16:51:16.643722: step: 296/526, loss: 0.006457481998950243 2023-01-22 16:51:17.717659: step: 300/526, loss: 0.004271854646503925 2023-01-22 16:51:18.794325: step: 304/526, loss: 0.00224136165343225 2023-01-22 16:51:19.860777: step: 308/526, loss: 0.0009568085661157966 2023-01-22 16:51:20.929762: step: 312/526, loss: 0.007775954902172089 2023-01-22 16:51:22.021591: step: 316/526, loss: 0.002744965488091111 2023-01-22 16:51:23.095945: step: 320/526, loss: 0.0028619111981242895 2023-01-22 16:51:24.188407: step: 324/526, loss: 0.0018685284303501248 2023-01-22 16:51:25.253562: step: 328/526, loss: 0.0035534941125661135 2023-01-22 16:51:26.317168: step: 332/526, loss: 0.01011749915778637 2023-01-22 16:51:27.388823: step: 336/526, loss: 0.00327878980897367 2023-01-22 16:51:28.454682: step: 340/526, loss: 0.006491495296359062 2023-01-22 16:51:29.533912: step: 344/526, loss: 0.004422201309353113 2023-01-22 16:51:30.592902: step: 348/526, loss: 0.0009063204634003341 2023-01-22 16:51:31.653036: step: 352/526, loss: 0.0024999917950481176 2023-01-22 16:51:32.713409: step: 356/526, loss: 4.173920751782134e-05 2023-01-22 16:51:33.802545: step: 360/526, loss: 0.009568093344569206 2023-01-22 16:51:34.866433: step: 364/526, loss: 0.001386543270200491 2023-01-22 16:51:35.942122: step: 368/526, loss: 0.018805434927344322 2023-01-22 16:51:37.023777: step: 372/526, loss: 0.004321052227169275 2023-01-22 16:51:38.084316: step: 376/526, loss: 0.0011334537994116545 2023-01-22 16:51:39.169413: step: 380/526, loss: 0.008020251989364624 2023-01-22 16:51:40.236226: step: 384/526, loss: 0.003865952370688319 2023-01-22 16:51:41.308965: step: 388/526, loss: 0.002532330574467778 2023-01-22 16:51:42.368613: step: 392/526, loss: 0.013063295744359493 2023-01-22 16:51:43.441580: step: 396/526, loss: 0.0056519233621656895 2023-01-22 16:51:44.498315: step: 400/526, loss: 0.004078799858689308 2023-01-22 16:51:45.585009: step: 404/526, loss: 0.009090702049434185 2023-01-22 16:51:46.663422: step: 408/526, loss: 0.01338796503841877 2023-01-22 16:51:47.732277: step: 412/526, loss: 0.023754622787237167 2023-01-22 16:51:48.791398: step: 416/526, loss: 0.0008065896690823138 2023-01-22 16:51:49.856889: step: 420/526, loss: 0.005122347269207239 2023-01-22 16:51:50.915953: step: 424/526, loss: 0.005691178143024445 2023-01-22 16:51:51.994225: step: 428/526, loss: 0.0075169154442846775 2023-01-22 16:51:53.055318: step: 432/526, loss: 0.005046722944825888 2023-01-22 16:51:54.119323: step: 436/526, loss: 0.003507403889670968 2023-01-22 16:51:55.199836: step: 440/526, loss: 0.0069351098500192165 2023-01-22 16:51:56.295847: step: 444/526, loss: 0.07562882453203201 2023-01-22 16:51:57.366722: step: 448/526, loss: 0.009906035847961903 2023-01-22 16:51:58.428901: step: 452/526, loss: 0.0033758808858692646 2023-01-22 16:51:59.516584: step: 456/526, loss: 0.020844178274273872 2023-01-22 16:52:00.593209: step: 460/526, loss: 0.0024903956800699234 2023-01-22 16:52:01.655333: step: 464/526, loss: 0.0015259014908224344 2023-01-22 16:52:02.710296: step: 468/526, loss: 0.0006897325511090457 2023-01-22 16:52:03.782864: step: 472/526, loss: 0.0056253401562571526 2023-01-22 16:52:04.856603: step: 476/526, loss: 0.0004885842208750546 2023-01-22 16:52:05.916664: step: 480/526, loss: 0.010557063855230808 2023-01-22 16:52:06.989445: step: 484/526, loss: 0.03432700037956238 2023-01-22 16:52:08.058439: step: 488/526, loss: 0.009193593636155128 2023-01-22 16:52:09.143539: step: 492/526, loss: 0.0029847752302885056 2023-01-22 16:52:10.197530: step: 496/526, loss: 0.006365698296576738 2023-01-22 16:52:11.264091: step: 500/526, loss: 0.009834758006036282 2023-01-22 16:52:12.350620: step: 504/526, loss: 0.038854554295539856 2023-01-22 16:52:13.429161: step: 508/526, loss: 0.001355179469101131 2023-01-22 16:52:14.513674: step: 512/526, loss: 0.004993292968720198 2023-01-22 16:52:15.579944: step: 516/526, loss: 0.008177582174539566 2023-01-22 16:52:16.632858: step: 520/526, loss: 0.0015152118867263198 2023-01-22 16:52:17.708809: step: 524/526, loss: 0.014209181070327759 2023-01-22 16:52:18.786640: step: 528/526, loss: 0.021385325118899345 2023-01-22 16:52:19.861248: step: 532/526, loss: 0.0015365779399871826 2023-01-22 16:52:20.951896: step: 536/526, loss: 0.006883626338094473 2023-01-22 16:52:22.019590: step: 540/526, loss: 0.0012072596000507474 2023-01-22 16:52:23.107603: step: 544/526, loss: 0.0013109153369441628 2023-01-22 16:52:24.195190: step: 548/526, loss: 0.0209902822971344 2023-01-22 16:52:25.280726: step: 552/526, loss: 0.005961552262306213 2023-01-22 16:52:26.345127: step: 556/526, loss: 0.0029851722065359354 2023-01-22 16:52:27.400488: step: 560/526, loss: 0.004833499900996685 2023-01-22 16:52:28.482021: step: 564/526, loss: 0.0039832089096307755 2023-01-22 16:52:29.532851: step: 568/526, loss: 0.0007380950846709311 2023-01-22 16:52:30.597108: step: 572/526, loss: 0.0030625511426478624 2023-01-22 16:52:31.675695: step: 576/526, loss: 0.004741715732961893 2023-01-22 16:52:32.748769: step: 580/526, loss: 0.0008771279826760292 2023-01-22 16:52:33.833563: step: 584/526, loss: 0.004129146225750446 2023-01-22 16:52:34.891436: step: 588/526, loss: 0.001908286940306425 2023-01-22 16:52:35.961624: step: 592/526, loss: 0.0031311430502682924 2023-01-22 16:52:37.029388: step: 596/526, loss: 0.0024689885322004557 2023-01-22 16:52:38.099282: step: 600/526, loss: 0.012600594200193882 2023-01-22 16:52:39.168102: step: 604/526, loss: 0.005509055685251951 2023-01-22 16:52:40.228898: step: 608/526, loss: 0.0007588063599541783 2023-01-22 16:52:41.292176: step: 612/526, loss: 0.0021500359289348125 2023-01-22 16:52:42.372187: step: 616/526, loss: 0.004716811235994101 2023-01-22 16:52:43.449929: step: 620/526, loss: 0.024872159585356712 2023-01-22 16:52:44.520149: step: 624/526, loss: 0.004087517037987709 2023-01-22 16:52:45.585256: step: 628/526, loss: 0.024557167664170265 2023-01-22 16:52:46.649188: step: 632/526, loss: 0.0034598500933498144 2023-01-22 16:52:47.715356: step: 636/526, loss: 0.005095295608043671 2023-01-22 16:52:48.800120: step: 640/526, loss: 0.005925626493990421 2023-01-22 16:52:49.861324: step: 644/526, loss: 0.004243495874106884 2023-01-22 16:52:50.922994: step: 648/526, loss: 0.023387502878904343 2023-01-22 16:52:51.996166: step: 652/526, loss: 0.0025512438733130693 2023-01-22 16:52:53.063607: step: 656/526, loss: 0.0022255745716392994 2023-01-22 16:52:54.123816: step: 660/526, loss: 0.01528609823435545 2023-01-22 16:52:55.208702: step: 664/526, loss: 0.012744559906423092 2023-01-22 16:52:56.283218: step: 668/526, loss: 0.020494695752859116 2023-01-22 16:52:57.342465: step: 672/526, loss: 0.00991932675242424 2023-01-22 16:52:58.415169: step: 676/526, loss: 0.01480784360319376 2023-01-22 16:52:59.490563: step: 680/526, loss: 0.008984953165054321 2023-01-22 16:53:00.554489: step: 684/526, loss: 0.007365328259766102 2023-01-22 16:53:01.630675: step: 688/526, loss: 0.0004520653164945543 2023-01-22 16:53:02.684074: step: 692/526, loss: 0.0025731660425662994 2023-01-22 16:53:03.741958: step: 696/526, loss: 0.003520233789458871 2023-01-22 16:53:04.801895: step: 700/526, loss: 0.017229948192834854 2023-01-22 16:53:05.877541: step: 704/526, loss: 0.01180350687354803 2023-01-22 16:53:06.941054: step: 708/526, loss: 0.024067580699920654 2023-01-22 16:53:08.009561: step: 712/526, loss: 0.005620887968689203 2023-01-22 16:53:09.076163: step: 716/526, loss: 0.008405043743550777 2023-01-22 16:53:10.134456: step: 720/526, loss: 0.006112702656537294 2023-01-22 16:53:11.195902: step: 724/526, loss: 0.027457116171717644 2023-01-22 16:53:12.304273: step: 728/526, loss: 0.010533414781093597 2023-01-22 16:53:13.393246: step: 732/526, loss: 0.003400026587769389 2023-01-22 16:53:14.454479: step: 736/526, loss: 0.012767207808792591 2023-01-22 16:53:15.529347: step: 740/526, loss: 0.06059146672487259 2023-01-22 16:53:16.600477: step: 744/526, loss: 0.013861028477549553 2023-01-22 16:53:17.659918: step: 748/526, loss: 0.004016069695353508 2023-01-22 16:53:18.726424: step: 752/526, loss: 0.010700277052819729 2023-01-22 16:53:19.788334: step: 756/526, loss: 0.011772260069847107 2023-01-22 16:53:20.849180: step: 760/526, loss: 0.007367943413555622 2023-01-22 16:53:21.913420: step: 764/526, loss: 0.0037237280048429966 2023-01-22 16:53:22.982738: step: 768/526, loss: 0.01740916818380356 2023-01-22 16:53:24.052030: step: 772/526, loss: 0.0037708294112235308 2023-01-22 16:53:25.133941: step: 776/526, loss: 0.0017007385613396764 2023-01-22 16:53:26.198949: step: 780/526, loss: 0.005401583854109049 2023-01-22 16:53:27.267644: step: 784/526, loss: 0.006413219962269068 2023-01-22 16:53:28.341064: step: 788/526, loss: 0.005851361434906721 2023-01-22 16:53:29.417123: step: 792/526, loss: 0.021040815860033035 2023-01-22 16:53:30.487623: step: 796/526, loss: 0.020898278802633286 2023-01-22 16:53:31.560014: step: 800/526, loss: 0.002156848320737481 2023-01-22 16:53:32.634513: step: 804/526, loss: 0.009684685617685318 2023-01-22 16:53:33.703758: step: 808/526, loss: 0.0011495487997308373 2023-01-22 16:53:34.791545: step: 812/526, loss: 0.0066055599600076675 2023-01-22 16:53:35.853046: step: 816/526, loss: 0.004500295501202345 2023-01-22 16:53:36.944488: step: 820/526, loss: 0.0009703778778202832 2023-01-22 16:53:38.008355: step: 824/526, loss: 0.0006261609960347414 2023-01-22 16:53:39.072736: step: 828/526, loss: 0.0043951706029474735 2023-01-22 16:53:40.139423: step: 832/526, loss: 0.009856266900897026 2023-01-22 16:53:41.194636: step: 836/526, loss: 0.007887708023190498 2023-01-22 16:53:42.253621: step: 840/526, loss: 0.0017930191243067384 2023-01-22 16:53:43.326714: step: 844/526, loss: 0.005878266412764788 2023-01-22 16:53:44.385368: step: 848/526, loss: 0.009804030880331993 2023-01-22 16:53:45.444292: step: 852/526, loss: 0.0026165081653743982 2023-01-22 16:53:46.507403: step: 856/526, loss: 0.0004242129507474601 2023-01-22 16:53:47.559998: step: 860/526, loss: 0.004261369351297617 2023-01-22 16:53:48.626313: step: 864/526, loss: 0.025270730257034302 2023-01-22 16:53:49.679195: step: 868/526, loss: 2.5716413802001625e-05 2023-01-22 16:53:50.738806: step: 872/526, loss: 0.0009899325668811798 2023-01-22 16:53:51.791082: step: 876/526, loss: 0.013461158610880375 2023-01-22 16:53:52.847342: step: 880/526, loss: 0.021764567121863365 2023-01-22 16:53:53.902278: step: 884/526, loss: 0.0035536361392587423 2023-01-22 16:53:54.978272: step: 888/526, loss: 0.009750930592417717 2023-01-22 16:53:56.059022: step: 892/526, loss: 0.00548756355419755 2023-01-22 16:53:57.113599: step: 896/526, loss: 0.10667015612125397 2023-01-22 16:53:58.191715: step: 900/526, loss: 0.0003715291095431894 2023-01-22 16:53:59.245503: step: 904/526, loss: 0.014691632241010666 2023-01-22 16:54:00.308077: step: 908/526, loss: 0.002819499233737588 2023-01-22 16:54:01.370343: step: 912/526, loss: 0.01883510872721672 2023-01-22 16:54:02.437129: step: 916/526, loss: 0.00994692463427782 2023-01-22 16:54:03.527913: step: 920/526, loss: 0.005518668331205845 2023-01-22 16:54:04.609516: step: 924/526, loss: 0.029395248740911484 2023-01-22 16:54:05.689914: step: 928/526, loss: 0.00489407405257225 2023-01-22 16:54:06.764805: step: 932/526, loss: 0.0037236004136502743 2023-01-22 16:54:07.844770: step: 936/526, loss: 0.054493267089128494 2023-01-22 16:54:08.892748: step: 940/526, loss: 0.005480606108903885 2023-01-22 16:54:09.947564: step: 944/526, loss: 0.008045137859880924 2023-01-22 16:54:11.006910: step: 948/526, loss: 0.00538345193490386 2023-01-22 16:54:12.060925: step: 952/526, loss: 0.001313667744398117 2023-01-22 16:54:13.146251: step: 956/526, loss: 0.016213377937674522 2023-01-22 16:54:14.212543: step: 960/526, loss: 0.012696747668087482 2023-01-22 16:54:15.283943: step: 964/526, loss: 1.2140513717895374e-05 2023-01-22 16:54:16.361328: step: 968/526, loss: 0.0113800885155797 2023-01-22 16:54:17.442028: step: 972/526, loss: 0.009258749894797802 2023-01-22 16:54:18.490764: step: 976/526, loss: 0.01386339496821165 2023-01-22 16:54:19.550834: step: 980/526, loss: 0.0015523829497396946 2023-01-22 16:54:20.639669: step: 984/526, loss: 0.000966435472946614 2023-01-22 16:54:21.709788: step: 988/526, loss: 0.0010665992740541697 2023-01-22 16:54:22.767902: step: 992/526, loss: 0.00042779018986038864 2023-01-22 16:54:23.834721: step: 996/526, loss: 0.007841132581233978 2023-01-22 16:54:24.890897: step: 1000/526, loss: 0.0045336452312767506 2023-01-22 16:54:25.969169: step: 1004/526, loss: 0.0038128553424030542 2023-01-22 16:54:27.050380: step: 1008/526, loss: 0.0010101242223754525 2023-01-22 16:54:28.123366: step: 1012/526, loss: 0.006563783623278141 2023-01-22 16:54:29.190167: step: 1016/526, loss: 0.001392957055941224 2023-01-22 16:54:30.254241: step: 1020/526, loss: 1.8566330254543573e-05 2023-01-22 16:54:31.321470: step: 1024/526, loss: 0.03251112997531891 2023-01-22 16:54:32.378510: step: 1028/526, loss: 0.0006096226279623806 2023-01-22 16:54:33.465205: step: 1032/526, loss: 0.0047867437824606895 2023-01-22 16:54:34.546558: step: 1036/526, loss: 0.0029120836406946182 2023-01-22 16:54:35.596591: step: 1040/526, loss: 0.005305441562086344 2023-01-22 16:54:36.666693: step: 1044/526, loss: 0.010105530731379986 2023-01-22 16:54:37.733632: step: 1048/526, loss: 0.0018843625439330935 2023-01-22 16:54:38.810077: step: 1052/526, loss: 0.0015610286500304937 2023-01-22 16:54:39.864984: step: 1056/526, loss: 0.0012386649614199996 2023-01-22 16:54:40.930044: step: 1060/526, loss: 4.018219988211058e-05 2023-01-22 16:54:42.011825: step: 1064/526, loss: 0.005092525854706764 2023-01-22 16:54:43.081485: step: 1068/526, loss: 0.026332106441259384 2023-01-22 16:54:44.174836: step: 1072/526, loss: 0.012815611436963081 2023-01-22 16:54:45.233315: step: 1076/526, loss: 0.007480602245777845 2023-01-22 16:54:46.307988: step: 1080/526, loss: 0.012437943369150162 2023-01-22 16:54:47.360637: step: 1084/526, loss: 0.000634836673270911 2023-01-22 16:54:48.432684: step: 1088/526, loss: 0.008440871722996235 2023-01-22 16:54:49.488552: step: 1092/526, loss: 0.013713391497731209 2023-01-22 16:54:50.563695: step: 1096/526, loss: 0.003165673464536667 2023-01-22 16:54:51.620998: step: 1100/526, loss: 0.0009543290361762047 2023-01-22 16:54:52.698615: step: 1104/526, loss: 0.004830463789403439 2023-01-22 16:54:53.767493: step: 1108/526, loss: 0.004033912904560566 2023-01-22 16:54:54.839064: step: 1112/526, loss: 0.03303450345993042 2023-01-22 16:54:55.921646: step: 1116/526, loss: 0.026137851178646088 2023-01-22 16:54:56.981508: step: 1120/526, loss: 0.013627216219902039 2023-01-22 16:54:58.087054: step: 1124/526, loss: 0.004265769384801388 2023-01-22 16:54:59.161926: step: 1128/526, loss: 0.010463309474289417 2023-01-22 16:55:00.229176: step: 1132/526, loss: 0.0004418915486894548 2023-01-22 16:55:01.305667: step: 1136/526, loss: 0.03460986912250519 2023-01-22 16:55:02.352420: step: 1140/526, loss: 0.004669278860092163 2023-01-22 16:55:03.436732: step: 1144/526, loss: 0.0035937430802732706 2023-01-22 16:55:04.529244: step: 1148/526, loss: 0.006591061595827341 2023-01-22 16:55:05.591687: step: 1152/526, loss: 0.0002037236699834466 2023-01-22 16:55:06.649044: step: 1156/526, loss: 0.003959314897656441 2023-01-22 16:55:07.703905: step: 1160/526, loss: 0.005870916415005922 2023-01-22 16:55:08.775560: step: 1164/526, loss: 0.01081240177154541 2023-01-22 16:55:09.827787: step: 1168/526, loss: 0.001602742588147521 2023-01-22 16:55:10.909336: step: 1172/526, loss: 0.015274214558303356 2023-01-22 16:55:11.979916: step: 1176/526, loss: 0.0009931731037795544 2023-01-22 16:55:13.066415: step: 1180/526, loss: 0.006461134180426598 2023-01-22 16:55:14.123204: step: 1184/526, loss: 0.00555135915055871 2023-01-22 16:55:15.194472: step: 1188/526, loss: 0.0007979444926604629 2023-01-22 16:55:16.248680: step: 1192/526, loss: 0.003826882690191269 2023-01-22 16:55:17.317118: step: 1196/526, loss: 0.0037616360932588577 2023-01-22 16:55:18.398584: step: 1200/526, loss: 0.004110532347112894 2023-01-22 16:55:19.474474: step: 1204/526, loss: 0.006867086514830589 2023-01-22 16:55:20.533704: step: 1208/526, loss: 0.005057360976934433 2023-01-22 16:55:21.615251: step: 1212/526, loss: 0.010423664003610611 2023-01-22 16:55:22.680140: step: 1216/526, loss: 0.0005080632981844246 2023-01-22 16:55:23.740346: step: 1220/526, loss: 0.0029604537412524223 2023-01-22 16:55:24.800891: step: 1224/526, loss: 0.0038718911819159985 2023-01-22 16:55:25.874448: step: 1228/526, loss: 0.057234667241573334 2023-01-22 16:55:26.938699: step: 1232/526, loss: 0.037851374596357346 2023-01-22 16:55:28.002247: step: 1236/526, loss: 0.0024671857245266438 2023-01-22 16:55:29.074713: step: 1240/526, loss: 0.0037177084013819695 2023-01-22 16:55:30.159544: step: 1244/526, loss: 0.002856529550626874 2023-01-22 16:55:31.237022: step: 1248/526, loss: 0.0014308544341474771 2023-01-22 16:55:32.307456: step: 1252/526, loss: 0.0039054774679243565 2023-01-22 16:55:33.364801: step: 1256/526, loss: 0.0091661112383008 2023-01-22 16:55:34.421003: step: 1260/526, loss: 0.002865250688046217 2023-01-22 16:55:35.478146: step: 1264/526, loss: 0.014015741646289825 2023-01-22 16:55:36.548656: step: 1268/526, loss: 0.01665918529033661 2023-01-22 16:55:37.605155: step: 1272/526, loss: 0.003201608546078205 2023-01-22 16:55:38.678480: step: 1276/526, loss: 0.0025636558420956135 2023-01-22 16:55:39.764065: step: 1280/526, loss: 0.004745763260871172 2023-01-22 16:55:40.823816: step: 1284/526, loss: 0.00475140567868948 2023-01-22 16:55:41.878697: step: 1288/526, loss: 0.002388492226600647 2023-01-22 16:55:42.943833: step: 1292/526, loss: 0.0016680208500474691 2023-01-22 16:55:44.002115: step: 1296/526, loss: 0.005548520013689995 2023-01-22 16:55:45.071284: step: 1300/526, loss: 0.00956171564757824 2023-01-22 16:55:46.133068: step: 1304/526, loss: 0.008312403224408627 2023-01-22 16:55:47.192095: step: 1308/526, loss: 0.0041682836599648 2023-01-22 16:55:48.258552: step: 1312/526, loss: 0.0015386065933853388 2023-01-22 16:55:49.333561: step: 1316/526, loss: 0.004809641279280186 2023-01-22 16:55:50.409252: step: 1320/526, loss: 0.0071939947083592415 2023-01-22 16:55:51.465830: step: 1324/526, loss: 0.0008508415194228292 2023-01-22 16:55:52.541266: step: 1328/526, loss: 0.03499884903430939 2023-01-22 16:55:53.600091: step: 1332/526, loss: 0.012733954936265945 2023-01-22 16:55:54.682125: step: 1336/526, loss: 0.0029565789736807346 2023-01-22 16:55:55.752403: step: 1340/526, loss: 0.0044716433621943 2023-01-22 16:55:56.826455: step: 1344/526, loss: 7.427403033943847e-05 2023-01-22 16:55:57.897766: step: 1348/526, loss: 0.007862232625484467 2023-01-22 16:55:58.993888: step: 1352/526, loss: 0.0009374048677273095 2023-01-22 16:56:00.062345: step: 1356/526, loss: 0.009015649557113647 2023-01-22 16:56:01.106873: step: 1360/526, loss: 0.021186884492635727 2023-01-22 16:56:02.182661: step: 1364/526, loss: 0.029067885130643845 2023-01-22 16:56:03.250725: step: 1368/526, loss: 0.0068560135550796986 2023-01-22 16:56:04.338531: step: 1372/526, loss: 0.013239089399576187 2023-01-22 16:56:05.416048: step: 1376/526, loss: 0.005027777049690485 2023-01-22 16:56:06.485916: step: 1380/526, loss: 0.09782104194164276 2023-01-22 16:56:07.566586: step: 1384/526, loss: 0.020495202392339706 2023-01-22 16:56:08.640172: step: 1388/526, loss: 0.0003649875579867512 2023-01-22 16:56:09.712050: step: 1392/526, loss: 0.0015582585474476218 2023-01-22 16:56:10.771243: step: 1396/526, loss: 0.006838055327534676 2023-01-22 16:56:11.854364: step: 1400/526, loss: 0.003772750962525606 2023-01-22 16:56:12.912130: step: 1404/526, loss: 0.0011949347099289298 2023-01-22 16:56:13.997625: step: 1408/526, loss: 0.0057137152180075645 2023-01-22 16:56:15.067522: step: 1412/526, loss: 0.006231048610061407 2023-01-22 16:56:16.131444: step: 1416/526, loss: 0.008469206281006336 2023-01-22 16:56:17.186422: step: 1420/526, loss: 0.020381668582558632 2023-01-22 16:56:18.252504: step: 1424/526, loss: 0.003002460580319166 2023-01-22 16:56:19.305417: step: 1428/526, loss: 0.004201877862215042 2023-01-22 16:56:20.370962: step: 1432/526, loss: 0.011313398368656635 2023-01-22 16:56:21.441889: step: 1436/526, loss: 0.053479019552469254 2023-01-22 16:56:22.497950: step: 1440/526, loss: 0.0016285593155771494 2023-01-22 16:56:23.564764: step: 1444/526, loss: 0.01658998243510723 2023-01-22 16:56:24.625656: step: 1448/526, loss: 0.0026117165107280016 2023-01-22 16:56:25.711197: step: 1452/526, loss: 0.002124648541212082 2023-01-22 16:56:26.782790: step: 1456/526, loss: 0.017595259472727776 2023-01-22 16:56:27.856656: step: 1460/526, loss: 0.0003547028172761202 2023-01-22 16:56:28.931412: step: 1464/526, loss: 0.0035118036903440952 2023-01-22 16:56:29.988769: step: 1468/526, loss: 0.008548315614461899 2023-01-22 16:56:31.053925: step: 1472/526, loss: 0.005818708799779415 2023-01-22 16:56:32.132511: step: 1476/526, loss: 0.009090069681406021 2023-01-22 16:56:33.209288: step: 1480/526, loss: 0.005407553631812334 2023-01-22 16:56:34.287191: step: 1484/526, loss: 0.005108366720378399 2023-01-22 16:56:35.351027: step: 1488/526, loss: 0.025298217311501503 2023-01-22 16:56:36.416890: step: 1492/526, loss: 0.004251240286976099 2023-01-22 16:56:37.487804: step: 1496/526, loss: 0.03473653271794319 2023-01-22 16:56:38.543140: step: 1500/526, loss: 0.013348586857318878 2023-01-22 16:56:39.621340: step: 1504/526, loss: 0.006600175052881241 2023-01-22 16:56:40.675300: step: 1508/526, loss: 0.008938651531934738 2023-01-22 16:56:41.745928: step: 1512/526, loss: 0.002422742312774062 2023-01-22 16:56:42.800682: step: 1516/526, loss: 0.003078901208937168 2023-01-22 16:56:43.895509: step: 1520/526, loss: 0.016226626932621002 2023-01-22 16:56:44.966390: step: 1524/526, loss: 0.0006655099568888545 2023-01-22 16:56:46.029246: step: 1528/526, loss: 0.0012265611439943314 2023-01-22 16:56:47.094546: step: 1532/526, loss: 0.005088868085294962 2023-01-22 16:56:48.151820: step: 1536/526, loss: 0.0010596613865345716 2023-01-22 16:56:49.218855: step: 1540/526, loss: 0.009277921169996262 2023-01-22 16:56:50.277931: step: 1544/526, loss: 0.014644542708992958 2023-01-22 16:56:51.339460: step: 1548/526, loss: 0.005800127983093262 2023-01-22 16:56:52.422671: step: 1552/526, loss: 0.011130206286907196 2023-01-22 16:56:53.478864: step: 1556/526, loss: 0.002271250355988741 2023-01-22 16:56:54.543138: step: 1560/526, loss: 0.004625855945050716 2023-01-22 16:56:55.602394: step: 1564/526, loss: 0.00019734085071831942 2023-01-22 16:56:56.674679: step: 1568/526, loss: 0.002903470303863287 2023-01-22 16:56:57.755825: step: 1572/526, loss: 0.0055061206221580505 2023-01-22 16:56:58.816204: step: 1576/526, loss: 0.014441757462918758 2023-01-22 16:56:59.879469: step: 1580/526, loss: 0.0029819512274116278 2023-01-22 16:57:00.927206: step: 1584/526, loss: 0.015545613132417202 2023-01-22 16:57:02.014614: step: 1588/526, loss: 0.0005880399839952588 2023-01-22 16:57:03.089232: step: 1592/526, loss: 0.005186446476727724 2023-01-22 16:57:04.165098: step: 1596/526, loss: 0.043246544897556305 2023-01-22 16:57:05.230280: step: 1600/526, loss: 0.007097621913999319 2023-01-22 16:57:06.316156: step: 1604/526, loss: 0.006098034791648388 2023-01-22 16:57:07.392047: step: 1608/526, loss: 0.007078444119542837 2023-01-22 16:57:08.463492: step: 1612/526, loss: 0.017605388537049294 2023-01-22 16:57:09.533911: step: 1616/526, loss: 0.004377124365419149 2023-01-22 16:57:10.600782: step: 1620/526, loss: 0.0048618135042488575 2023-01-22 16:57:11.660896: step: 1624/526, loss: 0.0027774160262197256 2023-01-22 16:57:12.736608: step: 1628/526, loss: 0.004077346064150333 2023-01-22 16:57:13.811780: step: 1632/526, loss: 0.038192201405763626 2023-01-22 16:57:14.858479: step: 1636/526, loss: 0.017146332189440727 2023-01-22 16:57:15.936311: step: 1640/526, loss: 0.014185166917741299 2023-01-22 16:57:16.981740: step: 1644/526, loss: 0.002837585285305977 2023-01-22 16:57:18.054568: step: 1648/526, loss: 0.005212290212512016 2023-01-22 16:57:19.118133: step: 1652/526, loss: 0.0020114383660256863 2023-01-22 16:57:20.186783: step: 1656/526, loss: 0.008721740916371346 2023-01-22 16:57:21.244980: step: 1660/526, loss: 0.004408359527587891 2023-01-22 16:57:22.318284: step: 1664/526, loss: 0.004980864934623241 2023-01-22 16:57:23.404452: step: 1668/526, loss: 0.004344732966274023 2023-01-22 16:57:24.478022: step: 1672/526, loss: 0.000978962634690106 2023-01-22 16:57:25.573746: step: 1676/526, loss: 0.0012102506589144468 2023-01-22 16:57:26.645261: step: 1680/526, loss: 0.00945677887648344 2023-01-22 16:57:27.713092: step: 1684/526, loss: 0.004169594030827284 2023-01-22 16:57:28.758238: step: 1688/526, loss: 1.1368526429578196e-05 2023-01-22 16:57:29.817907: step: 1692/526, loss: 0.0006582618225365877 2023-01-22 16:57:30.890422: step: 1696/526, loss: 0.007216115947812796 2023-01-22 16:57:31.960838: step: 1700/526, loss: 0.008537952788174152 2023-01-22 16:57:33.012583: step: 1704/526, loss: 0.008632871322333813 2023-01-22 16:57:34.089856: step: 1708/526, loss: 0.0029105464927852154 2023-01-22 16:57:35.152322: step: 1712/526, loss: 0.001449619885534048 2023-01-22 16:57:36.207523: step: 1716/526, loss: 0.00844323169440031 2023-01-22 16:57:37.276806: step: 1720/526, loss: 0.014320994727313519 2023-01-22 16:57:38.331273: step: 1724/526, loss: 0.0012402012944221497 2023-01-22 16:57:39.400145: step: 1728/526, loss: 0.0024995682761073112 2023-01-22 16:57:40.482397: step: 1732/526, loss: 0.002082785591483116 2023-01-22 16:57:41.536778: step: 1736/526, loss: 0.00018656860629562289 2023-01-22 16:57:42.631374: step: 1740/526, loss: 0.0029906679410487413 2023-01-22 16:57:43.714439: step: 1744/526, loss: 0.007322310004383326 2023-01-22 16:57:44.796193: step: 1748/526, loss: 0.011403138749301434 2023-01-22 16:57:45.877446: step: 1752/526, loss: 0.01549022737890482 2023-01-22 16:57:46.939090: step: 1756/526, loss: 0.05550411343574524 2023-01-22 16:57:48.010989: step: 1760/526, loss: 0.006561277899891138 2023-01-22 16:57:49.078376: step: 1764/526, loss: 0.01718747429549694 2023-01-22 16:57:50.150006: step: 1768/526, loss: 0.0062029482796788216 2023-01-22 16:57:51.217273: step: 1772/526, loss: 0.015745092183351517 2023-01-22 16:57:52.287384: step: 1776/526, loss: 0.023954901844263077 2023-01-22 16:57:53.348387: step: 1780/526, loss: 0.006017810199409723 2023-01-22 16:57:54.414869: step: 1784/526, loss: 0.012075243517756462 2023-01-22 16:57:55.481007: step: 1788/526, loss: 0.005847611464560032 2023-01-22 16:57:56.553783: step: 1792/526, loss: 0.002950123278424144 2023-01-22 16:57:57.609865: step: 1796/526, loss: 0.009506269358098507 2023-01-22 16:57:58.679700: step: 1800/526, loss: 0.0029114210046827793 2023-01-22 16:57:59.760285: step: 1804/526, loss: 0.0005668816738761961 2023-01-22 16:58:00.819425: step: 1808/526, loss: 0.010006722062826157 2023-01-22 16:58:01.882840: step: 1812/526, loss: 0.0024292946327477694 2023-01-22 16:58:02.958459: step: 1816/526, loss: 0.005452687386423349 2023-01-22 16:58:04.016454: step: 1820/526, loss: 0.0046021295711398125 2023-01-22 16:58:05.067289: step: 1824/526, loss: 0.007638941053301096 2023-01-22 16:58:06.129012: step: 1828/526, loss: 0.0063230618834495544 2023-01-22 16:58:07.209170: step: 1832/526, loss: 0.005664732772856951 2023-01-22 16:58:08.277407: step: 1836/526, loss: 0.004440494813024998 2023-01-22 16:58:09.333723: step: 1840/526, loss: 0.015246671624481678 2023-01-22 16:58:10.376710: step: 1844/526, loss: 0.004059979692101479 2023-01-22 16:58:11.444547: step: 1848/526, loss: 5.105124364490621e-05 2023-01-22 16:58:12.513918: step: 1852/526, loss: 0.007765565533190966 2023-01-22 16:58:13.571750: step: 1856/526, loss: 0.00982578843832016 2023-01-22 16:58:14.650070: step: 1860/526, loss: 0.005411851219832897 2023-01-22 16:58:15.721563: step: 1864/526, loss: 0.02710109017789364 2023-01-22 16:58:16.796202: step: 1868/526, loss: 0.005173692479729652 2023-01-22 16:58:17.854184: step: 1872/526, loss: 0.008736705407500267 2023-01-22 16:58:18.920708: step: 1876/526, loss: 0.00696840463206172 2023-01-22 16:58:19.981897: step: 1880/526, loss: 0.01092579960823059 2023-01-22 16:58:21.071888: step: 1884/526, loss: 0.0038575599901378155 2023-01-22 16:58:22.118753: step: 1888/526, loss: 0.00013975257752463222 2023-01-22 16:58:23.188214: step: 1892/526, loss: 0.006790732499212027 2023-01-22 16:58:24.264516: step: 1896/526, loss: 0.006647312548011541 2023-01-22 16:58:25.323440: step: 1900/526, loss: 0.007207388058304787 2023-01-22 16:58:26.388931: step: 1904/526, loss: 0.030093692243099213 2023-01-22 16:58:27.454793: step: 1908/526, loss: 0.054702356457710266 2023-01-22 16:58:28.529555: step: 1912/526, loss: 0.0034478632733225822 2023-01-22 16:58:29.603493: step: 1916/526, loss: 0.007571808993816376 2023-01-22 16:58:30.679743: step: 1920/526, loss: 0.013936576433479786 2023-01-22 16:58:31.750515: step: 1924/526, loss: 0.005216843914240599 2023-01-22 16:58:32.810115: step: 1928/526, loss: 0.0012667336268350482 2023-01-22 16:58:33.869892: step: 1932/526, loss: 0.01398126594722271 2023-01-22 16:58:34.946509: step: 1936/526, loss: 0.0038864300586283207 2023-01-22 16:58:36.012638: step: 1940/526, loss: 0.004495956469327211 2023-01-22 16:58:37.093916: step: 1944/526, loss: 0.012851502746343613 2023-01-22 16:58:38.165188: step: 1948/526, loss: 0.0078731132671237 2023-01-22 16:58:39.249602: step: 1952/526, loss: 0.003647000063210726 2023-01-22 16:58:40.323093: step: 1956/526, loss: 0.00452793063595891 2023-01-22 16:58:41.399375: step: 1960/526, loss: 0.005784343462437391 2023-01-22 16:58:42.465601: step: 1964/526, loss: 0.006865252275019884 2023-01-22 16:58:43.565896: step: 1968/526, loss: 0.00446565356105566 2023-01-22 16:58:44.616286: step: 1972/526, loss: 0.0005312049761414528 2023-01-22 16:58:45.683361: step: 1976/526, loss: 0.01868703030049801 2023-01-22 16:58:46.741063: step: 1980/526, loss: 0.008483445271849632 2023-01-22 16:58:47.818338: step: 1984/526, loss: 0.05349397659301758 2023-01-22 16:58:48.886254: step: 1988/526, loss: 0.004792788997292519 2023-01-22 16:58:49.956098: step: 1992/526, loss: 0.0016174135962501168 2023-01-22 16:58:51.016208: step: 1996/526, loss: 0.004809586331248283 2023-01-22 16:58:52.074581: step: 2000/526, loss: 0.00432139215990901 2023-01-22 16:58:53.147970: step: 2004/526, loss: 0.006920506712049246 2023-01-22 16:58:54.216977: step: 2008/526, loss: 0.007075745612382889 2023-01-22 16:58:55.284962: step: 2012/526, loss: 0.0050687906332314014 2023-01-22 16:58:56.366869: step: 2016/526, loss: 0.017767786979675293 2023-01-22 16:58:57.448112: step: 2020/526, loss: 0.004493189509958029 2023-01-22 16:58:58.515508: step: 2024/526, loss: 0.004642934072762728 2023-01-22 16:58:59.579723: step: 2028/526, loss: 0.004403568338602781 2023-01-22 16:59:00.639163: step: 2032/526, loss: 0.006857232190668583 2023-01-22 16:59:01.703561: step: 2036/526, loss: 0.012456074357032776 2023-01-22 16:59:02.778207: step: 2040/526, loss: 0.012994619086384773 2023-01-22 16:59:03.862142: step: 2044/526, loss: 0.005447262432426214 2023-01-22 16:59:04.921923: step: 2048/526, loss: 0.01025354117155075 2023-01-22 16:59:05.994414: step: 2052/526, loss: 0.053243450820446014 2023-01-22 16:59:07.053274: step: 2056/526, loss: 0.019626379013061523 2023-01-22 16:59:08.110893: step: 2060/526, loss: 0.07240425050258636 2023-01-22 16:59:09.167028: step: 2064/526, loss: 0.006490351632237434 2023-01-22 16:59:10.239945: step: 2068/526, loss: 0.030568212270736694 2023-01-22 16:59:11.315294: step: 2072/526, loss: 0.01846432127058506 2023-01-22 16:59:12.394442: step: 2076/526, loss: 0.002376555697992444 2023-01-22 16:59:13.456870: step: 2080/526, loss: 0.0010407415684312582 2023-01-22 16:59:14.520450: step: 2084/526, loss: 0.005833109840750694 2023-01-22 16:59:15.582943: step: 2088/526, loss: 0.006362421438097954 2023-01-22 16:59:16.662578: step: 2092/526, loss: 0.005434426013380289 2023-01-22 16:59:17.720193: step: 2096/526, loss: 0.003740588901564479 2023-01-22 16:59:18.803778: step: 2100/526, loss: 0.0039010117761790752 2023-01-22 16:59:19.873299: step: 2104/526, loss: 0.03432996943593025 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3461813501144165, 'r': 0.2870611954459203, 'f1': 0.31386151452282157}, 'combined': 0.23126637912207904, 'stategy': 1, 'epoch': 7} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3408077896512936, 'r': 0.2378164246467818, 'f1': 0.2801462089690245}, 'combined': 0.15280702307401334, 'stategy': 1, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31464680406212664, 'r': 0.3331554395951929, 'f1': 0.323636712749616}, 'combined': 0.23846915676287492, 'stategy': 1, 'epoch': 7} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33946620592831006, 'r': 0.2624418517927008, 'f1': 0.2960257420117874}, 'combined': 0.161468586551884, 'stategy': 1, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32847142590646744, 'r': 0.3303412822209255, 'f1': 0.3294037005306107}, 'combined': 0.24271851618044996, 'stategy': 1, 'epoch': 7} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3381592663705509, 'r': 0.2723803405834026, 'f1': 0.3017263256841941}, 'combined': 0.16457799582774224, 'stategy': 1, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3252032520325203, 'r': 0.38095238095238093, 'f1': 0.3508771929824561}, 'combined': 0.23391812865497075, 'stategy': 1, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31418668446026093, 'r': 0.335052972802024, 'f1': 0.3242845117845118}, 'combined': 0.238946482367535, 'stategy': 1, 'epoch': 1} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34272064379813255, 'r': 0.2649578627716578, 'f1': 0.298863719307969}, 'combined': 0.1630165741679831, 'stategy': 1, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 8 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:02:03.058214: step: 4/526, loss: 0.010899268090724945 2023-01-22 17:02:04.114576: step: 8/526, loss: 0.0030616391450166702 2023-01-22 17:02:05.192135: step: 12/526, loss: 0.01314435712993145 2023-01-22 17:02:06.240047: step: 16/526, loss: 0.0002661587204784155 2023-01-22 17:02:07.305942: step: 20/526, loss: 0.008434689603745937 2023-01-22 17:02:08.359037: step: 24/526, loss: 0.005727472715079784 2023-01-22 17:02:09.413042: step: 28/526, loss: 0.005028573330491781 2023-01-22 17:02:10.486499: step: 32/526, loss: 0.019856305792927742 2023-01-22 17:02:11.564059: step: 36/526, loss: 0.002529808785766363 2023-01-22 17:02:12.632697: step: 40/526, loss: 0.002600078471004963 2023-01-22 17:02:13.701937: step: 44/526, loss: 0.022507553920149803 2023-01-22 17:02:14.758272: step: 48/526, loss: 0.006190139334648848 2023-01-22 17:02:15.825995: step: 52/526, loss: 0.019749416038393974 2023-01-22 17:02:16.881415: step: 56/526, loss: 0.0069786179810762405 2023-01-22 17:02:17.943468: step: 60/526, loss: 0.003600344993174076 2023-01-22 17:02:19.017249: step: 64/526, loss: 0.007230598945170641 2023-01-22 17:02:20.078083: step: 68/526, loss: 0.005291781388223171 2023-01-22 17:02:21.149164: step: 72/526, loss: 0.0035029686987400055 2023-01-22 17:02:22.207004: step: 76/526, loss: 0.0026197240222245455 2023-01-22 17:02:23.278292: step: 80/526, loss: 0.007565547712147236 2023-01-22 17:02:24.335259: step: 84/526, loss: 0.0021991266403347254 2023-01-22 17:02:25.397407: step: 88/526, loss: 0.000680313678458333 2023-01-22 17:02:26.467554: step: 92/526, loss: 0.01031088549643755 2023-01-22 17:02:27.514123: step: 96/526, loss: 0.0050867400132119656 2023-01-22 17:02:28.562499: step: 100/526, loss: 0.0 2023-01-22 17:02:29.635068: step: 104/526, loss: 0.003087876131758094 2023-01-22 17:02:30.695072: step: 108/526, loss: 0.003220552345737815 2023-01-22 17:02:31.771953: step: 112/526, loss: 0.028040826320648193 2023-01-22 17:02:32.835758: step: 116/526, loss: 0.004460204858332872 2023-01-22 17:02:33.913972: step: 120/526, loss: 0.004889460280537605 2023-01-22 17:02:35.005364: step: 124/526, loss: 0.004017074126750231 2023-01-22 17:02:36.067600: step: 128/526, loss: 0.008606400340795517 2023-01-22 17:02:37.120945: step: 132/526, loss: 0.0019883138593286276 2023-01-22 17:02:38.197849: step: 136/526, loss: 0.0025338460691273212 2023-01-22 17:02:39.273778: step: 140/526, loss: 0.019969364628195763 2023-01-22 17:02:40.349866: step: 144/526, loss: 0.007962181232869625 2023-01-22 17:02:41.431098: step: 148/526, loss: 0.0053929053246974945 2023-01-22 17:02:42.503059: step: 152/526, loss: 0.004747800063341856 2023-01-22 17:02:43.580925: step: 156/526, loss: 0.006381009239703417 2023-01-22 17:02:44.642418: step: 160/526, loss: 0.0004650416085496545 2023-01-22 17:02:45.704668: step: 164/526, loss: 0.0014457793440669775 2023-01-22 17:02:46.763778: step: 168/526, loss: 0.01151053886860609 2023-01-22 17:02:47.830306: step: 172/526, loss: 0.004655364900827408 2023-01-22 17:02:48.917757: step: 176/526, loss: 0.004202236421406269 2023-01-22 17:02:49.994273: step: 180/526, loss: 0.0007839004974812269 2023-01-22 17:02:51.058707: step: 184/526, loss: 0.0025609065778553486 2023-01-22 17:02:52.128527: step: 188/526, loss: 0.0008630530210211873 2023-01-22 17:02:53.198620: step: 192/526, loss: 0.008424773812294006 2023-01-22 17:02:54.266184: step: 196/526, loss: 0.0018581899348646402 2023-01-22 17:02:55.346058: step: 200/526, loss: 0.008963003754615784 2023-01-22 17:02:56.421099: step: 204/526, loss: 0.0008392927120439708 2023-01-22 17:02:57.494960: step: 208/526, loss: 0.0008842563256621361 2023-01-22 17:02:58.597352: step: 212/526, loss: 0.005269532557576895 2023-01-22 17:02:59.679132: step: 216/526, loss: 0.04264936223626137 2023-01-22 17:03:00.765159: step: 220/526, loss: 0.03305068239569664 2023-01-22 17:03:01.825461: step: 224/526, loss: 0.0007441497873514891 2023-01-22 17:03:02.891327: step: 228/526, loss: 0.02527272142469883 2023-01-22 17:03:03.974741: step: 232/526, loss: 0.00832196231931448 2023-01-22 17:03:05.075295: step: 236/526, loss: 0.004788657650351524 2023-01-22 17:03:06.185516: step: 240/526, loss: 0.005434014834463596 2023-01-22 17:03:07.259272: step: 244/526, loss: 0.001945069176144898 2023-01-22 17:03:08.341033: step: 248/526, loss: 0.0 2023-01-22 17:03:09.419318: step: 252/526, loss: 0.0004295838880352676 2023-01-22 17:03:10.490947: step: 256/526, loss: 0.0034755917731672525 2023-01-22 17:03:11.571264: step: 260/526, loss: 0.00377582386136055 2023-01-22 17:03:12.649755: step: 264/526, loss: 0.048546332865953445 2023-01-22 17:03:13.726042: step: 268/526, loss: 0.0016245838487520814 2023-01-22 17:03:14.799416: step: 272/526, loss: 0.002566190203651786 2023-01-22 17:03:15.859292: step: 276/526, loss: 0.002763283671811223 2023-01-22 17:03:16.925807: step: 280/526, loss: 6.963249325053766e-05 2023-01-22 17:03:18.006481: step: 284/526, loss: 0.009911938570439816 2023-01-22 17:03:19.091054: step: 288/526, loss: 0.026897506788372993 2023-01-22 17:03:20.174452: step: 292/526, loss: 0.007379285991191864 2023-01-22 17:03:21.258664: step: 296/526, loss: 0.01772451400756836 2023-01-22 17:03:22.336785: step: 300/526, loss: 0.01802189089357853 2023-01-22 17:03:23.414492: step: 304/526, loss: 0.0012491790112107992 2023-01-22 17:03:24.492226: step: 308/526, loss: 0.0023968906607478857 2023-01-22 17:03:25.568898: step: 312/526, loss: 0.013670278713107109 2023-01-22 17:03:26.638939: step: 316/526, loss: 0.003918391186743975 2023-01-22 17:03:27.707304: step: 320/526, loss: 0.001893170876428485 2023-01-22 17:03:28.781996: step: 324/526, loss: 0.007293326780200005 2023-01-22 17:03:29.841267: step: 328/526, loss: 0.021721050143241882 2023-01-22 17:03:30.907442: step: 332/526, loss: 0.0015705113764852285 2023-01-22 17:03:31.979648: step: 336/526, loss: 0.007181911263614893 2023-01-22 17:03:33.047987: step: 340/526, loss: 0.0011621711309999228 2023-01-22 17:03:34.141742: step: 344/526, loss: 0.009121734648942947 2023-01-22 17:03:35.213909: step: 348/526, loss: 0.019756240770220757 2023-01-22 17:03:36.288502: step: 352/526, loss: 0.03507305681705475 2023-01-22 17:03:37.358654: step: 356/526, loss: 0.0035847711842507124 2023-01-22 17:03:38.424022: step: 360/526, loss: 0.0020180491264909506 2023-01-22 17:03:39.495607: step: 364/526, loss: 0.004328557290136814 2023-01-22 17:03:40.560436: step: 368/526, loss: 0.0011874393094331026 2023-01-22 17:03:41.625607: step: 372/526, loss: 0.0040312777273356915 2023-01-22 17:03:42.717513: step: 376/526, loss: 0.0044716899283230305 2023-01-22 17:03:43.815835: step: 380/526, loss: 0.010534740053117275 2023-01-22 17:03:44.878984: step: 384/526, loss: 0.0006007138290442526 2023-01-22 17:03:45.956789: step: 388/526, loss: 0.0030399055685847998 2023-01-22 17:03:47.021272: step: 392/526, loss: 0.0009213355951942503 2023-01-22 17:03:48.080553: step: 396/526, loss: 0.004745754878968 2023-01-22 17:03:49.127165: step: 400/526, loss: 0.002493172651156783 2023-01-22 17:03:50.188541: step: 404/526, loss: 0.0021334898192435503 2023-01-22 17:03:51.258671: step: 408/526, loss: 0.007485872600227594 2023-01-22 17:03:52.320119: step: 412/526, loss: 0.004724218510091305 2023-01-22 17:03:53.386403: step: 416/526, loss: 0.013886557891964912 2023-01-22 17:03:54.455404: step: 420/526, loss: 0.0035828452091664076 2023-01-22 17:03:55.516456: step: 424/526, loss: 0.00639519514515996 2023-01-22 17:03:56.580543: step: 428/526, loss: 0.0055870735086500645 2023-01-22 17:03:57.634235: step: 432/526, loss: 0.001009528641588986 2023-01-22 17:03:58.683447: step: 436/526, loss: 0.012413972988724709 2023-01-22 17:03:59.763712: step: 440/526, loss: 0.0059785377234220505 2023-01-22 17:04:00.822360: step: 444/526, loss: 0.015663810074329376 2023-01-22 17:04:01.879913: step: 448/526, loss: 0.014677578583359718 2023-01-22 17:04:02.930080: step: 452/526, loss: 0.003707374446094036 2023-01-22 17:04:03.991373: step: 456/526, loss: 0.0025234143249690533 2023-01-22 17:04:05.060046: step: 460/526, loss: 0.00508946692571044 2023-01-22 17:04:06.129000: step: 464/526, loss: 0.011581063270568848 2023-01-22 17:04:07.192649: step: 468/526, loss: 0.004123955499380827 2023-01-22 17:04:08.279473: step: 472/526, loss: 0.002478219335898757 2023-01-22 17:04:09.345330: step: 476/526, loss: 0.009361781179904938 2023-01-22 17:04:10.435239: step: 480/526, loss: 0.0031668555457144976 2023-01-22 17:04:11.491986: step: 484/526, loss: 0.01846126839518547 2023-01-22 17:04:12.559342: step: 488/526, loss: 0.00579203013330698 2023-01-22 17:04:13.636866: step: 492/526, loss: 0.004004118964076042 2023-01-22 17:04:14.716969: step: 496/526, loss: 0.010433213785290718 2023-01-22 17:04:15.790804: step: 500/526, loss: 0.0012413881486281753 2023-01-22 17:04:16.860200: step: 504/526, loss: 0.052186258137226105 2023-01-22 17:04:17.940499: step: 508/526, loss: 0.004598609171807766 2023-01-22 17:04:19.000296: step: 512/526, loss: 0.00733589380979538 2023-01-22 17:04:20.085582: step: 516/526, loss: 0.011689831502735615 2023-01-22 17:04:21.143265: step: 520/526, loss: 0.00664775725454092 2023-01-22 17:04:22.235127: step: 524/526, loss: 0.012676513753831387 2023-01-22 17:04:23.310634: step: 528/526, loss: 0.013543674722313881 2023-01-22 17:04:24.372775: step: 532/526, loss: 0.00021971965907141566 2023-01-22 17:04:25.442835: step: 536/526, loss: 0.0045703742653131485 2023-01-22 17:04:26.492777: step: 540/526, loss: 0.0020279698073863983 2023-01-22 17:04:27.572791: step: 544/526, loss: 0.019192779436707497 2023-01-22 17:04:28.636096: step: 548/526, loss: 0.006866115611046553 2023-01-22 17:04:29.697880: step: 552/526, loss: 0.008568652905523777 2023-01-22 17:04:30.763700: step: 556/526, loss: 0.002849880140274763 2023-01-22 17:04:31.835431: step: 560/526, loss: 0.007152759935706854 2023-01-22 17:04:32.918451: step: 564/526, loss: 0.002433264395222068 2023-01-22 17:04:33.984987: step: 568/526, loss: 0.005239705555140972 2023-01-22 17:04:35.048141: step: 572/526, loss: 0.008249285630881786 2023-01-22 17:04:36.128935: step: 576/526, loss: 0.0074399858713150024 2023-01-22 17:04:37.183537: step: 580/526, loss: 0.007843276485800743 2023-01-22 17:04:38.239277: step: 584/526, loss: 0.009459164924919605 2023-01-22 17:04:39.290868: step: 588/526, loss: 0.012332292273640633 2023-01-22 17:04:40.351115: step: 592/526, loss: 0.0019126208499073982 2023-01-22 17:04:41.424225: step: 596/526, loss: 0.004313490819185972 2023-01-22 17:04:42.476232: step: 600/526, loss: 0.0034413503017276525 2023-01-22 17:04:43.568238: step: 604/526, loss: 0.0028735999949276447 2023-01-22 17:04:44.654665: step: 608/526, loss: 0.008208543993532658 2023-01-22 17:04:45.723140: step: 612/526, loss: 0.00790444202721119 2023-01-22 17:04:46.780027: step: 616/526, loss: 0.0035831709392368793 2023-01-22 17:04:47.843196: step: 620/526, loss: 0.004880182910710573 2023-01-22 17:04:48.903267: step: 624/526, loss: 0.0027925942558795214 2023-01-22 17:04:49.978445: step: 628/526, loss: 0.005777599755674601 2023-01-22 17:04:51.049555: step: 632/526, loss: 0.007344453129917383 2023-01-22 17:04:52.113157: step: 636/526, loss: 0.009829001501202583 2023-01-22 17:04:53.190269: step: 640/526, loss: 0.0023153286892920732 2023-01-22 17:04:54.276150: step: 644/526, loss: 0.037738338112831116 2023-01-22 17:04:55.353552: step: 648/526, loss: 0.012994375079870224 2023-01-22 17:04:56.427806: step: 652/526, loss: 0.0015481819864362478 2023-01-22 17:04:57.487542: step: 656/526, loss: 0.0017289051320403814 2023-01-22 17:04:58.559185: step: 660/526, loss: 0.002095303498208523 2023-01-22 17:04:59.642487: step: 664/526, loss: 0.008841686882078648 2023-01-22 17:05:00.694483: step: 668/526, loss: 0.0016738995909690857 2023-01-22 17:05:01.761158: step: 672/526, loss: 0.008282607421278954 2023-01-22 17:05:02.839507: step: 676/526, loss: 0.0016179295489564538 2023-01-22 17:05:03.904339: step: 680/526, loss: 0.005020576529204845 2023-01-22 17:05:04.970731: step: 684/526, loss: 0.015881359577178955 2023-01-22 17:05:06.043527: step: 688/526, loss: 0.001501933904364705 2023-01-22 17:05:07.094228: step: 692/526, loss: 0.009338192641735077 2023-01-22 17:05:08.161191: step: 696/526, loss: 0.05601728707551956 2023-01-22 17:05:09.220411: step: 700/526, loss: 0.007577328477054834 2023-01-22 17:05:10.294292: step: 704/526, loss: 0.0 2023-01-22 17:05:11.375721: step: 708/526, loss: 0.028329677879810333 2023-01-22 17:05:12.436701: step: 712/526, loss: 0.016650890931487083 2023-01-22 17:05:13.507002: step: 716/526, loss: 0.0027997170109301805 2023-01-22 17:05:14.565054: step: 720/526, loss: 0.003414830658584833 2023-01-22 17:05:15.640394: step: 724/526, loss: 0.004446730948984623 2023-01-22 17:05:16.714589: step: 728/526, loss: 0.006720058619976044 2023-01-22 17:05:17.794013: step: 732/526, loss: 0.006368011236190796 2023-01-22 17:05:18.849732: step: 736/526, loss: 0.0004205916484352201 2023-01-22 17:05:19.911486: step: 740/526, loss: 0.00021764103439636528 2023-01-22 17:05:21.002954: step: 744/526, loss: 0.005215159151703119 2023-01-22 17:05:22.068249: step: 748/526, loss: 0.011008137837052345 2023-01-22 17:05:23.139903: step: 752/526, loss: 0.0044187502935528755 2023-01-22 17:05:24.198921: step: 756/526, loss: 0.0068278443068265915 2023-01-22 17:05:25.283076: step: 760/526, loss: 0.008587944321334362 2023-01-22 17:05:26.331272: step: 764/526, loss: 0.000590951123740524 2023-01-22 17:05:27.391784: step: 768/526, loss: 0.0019527932163327932 2023-01-22 17:05:28.444198: step: 772/526, loss: 0.0036437748931348324 2023-01-22 17:05:29.510527: step: 776/526, loss: 0.008449382148683071 2023-01-22 17:05:30.578075: step: 780/526, loss: 0.006135961506515741 2023-01-22 17:05:31.645577: step: 784/526, loss: 0.004804146941751242 2023-01-22 17:05:32.698017: step: 788/526, loss: 0.0021569302771240473 2023-01-22 17:05:33.767817: step: 792/526, loss: 0.007811501156538725 2023-01-22 17:05:34.825285: step: 796/526, loss: 0.01714414358139038 2023-01-22 17:05:35.892305: step: 800/526, loss: 0.024840185418725014 2023-01-22 17:05:36.954073: step: 804/526, loss: 0.00048400156083516777 2023-01-22 17:05:38.032701: step: 808/526, loss: 0.007126195821911097 2023-01-22 17:05:39.095520: step: 812/526, loss: 8.099444676190615e-05 2023-01-22 17:05:40.154351: step: 816/526, loss: 0.0003011475782841444 2023-01-22 17:05:41.239733: step: 820/526, loss: 0.003924433141946793 2023-01-22 17:05:42.317758: step: 824/526, loss: 0.0013155933702364564 2023-01-22 17:05:43.400486: step: 828/526, loss: 0.0024910878855735064 2023-01-22 17:05:44.482762: step: 832/526, loss: 0.000884887995198369 2023-01-22 17:05:45.541024: step: 836/526, loss: 0.004059998784214258 2023-01-22 17:05:46.644642: step: 840/526, loss: 0.017620541155338287 2023-01-22 17:05:47.721030: step: 844/526, loss: 0.00626614922657609 2023-01-22 17:05:48.776152: step: 848/526, loss: 0.002911080839112401 2023-01-22 17:05:49.842825: step: 852/526, loss: 0.0018006776226684451 2023-01-22 17:05:50.920481: step: 856/526, loss: 0.001855318434536457 2023-01-22 17:05:51.979820: step: 860/526, loss: 0.007864564657211304 2023-01-22 17:05:53.064617: step: 864/526, loss: 0.0021635969169437885 2023-01-22 17:05:54.137309: step: 868/526, loss: 0.0037115311715751886 2023-01-22 17:05:55.200528: step: 872/526, loss: 9.951701940735802e-05 2023-01-22 17:05:56.268393: step: 876/526, loss: 0.0038975137285888195 2023-01-22 17:05:57.325379: step: 880/526, loss: 0.00393241411074996 2023-01-22 17:05:58.386344: step: 884/526, loss: 0.0 2023-01-22 17:05:59.444566: step: 888/526, loss: 0.0008587830816395581 2023-01-22 17:06:00.505303: step: 892/526, loss: 0.03417181223630905 2023-01-22 17:06:01.579020: step: 896/526, loss: 0.0010484495433047414 2023-01-22 17:06:02.645875: step: 900/526, loss: 0.0039267041720449924 2023-01-22 17:06:03.699462: step: 904/526, loss: 0.0013858022866770625 2023-01-22 17:06:04.755645: step: 908/526, loss: 0.004585203714668751 2023-01-22 17:06:05.808592: step: 912/526, loss: 0.00661693187430501 2023-01-22 17:06:06.863950: step: 916/526, loss: 0.006822461262345314 2023-01-22 17:06:07.934683: step: 920/526, loss: 0.004754720255732536 2023-01-22 17:06:09.009129: step: 924/526, loss: 0.006811104714870453 2023-01-22 17:06:10.067439: step: 928/526, loss: 0.006537212990224361 2023-01-22 17:06:11.132460: step: 932/526, loss: 0.003616190515458584 2023-01-22 17:06:12.185853: step: 936/526, loss: 0.0010003787465393543 2023-01-22 17:06:13.241164: step: 940/526, loss: 0.0031409617513418198 2023-01-22 17:06:14.318812: step: 944/526, loss: 0.001413355697877705 2023-01-22 17:06:15.378156: step: 948/526, loss: 0.0005370237049646676 2023-01-22 17:06:16.440670: step: 952/526, loss: 0.002327698515728116 2023-01-22 17:06:17.495205: step: 956/526, loss: 0.004209585953503847 2023-01-22 17:06:18.570060: step: 960/526, loss: 0.001094841631129384 2023-01-22 17:06:19.651235: step: 964/526, loss: 0.005400075577199459 2023-01-22 17:06:20.718339: step: 968/526, loss: 0.012738915160298347 2023-01-22 17:06:21.802425: step: 972/526, loss: 0.003163806861266494 2023-01-22 17:06:22.887692: step: 976/526, loss: 0.010877230204641819 2023-01-22 17:06:23.955101: step: 980/526, loss: 0.0035219392739236355 2023-01-22 17:06:25.029596: step: 984/526, loss: 0.01758062280714512 2023-01-22 17:06:26.113926: step: 988/526, loss: 0.004554011858999729 2023-01-22 17:06:27.171671: step: 992/526, loss: 0.0012336608488112688 2023-01-22 17:06:28.230988: step: 996/526, loss: 0.0016016424633562565 2023-01-22 17:06:29.275134: step: 1000/526, loss: 0.005224517080932856 2023-01-22 17:06:30.353120: step: 1004/526, loss: 0.0032092106994241476 2023-01-22 17:06:31.424101: step: 1008/526, loss: 0.0040969569236040115 2023-01-22 17:06:32.495468: step: 1012/526, loss: 0.03528813272714615 2023-01-22 17:06:33.559501: step: 1016/526, loss: 0.00044168398017063737 2023-01-22 17:06:34.634775: step: 1020/526, loss: 0.006870711222290993 2023-01-22 17:06:35.712687: step: 1024/526, loss: 0.00038013941957615316 2023-01-22 17:06:36.757874: step: 1028/526, loss: 0.00493632210418582 2023-01-22 17:06:37.832882: step: 1032/526, loss: 0.0026533312629908323 2023-01-22 17:06:38.889655: step: 1036/526, loss: 0.007120381575077772 2023-01-22 17:06:39.946204: step: 1040/526, loss: 0.008523966185748577 2023-01-22 17:06:41.023814: step: 1044/526, loss: 0.004000928718596697 2023-01-22 17:06:42.108605: step: 1048/526, loss: 0.0037398335989564657 2023-01-22 17:06:43.161017: step: 1052/526, loss: 0.003333235392346978 2023-01-22 17:06:44.226927: step: 1056/526, loss: 0.0012805245351046324 2023-01-22 17:06:45.290113: step: 1060/526, loss: 0.001410696073435247 2023-01-22 17:06:46.369171: step: 1064/526, loss: 0.007828780449926853 2023-01-22 17:06:47.424882: step: 1068/526, loss: 0.0003510605019982904 2023-01-22 17:06:48.481640: step: 1072/526, loss: 0.01994696818292141 2023-01-22 17:06:49.562939: step: 1076/526, loss: 0.0049826474860310555 2023-01-22 17:06:50.623212: step: 1080/526, loss: 0.003470906987786293 2023-01-22 17:06:51.695994: step: 1084/526, loss: 0.002952217124402523 2023-01-22 17:06:52.758686: step: 1088/526, loss: 0.004679102450609207 2023-01-22 17:06:53.815104: step: 1092/526, loss: 0.0037274339701980352 2023-01-22 17:06:54.893042: step: 1096/526, loss: 0.006447460502386093 2023-01-22 17:06:55.950042: step: 1100/526, loss: 0.007627236191183329 2023-01-22 17:06:57.003725: step: 1104/526, loss: 0.00705467164516449 2023-01-22 17:06:58.071882: step: 1108/526, loss: 0.004185685887932777 2023-01-22 17:06:59.145671: step: 1112/526, loss: 0.008962001651525497 2023-01-22 17:07:00.236010: step: 1116/526, loss: 0.011833866126835346 2023-01-22 17:07:01.310002: step: 1120/526, loss: 0.0022730305790901184 2023-01-22 17:07:02.369735: step: 1124/526, loss: 0.0030844039283692837 2023-01-22 17:07:03.434542: step: 1128/526, loss: 0.0021184836514294147 2023-01-22 17:07:04.483533: step: 1132/526, loss: 5.327435064828023e-05 2023-01-22 17:07:05.562847: step: 1136/526, loss: 0.013489339500665665 2023-01-22 17:07:06.628674: step: 1140/526, loss: 0.0339508093893528 2023-01-22 17:07:07.678503: step: 1144/526, loss: 0.0004990124143660069 2023-01-22 17:07:08.745794: step: 1148/526, loss: 0.006240393966436386 2023-01-22 17:07:09.819692: step: 1152/526, loss: 0.008802542462944984 2023-01-22 17:07:10.892362: step: 1156/526, loss: 0.00857494119554758 2023-01-22 17:07:11.953457: step: 1160/526, loss: 0.0029678912833333015 2023-01-22 17:07:13.041602: step: 1164/526, loss: 0.0014113453216850758 2023-01-22 17:07:14.090594: step: 1168/526, loss: 0.0037383323069661856 2023-01-22 17:07:15.165192: step: 1172/526, loss: 0.0032509141601622105 2023-01-22 17:07:16.239838: step: 1176/526, loss: 0.002277893014252186 2023-01-22 17:07:17.305386: step: 1180/526, loss: 0.0013324364554136992 2023-01-22 17:07:18.369940: step: 1184/526, loss: 0.0028657233342528343 2023-01-22 17:07:19.432796: step: 1188/526, loss: 0.004632764030247927 2023-01-22 17:07:20.512804: step: 1192/526, loss: 0.007884617894887924 2023-01-22 17:07:21.577602: step: 1196/526, loss: 0.0036501542199403048 2023-01-22 17:07:22.645400: step: 1200/526, loss: 0.004037424921989441 2023-01-22 17:07:23.700189: step: 1204/526, loss: 0.009743054397404194 2023-01-22 17:07:24.773133: step: 1208/526, loss: 0.0020211022347211838 2023-01-22 17:07:25.838200: step: 1212/526, loss: 0.002254490740597248 2023-01-22 17:07:26.905474: step: 1216/526, loss: 0.00022705357696395367 2023-01-22 17:07:27.968586: step: 1220/526, loss: 0.0018595204455778003 2023-01-22 17:07:29.036059: step: 1224/526, loss: 0.0066552553325891495 2023-01-22 17:07:30.115087: step: 1228/526, loss: 0.0007397474837489426 2023-01-22 17:07:31.177539: step: 1232/526, loss: 0.00040422313031740487 2023-01-22 17:07:32.248206: step: 1236/526, loss: 0.0009621197823435068 2023-01-22 17:07:33.306103: step: 1240/526, loss: 0.010266823694109917 2023-01-22 17:07:34.373511: step: 1244/526, loss: 0.0008489550091326237 2023-01-22 17:07:35.431768: step: 1248/526, loss: 0.0022352978121489286 2023-01-22 17:07:36.470477: step: 1252/526, loss: 0.0009734187624417245 2023-01-22 17:07:37.538670: step: 1256/526, loss: 0.05258942395448685 2023-01-22 17:07:38.630313: step: 1260/526, loss: 0.0009521223837509751 2023-01-22 17:07:39.698815: step: 1264/526, loss: 0.0014694444835186005 2023-01-22 17:07:40.762116: step: 1268/526, loss: 0.0028021077159792185 2023-01-22 17:07:41.828284: step: 1272/526, loss: 0.011146968230605125 2023-01-22 17:07:42.903659: step: 1276/526, loss: 0.004251406062394381 2023-01-22 17:07:43.981982: step: 1280/526, loss: 0.00867058802396059 2023-01-22 17:07:45.051200: step: 1284/526, loss: 0.004412802401930094 2023-01-22 17:07:46.121083: step: 1288/526, loss: 0.001694925012998283 2023-01-22 17:07:47.187611: step: 1292/526, loss: 0.002022380940616131 2023-01-22 17:07:48.259651: step: 1296/526, loss: 0.008119679987430573 2023-01-22 17:07:49.332947: step: 1300/526, loss: 0.010428724810481071 2023-01-22 17:07:50.384024: step: 1304/526, loss: 0.00814066082239151 2023-01-22 17:07:51.455405: step: 1308/526, loss: 0.02768310345709324 2023-01-22 17:07:52.518933: step: 1312/526, loss: 0.0021256140898913145 2023-01-22 17:07:53.593563: step: 1316/526, loss: 0.008671700023114681 2023-01-22 17:07:54.647582: step: 1320/526, loss: 0.00039050879422575235 2023-01-22 17:07:55.691568: step: 1324/526, loss: 0.00012860716378781945 2023-01-22 17:07:56.756604: step: 1328/526, loss: 0.0033445963636040688 2023-01-22 17:07:57.834546: step: 1332/526, loss: 0.0023296927101910114 2023-01-22 17:07:58.906744: step: 1336/526, loss: 0.004069339018315077 2023-01-22 17:07:59.975820: step: 1340/526, loss: 0.0023620116990059614 2023-01-22 17:08:01.047005: step: 1344/526, loss: 0.007237502373754978 2023-01-22 17:08:02.108262: step: 1348/526, loss: 0.0007343686302192509 2023-01-22 17:08:03.186512: step: 1352/526, loss: 0.0001781665050657466 2023-01-22 17:08:04.253875: step: 1356/526, loss: 0.0065128314308822155 2023-01-22 17:08:05.319679: step: 1360/526, loss: 0.006090397015213966 2023-01-22 17:08:06.379480: step: 1364/526, loss: 0.009424989111721516 2023-01-22 17:08:07.448934: step: 1368/526, loss: 0.0043399520218372345 2023-01-22 17:08:08.511799: step: 1372/526, loss: 0.012488430365920067 2023-01-22 17:08:09.592625: step: 1376/526, loss: 0.005547517444938421 2023-01-22 17:08:10.667106: step: 1380/526, loss: 0.004449707921594381 2023-01-22 17:08:11.739220: step: 1384/526, loss: 0.018844323232769966 2023-01-22 17:08:12.806053: step: 1388/526, loss: 0.0033769886940717697 2023-01-22 17:08:13.883066: step: 1392/526, loss: 0.008900512009859085 2023-01-22 17:08:14.956019: step: 1396/526, loss: 0.007564301136881113 2023-01-22 17:08:16.036731: step: 1400/526, loss: 0.0005988328484818339 2023-01-22 17:08:17.092244: step: 1404/526, loss: 0.0028724120929837227 2023-01-22 17:08:18.161061: step: 1408/526, loss: 0.007799883838742971 2023-01-22 17:08:19.230207: step: 1412/526, loss: 0.001433442928828299 2023-01-22 17:08:20.304535: step: 1416/526, loss: 0.0012137835146859288 2023-01-22 17:08:21.369845: step: 1420/526, loss: 0.00638975203037262 2023-01-22 17:08:22.428015: step: 1424/526, loss: 0.0019523389637470245 2023-01-22 17:08:23.501303: step: 1428/526, loss: 0.015957552939653397 2023-01-22 17:08:24.552384: step: 1432/526, loss: 0.010864854790270329 2023-01-22 17:08:25.616949: step: 1436/526, loss: 0.000659624463878572 2023-01-22 17:08:26.678930: step: 1440/526, loss: 0.0093257250264287 2023-01-22 17:08:27.762039: step: 1444/526, loss: 0.03805235028266907 2023-01-22 17:08:28.813447: step: 1448/526, loss: 0.0027287560515105724 2023-01-22 17:08:29.874656: step: 1452/526, loss: 0.004940278362482786 2023-01-22 17:08:30.949711: step: 1456/526, loss: 0.008313498459756374 2023-01-22 17:08:32.026244: step: 1460/526, loss: 0.023574357852339745 2023-01-22 17:08:33.091539: step: 1464/526, loss: 0.006596646271646023 2023-01-22 17:08:34.153176: step: 1468/526, loss: 0.008456969633698463 2023-01-22 17:08:35.216785: step: 1472/526, loss: 0.004637174773961306 2023-01-22 17:08:36.269239: step: 1476/526, loss: 0.009196712635457516 2023-01-22 17:08:37.334632: step: 1480/526, loss: 0.013665327802300453 2023-01-22 17:08:38.399766: step: 1484/526, loss: 0.0036045322194695473 2023-01-22 17:08:39.491024: step: 1488/526, loss: 0.030869755893945694 2023-01-22 17:08:40.545781: step: 1492/526, loss: 0.004974499810487032 2023-01-22 17:08:41.608377: step: 1496/526, loss: 0.00295928749255836 2023-01-22 17:08:42.666430: step: 1500/526, loss: 0.004706934560090303 2023-01-22 17:08:43.755129: step: 1504/526, loss: 0.030107242986559868 2023-01-22 17:08:44.823197: step: 1508/526, loss: 0.004500477574765682 2023-01-22 17:08:45.893754: step: 1512/526, loss: 0.015397071838378906 2023-01-22 17:08:46.961542: step: 1516/526, loss: 0.002677349839359522 2023-01-22 17:08:48.009504: step: 1520/526, loss: 0.001125907525420189 2023-01-22 17:08:49.071182: step: 1524/526, loss: 0.00018671000725589693 2023-01-22 17:08:50.124852: step: 1528/526, loss: 0.008221160620450974 2023-01-22 17:08:51.177987: step: 1532/526, loss: 0.0026178406551480293 2023-01-22 17:08:52.239676: step: 1536/526, loss: 0.03792737051844597 2023-01-22 17:08:53.305966: step: 1540/526, loss: 0.00810444075614214 2023-01-22 17:08:54.362346: step: 1544/526, loss: 0.03169369697570801 2023-01-22 17:08:55.432549: step: 1548/526, loss: 0.014530322514474392 2023-01-22 17:08:56.497950: step: 1552/526, loss: 0.003152405144646764 2023-01-22 17:08:57.567229: step: 1556/526, loss: 0.0044007617980241776 2023-01-22 17:08:58.641450: step: 1560/526, loss: 0.006703834515064955 2023-01-22 17:08:59.714844: step: 1564/526, loss: 0.004929485265165567 2023-01-22 17:09:00.791853: step: 1568/526, loss: 0.013477531261742115 2023-01-22 17:09:01.870331: step: 1572/526, loss: 0.0037053553387522697 2023-01-22 17:09:02.939259: step: 1576/526, loss: 0.0017258892767131329 2023-01-22 17:09:03.995634: step: 1580/526, loss: 0.03574566915631294 2023-01-22 17:09:05.065529: step: 1584/526, loss: 0.002785998862236738 2023-01-22 17:09:06.139971: step: 1588/526, loss: 0.004753216169774532 2023-01-22 17:09:07.218230: step: 1592/526, loss: 0.00606357678771019 2023-01-22 17:09:08.308108: step: 1596/526, loss: 0.0009245178662240505 2023-01-22 17:09:09.370157: step: 1600/526, loss: 0.0007811725372448564 2023-01-22 17:09:10.430696: step: 1604/526, loss: 0.005165122915059328 2023-01-22 17:09:11.494208: step: 1608/526, loss: 0.012915108352899551 2023-01-22 17:09:12.582235: step: 1612/526, loss: 0.0039015228394418955 2023-01-22 17:09:13.667954: step: 1616/526, loss: 0.016677485778927803 2023-01-22 17:09:14.721414: step: 1620/526, loss: 0.003994401078671217 2023-01-22 17:09:15.806823: step: 1624/526, loss: 0.002671940019354224 2023-01-22 17:09:16.859112: step: 1628/526, loss: 0.00498681515455246 2023-01-22 17:09:17.923664: step: 1632/526, loss: 0.04805762320756912 2023-01-22 17:09:18.976342: step: 1636/526, loss: 0.05779241770505905 2023-01-22 17:09:20.032739: step: 1640/526, loss: 0.0033233652357012033 2023-01-22 17:09:21.111166: step: 1644/526, loss: 0.012875787913799286 2023-01-22 17:09:22.177326: step: 1648/526, loss: 0.004377391654998064 2023-01-22 17:09:23.245379: step: 1652/526, loss: 0.0019602095708251 2023-01-22 17:09:24.289891: step: 1656/526, loss: 0.013964900746941566 2023-01-22 17:09:25.340454: step: 1660/526, loss: 0.003034943016245961 2023-01-22 17:09:26.408526: step: 1664/526, loss: 0.002081464510411024 2023-01-22 17:09:27.472459: step: 1668/526, loss: 0.004796476569026709 2023-01-22 17:09:28.535526: step: 1672/526, loss: 0.011374261230230331 2023-01-22 17:09:29.596146: step: 1676/526, loss: 0.006944901309907436 2023-01-22 17:09:30.653812: step: 1680/526, loss: 0.00426824577152729 2023-01-22 17:09:31.727599: step: 1684/526, loss: 0.04126424714922905 2023-01-22 17:09:32.815323: step: 1688/526, loss: 0.005446398630738258 2023-01-22 17:09:33.884765: step: 1692/526, loss: 4.939519726576691e-07 2023-01-22 17:09:34.942784: step: 1696/526, loss: 0.008905082941055298 2023-01-22 17:09:36.014284: step: 1700/526, loss: 0.007130885496735573 2023-01-22 17:09:37.081419: step: 1704/526, loss: 0.002106334315612912 2023-01-22 17:09:38.169539: step: 1708/526, loss: 0.004011321812868118 2023-01-22 17:09:39.246746: step: 1712/526, loss: 0.010999292135238647 2023-01-22 17:09:40.318205: step: 1716/526, loss: 0.00357854668982327 2023-01-22 17:09:41.392004: step: 1720/526, loss: 0.008787340484559536 2023-01-22 17:09:42.453750: step: 1724/526, loss: 0.01996440440416336 2023-01-22 17:09:43.542006: step: 1728/526, loss: 0.006099092308431864 2023-01-22 17:09:44.633818: step: 1732/526, loss: 0.01982984133064747 2023-01-22 17:09:45.716515: step: 1736/526, loss: 0.0032578238751739264 2023-01-22 17:09:46.786046: step: 1740/526, loss: 0.011187322437763214 2023-01-22 17:09:47.878058: step: 1744/526, loss: 0.030422843992710114 2023-01-22 17:09:48.946421: step: 1748/526, loss: 0.00736046489328146 2023-01-22 17:09:50.025765: step: 1752/526, loss: 0.0023059165105223656 2023-01-22 17:09:51.092765: step: 1756/526, loss: 0.01954231783747673 2023-01-22 17:09:52.167804: step: 1760/526, loss: 0.004743980243802071 2023-01-22 17:09:53.231635: step: 1764/526, loss: 0.0045912181958556175 2023-01-22 17:09:54.317744: step: 1768/526, loss: 0.009715816006064415 2023-01-22 17:09:55.372208: step: 1772/526, loss: 0.007264117244631052 2023-01-22 17:09:56.449049: step: 1776/526, loss: 0.011482124216854572 2023-01-22 17:09:57.518019: step: 1780/526, loss: 0.011981969699263573 2023-01-22 17:09:58.596900: step: 1784/526, loss: 0.004446870181709528 2023-01-22 17:09:59.668048: step: 1788/526, loss: 0.0013532961020246148 2023-01-22 17:10:00.746458: step: 1792/526, loss: 0.008777124807238579 2023-01-22 17:10:01.813436: step: 1796/526, loss: 0.002468526130542159 2023-01-22 17:10:02.890154: step: 1800/526, loss: 0.00039130181539803743 2023-01-22 17:10:03.964182: step: 1804/526, loss: 0.004155596252530813 2023-01-22 17:10:05.029342: step: 1808/526, loss: 0.0017546814633533359 2023-01-22 17:10:06.094823: step: 1812/526, loss: 0.00041980567038990557 2023-01-22 17:10:07.161527: step: 1816/526, loss: 0.0015959583688527346 2023-01-22 17:10:08.228816: step: 1820/526, loss: 0.005487230606377125 2023-01-22 17:10:09.296067: step: 1824/526, loss: 0.014632657170295715 2023-01-22 17:10:10.348892: step: 1828/526, loss: 0.00432452792301774 2023-01-22 17:10:11.433414: step: 1832/526, loss: 0.00875879917293787 2023-01-22 17:10:12.506928: step: 1836/526, loss: 0.003968818578869104 2023-01-22 17:10:13.573807: step: 1840/526, loss: 0.0019335468532517552 2023-01-22 17:10:14.630471: step: 1844/526, loss: 0.004339843522757292 2023-01-22 17:10:15.703008: step: 1848/526, loss: 0.005686920136213303 2023-01-22 17:10:16.782051: step: 1852/526, loss: 0.0038829019758850336 2023-01-22 17:10:17.862974: step: 1856/526, loss: 0.001413828693330288 2023-01-22 17:10:18.920686: step: 1860/526, loss: 0.009554093703627586 2023-01-22 17:10:19.993051: step: 1864/526, loss: 0.008032902143895626 2023-01-22 17:10:21.068434: step: 1868/526, loss: 0.0013813204132020473 2023-01-22 17:10:22.144042: step: 1872/526, loss: 0.00010282945731887594 2023-01-22 17:10:23.210951: step: 1876/526, loss: 0.0037616966292262077 2023-01-22 17:10:24.282985: step: 1880/526, loss: 0.007655742112547159 2023-01-22 17:10:25.358414: step: 1884/526, loss: 0.02388540841639042 2023-01-22 17:10:26.434831: step: 1888/526, loss: 0.022810786962509155 2023-01-22 17:10:27.524628: step: 1892/526, loss: 0.02503994293510914 2023-01-22 17:10:28.599096: step: 1896/526, loss: 0.00503999600186944 2023-01-22 17:10:29.687260: step: 1900/526, loss: 0.02420971542596817 2023-01-22 17:10:30.745820: step: 1904/526, loss: 0.0004633065254893154 2023-01-22 17:10:31.814025: step: 1908/526, loss: 0.0007413079147227108 2023-01-22 17:10:32.892696: step: 1912/526, loss: 0.00847114808857441 2023-01-22 17:10:33.969165: step: 1916/526, loss: 0.039886489510536194 2023-01-22 17:10:35.036017: step: 1920/526, loss: 0.018483810126781464 2023-01-22 17:10:36.099715: step: 1924/526, loss: 0.0064291395246982574 2023-01-22 17:10:37.180615: step: 1928/526, loss: 0.004161641001701355 2023-01-22 17:10:38.268825: step: 1932/526, loss: 0.004157512914389372 2023-01-22 17:10:39.338956: step: 1936/526, loss: 0.008234788663685322 2023-01-22 17:10:40.409757: step: 1940/526, loss: 0.002967260777950287 2023-01-22 17:10:41.469004: step: 1944/526, loss: 0.010103443637490273 2023-01-22 17:10:42.544062: step: 1948/526, loss: 0.001317432732321322 2023-01-22 17:10:43.633188: step: 1952/526, loss: 0.006541980430483818 2023-01-22 17:10:44.705803: step: 1956/526, loss: 0.004061629064381123 2023-01-22 17:10:45.790880: step: 1960/526, loss: 0.0044001564383506775 2023-01-22 17:10:46.876746: step: 1964/526, loss: 0.003215089440345764 2023-01-22 17:10:47.934970: step: 1968/526, loss: 0.0030986550264060497 2023-01-22 17:10:49.011337: step: 1972/526, loss: 0.004986410494893789 2023-01-22 17:10:50.084609: step: 1976/526, loss: 0.008318000473082066 2023-01-22 17:10:51.149019: step: 1980/526, loss: 0.00040151720168069005 2023-01-22 17:10:52.236115: step: 1984/526, loss: 0.005011571105569601 2023-01-22 17:10:53.336037: step: 1988/526, loss: 0.003120447276160121 2023-01-22 17:10:54.394455: step: 1992/526, loss: 0.011333759874105453 2023-01-22 17:10:55.461071: step: 1996/526, loss: 0.0038796989247202873 2023-01-22 17:10:56.519520: step: 2000/526, loss: 0.00019722431898117065 2023-01-22 17:10:57.584876: step: 2004/526, loss: 0.008071818388998508 2023-01-22 17:10:58.653279: step: 2008/526, loss: 0.010678013786673546 2023-01-22 17:10:59.719272: step: 2012/526, loss: 0.01557554304599762 2023-01-22 17:11:00.785693: step: 2016/526, loss: 0.019661106169223785 2023-01-22 17:11:01.846219: step: 2020/526, loss: 0.002549381460994482 2023-01-22 17:11:02.906546: step: 2024/526, loss: 0.0063002691604197025 2023-01-22 17:11:03.976013: step: 2028/526, loss: 0.0005513711948879063 2023-01-22 17:11:05.044647: step: 2032/526, loss: 0.000553164747543633 2023-01-22 17:11:06.119624: step: 2036/526, loss: 0.006509793922305107 2023-01-22 17:11:07.197964: step: 2040/526, loss: 0.0066698892042040825 2023-01-22 17:11:08.273560: step: 2044/526, loss: 0.01583942212164402 2023-01-22 17:11:09.350136: step: 2048/526, loss: 0.01418278831988573 2023-01-22 17:11:10.425711: step: 2052/526, loss: 0.005408493336290121 2023-01-22 17:11:11.499480: step: 2056/526, loss: 0.0005478183156810701 2023-01-22 17:11:12.561166: step: 2060/526, loss: 0.0040389057248830795 2023-01-22 17:11:13.666356: step: 2064/526, loss: 0.01307224202901125 2023-01-22 17:11:14.729672: step: 2068/526, loss: 0.009238021448254585 2023-01-22 17:11:15.818916: step: 2072/526, loss: 0.011187410913407803 2023-01-22 17:11:16.891921: step: 2076/526, loss: 0.003588580060750246 2023-01-22 17:11:17.980350: step: 2080/526, loss: 0.0046479483135044575 2023-01-22 17:11:19.047273: step: 2084/526, loss: 0.004652297589927912 2023-01-22 17:11:20.124457: step: 2088/526, loss: 0.00234913919121027 2023-01-22 17:11:21.219224: step: 2092/526, loss: 0.006776631344109774 2023-01-22 17:11:22.296919: step: 2096/526, loss: 0.033366456627845764 2023-01-22 17:11:23.366941: step: 2100/526, loss: 0.0011444361880421638 2023-01-22 17:11:24.417343: step: 2104/526, loss: 0.0001746313355397433 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.352418903803132, 'r': 0.2989207779886148, 'f1': 0.32347279260780293}, 'combined': 0.23834837350048635, 'stategy': 1, 'epoch': 8} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3417378278920551, 'r': 0.23690067373103088, 'f1': 0.2798221046125319}, 'combined': 0.15263023887956284, 'stategy': 1, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3269112160648357, 'r': 0.32939251561751, 'f1': 0.32814717529381426}, 'combined': 0.2417926554796526, 'stategy': 1, 'epoch': 8} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34000739764437243, 'r': 0.2716954090765533, 'f1': 0.3020370283642902}, 'combined': 0.16474747001688556, 'stategy': 1, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 8} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 9 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:14:12.388384: step: 4/526, loss: 0.013079594820737839 2023-01-22 17:14:13.449018: step: 8/526, loss: 0.00904446467757225 2023-01-22 17:14:14.509602: step: 12/526, loss: 0.024451300501823425 2023-01-22 17:14:15.575980: step: 16/526, loss: 0.021447081118822098 2023-01-22 17:14:16.633177: step: 20/526, loss: 0.0033413756173104048 2023-01-22 17:14:17.699178: step: 24/526, loss: 0.0016094744205474854 2023-01-22 17:14:18.763694: step: 28/526, loss: 0.009556883946061134 2023-01-22 17:14:19.812233: step: 32/526, loss: 0.003605821868404746 2023-01-22 17:14:20.889903: step: 36/526, loss: 0.0031286971643567085 2023-01-22 17:14:21.961642: step: 40/526, loss: 0.003452236531302333 2023-01-22 17:14:23.021775: step: 44/526, loss: 0.00021833633945789188 2023-01-22 17:14:24.083659: step: 48/526, loss: 0.000872007803991437 2023-01-22 17:14:25.132029: step: 52/526, loss: 0.008121087215840816 2023-01-22 17:14:26.208520: step: 56/526, loss: 0.0017759572947397828 2023-01-22 17:14:27.272843: step: 60/526, loss: 0.001073908293619752 2023-01-22 17:14:28.329683: step: 64/526, loss: 0.011140496470034122 2023-01-22 17:14:29.395292: step: 68/526, loss: 0.0017967929597944021 2023-01-22 17:14:30.461551: step: 72/526, loss: 0.005892232526093721 2023-01-22 17:14:31.525043: step: 76/526, loss: 0.004117245320230722 2023-01-22 17:14:32.579479: step: 80/526, loss: 0.006771462503820658 2023-01-22 17:14:33.640101: step: 84/526, loss: 0.0037146317772567272 2023-01-22 17:14:34.700861: step: 88/526, loss: 0.0035161625128239393 2023-01-22 17:14:35.760426: step: 92/526, loss: 0.004980398807674646 2023-01-22 17:14:36.824562: step: 96/526, loss: 0.0006558881141245365 2023-01-22 17:14:37.879961: step: 100/526, loss: 0.0010797990253195167 2023-01-22 17:14:38.951345: step: 104/526, loss: 0.008615699596703053 2023-01-22 17:14:40.028679: step: 108/526, loss: 0.004968350287526846 2023-01-22 17:14:41.101176: step: 112/526, loss: 0.029111092910170555 2023-01-22 17:14:42.165139: step: 116/526, loss: 0.018352240324020386 2023-01-22 17:14:43.228271: step: 120/526, loss: 0.009935823269188404 2023-01-22 17:14:44.300773: step: 124/526, loss: 0.0024441261775791645 2023-01-22 17:14:45.365635: step: 128/526, loss: 0.00545929791405797 2023-01-22 17:14:46.441030: step: 132/526, loss: 0.00011647606879705563 2023-01-22 17:14:47.509415: step: 136/526, loss: 0.007359283044934273 2023-01-22 17:14:48.555488: step: 140/526, loss: 0.008807172998785973 2023-01-22 17:14:49.643079: step: 144/526, loss: 0.002639482729136944 2023-01-22 17:14:50.710465: step: 148/526, loss: 0.0006521404720842838 2023-01-22 17:14:51.792227: step: 152/526, loss: 0.00625608628615737 2023-01-22 17:14:52.869752: step: 156/526, loss: 0.013787361793220043 2023-01-22 17:14:53.960796: step: 160/526, loss: 0.002106620231643319 2023-01-22 17:14:55.017106: step: 164/526, loss: 0.0066810185089707375 2023-01-22 17:14:56.084639: step: 168/526, loss: 0.002057743724435568 2023-01-22 17:14:57.154527: step: 172/526, loss: 0.00016635513748042285 2023-01-22 17:14:58.227918: step: 176/526, loss: 0.007131034974008799 2023-01-22 17:14:59.305276: step: 180/526, loss: 0.0017956976080313325 2023-01-22 17:15:00.386550: step: 184/526, loss: 0.013966855593025684 2023-01-22 17:15:01.451851: step: 188/526, loss: 0.0027393600903451443 2023-01-22 17:15:02.522566: step: 192/526, loss: 0.0012498266296461225 2023-01-22 17:15:03.590909: step: 196/526, loss: 0.00817183218896389 2023-01-22 17:15:04.660909: step: 200/526, loss: 0.0004959365469403565 2023-01-22 17:15:05.725096: step: 204/526, loss: 0.02556758187711239 2023-01-22 17:15:06.791905: step: 208/526, loss: 0.008016317151486874 2023-01-22 17:15:07.851744: step: 212/526, loss: 0.015316873788833618 2023-01-22 17:15:08.901514: step: 216/526, loss: 0.0010514894966036081 2023-01-22 17:15:09.978583: step: 220/526, loss: 0.007979926653206348 2023-01-22 17:15:11.062836: step: 224/526, loss: 0.004599989857524633 2023-01-22 17:15:12.130311: step: 228/526, loss: 0.0018322113901376724 2023-01-22 17:15:13.217646: step: 232/526, loss: 0.029261555522680283 2023-01-22 17:15:14.286485: step: 236/526, loss: 0.010123688727617264 2023-01-22 17:15:15.355427: step: 240/526, loss: 0.005275878123939037 2023-01-22 17:15:16.428384: step: 244/526, loss: 0.03178130462765694 2023-01-22 17:15:17.494878: step: 248/526, loss: 0.000533056678250432 2023-01-22 17:15:18.573096: step: 252/526, loss: 0.005214234348386526 2023-01-22 17:15:19.649146: step: 256/526, loss: 0.0017743855714797974 2023-01-22 17:15:20.718235: step: 260/526, loss: 0.0033710047136992216 2023-01-22 17:15:21.793200: step: 264/526, loss: 0.009170453064143658 2023-01-22 17:15:22.900932: step: 268/526, loss: 0.007401072420179844 2023-01-22 17:15:23.968475: step: 272/526, loss: 3.725290076417309e-10 2023-01-22 17:15:25.033101: step: 276/526, loss: 0.007131366524845362 2023-01-22 17:15:26.098996: step: 280/526, loss: 0.005157087463885546 2023-01-22 17:15:27.168351: step: 284/526, loss: 0.014146647416055202 2023-01-22 17:15:28.236256: step: 288/526, loss: 0.005302580539137125 2023-01-22 17:15:29.300590: step: 292/526, loss: 0.003181050531566143 2023-01-22 17:15:30.375506: step: 296/526, loss: 0.010133090429008007 2023-01-22 17:15:31.468718: step: 300/526, loss: 0.003045728662982583 2023-01-22 17:15:32.568702: step: 304/526, loss: 0.04759509861469269 2023-01-22 17:15:33.635247: step: 308/526, loss: 0.0030888644978404045 2023-01-22 17:15:34.715581: step: 312/526, loss: 0.0013615991920232773 2023-01-22 17:15:35.777867: step: 316/526, loss: 0.003262763377279043 2023-01-22 17:15:36.844992: step: 320/526, loss: 1.9154898836859502e-05 2023-01-22 17:15:37.895154: step: 324/526, loss: 0.0016366096679121256 2023-01-22 17:15:38.957330: step: 328/526, loss: 0.0009521624888293445 2023-01-22 17:15:40.019354: step: 332/526, loss: 0.002164134057238698 2023-01-22 17:15:41.110595: step: 336/526, loss: 0.004773444961756468 2023-01-22 17:15:42.178068: step: 340/526, loss: 0.00251379469409585 2023-01-22 17:15:43.246433: step: 344/526, loss: 0.0005149704520590603 2023-01-22 17:15:44.319398: step: 348/526, loss: 0.021555345505475998 2023-01-22 17:15:45.399721: step: 352/526, loss: 0.0031156721524894238 2023-01-22 17:15:46.480034: step: 356/526, loss: 0.002873737830668688 2023-01-22 17:15:47.544845: step: 360/526, loss: 8.233762491727248e-05 2023-01-22 17:15:48.630359: step: 364/526, loss: 0.005617564544081688 2023-01-22 17:15:49.689293: step: 368/526, loss: 0.001380230882205069 2023-01-22 17:15:50.758443: step: 372/526, loss: 0.000144410616485402 2023-01-22 17:15:51.830131: step: 376/526, loss: 0.0014523608842864633 2023-01-22 17:15:52.898124: step: 380/526, loss: 0.0016333027742803097 2023-01-22 17:15:53.961314: step: 384/526, loss: 0.015328395180404186 2023-01-22 17:15:55.024882: step: 388/526, loss: 0.004146549850702286 2023-01-22 17:15:56.079513: step: 392/526, loss: 5.721750540033099e-07 2023-01-22 17:15:57.169316: step: 396/526, loss: 0.005526612978428602 2023-01-22 17:15:58.241725: step: 400/526, loss: 0.016683924943208694 2023-01-22 17:15:59.287537: step: 404/526, loss: 0.003495132550597191 2023-01-22 17:16:00.351581: step: 408/526, loss: 0.003528200089931488 2023-01-22 17:16:01.416432: step: 412/526, loss: 0.0008445320418104529 2023-01-22 17:16:02.476771: step: 416/526, loss: 0.004156423266977072 2023-01-22 17:16:03.542432: step: 420/526, loss: 0.006627610418945551 2023-01-22 17:16:04.618087: step: 424/526, loss: 0.00634584529325366 2023-01-22 17:16:05.668118: step: 428/526, loss: 0.011854343116283417 2023-01-22 17:16:06.731824: step: 432/526, loss: 0.0012294589541852474 2023-01-22 17:16:07.786838: step: 436/526, loss: 0.0006020539440214634 2023-01-22 17:16:08.851411: step: 440/526, loss: 0.007180261891335249 2023-01-22 17:16:09.909493: step: 444/526, loss: 0.002214729320257902 2023-01-22 17:16:10.970790: step: 448/526, loss: 0.004700632765889168 2023-01-22 17:16:12.055210: step: 452/526, loss: 0.022810641676187515 2023-01-22 17:16:13.129621: step: 456/526, loss: 8.707172673894092e-05 2023-01-22 17:16:14.195227: step: 460/526, loss: 0.0009464430040679872 2023-01-22 17:16:15.259144: step: 464/526, loss: 0.0012505368795245886 2023-01-22 17:16:16.310538: step: 468/526, loss: 0.020038196817040443 2023-01-22 17:16:17.394561: step: 472/526, loss: 0.0027339826337993145 2023-01-22 17:16:18.463503: step: 476/526, loss: 0.0011264992645010352 2023-01-22 17:16:19.532988: step: 480/526, loss: 0.005158867686986923 2023-01-22 17:16:20.614254: step: 484/526, loss: 0.0008930271724238992 2023-01-22 17:16:21.667832: step: 488/526, loss: 0.00048052764032036066 2023-01-22 17:16:22.727449: step: 492/526, loss: 0.0013856820296496153 2023-01-22 17:16:23.800142: step: 496/526, loss: 0.003456941805779934 2023-01-22 17:16:24.877815: step: 500/526, loss: 0.005218749865889549 2023-01-22 17:16:25.943763: step: 504/526, loss: 0.011373475193977356 2023-01-22 17:16:27.019553: step: 508/526, loss: 0.0020359831396490335 2023-01-22 17:16:28.083750: step: 512/526, loss: 0.0016010890249162912 2023-01-22 17:16:29.128021: step: 516/526, loss: 0.004258866887539625 2023-01-22 17:16:30.174835: step: 520/526, loss: 4.085959153599106e-05 2023-01-22 17:16:31.232356: step: 524/526, loss: 0.008100834675133228 2023-01-22 17:16:32.294149: step: 528/526, loss: 0.002264282898977399 2023-01-22 17:16:33.375090: step: 532/526, loss: 0.004935526754707098 2023-01-22 17:16:34.438826: step: 536/526, loss: 0.0016905742231756449 2023-01-22 17:16:35.509526: step: 540/526, loss: 0.0064620026387274265 2023-01-22 17:16:36.575326: step: 544/526, loss: 0.009874554350972176 2023-01-22 17:16:37.634703: step: 548/526, loss: 0.004197982605546713 2023-01-22 17:16:38.706966: step: 552/526, loss: 0.019544487819075584 2023-01-22 17:16:39.757831: step: 556/526, loss: 0.003573755966499448 2023-01-22 17:16:40.821371: step: 560/526, loss: 0.001996786566451192 2023-01-22 17:16:41.883921: step: 564/526, loss: 0.0069455611519515514 2023-01-22 17:16:42.952673: step: 568/526, loss: 0.0006901415181346238 2023-01-22 17:16:44.034546: step: 572/526, loss: 0.002882111119106412 2023-01-22 17:16:45.094196: step: 576/526, loss: 0.0006492440588772297 2023-01-22 17:16:46.162213: step: 580/526, loss: 0.012737701646983624 2023-01-22 17:16:47.209175: step: 584/526, loss: 0.0023327190428972244 2023-01-22 17:16:48.279987: step: 588/526, loss: 0.008727316744625568 2023-01-22 17:16:49.350399: step: 592/526, loss: 0.007423246745020151 2023-01-22 17:16:50.429136: step: 596/526, loss: 0.0008849871228449047 2023-01-22 17:16:51.508163: step: 600/526, loss: 0.003652324201539159 2023-01-22 17:16:52.570343: step: 604/526, loss: 0.015387420542538166 2023-01-22 17:16:53.625586: step: 608/526, loss: 5.522069113794714e-05 2023-01-22 17:16:54.693465: step: 612/526, loss: 0.008015790954232216 2023-01-22 17:16:55.758111: step: 616/526, loss: 0.004074361640959978 2023-01-22 17:16:56.819117: step: 620/526, loss: 0.0012326088035479188 2023-01-22 17:16:57.873845: step: 624/526, loss: 0.0021513698156923056 2023-01-22 17:16:58.932506: step: 628/526, loss: 0.0036216145381331444 2023-01-22 17:17:00.024009: step: 632/526, loss: 0.0009492533281445503 2023-01-22 17:17:01.096308: step: 636/526, loss: 0.011502153240144253 2023-01-22 17:17:02.145635: step: 640/526, loss: 0.002579902298748493 2023-01-22 17:17:03.208621: step: 644/526, loss: 0.00262247771024704 2023-01-22 17:17:04.274715: step: 648/526, loss: 0.007683777250349522 2023-01-22 17:17:05.331795: step: 652/526, loss: 0.0014152165967971087 2023-01-22 17:17:06.399563: step: 656/526, loss: 0.001773946569301188 2023-01-22 17:17:07.473024: step: 660/526, loss: 0.010569152422249317 2023-01-22 17:17:08.532234: step: 664/526, loss: 0.0003729837480932474 2023-01-22 17:17:09.596101: step: 668/526, loss: 0.002267500152811408 2023-01-22 17:17:10.655557: step: 672/526, loss: 0.0009689492871984839 2023-01-22 17:17:11.746090: step: 676/526, loss: 4.996197822038084e-05 2023-01-22 17:17:12.795656: step: 680/526, loss: 0.005962591152638197 2023-01-22 17:17:13.892792: step: 684/526, loss: 0.0032020793296396732 2023-01-22 17:17:14.945563: step: 688/526, loss: 0.002128659514710307 2023-01-22 17:17:16.018009: step: 692/526, loss: 2.5682535124360584e-05 2023-01-22 17:17:17.096815: step: 696/526, loss: 0.0033702075015753508 2023-01-22 17:17:18.183869: step: 700/526, loss: 0.004174541216343641 2023-01-22 17:17:19.235669: step: 704/526, loss: 0.00545587157830596 2023-01-22 17:17:20.290336: step: 708/526, loss: 0.00704062869772315 2023-01-22 17:17:21.347555: step: 712/526, loss: 0.0009306574356742203 2023-01-22 17:17:22.410908: step: 716/526, loss: 0.004401668906211853 2023-01-22 17:17:23.464348: step: 720/526, loss: 0.0038557122461497784 2023-01-22 17:17:24.515182: step: 724/526, loss: 0.0022898337338119745 2023-01-22 17:17:25.592392: step: 728/526, loss: 0.00023781249183230102 2023-01-22 17:17:26.657422: step: 732/526, loss: 0.0037209673319011927 2023-01-22 17:17:27.729967: step: 736/526, loss: 0.015409648418426514 2023-01-22 17:17:28.801726: step: 740/526, loss: 0.0037468383088707924 2023-01-22 17:17:29.861345: step: 744/526, loss: 0.003940398804843426 2023-01-22 17:17:30.928948: step: 748/526, loss: 0.0016385382041335106 2023-01-22 17:17:32.007801: step: 752/526, loss: 0.0033500271383672953 2023-01-22 17:17:33.079013: step: 756/526, loss: 0.0003862368466798216 2023-01-22 17:17:34.152547: step: 760/526, loss: 0.0006065507768653333 2023-01-22 17:17:35.226487: step: 764/526, loss: 0.00435351999476552 2023-01-22 17:17:36.286784: step: 768/526, loss: 0.001354056061245501 2023-01-22 17:17:37.360873: step: 772/526, loss: 0.00980560015887022 2023-01-22 17:17:38.444323: step: 776/526, loss: 0.0006540766917169094 2023-01-22 17:17:39.507035: step: 780/526, loss: 0.005372548010200262 2023-01-22 17:17:40.569070: step: 784/526, loss: 0.005511634983122349 2023-01-22 17:17:41.648640: step: 788/526, loss: 0.0032642828300595284 2023-01-22 17:17:42.698966: step: 792/526, loss: 0.02014615200459957 2023-01-22 17:17:43.769037: step: 796/526, loss: 0.008796419017016888 2023-01-22 17:17:44.855841: step: 800/526, loss: 0.0008439103839918971 2023-01-22 17:17:45.920680: step: 804/526, loss: 0.009275809861719608 2023-01-22 17:17:46.997474: step: 808/526, loss: 0.013660918921232224 2023-01-22 17:17:48.056712: step: 812/526, loss: 0.003361928276717663 2023-01-22 17:17:49.125688: step: 816/526, loss: 0.005954174790531397 2023-01-22 17:17:50.190351: step: 820/526, loss: 0.0007312035304494202 2023-01-22 17:17:51.250865: step: 824/526, loss: 0.004792365711182356 2023-01-22 17:17:52.308421: step: 828/526, loss: 0.0075009847059845924 2023-01-22 17:17:53.354501: step: 832/526, loss: 0.0026808949187397957 2023-01-22 17:17:54.417526: step: 836/526, loss: 0.02192048355937004 2023-01-22 17:17:55.478146: step: 840/526, loss: 0.018550509586930275 2023-01-22 17:17:56.548093: step: 844/526, loss: 0.002833773149177432 2023-01-22 17:17:57.602084: step: 848/526, loss: 0.0017575517995283008 2023-01-22 17:17:58.669187: step: 852/526, loss: 0.007136870641261339 2023-01-22 17:17:59.743987: step: 856/526, loss: 0.005001316778361797 2023-01-22 17:18:00.801844: step: 860/526, loss: 0.00017664016922935843 2023-01-22 17:18:01.858749: step: 864/526, loss: 0.0011570138158276677 2023-01-22 17:18:02.915682: step: 868/526, loss: 0.009741325862705708 2023-01-22 17:18:03.981642: step: 872/526, loss: 0.00023128798056859523 2023-01-22 17:18:05.051886: step: 876/526, loss: 0.0032117220107465982 2023-01-22 17:18:06.112433: step: 880/526, loss: 0.00822259671986103 2023-01-22 17:18:07.171504: step: 884/526, loss: 0.04778725653886795 2023-01-22 17:18:08.218078: step: 888/526, loss: 0.02158493548631668 2023-01-22 17:18:09.276912: step: 892/526, loss: 0.001879831776022911 2023-01-22 17:18:10.347676: step: 896/526, loss: 0.0036572597455233335 2023-01-22 17:18:11.428070: step: 900/526, loss: 0.013711349107325077 2023-01-22 17:18:12.481003: step: 904/526, loss: 0.0041593159548938274 2023-01-22 17:18:13.549339: step: 908/526, loss: 0.00014704930072184652 2023-01-22 17:18:14.622882: step: 912/526, loss: 0.010209574364125729 2023-01-22 17:18:15.686806: step: 916/526, loss: 0.0013150412123650312 2023-01-22 17:18:16.750016: step: 920/526, loss: 0.003515928518027067 2023-01-22 17:18:17.831153: step: 924/526, loss: 0.0009061343735083938 2023-01-22 17:18:18.918557: step: 928/526, loss: 0.025345806032419205 2023-01-22 17:18:19.958928: step: 932/526, loss: 0.00040686913416720927 2023-01-22 17:18:21.021868: step: 936/526, loss: 0.00034084025537595153 2023-01-22 17:18:22.077515: step: 940/526, loss: 0.003772574011236429 2023-01-22 17:18:23.151199: step: 944/526, loss: 0.006722453981637955 2023-01-22 17:18:24.215187: step: 948/526, loss: 0.007665977813303471 2023-01-22 17:18:25.265237: step: 952/526, loss: 0.005467934533953667 2023-01-22 17:18:26.344199: step: 956/526, loss: 0.0015559265157207847 2023-01-22 17:18:27.423473: step: 960/526, loss: 0.0033845871221274137 2023-01-22 17:18:28.497413: step: 964/526, loss: 0.0061858962289988995 2023-01-22 17:18:29.567328: step: 968/526, loss: 0.005246929358690977 2023-01-22 17:18:30.637169: step: 972/526, loss: 0.004883466754108667 2023-01-22 17:18:31.682534: step: 976/526, loss: 0.003289029933512211 2023-01-22 17:18:32.738365: step: 980/526, loss: 0.004805135540664196 2023-01-22 17:18:33.810157: step: 984/526, loss: 0.002463815500959754 2023-01-22 17:18:34.883232: step: 988/526, loss: 0.023306015878915787 2023-01-22 17:18:35.949498: step: 992/526, loss: 0.006063970737159252 2023-01-22 17:18:37.011812: step: 996/526, loss: 0.00376648991368711 2023-01-22 17:18:38.074796: step: 1000/526, loss: 0.020093783736228943 2023-01-22 17:18:39.149227: step: 1004/526, loss: 0.005251782014966011 2023-01-22 17:18:40.207631: step: 1008/526, loss: 0.0038181315176188946 2023-01-22 17:18:41.268993: step: 1012/526, loss: 0.0173733439296484 2023-01-22 17:18:42.355431: step: 1016/526, loss: 0.003919048700481653 2023-01-22 17:18:43.418367: step: 1020/526, loss: 1.0426172138977563e-06 2023-01-22 17:18:44.480442: step: 1024/526, loss: 0.006582462694495916 2023-01-22 17:18:45.533185: step: 1028/526, loss: 0.009237326681613922 2023-01-22 17:18:46.581535: step: 1032/526, loss: 0.001396065461449325 2023-01-22 17:18:47.671936: step: 1036/526, loss: 0.009620334953069687 2023-01-22 17:18:48.728692: step: 1040/526, loss: 0.003203710075467825 2023-01-22 17:18:49.779953: step: 1044/526, loss: 0.009948832914233208 2023-01-22 17:18:50.857774: step: 1048/526, loss: 0.001592310843989253 2023-01-22 17:18:51.943694: step: 1052/526, loss: 0.001643832423724234 2023-01-22 17:18:53.017304: step: 1056/526, loss: 0.005469069816172123 2023-01-22 17:18:54.079097: step: 1060/526, loss: 0.006859211251139641 2023-01-22 17:18:55.148760: step: 1064/526, loss: 0.0056801168248057365 2023-01-22 17:18:56.204637: step: 1068/526, loss: 0.0005950412596575916 2023-01-22 17:18:57.281661: step: 1072/526, loss: 0.007422159891575575 2023-01-22 17:18:58.338603: step: 1076/526, loss: 0.002564589260146022 2023-01-22 17:18:59.399532: step: 1080/526, loss: 0.0018462928710505366 2023-01-22 17:19:00.472490: step: 1084/526, loss: 0.00596190569922328 2023-01-22 17:19:01.535769: step: 1088/526, loss: 0.010247442871332169 2023-01-22 17:19:02.597634: step: 1092/526, loss: 0.0011546171735972166 2023-01-22 17:19:03.659328: step: 1096/526, loss: 0.012049159035086632 2023-01-22 17:19:04.725559: step: 1100/526, loss: 0.0021646914537996054 2023-01-22 17:19:05.787438: step: 1104/526, loss: 0.011634151451289654 2023-01-22 17:19:06.855967: step: 1108/526, loss: 0.0017849744763225317 2023-01-22 17:19:07.912483: step: 1112/526, loss: 0.004648881033062935 2023-01-22 17:19:08.970904: step: 1116/526, loss: 0.0010674720397219062 2023-01-22 17:19:10.046293: step: 1120/526, loss: 0.0037717395462095737 2023-01-22 17:19:11.117393: step: 1124/526, loss: 0.001733866287395358 2023-01-22 17:19:12.190627: step: 1128/526, loss: 0.011036127805709839 2023-01-22 17:19:13.253512: step: 1132/526, loss: 0.0 2023-01-22 17:19:14.315099: step: 1136/526, loss: 0.012006633915007114 2023-01-22 17:19:15.370366: step: 1140/526, loss: 0.00693461624905467 2023-01-22 17:19:16.467548: step: 1144/526, loss: 0.008088336326181889 2023-01-22 17:19:17.527298: step: 1148/526, loss: 0.007631760556250811 2023-01-22 17:19:18.593061: step: 1152/526, loss: 0.008798143826425076 2023-01-22 17:19:19.677996: step: 1156/526, loss: 0.005036241374909878 2023-01-22 17:19:20.738618: step: 1160/526, loss: 0.000274105928838253 2023-01-22 17:19:21.802419: step: 1164/526, loss: 0.008674328215420246 2023-01-22 17:19:22.869921: step: 1168/526, loss: 0.0009908691281452775 2023-01-22 17:19:23.931216: step: 1172/526, loss: 0.0033133842516690493 2023-01-22 17:19:24.992122: step: 1176/526, loss: 0.023453017696738243 2023-01-22 17:19:26.062558: step: 1180/526, loss: 0.0027759382501244545 2023-01-22 17:19:27.114974: step: 1184/526, loss: 9.883051097858697e-05 2023-01-22 17:19:28.180353: step: 1188/526, loss: 0.005009297281503677 2023-01-22 17:19:29.254041: step: 1192/526, loss: 0.001685858704149723 2023-01-22 17:19:30.312017: step: 1196/526, loss: 0.008580422960221767 2023-01-22 17:19:31.379384: step: 1200/526, loss: 0.011451034806668758 2023-01-22 17:19:32.435219: step: 1204/526, loss: 0.000860861677210778 2023-01-22 17:19:33.516467: step: 1208/526, loss: 0.000365014944691211 2023-01-22 17:19:34.578502: step: 1212/526, loss: 0.014941053465008736 2023-01-22 17:19:35.653046: step: 1216/526, loss: 0.009724997915327549 2023-01-22 17:19:36.729730: step: 1220/526, loss: 0.0015168003737926483 2023-01-22 17:19:37.805831: step: 1224/526, loss: 0.0019922954961657524 2023-01-22 17:19:38.886598: step: 1228/526, loss: 0.021697305142879486 2023-01-22 17:19:39.978481: step: 1232/526, loss: 0.0006142150377854705 2023-01-22 17:19:41.045345: step: 1236/526, loss: 0.0014331192942336202 2023-01-22 17:19:42.097323: step: 1240/526, loss: 0.004747698549181223 2023-01-22 17:19:43.166420: step: 1244/526, loss: 0.005667760502547026 2023-01-22 17:19:44.228321: step: 1248/526, loss: 0.002338648308068514 2023-01-22 17:19:45.304170: step: 1252/526, loss: 0.005550717934966087 2023-01-22 17:19:46.382245: step: 1256/526, loss: 0.03065328486263752 2023-01-22 17:19:47.437853: step: 1260/526, loss: 0.0 2023-01-22 17:19:48.501742: step: 1264/526, loss: 0.0031456623692065477 2023-01-22 17:19:49.575092: step: 1268/526, loss: 0.008177438750863075 2023-01-22 17:19:50.640386: step: 1272/526, loss: 0.007453497499227524 2023-01-22 17:19:51.695725: step: 1276/526, loss: 0.00031914940336719155 2023-01-22 17:19:52.766854: step: 1280/526, loss: 0.007359337527304888 2023-01-22 17:19:53.835935: step: 1284/526, loss: 0.012301357463002205 2023-01-22 17:19:54.935763: step: 1288/526, loss: 0.0032113343477249146 2023-01-22 17:19:56.007673: step: 1292/526, loss: 0.0004054347809869796 2023-01-22 17:19:57.059331: step: 1296/526, loss: 4.1995637729996815e-05 2023-01-22 17:19:58.124146: step: 1300/526, loss: 0.005636436864733696 2023-01-22 17:19:59.177983: step: 1304/526, loss: 0.0032630146015435457 2023-01-22 17:20:00.225339: step: 1308/526, loss: 0.00539812259376049 2023-01-22 17:20:01.283773: step: 1312/526, loss: 0.0077217682264745235 2023-01-22 17:20:02.358865: step: 1316/526, loss: 0.0014386394759640098 2023-01-22 17:20:03.418963: step: 1320/526, loss: 0.007581560406833887 2023-01-22 17:20:04.515786: step: 1324/526, loss: 0.006626219023019075 2023-01-22 17:20:05.579869: step: 1328/526, loss: 0.003095823572948575 2023-01-22 17:20:06.655694: step: 1332/526, loss: 0.0029299117159098387 2023-01-22 17:20:07.735988: step: 1336/526, loss: 0.029629366472363472 2023-01-22 17:20:08.794407: step: 1340/526, loss: 0.008206835947930813 2023-01-22 17:20:09.855039: step: 1344/526, loss: 0.00363155291415751 2023-01-22 17:20:10.899807: step: 1348/526, loss: 0.009168574586510658 2023-01-22 17:20:11.976469: step: 1352/526, loss: 0.0009880515281111002 2023-01-22 17:20:13.076169: step: 1356/526, loss: 0.0007103616371750832 2023-01-22 17:20:14.138264: step: 1360/526, loss: 0.0025437932927161455 2023-01-22 17:20:15.215490: step: 1364/526, loss: 0.0289238803088665 2023-01-22 17:20:16.298669: step: 1368/526, loss: 0.00441761314868927 2023-01-22 17:20:17.364738: step: 1372/526, loss: 0.0003461152664385736 2023-01-22 17:20:18.430413: step: 1376/526, loss: 0.002913940232247114 2023-01-22 17:20:19.484014: step: 1380/526, loss: 0.0017914064228534698 2023-01-22 17:20:20.543654: step: 1384/526, loss: 0.00041023417725227773 2023-01-22 17:20:21.592744: step: 1388/526, loss: 0.015686744824051857 2023-01-22 17:20:22.661331: step: 1392/526, loss: 0.017496313899755478 2023-01-22 17:20:23.743151: step: 1396/526, loss: 0.002222283510491252 2023-01-22 17:20:24.810438: step: 1400/526, loss: 0.0009443407179787755 2023-01-22 17:20:25.880836: step: 1404/526, loss: 0.005772572476416826 2023-01-22 17:20:26.943790: step: 1408/526, loss: 0.002379060024395585 2023-01-22 17:20:28.023783: step: 1412/526, loss: 0.010205735452473164 2023-01-22 17:20:29.097494: step: 1416/526, loss: 0.01043506246060133 2023-01-22 17:20:30.162103: step: 1420/526, loss: 0.0020910585299134254 2023-01-22 17:20:31.236738: step: 1424/526, loss: 0.00906625296920538 2023-01-22 17:20:32.309710: step: 1428/526, loss: 0.0006995234871283174 2023-01-22 17:20:33.382857: step: 1432/526, loss: 0.00631823530420661 2023-01-22 17:20:34.437875: step: 1436/526, loss: 0.003414412261918187 2023-01-22 17:20:35.510036: step: 1440/526, loss: 0.003993874415755272 2023-01-22 17:20:36.571162: step: 1444/526, loss: 0.007262456230819225 2023-01-22 17:20:37.629697: step: 1448/526, loss: 0.004374999552965164 2023-01-22 17:20:38.722747: step: 1452/526, loss: 0.0030179843306541443 2023-01-22 17:20:39.793342: step: 1456/526, loss: 0.005736122373491526 2023-01-22 17:20:40.880633: step: 1460/526, loss: 0.0027171254623681307 2023-01-22 17:20:41.938498: step: 1464/526, loss: 0.00468910438939929 2023-01-22 17:20:42.986371: step: 1468/526, loss: 0.0038674722891300917 2023-01-22 17:20:44.050836: step: 1472/526, loss: 0.0009095493005588651 2023-01-22 17:20:45.103609: step: 1476/526, loss: 0.001541096600703895 2023-01-22 17:20:46.172342: step: 1480/526, loss: 0.004774812143296003 2023-01-22 17:20:47.233611: step: 1484/526, loss: 0.0020774423610419035 2023-01-22 17:20:48.302668: step: 1488/526, loss: 0.008269724436104298 2023-01-22 17:20:49.369661: step: 1492/526, loss: 0.003047727979719639 2023-01-22 17:20:50.435640: step: 1496/526, loss: 0.006301156245172024 2023-01-22 17:20:51.510797: step: 1500/526, loss: 0.017137933522462845 2023-01-22 17:20:52.580546: step: 1504/526, loss: 0.05789651721715927 2023-01-22 17:20:53.636834: step: 1508/526, loss: 0.019466396421194077 2023-01-22 17:20:54.698342: step: 1512/526, loss: 0.0030354063492268324 2023-01-22 17:20:55.765766: step: 1516/526, loss: 0.007000172510743141 2023-01-22 17:20:56.838179: step: 1520/526, loss: 0.010465754196047783 2023-01-22 17:20:57.900856: step: 1524/526, loss: 0.0016235969960689545 2023-01-22 17:20:58.969445: step: 1528/526, loss: 0.0052039301954209805 2023-01-22 17:21:00.031492: step: 1532/526, loss: 0.006547519937157631 2023-01-22 17:21:01.112814: step: 1536/526, loss: 0.0036778971552848816 2023-01-22 17:21:02.183310: step: 1540/526, loss: 0.002224015537649393 2023-01-22 17:21:03.244363: step: 1544/526, loss: 0.011120656505227089 2023-01-22 17:21:04.335781: step: 1548/526, loss: 0.004064811393618584 2023-01-22 17:21:05.395462: step: 1552/526, loss: 0.0008163480670191348 2023-01-22 17:21:06.464618: step: 1556/526, loss: 0.0027425058651715517 2023-01-22 17:21:07.540320: step: 1560/526, loss: 0.0034983966033905745 2023-01-22 17:21:08.599836: step: 1564/526, loss: 0.006434377748519182 2023-01-22 17:21:09.671743: step: 1568/526, loss: 0.001518317381851375 2023-01-22 17:21:10.736981: step: 1572/526, loss: 0.00011459342204034328 2023-01-22 17:21:11.819058: step: 1576/526, loss: 0.0024109946098178625 2023-01-22 17:21:12.911343: step: 1580/526, loss: 0.0051181805320084095 2023-01-22 17:21:13.995695: step: 1584/526, loss: 0.0038799168542027473 2023-01-22 17:21:15.069159: step: 1588/526, loss: 0.0027905148454010487 2023-01-22 17:21:16.136354: step: 1592/526, loss: 9.780994150787592e-05 2023-01-22 17:21:17.213303: step: 1596/526, loss: 0.005336377769708633 2023-01-22 17:21:18.309964: step: 1600/526, loss: 0.0069270809181034565 2023-01-22 17:21:19.386331: step: 1604/526, loss: 0.010091659612953663 2023-01-22 17:21:20.459823: step: 1608/526, loss: 0.0038561539258807898 2023-01-22 17:21:21.526182: step: 1612/526, loss: 0.0020094066858291626 2023-01-22 17:21:22.613586: step: 1616/526, loss: 0.02275482937693596 2023-01-22 17:21:23.702097: step: 1620/526, loss: 0.0001950414734892547 2023-01-22 17:21:24.764792: step: 1624/526, loss: 0.005835406016558409 2023-01-22 17:21:25.829740: step: 1628/526, loss: 0.008719498291611671 2023-01-22 17:21:26.893593: step: 1632/526, loss: 0.006848846096545458 2023-01-22 17:21:27.959903: step: 1636/526, loss: 0.005393713712692261 2023-01-22 17:21:29.052565: step: 1640/526, loss: 0.004651104565709829 2023-01-22 17:21:30.122781: step: 1644/526, loss: 0.0002404949045740068 2023-01-22 17:21:31.188480: step: 1648/526, loss: 0.0042755152098834515 2023-01-22 17:21:32.256381: step: 1652/526, loss: 0.006463938392698765 2023-01-22 17:21:33.323340: step: 1656/526, loss: 0.008712255395948887 2023-01-22 17:21:34.394041: step: 1660/526, loss: 0.006245963275432587 2023-01-22 17:21:35.478314: step: 1664/526, loss: 0.005003716796636581 2023-01-22 17:21:36.570132: step: 1668/526, loss: 0.005037671886384487 2023-01-22 17:21:37.637660: step: 1672/526, loss: 0.00518902437761426 2023-01-22 17:21:38.719569: step: 1676/526, loss: 0.006831640377640724 2023-01-22 17:21:39.794882: step: 1680/526, loss: 0.011011738330125809 2023-01-22 17:21:40.872522: step: 1684/526, loss: 0.0005441741086542606 2023-01-22 17:21:41.930726: step: 1688/526, loss: 0.001533876871690154 2023-01-22 17:21:43.006281: step: 1692/526, loss: 0.009481044486165047 2023-01-22 17:21:44.078914: step: 1696/526, loss: 0.00797145627439022 2023-01-22 17:21:45.166924: step: 1700/526, loss: 0.004354438278824091 2023-01-22 17:21:46.234867: step: 1704/526, loss: 0.004230857361108065 2023-01-22 17:21:47.315718: step: 1708/526, loss: 0.0026106340810656548 2023-01-22 17:21:48.399073: step: 1712/526, loss: 0.03682756796479225 2023-01-22 17:21:49.467467: step: 1716/526, loss: 0.00035873689921572804 2023-01-22 17:21:50.536164: step: 1720/526, loss: 0.008056238293647766 2023-01-22 17:21:51.608406: step: 1724/526, loss: 0.0027793829794973135 2023-01-22 17:21:52.697510: step: 1728/526, loss: 0.02800150029361248 2023-01-22 17:21:53.773252: step: 1732/526, loss: 0.004113492090255022 2023-01-22 17:21:54.831135: step: 1736/526, loss: 0.0018264194950461388 2023-01-22 17:21:55.896075: step: 1740/526, loss: 0.001853841356933117 2023-01-22 17:21:56.965239: step: 1744/526, loss: 0.0014736369485035539 2023-01-22 17:21:58.039059: step: 1748/526, loss: 0.002336955862119794 2023-01-22 17:21:59.112884: step: 1752/526, loss: 0.029125962406396866 2023-01-22 17:22:00.178957: step: 1756/526, loss: 0.00979869719594717 2023-01-22 17:22:01.250319: step: 1760/526, loss: 0.0052991947159171104 2023-01-22 17:22:02.306983: step: 1764/526, loss: 1.6575935660512187e-05 2023-01-22 17:22:03.374048: step: 1768/526, loss: 0.0003262819955125451 2023-01-22 17:22:04.445569: step: 1772/526, loss: 0.002665152307599783 2023-01-22 17:22:05.519477: step: 1776/526, loss: 0.007335955277085304 2023-01-22 17:22:06.592856: step: 1780/526, loss: 0.009206417948007584 2023-01-22 17:22:07.661046: step: 1784/526, loss: 0.004034833982586861 2023-01-22 17:22:08.752741: step: 1788/526, loss: 0.041636377573013306 2023-01-22 17:22:09.831369: step: 1792/526, loss: 0.0008861317182891071 2023-01-22 17:22:10.902055: step: 1796/526, loss: 0.005037650465965271 2023-01-22 17:22:11.971079: step: 1800/526, loss: 0.0004444077785592526 2023-01-22 17:22:13.054386: step: 1804/526, loss: 0.00010646445298334584 2023-01-22 17:22:14.117576: step: 1808/526, loss: 0.0034417728893458843 2023-01-22 17:22:15.200115: step: 1812/526, loss: 0.003349489765241742 2023-01-22 17:22:16.257154: step: 1816/526, loss: 0.004526312462985516 2023-01-22 17:22:17.338292: step: 1820/526, loss: 0.00029587274184450507 2023-01-22 17:22:18.403658: step: 1824/526, loss: 0.0013475136365741491 2023-01-22 17:22:19.484813: step: 1828/526, loss: 0.0009167612879537046 2023-01-22 17:22:20.548288: step: 1832/526, loss: 0.004182068165391684 2023-01-22 17:22:21.617107: step: 1836/526, loss: 0.005378655157983303 2023-01-22 17:22:22.678108: step: 1840/526, loss: 0.000523278780747205 2023-01-22 17:22:23.750487: step: 1844/526, loss: 0.020257247611880302 2023-01-22 17:22:24.828134: step: 1848/526, loss: 0.0006278672954067588 2023-01-22 17:22:25.897966: step: 1852/526, loss: 0.0011107685277238488 2023-01-22 17:22:26.988084: step: 1856/526, loss: 0.016520028933882713 2023-01-22 17:22:28.050553: step: 1860/526, loss: 0.0008798314956948161 2023-01-22 17:22:29.121557: step: 1864/526, loss: 0.0024338355287909508 2023-01-22 17:22:30.192508: step: 1868/526, loss: 0.002423889935016632 2023-01-22 17:22:31.255190: step: 1872/526, loss: 0.0014391009462997317 2023-01-22 17:22:32.326993: step: 1876/526, loss: 0.0032042853999882936 2023-01-22 17:22:33.391700: step: 1880/526, loss: 0.0013594884658232331 2023-01-22 17:22:34.462490: step: 1884/526, loss: 0.002975313924252987 2023-01-22 17:22:35.554412: step: 1888/526, loss: 0.006427662447094917 2023-01-22 17:22:36.625846: step: 1892/526, loss: 0.006068871356546879 2023-01-22 17:22:37.704177: step: 1896/526, loss: 0.0007641970296390355 2023-01-22 17:22:38.756605: step: 1900/526, loss: 0.0006615650490857661 2023-01-22 17:22:39.813393: step: 1904/526, loss: 0.0011104480363428593 2023-01-22 17:22:40.892156: step: 1908/526, loss: 0.0028059682808816433 2023-01-22 17:22:41.980389: step: 1912/526, loss: 0.0027646261733025312 2023-01-22 17:22:43.074116: step: 1916/526, loss: 0.0099010169506073 2023-01-22 17:22:44.147003: step: 1920/526, loss: 0.00015959309530444443 2023-01-22 17:22:45.217292: step: 1924/526, loss: 0.0035765361972153187 2023-01-22 17:22:46.284514: step: 1928/526, loss: 0.008528263308107853 2023-01-22 17:22:47.372574: step: 1932/526, loss: 0.0027974469121545553 2023-01-22 17:22:48.455560: step: 1936/526, loss: 0.0065225595608353615 2023-01-22 17:22:49.536185: step: 1940/526, loss: 0.006657288875430822 2023-01-22 17:22:50.597588: step: 1944/526, loss: 0.011110926046967506 2023-01-22 17:22:51.675299: step: 1948/526, loss: 0.0020117724779993296 2023-01-22 17:22:52.760097: step: 1952/526, loss: 0.0007542409002780914 2023-01-22 17:22:53.833276: step: 1956/526, loss: 0.0035255032125860453 2023-01-22 17:22:54.888645: step: 1960/526, loss: 0.012512335553765297 2023-01-22 17:22:55.974929: step: 1964/526, loss: 0.0010673401411622763 2023-01-22 17:22:57.054134: step: 1968/526, loss: 0.00037337830872274935 2023-01-22 17:22:58.141210: step: 1972/526, loss: 0.01309991255402565 2023-01-22 17:22:59.212217: step: 1976/526, loss: 0.0011735076550394297 2023-01-22 17:23:00.279922: step: 1980/526, loss: 0.016302626579999924 2023-01-22 17:23:01.346714: step: 1984/526, loss: 0.0008477665251120925 2023-01-22 17:23:02.418273: step: 1988/526, loss: 0.003587673883885145 2023-01-22 17:23:03.488171: step: 1992/526, loss: 0.011979344300925732 2023-01-22 17:23:04.565815: step: 1996/526, loss: 0.0028485064394772053 2023-01-22 17:23:05.631339: step: 2000/526, loss: 0.018039528280496597 2023-01-22 17:23:06.707341: step: 2004/526, loss: 0.0019866209477186203 2023-01-22 17:23:07.768042: step: 2008/526, loss: 0.008165537379682064 2023-01-22 17:23:08.829334: step: 2012/526, loss: 0.007037199102342129 2023-01-22 17:23:09.897968: step: 2016/526, loss: 0.0003728357551153749 2023-01-22 17:23:10.986707: step: 2020/526, loss: 0.0006959430756978691 2023-01-22 17:23:12.071811: step: 2024/526, loss: 0.05053370073437691 2023-01-22 17:23:13.156591: step: 2028/526, loss: 0.006268172059208155 2023-01-22 17:23:14.239106: step: 2032/526, loss: 0.00017043150728568435 2023-01-22 17:23:15.315193: step: 2036/526, loss: 0.008184626698493958 2023-01-22 17:23:16.366976: step: 2040/526, loss: 0.01348032895475626 2023-01-22 17:23:17.431363: step: 2044/526, loss: 0.003239545039832592 2023-01-22 17:23:18.504487: step: 2048/526, loss: 0.0024152989499270916 2023-01-22 17:23:19.585195: step: 2052/526, loss: 0.005631963722407818 2023-01-22 17:23:20.645561: step: 2056/526, loss: 0.0002426598803140223 2023-01-22 17:23:21.703654: step: 2060/526, loss: 0.00610897783190012 2023-01-22 17:23:22.769612: step: 2064/526, loss: 0.0058256033807992935 2023-01-22 17:23:23.833392: step: 2068/526, loss: 0.015423965640366077 2023-01-22 17:23:24.904240: step: 2072/526, loss: 0.002549290657043457 2023-01-22 17:23:25.964976: step: 2076/526, loss: 0.001205370295792818 2023-01-22 17:23:27.036135: step: 2080/526, loss: 0.033404458314180374 2023-01-22 17:23:28.126864: step: 2084/526, loss: 0.0018363238777965307 2023-01-22 17:23:29.211855: step: 2088/526, loss: 0.018268831074237823 2023-01-22 17:23:30.274064: step: 2092/526, loss: 0.002536095678806305 2023-01-22 17:23:31.358214: step: 2096/526, loss: 0.014475345611572266 2023-01-22 17:23:32.440742: step: 2100/526, loss: 0.0019569203723222017 2023-01-22 17:23:33.511489: step: 2104/526, loss: 0.01226720493286848 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3441751700680272, 'r': 0.2880099620493359, 'f1': 0.3135976239669422}, 'combined': 0.2310719334493258, 'stategy': 1, 'epoch': 9} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.341375704887218, 'r': 0.23758748691784407, 'f1': 0.28017876427028693}, 'combined': 0.1528247805110656, 'stategy': 1, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3135304256594724, 'r': 0.330783523086654, 'f1': 0.3219259772237611}, 'combined': 0.23720861479645552, 'stategy': 1, 'epoch': 9} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34083799528300474, 'r': 0.2638142214175864, 'f1': 0.2974202620004352}, 'combined': 0.1622292338184192, 'stategy': 1, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32413830661128795, 'r': 0.3284437490140944, 'f1': 0.32627682512804485}, 'combined': 0.24041450272592776, 'stategy': 1, 'epoch': 9} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3350355626427411, 'r': 0.2707821670674209, 'f1': 0.2995014878169958}, 'combined': 0.1633644479001795, 'stategy': 1, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3252032520325203, 'r': 0.38095238095238093, 'f1': 0.3508771929824561}, 'combined': 0.23391812865497075, 'stategy': 1, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3425925925925926, 'r': 0.40217391304347827, 'f1': 0.37}, 'combined': 0.185, 'stategy': 1, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 10 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:26:15.400113: step: 4/526, loss: 0.03914384916424751 2023-01-22 17:26:16.468933: step: 8/526, loss: 0.012458334676921368 2023-01-22 17:26:17.523175: step: 12/526, loss: 0.0011874176561832428 2023-01-22 17:26:18.591009: step: 16/526, loss: 0.006760450545698404 2023-01-22 17:26:19.658138: step: 20/526, loss: 0.003804833395406604 2023-01-22 17:26:20.715419: step: 24/526, loss: 0.003031475469470024 2023-01-22 17:26:21.765150: step: 28/526, loss: 0.0024960062000900507 2023-01-22 17:26:22.817677: step: 32/526, loss: 0.011792642995715141 2023-01-22 17:26:23.890613: step: 36/526, loss: 0.003637844929471612 2023-01-22 17:26:24.974663: step: 40/526, loss: 0.001144232926890254 2023-01-22 17:26:26.055497: step: 44/526, loss: 0.009493201971054077 2023-01-22 17:26:27.116216: step: 48/526, loss: 0.0037126510869711637 2023-01-22 17:26:28.195026: step: 52/526, loss: 0.08734021335840225 2023-01-22 17:26:29.263900: step: 56/526, loss: 0.020190024748444557 2023-01-22 17:26:30.334621: step: 60/526, loss: 0.0010884770890697837 2023-01-22 17:26:31.385596: step: 64/526, loss: 0.00432793190702796 2023-01-22 17:26:32.473103: step: 68/526, loss: 0.014436259865760803 2023-01-22 17:26:33.553472: step: 72/526, loss: 0.0028390262741595507 2023-01-22 17:26:34.613397: step: 76/526, loss: 0.004512321203947067 2023-01-22 17:26:35.679923: step: 80/526, loss: 0.0027164120692759752 2023-01-22 17:26:36.742406: step: 84/526, loss: 0.0013908883556723595 2023-01-22 17:26:37.796072: step: 88/526, loss: 0.004421636927872896 2023-01-22 17:26:38.857873: step: 92/526, loss: 0.009385128505527973 2023-01-22 17:26:39.905818: step: 96/526, loss: 0.0009007321204990149 2023-01-22 17:26:40.969095: step: 100/526, loss: 0.015045412816107273 2023-01-22 17:26:42.040171: step: 104/526, loss: 0.005501618143171072 2023-01-22 17:26:43.146666: step: 108/526, loss: 0.005629522260278463 2023-01-22 17:26:44.196023: step: 112/526, loss: 0.001161363790743053 2023-01-22 17:26:45.250285: step: 116/526, loss: 0.0034099631011486053 2023-01-22 17:26:46.321399: step: 120/526, loss: 0.0011534694349393249 2023-01-22 17:26:47.401677: step: 124/526, loss: 0.014831599779427052 2023-01-22 17:26:48.494353: step: 128/526, loss: 0.0006340779364109039 2023-01-22 17:26:49.566393: step: 132/526, loss: 0.0061704134568572044 2023-01-22 17:26:50.637243: step: 136/526, loss: 0.006142008118331432 2023-01-22 17:26:51.724258: step: 140/526, loss: 0.03993793576955795 2023-01-22 17:26:52.795280: step: 144/526, loss: 0.0008726411615498364 2023-01-22 17:26:53.854061: step: 148/526, loss: 0.00754851708188653 2023-01-22 17:26:54.933431: step: 152/526, loss: 0.011415963061153889 2023-01-22 17:26:55.999410: step: 156/526, loss: 0.0063007972203195095 2023-01-22 17:26:57.073796: step: 160/526, loss: 0.014107774011790752 2023-01-22 17:26:58.167291: step: 164/526, loss: 0.010831790044903755 2023-01-22 17:26:59.228189: step: 168/526, loss: 0.0032410998828709126 2023-01-22 17:27:00.298572: step: 172/526, loss: 0.0034866398200392723 2023-01-22 17:27:01.361076: step: 176/526, loss: 0.019249439239501953 2023-01-22 17:27:02.431643: step: 180/526, loss: 0.002934157382696867 2023-01-22 17:27:03.500994: step: 184/526, loss: 3.073999687330797e-05 2023-01-22 17:27:04.576887: step: 188/526, loss: 0.0021913948003202677 2023-01-22 17:27:05.651553: step: 192/526, loss: 1.5047981833049562e-05 2023-01-22 17:27:06.744628: step: 196/526, loss: 0.00964015256613493 2023-01-22 17:27:07.814105: step: 200/526, loss: 0.0052771237678825855 2023-01-22 17:27:08.902615: step: 204/526, loss: 0.0024171725381165743 2023-01-22 17:27:09.985620: step: 208/526, loss: 0.029271699488162994 2023-01-22 17:27:11.068391: step: 212/526, loss: 0.014002806507050991 2023-01-22 17:27:12.135667: step: 216/526, loss: 0.004147626459598541 2023-01-22 17:27:13.204073: step: 220/526, loss: 0.002527667907997966 2023-01-22 17:27:14.265022: step: 224/526, loss: 0.010924090631306171 2023-01-22 17:27:15.330074: step: 228/526, loss: 0.0022656205110251904 2023-01-22 17:27:16.393427: step: 232/526, loss: 0.002628905698657036 2023-01-22 17:27:17.469312: step: 236/526, loss: 0.0029345862567424774 2023-01-22 17:27:18.557275: step: 240/526, loss: 0.004847095813602209 2023-01-22 17:27:19.622067: step: 244/526, loss: 0.0006241805385798216 2023-01-22 17:27:20.709886: step: 248/526, loss: 0.04192202538251877 2023-01-22 17:27:21.788570: step: 252/526, loss: 0.0073339734226465225 2023-01-22 17:27:22.862316: step: 256/526, loss: 0.004048834089189768 2023-01-22 17:27:23.923593: step: 260/526, loss: 0.0018592940177768469 2023-01-22 17:27:25.004779: step: 264/526, loss: 0.003509767819195986 2023-01-22 17:27:26.072994: step: 268/526, loss: 0.007049216888844967 2023-01-22 17:27:27.132901: step: 272/526, loss: 0.000458940165117383 2023-01-22 17:27:28.204198: step: 276/526, loss: 0.0013739545829594135 2023-01-22 17:27:29.284214: step: 280/526, loss: 0.009642442688345909 2023-01-22 17:27:30.366089: step: 284/526, loss: 8.788464037934318e-05 2023-01-22 17:27:31.428355: step: 288/526, loss: 0.00039300654316321015 2023-01-22 17:27:32.486143: step: 292/526, loss: 0.001044073374941945 2023-01-22 17:27:33.560264: step: 296/526, loss: 0.00024214394215960056 2023-01-22 17:27:34.615256: step: 300/526, loss: 0.0010353511897847056 2023-01-22 17:27:35.683252: step: 304/526, loss: 0.0053918128833174706 2023-01-22 17:27:36.751442: step: 308/526, loss: 0.004577489569783211 2023-01-22 17:27:37.820759: step: 312/526, loss: 0.002369215711951256 2023-01-22 17:27:38.903236: step: 316/526, loss: 0.007408234756439924 2023-01-22 17:27:39.972625: step: 320/526, loss: 0.005347894504666328 2023-01-22 17:27:41.047523: step: 324/526, loss: 0.0003589502302929759 2023-01-22 17:27:42.116116: step: 328/526, loss: 0.009502967819571495 2023-01-22 17:27:43.179784: step: 332/526, loss: 0.004050052259117365 2023-01-22 17:27:44.233993: step: 336/526, loss: 0.0026822916697710752 2023-01-22 17:27:45.285845: step: 340/526, loss: 0.006174871698021889 2023-01-22 17:27:46.350916: step: 344/526, loss: 0.003229816211387515 2023-01-22 17:27:47.410484: step: 348/526, loss: 0.00220097741112113 2023-01-22 17:27:48.484748: step: 352/526, loss: 0.005181090906262398 2023-01-22 17:27:49.542322: step: 356/526, loss: 0.02471998706459999 2023-01-22 17:27:50.623247: step: 360/526, loss: 0.002874291967600584 2023-01-22 17:27:51.675603: step: 364/526, loss: 0.0009685191907919943 2023-01-22 17:27:52.738300: step: 368/526, loss: 0.0014047357253730297 2023-01-22 17:27:53.818147: step: 372/526, loss: 0.0039881691336631775 2023-01-22 17:27:54.887540: step: 376/526, loss: 0.0036766391713172197 2023-01-22 17:27:55.953093: step: 380/526, loss: 0.004806995391845703 2023-01-22 17:27:57.028206: step: 384/526, loss: 0.018413497135043144 2023-01-22 17:27:58.093114: step: 388/526, loss: 0.0006281603127717972 2023-01-22 17:27:59.142844: step: 392/526, loss: 0.0018995624268427491 2023-01-22 17:28:00.211676: step: 396/526, loss: 0.02252894453704357 2023-01-22 17:28:01.276830: step: 400/526, loss: 0.0031219327356666327 2023-01-22 17:28:02.345191: step: 404/526, loss: 0.004498603288084269 2023-01-22 17:28:03.416087: step: 408/526, loss: 0.004424775019288063 2023-01-22 17:28:04.479560: step: 412/526, loss: 0.014513521455228329 2023-01-22 17:28:05.565487: step: 416/526, loss: 0.02892647683620453 2023-01-22 17:28:06.638383: step: 420/526, loss: 0.0039602783508598804 2023-01-22 17:28:07.701396: step: 424/526, loss: 0.018903126940131187 2023-01-22 17:28:08.787121: step: 428/526, loss: 0.006359412334859371 2023-01-22 17:28:09.855437: step: 432/526, loss: 0.0012890842044726014 2023-01-22 17:28:10.922368: step: 436/526, loss: 0.007926391437649727 2023-01-22 17:28:11.989109: step: 440/526, loss: 0.009608851745724678 2023-01-22 17:28:13.072922: step: 444/526, loss: 0.007655289489775896 2023-01-22 17:28:14.150485: step: 448/526, loss: 0.005288000218570232 2023-01-22 17:28:15.229937: step: 452/526, loss: 0.0030700673814862967 2023-01-22 17:28:16.303181: step: 456/526, loss: 0.01592688076198101 2023-01-22 17:28:17.357291: step: 460/526, loss: 0.004105378873646259 2023-01-22 17:28:18.423072: step: 464/526, loss: 0.0009433595114387572 2023-01-22 17:28:19.480976: step: 468/526, loss: 0.0019469358958303928 2023-01-22 17:28:20.535134: step: 472/526, loss: 0.005649545695632696 2023-01-22 17:28:21.610203: step: 476/526, loss: 0.009048526175320148 2023-01-22 17:28:22.670273: step: 480/526, loss: 0.002955965232104063 2023-01-22 17:28:23.734096: step: 484/526, loss: 0.0016291955253109336 2023-01-22 17:28:24.809648: step: 488/526, loss: 0.004816431552171707 2023-01-22 17:28:25.880357: step: 492/526, loss: 0.005789854098111391 2023-01-22 17:28:26.941461: step: 496/526, loss: 0.014252807945013046 2023-01-22 17:28:28.010499: step: 500/526, loss: 1.580585740157403e-05 2023-01-22 17:28:29.068192: step: 504/526, loss: 0.003073624335229397 2023-01-22 17:28:30.134041: step: 508/526, loss: 0.018114915117621422 2023-01-22 17:28:31.218801: step: 512/526, loss: 0.00632064463570714 2023-01-22 17:28:32.300972: step: 516/526, loss: 0.009451929479837418 2023-01-22 17:28:33.360580: step: 520/526, loss: 0.003335179528221488 2023-01-22 17:28:34.421274: step: 524/526, loss: 0.013502035290002823 2023-01-22 17:28:35.473324: step: 528/526, loss: 0.010950354859232903 2023-01-22 17:28:36.553607: step: 532/526, loss: 0.0017414387548342347 2023-01-22 17:28:37.616659: step: 536/526, loss: 0.007401375100016594 2023-01-22 17:28:38.678013: step: 540/526, loss: 0.0005550369969569147 2023-01-22 17:28:39.731749: step: 544/526, loss: 0.003469782182946801 2023-01-22 17:28:40.795736: step: 548/526, loss: 0.002681322628632188 2023-01-22 17:28:41.869875: step: 552/526, loss: 0.0065278890542685986 2023-01-22 17:28:42.943547: step: 556/526, loss: 0.002598665887489915 2023-01-22 17:28:43.998877: step: 560/526, loss: 0.011027329601347446 2023-01-22 17:28:45.079323: step: 564/526, loss: 0.004067003261297941 2023-01-22 17:28:46.144229: step: 568/526, loss: 0.0017140146810561419 2023-01-22 17:28:47.216063: step: 572/526, loss: 0.014103577472269535 2023-01-22 17:28:48.286536: step: 576/526, loss: 0.005832192953675985 2023-01-22 17:28:49.348420: step: 580/526, loss: 0.0025809190701693296 2023-01-22 17:28:50.412386: step: 584/526, loss: 0.008384406566619873 2023-01-22 17:28:51.472200: step: 588/526, loss: 0.000408686202717945 2023-01-22 17:28:52.530862: step: 592/526, loss: 0.004185400903224945 2023-01-22 17:28:53.609606: step: 596/526, loss: 0.0025841384194791317 2023-01-22 17:28:54.685437: step: 600/526, loss: 0.02498718723654747 2023-01-22 17:28:55.748249: step: 604/526, loss: 0.0011584153398871422 2023-01-22 17:28:56.804511: step: 608/526, loss: 0.0002652000111993402 2023-01-22 17:28:57.874276: step: 612/526, loss: 0.004533391445875168 2023-01-22 17:28:58.936512: step: 616/526, loss: 0.0021636730525642633 2023-01-22 17:29:00.001189: step: 620/526, loss: 0.00018159067258238792 2023-01-22 17:29:01.080337: step: 624/526, loss: 0.0067629567347466946 2023-01-22 17:29:02.136237: step: 628/526, loss: 4.668658220907673e-05 2023-01-22 17:29:03.206187: step: 632/526, loss: 0.012922360561788082 2023-01-22 17:29:04.282308: step: 636/526, loss: 0.00164447957649827 2023-01-22 17:29:05.342447: step: 640/526, loss: 0.009876989759504795 2023-01-22 17:29:06.405507: step: 644/526, loss: 0.006384863518178463 2023-01-22 17:29:07.468590: step: 648/526, loss: 0.001361687434837222 2023-01-22 17:29:08.541221: step: 652/526, loss: 0.0016008722595870495 2023-01-22 17:29:09.608924: step: 656/526, loss: 0.0030787885189056396 2023-01-22 17:29:10.657344: step: 660/526, loss: 0.000887411879375577 2023-01-22 17:29:11.741931: step: 664/526, loss: 0.0016765184700489044 2023-01-22 17:29:12.842269: step: 668/526, loss: 0.00010889178520301357 2023-01-22 17:29:13.892219: step: 672/526, loss: 0.009153757244348526 2023-01-22 17:29:14.954244: step: 676/526, loss: 0.005686105694621801 2023-01-22 17:29:16.019736: step: 680/526, loss: 0.0020400178618729115 2023-01-22 17:29:17.087000: step: 684/526, loss: 0.008220906369388103 2023-01-22 17:29:18.145538: step: 688/526, loss: 0.011818169616162777 2023-01-22 17:29:19.220035: step: 692/526, loss: 0.010587401688098907 2023-01-22 17:29:20.295947: step: 696/526, loss: 0.0010475068120285869 2023-01-22 17:29:21.361255: step: 700/526, loss: 0.0034396848641335964 2023-01-22 17:29:22.448875: step: 704/526, loss: 0.030698630958795547 2023-01-22 17:29:23.499512: step: 708/526, loss: 0.0010491388384252787 2023-01-22 17:29:24.574361: step: 712/526, loss: 0.0016570492880418897 2023-01-22 17:29:25.644713: step: 716/526, loss: 0.0022557477932423353 2023-01-22 17:29:26.692553: step: 720/526, loss: 0.0025538038462400436 2023-01-22 17:29:27.765031: step: 724/526, loss: 0.012905474752187729 2023-01-22 17:29:28.817010: step: 728/526, loss: 0.0001540376542834565 2023-01-22 17:29:29.895056: step: 732/526, loss: 0.009049699641764164 2023-01-22 17:29:30.973756: step: 736/526, loss: 0.0014287405647337437 2023-01-22 17:29:32.042639: step: 740/526, loss: 0.00016093575686682016 2023-01-22 17:29:33.095578: step: 744/526, loss: 0.0012757738586515188 2023-01-22 17:29:34.166292: step: 748/526, loss: 0.0018830905901268125 2023-01-22 17:29:35.231305: step: 752/526, loss: 0.004661615937948227 2023-01-22 17:29:36.296821: step: 756/526, loss: 0.005450590513646603 2023-01-22 17:29:37.367960: step: 760/526, loss: 0.0035836242605000734 2023-01-22 17:29:38.437610: step: 764/526, loss: 0.0036698803305625916 2023-01-22 17:29:39.478735: step: 768/526, loss: 0.00018407157040201128 2023-01-22 17:29:40.524810: step: 772/526, loss: 0.006386274006217718 2023-01-22 17:29:41.596976: step: 776/526, loss: 0.005379513371735811 2023-01-22 17:29:42.671220: step: 780/526, loss: 0.005845651030540466 2023-01-22 17:29:43.730459: step: 784/526, loss: 7.169665332185104e-05 2023-01-22 17:29:44.782551: step: 788/526, loss: 0.008214220404624939 2023-01-22 17:29:45.846677: step: 792/526, loss: 0.002594218822196126 2023-01-22 17:29:46.914345: step: 796/526, loss: 0.007495261263102293 2023-01-22 17:29:47.996944: step: 800/526, loss: 0.001038069138303399 2023-01-22 17:29:49.057740: step: 804/526, loss: 0.0034643833059817553 2023-01-22 17:29:50.111241: step: 808/526, loss: 0.003417538944631815 2023-01-22 17:29:51.177205: step: 812/526, loss: 0.003827321110293269 2023-01-22 17:29:52.245364: step: 816/526, loss: 0.003469871822744608 2023-01-22 17:29:53.298624: step: 820/526, loss: 0.0008538606343790889 2023-01-22 17:29:54.361247: step: 824/526, loss: 0.007968323305249214 2023-01-22 17:29:55.428150: step: 828/526, loss: 0.013973237946629524 2023-01-22 17:29:56.467183: step: 832/526, loss: 0.003516615368425846 2023-01-22 17:29:57.537889: step: 836/526, loss: 9.520177627564408e-06 2023-01-22 17:29:58.602080: step: 840/526, loss: 0.016389090567827225 2023-01-22 17:29:59.679029: step: 844/526, loss: 0.0018904394237324595 2023-01-22 17:30:00.754558: step: 848/526, loss: 0.004854640457779169 2023-01-22 17:30:01.817907: step: 852/526, loss: 0.00011276135046500713 2023-01-22 17:30:02.877545: step: 856/526, loss: 0.000125417675008066 2023-01-22 17:30:03.955402: step: 860/526, loss: 0.0028902566991746426 2023-01-22 17:30:04.999528: step: 864/526, loss: 0.0005327682010829449 2023-01-22 17:30:06.077777: step: 868/526, loss: 0.008473590947687626 2023-01-22 17:30:07.143672: step: 872/526, loss: 0.0011575708631426096 2023-01-22 17:30:08.210885: step: 876/526, loss: 0.007531928364187479 2023-01-22 17:30:09.281739: step: 880/526, loss: 0.002268790500238538 2023-01-22 17:30:10.349367: step: 884/526, loss: 0.01531816367059946 2023-01-22 17:30:11.414912: step: 888/526, loss: 0.008446996100246906 2023-01-22 17:30:12.485141: step: 892/526, loss: 0.016245784237980843 2023-01-22 17:30:13.570593: step: 896/526, loss: 0.0008730721310712397 2023-01-22 17:30:14.647811: step: 900/526, loss: 0.002750067040324211 2023-01-22 17:30:15.719848: step: 904/526, loss: 0.005723040085285902 2023-01-22 17:30:16.801459: step: 908/526, loss: 0.003289812942966819 2023-01-22 17:30:17.851276: step: 912/526, loss: 0.0006030689692124724 2023-01-22 17:30:18.911940: step: 916/526, loss: 0.005199831910431385 2023-01-22 17:30:19.969878: step: 920/526, loss: 0.013157131150364876 2023-01-22 17:30:21.041138: step: 924/526, loss: 0.0037393078673630953 2023-01-22 17:30:22.096400: step: 928/526, loss: 0.0006819416303187609 2023-01-22 17:30:23.174870: step: 932/526, loss: 0.0004163504345342517 2023-01-22 17:30:24.227899: step: 936/526, loss: 0.003460089908912778 2023-01-22 17:30:25.303239: step: 940/526, loss: 0.003104201750829816 2023-01-22 17:30:26.378157: step: 944/526, loss: 0.008607184514403343 2023-01-22 17:30:27.441099: step: 948/526, loss: 0.008992396295070648 2023-01-22 17:30:28.487084: step: 952/526, loss: 0.0016876272857189178 2023-01-22 17:30:29.567970: step: 956/526, loss: 0.008374501019716263 2023-01-22 17:30:30.630411: step: 960/526, loss: 0.0006010061479173601 2023-01-22 17:30:31.679634: step: 964/526, loss: 0.00041502172825857997 2023-01-22 17:30:32.738518: step: 968/526, loss: 0.002488055732101202 2023-01-22 17:30:33.796952: step: 972/526, loss: 0.0030510660726577044 2023-01-22 17:30:34.857276: step: 976/526, loss: 0.006629373412579298 2023-01-22 17:30:35.937639: step: 980/526, loss: 0.000984479789622128 2023-01-22 17:30:37.016828: step: 984/526, loss: 0.009197777137160301 2023-01-22 17:30:38.074881: step: 988/526, loss: 0.002648509806022048 2023-01-22 17:30:39.140933: step: 992/526, loss: 0.008000624366104603 2023-01-22 17:30:40.197637: step: 996/526, loss: 0.006690055597573519 2023-01-22 17:30:41.256878: step: 1000/526, loss: 0.00349241541698575 2023-01-22 17:30:42.342080: step: 1004/526, loss: 0.007179903332144022 2023-01-22 17:30:43.422087: step: 1008/526, loss: 0.0016613035695627332 2023-01-22 17:30:44.493903: step: 1012/526, loss: 0.0054354192689061165 2023-01-22 17:30:45.554094: step: 1016/526, loss: 0.002949904650449753 2023-01-22 17:30:46.634656: step: 1020/526, loss: 0.0014861278468742967 2023-01-22 17:30:47.697743: step: 1024/526, loss: 0.005480223800987005 2023-01-22 17:30:48.761865: step: 1028/526, loss: 0.00015086343046277761 2023-01-22 17:30:49.818382: step: 1032/526, loss: 0.00649261474609375 2023-01-22 17:30:50.889750: step: 1036/526, loss: 0.007078051567077637 2023-01-22 17:30:51.972899: step: 1040/526, loss: 0.001564691192470491 2023-01-22 17:30:53.053583: step: 1044/526, loss: 0.005240923259407282 2023-01-22 17:30:54.109889: step: 1048/526, loss: 0.0001548617146909237 2023-01-22 17:30:55.175884: step: 1052/526, loss: 0.0038983726408332586 2023-01-22 17:30:56.234215: step: 1056/526, loss: 0.001131076947785914 2023-01-22 17:30:57.312824: step: 1060/526, loss: 0.010171943344175816 2023-01-22 17:30:58.406264: step: 1064/526, loss: 0.004994820803403854 2023-01-22 17:30:59.469868: step: 1068/526, loss: 0.0004371219838503748 2023-01-22 17:31:00.531095: step: 1072/526, loss: 0.0031380902510136366 2023-01-22 17:31:01.615240: step: 1076/526, loss: 0.004172165412455797 2023-01-22 17:31:02.691691: step: 1080/526, loss: 0.0037502821069210768 2023-01-22 17:31:03.755834: step: 1084/526, loss: 7.080791692715138e-05 2023-01-22 17:31:04.841984: step: 1088/526, loss: 0.00041992071783170104 2023-01-22 17:31:05.916460: step: 1092/526, loss: 0.009625636041164398 2023-01-22 17:31:06.992341: step: 1096/526, loss: 0.006246599834412336 2023-01-22 17:31:08.060893: step: 1100/526, loss: 0.0034431691747158766 2023-01-22 17:31:09.131337: step: 1104/526, loss: 0.0004512475279625505 2023-01-22 17:31:10.202078: step: 1108/526, loss: 0.008080788888037205 2023-01-22 17:31:11.296417: step: 1112/526, loss: 0.008534091524779797 2023-01-22 17:31:12.365738: step: 1116/526, loss: 0.003935400862246752 2023-01-22 17:31:13.434613: step: 1120/526, loss: 0.007640754338353872 2023-01-22 17:31:14.500715: step: 1124/526, loss: 0.0037388226483017206 2023-01-22 17:31:15.561488: step: 1128/526, loss: 0.006042583379894495 2023-01-22 17:31:16.620048: step: 1132/526, loss: 0.0033820010721683502 2023-01-22 17:31:17.662362: step: 1136/526, loss: 0.004182777367532253 2023-01-22 17:31:18.728302: step: 1140/526, loss: 0.009561761282384396 2023-01-22 17:31:19.804734: step: 1144/526, loss: 0.00428747246041894 2023-01-22 17:31:20.860849: step: 1148/526, loss: 0.001208893721923232 2023-01-22 17:31:21.922748: step: 1152/526, loss: 0.015343909151852131 2023-01-22 17:31:23.004815: step: 1156/526, loss: 0.00758409732952714 2023-01-22 17:31:24.086280: step: 1160/526, loss: 0.0031642108224332333 2023-01-22 17:31:25.142570: step: 1164/526, loss: 0.008916519582271576 2023-01-22 17:31:26.217284: step: 1168/526, loss: 0.0031977524049580097 2023-01-22 17:31:27.275666: step: 1172/526, loss: 0.006974969524890184 2023-01-22 17:31:28.365730: step: 1176/526, loss: 0.0029876295011490583 2023-01-22 17:31:29.429187: step: 1180/526, loss: 0.004767078440636396 2023-01-22 17:31:30.498389: step: 1184/526, loss: 0.002089911140501499 2023-01-22 17:31:31.559246: step: 1188/526, loss: 0.00031690316973254085 2023-01-22 17:31:32.640520: step: 1192/526, loss: 0.0023573071230202913 2023-01-22 17:31:33.703487: step: 1196/526, loss: 0.004057617392390966 2023-01-22 17:31:34.778795: step: 1200/526, loss: 0.016314206644892693 2023-01-22 17:31:35.858880: step: 1204/526, loss: 0.002020896878093481 2023-01-22 17:31:36.918389: step: 1208/526, loss: 0.005386164877563715 2023-01-22 17:31:37.975583: step: 1212/526, loss: 0.00048268295358866453 2023-01-22 17:31:39.044014: step: 1216/526, loss: 0.0036476771347224712 2023-01-22 17:31:40.109066: step: 1220/526, loss: 0.0019774893298745155 2023-01-22 17:31:41.173930: step: 1224/526, loss: 0.0013882461935281754 2023-01-22 17:31:42.252499: step: 1228/526, loss: 0.002059339312836528 2023-01-22 17:31:43.309049: step: 1232/526, loss: 0.0005826130509376526 2023-01-22 17:31:44.361484: step: 1236/526, loss: 0.00014105670561548322 2023-01-22 17:31:45.429658: step: 1240/526, loss: 0.0026437670458108187 2023-01-22 17:31:46.489885: step: 1244/526, loss: 0.021763278171420097 2023-01-22 17:31:47.562402: step: 1248/526, loss: 0.0022564467508345842 2023-01-22 17:31:48.632405: step: 1252/526, loss: 0.004126391373574734 2023-01-22 17:31:49.684448: step: 1256/526, loss: 0.010915586724877357 2023-01-22 17:31:50.739710: step: 1260/526, loss: 0.0025179910007864237 2023-01-22 17:31:51.824897: step: 1264/526, loss: 0.0007698491681367159 2023-01-22 17:31:52.884053: step: 1268/526, loss: 0.0037589343264698982 2023-01-22 17:31:53.943789: step: 1272/526, loss: 0.0009809480980038643 2023-01-22 17:31:55.001595: step: 1276/526, loss: 0.002951584756374359 2023-01-22 17:31:56.060787: step: 1280/526, loss: 0.00807819701731205 2023-01-22 17:31:57.130910: step: 1284/526, loss: 0.0036871337797492743 2023-01-22 17:31:58.185569: step: 1288/526, loss: 0.0027246992103755474 2023-01-22 17:31:59.252386: step: 1292/526, loss: 0.0006611350690945983 2023-01-22 17:32:00.321580: step: 1296/526, loss: 0.0006722900434397161 2023-01-22 17:32:01.402517: step: 1300/526, loss: 0.005657564383000135 2023-01-22 17:32:02.471523: step: 1304/526, loss: 0.0008522614371031523 2023-01-22 17:32:03.550808: step: 1308/526, loss: 0.002993806032463908 2023-01-22 17:32:04.623563: step: 1312/526, loss: 0.001959262415766716 2023-01-22 17:32:05.734114: step: 1316/526, loss: 0.0008473132620565593 2023-01-22 17:32:06.787707: step: 1320/526, loss: 0.021236548200249672 2023-01-22 17:32:07.859821: step: 1324/526, loss: 0.008937397040426731 2023-01-22 17:32:08.917743: step: 1328/526, loss: 0.023066446185112 2023-01-22 17:32:09.987913: step: 1332/526, loss: 0.002669062465429306 2023-01-22 17:32:11.051289: step: 1336/526, loss: 0.009775707498192787 2023-01-22 17:32:12.139113: step: 1340/526, loss: 0.00567167392000556 2023-01-22 17:32:13.215895: step: 1344/526, loss: 0.0010038410546258092 2023-01-22 17:32:14.275439: step: 1348/526, loss: 0.003936221357434988 2023-01-22 17:32:15.353112: step: 1352/526, loss: 0.014694486744701862 2023-01-22 17:32:16.435703: step: 1356/526, loss: 0.002703920006752014 2023-01-22 17:32:17.494352: step: 1360/526, loss: 0.0029441246297210455 2023-01-22 17:32:18.574592: step: 1364/526, loss: 0.006541743408888578 2023-01-22 17:32:19.656625: step: 1368/526, loss: 0.0013960616197437048 2023-01-22 17:32:20.713688: step: 1372/526, loss: 0.0019162222743034363 2023-01-22 17:32:21.778318: step: 1376/526, loss: 0.0029492340981960297 2023-01-22 17:32:22.844872: step: 1380/526, loss: 0.00548131437972188 2023-01-22 17:32:23.905854: step: 1384/526, loss: 0.0013217887608334422 2023-01-22 17:32:24.976065: step: 1388/526, loss: 0.010780309326946735 2023-01-22 17:32:26.049046: step: 1392/526, loss: 0.002327525522559881 2023-01-22 17:32:27.114977: step: 1396/526, loss: 0.005291208159178495 2023-01-22 17:32:28.179107: step: 1400/526, loss: 0.045353829860687256 2023-01-22 17:32:29.247740: step: 1404/526, loss: 0.012458308599889278 2023-01-22 17:32:30.329128: step: 1408/526, loss: 0.0007240973063744605 2023-01-22 17:32:31.395125: step: 1412/526, loss: 0.006956041324883699 2023-01-22 17:32:32.486278: step: 1416/526, loss: 0.0014333715662360191 2023-01-22 17:32:33.540577: step: 1420/526, loss: 1.070375446943217e-06 2023-01-22 17:32:34.601387: step: 1424/526, loss: 0.0012360136024653912 2023-01-22 17:32:35.659137: step: 1428/526, loss: 0.0007054863963276148 2023-01-22 17:32:36.730876: step: 1432/526, loss: 0.0022484958171844482 2023-01-22 17:32:37.797277: step: 1436/526, loss: 0.004151784814894199 2023-01-22 17:32:38.855212: step: 1440/526, loss: 0.00780200120061636 2023-01-22 17:32:39.922920: step: 1444/526, loss: 5.254819188849069e-05 2023-01-22 17:32:40.997516: step: 1448/526, loss: 0.0012296994682401419 2023-01-22 17:32:42.079887: step: 1452/526, loss: 0.009779248386621475 2023-01-22 17:32:43.153870: step: 1456/526, loss: 0.025997979566454887 2023-01-22 17:32:44.211775: step: 1460/526, loss: 0.0006762303528375924 2023-01-22 17:32:45.283678: step: 1464/526, loss: 0.0008575352840125561 2023-01-22 17:32:46.378271: step: 1468/526, loss: 0.001646463293582201 2023-01-22 17:32:47.443754: step: 1472/526, loss: 0.007763678673654795 2023-01-22 17:32:48.520366: step: 1476/526, loss: 0.0030321392696350813 2023-01-22 17:32:49.584723: step: 1480/526, loss: 0.007338542956858873 2023-01-22 17:32:50.663859: step: 1484/526, loss: 0.004938645288348198 2023-01-22 17:32:51.725548: step: 1488/526, loss: 0.011469047516584396 2023-01-22 17:32:52.783978: step: 1492/526, loss: 0.006655020639300346 2023-01-22 17:32:53.840756: step: 1496/526, loss: 0.02961951494216919 2023-01-22 17:32:54.903450: step: 1500/526, loss: 0.0001534883485874161 2023-01-22 17:32:55.962746: step: 1504/526, loss: 0.003928833641111851 2023-01-22 17:32:57.014069: step: 1508/526, loss: 0.018733810633420944 2023-01-22 17:32:58.060363: step: 1512/526, loss: 0.0005932717467658222 2023-01-22 17:32:59.106307: step: 1516/526, loss: 0.0012880591675639153 2023-01-22 17:33:00.186058: step: 1520/526, loss: 0.0016507901018485427 2023-01-22 17:33:01.242022: step: 1524/526, loss: 0.007693939842283726 2023-01-22 17:33:02.294314: step: 1528/526, loss: 0.001705571310594678 2023-01-22 17:33:03.354461: step: 1532/526, loss: 0.005585390608757734 2023-01-22 17:33:04.430507: step: 1536/526, loss: 0.001855135546065867 2023-01-22 17:33:05.499417: step: 1540/526, loss: 0.002898991806432605 2023-01-22 17:33:06.556211: step: 1544/526, loss: 0.006664915941655636 2023-01-22 17:33:07.614174: step: 1548/526, loss: 0.001996766310185194 2023-01-22 17:33:08.699313: step: 1552/526, loss: 0.0051386235281825066 2023-01-22 17:33:09.745050: step: 1556/526, loss: 0.0005143631133250892 2023-01-22 17:33:10.823312: step: 1560/526, loss: 0.012425909750163555 2023-01-22 17:33:11.890915: step: 1564/526, loss: 0.009358774870634079 2023-01-22 17:33:12.966910: step: 1568/526, loss: 0.007750874850898981 2023-01-22 17:33:14.044123: step: 1572/526, loss: 3.952288534492254e-05 2023-01-22 17:33:15.101208: step: 1576/526, loss: 2.425097136438126e-07 2023-01-22 17:33:16.172570: step: 1580/526, loss: 0.004351557698100805 2023-01-22 17:33:17.244488: step: 1584/526, loss: 0.004653299227356911 2023-01-22 17:33:18.312189: step: 1588/526, loss: 0.005408448167145252 2023-01-22 17:33:19.398930: step: 1592/526, loss: 0.003711160272359848 2023-01-22 17:33:20.494260: step: 1596/526, loss: 0.007804130669683218 2023-01-22 17:33:21.561682: step: 1600/526, loss: 0.00831583607941866 2023-01-22 17:33:22.644707: step: 1604/526, loss: 0.004499559290707111 2023-01-22 17:33:23.728561: step: 1608/526, loss: 0.00016871334810275584 2023-01-22 17:33:24.783889: step: 1612/526, loss: 0.0040127187967300415 2023-01-22 17:33:25.860352: step: 1616/526, loss: 0.009973247535526752 2023-01-22 17:33:26.926538: step: 1620/526, loss: 0.018963787704706192 2023-01-22 17:33:28.004586: step: 1624/526, loss: 0.0055965096689760685 2023-01-22 17:33:29.055804: step: 1628/526, loss: 0.015125522390007973 2023-01-22 17:33:30.143022: step: 1632/526, loss: 0.006289013661444187 2023-01-22 17:33:31.189875: step: 1636/526, loss: 0.016031652688980103 2023-01-22 17:33:32.280325: step: 1640/526, loss: 0.0021265451796352863 2023-01-22 17:33:33.335683: step: 1644/526, loss: 0.00998340267688036 2023-01-22 17:33:34.426740: step: 1648/526, loss: 0.008487081155180931 2023-01-22 17:33:35.497447: step: 1652/526, loss: 0.00037652032915502787 2023-01-22 17:33:36.573334: step: 1656/526, loss: 0.01791190728545189 2023-01-22 17:33:37.634576: step: 1660/526, loss: 0.0 2023-01-22 17:33:38.708167: step: 1664/526, loss: 0.010901509784162045 2023-01-22 17:33:39.780219: step: 1668/526, loss: 0.007096904795616865 2023-01-22 17:33:40.841555: step: 1672/526, loss: 0.003612255910411477 2023-01-22 17:33:41.905252: step: 1676/526, loss: 0.0021530084777623415 2023-01-22 17:33:42.981766: step: 1680/526, loss: 0.0030258512124419212 2023-01-22 17:33:44.050032: step: 1684/526, loss: 0.0048541477881371975 2023-01-22 17:33:45.139552: step: 1688/526, loss: 0.0006590585107915103 2023-01-22 17:33:46.209018: step: 1692/526, loss: 0.009203913621604443 2023-01-22 17:33:47.289425: step: 1696/526, loss: 0.0005242836778052151 2023-01-22 17:33:48.381698: step: 1700/526, loss: 0.00464558694511652 2023-01-22 17:33:49.446276: step: 1704/526, loss: 0.00245933816768229 2023-01-22 17:33:50.511977: step: 1708/526, loss: 0.0018439812120050192 2023-01-22 17:33:51.588941: step: 1712/526, loss: 0.010666943155229092 2023-01-22 17:33:52.662029: step: 1716/526, loss: 0.00019439266179688275 2023-01-22 17:33:53.728781: step: 1720/526, loss: 0.003129103686660528 2023-01-22 17:33:54.811493: step: 1724/526, loss: 0.0022669986356049776 2023-01-22 17:33:55.871486: step: 1728/526, loss: 0.006754318252205849 2023-01-22 17:33:56.952974: step: 1732/526, loss: 0.003376535139977932 2023-01-22 17:33:58.018266: step: 1736/526, loss: 0.007157266139984131 2023-01-22 17:33:59.105583: step: 1740/526, loss: 0.0134523194283247 2023-01-22 17:34:00.165509: step: 1744/526, loss: 0.0051355487667024136 2023-01-22 17:34:01.248021: step: 1748/526, loss: 0.006010604090988636 2023-01-22 17:34:02.318584: step: 1752/526, loss: 0.0007501115323975682 2023-01-22 17:34:03.388357: step: 1756/526, loss: 0.0002574706741143018 2023-01-22 17:34:04.442052: step: 1760/526, loss: 0.006906383205205202 2023-01-22 17:34:05.520351: step: 1764/526, loss: 0.003060990246012807 2023-01-22 17:34:06.612530: step: 1768/526, loss: 0.00755661353468895 2023-01-22 17:34:07.683249: step: 1772/526, loss: 0.0031891348771750927 2023-01-22 17:34:08.747314: step: 1776/526, loss: 0.008128157816827297 2023-01-22 17:34:09.795690: step: 1780/526, loss: 0.0039753383025527 2023-01-22 17:34:10.867005: step: 1784/526, loss: 0.004681938327848911 2023-01-22 17:34:11.927582: step: 1788/526, loss: 0.00044340832391753793 2023-01-22 17:34:12.999795: step: 1792/526, loss: 0.007938959635794163 2023-01-22 17:34:14.100205: step: 1796/526, loss: 0.010996698401868343 2023-01-22 17:34:15.170933: step: 1800/526, loss: 0.00542069086804986 2023-01-22 17:34:16.251230: step: 1804/526, loss: 0.010516573674976826 2023-01-22 17:34:17.301963: step: 1808/526, loss: 0.001993355806916952 2023-01-22 17:34:18.362161: step: 1812/526, loss: 0.001645339885726571 2023-01-22 17:34:19.429117: step: 1816/526, loss: 0.001747045200318098 2023-01-22 17:34:20.487336: step: 1820/526, loss: 0.0017123748548328876 2023-01-22 17:34:21.553505: step: 1824/526, loss: 0.00014265051868278533 2023-01-22 17:34:22.623975: step: 1828/526, loss: 0.04269849509000778 2023-01-22 17:34:23.687623: step: 1832/526, loss: 0.00025080618797801435 2023-01-22 17:34:24.753859: step: 1836/526, loss: 0.002907001879066229 2023-01-22 17:34:25.828510: step: 1840/526, loss: 0.0049316114746034145 2023-01-22 17:34:26.901800: step: 1844/526, loss: 0.018841296434402466 2023-01-22 17:34:27.994444: step: 1848/526, loss: 0.00803183764219284 2023-01-22 17:34:29.060371: step: 1852/526, loss: 0.004255578387528658 2023-01-22 17:34:30.137210: step: 1856/526, loss: 0.003771889489144087 2023-01-22 17:34:31.204035: step: 1860/526, loss: 0.006448616273701191 2023-01-22 17:34:32.276760: step: 1864/526, loss: 0.011281045153737068 2023-01-22 17:34:33.348984: step: 1868/526, loss: 0.0024426572490483522 2023-01-22 17:34:34.417889: step: 1872/526, loss: 0.000969375076238066 2023-01-22 17:34:35.466687: step: 1876/526, loss: 0.004139092285186052 2023-01-22 17:34:36.530583: step: 1880/526, loss: 0.0036636251024901867 2023-01-22 17:34:37.596871: step: 1884/526, loss: 0.005768220871686935 2023-01-22 17:34:38.658284: step: 1888/526, loss: 0.014645333401858807 2023-01-22 17:34:39.725611: step: 1892/526, loss: 0.0038437529001384974 2023-01-22 17:34:40.792150: step: 1896/526, loss: 0.006669899448752403 2023-01-22 17:34:41.875945: step: 1900/526, loss: 0.0027901632711291313 2023-01-22 17:34:42.940751: step: 1904/526, loss: 0.0050841630436480045 2023-01-22 17:34:43.998382: step: 1908/526, loss: 0.0026445232797414064 2023-01-22 17:34:45.055849: step: 1912/526, loss: 0.008343411609530449 2023-01-22 17:34:46.118347: step: 1916/526, loss: 0.0038834193255752325 2023-01-22 17:34:47.184547: step: 1920/526, loss: 0.008884825743734837 2023-01-22 17:34:48.251980: step: 1924/526, loss: 3.729144009412266e-05 2023-01-22 17:34:49.325014: step: 1928/526, loss: 0.009907491505146027 2023-01-22 17:34:50.382922: step: 1932/526, loss: 0.009486119262874126 2023-01-22 17:34:51.451787: step: 1936/526, loss: 0.025884343311190605 2023-01-22 17:34:52.515977: step: 1940/526, loss: 0.004642680287361145 2023-01-22 17:34:53.576939: step: 1944/526, loss: 0.003738254541531205 2023-01-22 17:34:54.640884: step: 1948/526, loss: 0.003660036949440837 2023-01-22 17:34:55.699133: step: 1952/526, loss: 0.0011103119468316436 2023-01-22 17:34:56.781132: step: 1956/526, loss: 0.005755078047513962 2023-01-22 17:34:57.857783: step: 1960/526, loss: 0.0012564045609906316 2023-01-22 17:34:58.928621: step: 1964/526, loss: 0.003929882310330868 2023-01-22 17:35:00.003263: step: 1968/526, loss: 0.002068354981020093 2023-01-22 17:35:01.063273: step: 1972/526, loss: 0.013658208772540092 2023-01-22 17:35:02.152500: step: 1976/526, loss: 0.0031996287871152163 2023-01-22 17:35:03.206023: step: 1980/526, loss: 0.0004426951054483652 2023-01-22 17:35:04.267904: step: 1984/526, loss: 0.007202464155852795 2023-01-22 17:35:05.343624: step: 1988/526, loss: 0.0019553338643163443 2023-01-22 17:35:06.400964: step: 1992/526, loss: 0.0005978432018309832 2023-01-22 17:35:07.458053: step: 1996/526, loss: 0.01198502816259861 2023-01-22 17:35:08.531700: step: 2000/526, loss: 0.006707605440169573 2023-01-22 17:35:09.597897: step: 2004/526, loss: 0.0064396606758236885 2023-01-22 17:35:10.666279: step: 2008/526, loss: 0.015926480293273926 2023-01-22 17:35:11.734315: step: 2012/526, loss: 0.00544326938688755 2023-01-22 17:35:12.797278: step: 2016/526, loss: 0.0013194968923926353 2023-01-22 17:35:13.878344: step: 2020/526, loss: 0.007548683788627386 2023-01-22 17:35:14.947367: step: 2024/526, loss: 0.005086565390229225 2023-01-22 17:35:15.993550: step: 2028/526, loss: 0.009567863307893276 2023-01-22 17:35:17.061824: step: 2032/526, loss: 0.004748514387756586 2023-01-22 17:35:18.147813: step: 2036/526, loss: 0.013343237340450287 2023-01-22 17:35:19.210519: step: 2040/526, loss: 0.005300730932503939 2023-01-22 17:35:20.269336: step: 2044/526, loss: 0.005254854913800955 2023-01-22 17:35:21.328204: step: 2048/526, loss: 0.00332982768304646 2023-01-22 17:35:22.403915: step: 2052/526, loss: 0.005135139916092157 2023-01-22 17:35:23.450489: step: 2056/526, loss: 0.0008605180773884058 2023-01-22 17:35:24.528350: step: 2060/526, loss: 0.0008949894108809531 2023-01-22 17:35:25.599491: step: 2064/526, loss: 0.003249643836170435 2023-01-22 17:35:26.678302: step: 2068/526, loss: 0.002316853031516075 2023-01-22 17:35:27.740254: step: 2072/526, loss: 0.0030070352368056774 2023-01-22 17:35:28.802132: step: 2076/526, loss: 0.01448208000510931 2023-01-22 17:35:29.875899: step: 2080/526, loss: 0.003342254087328911 2023-01-22 17:35:30.933015: step: 2084/526, loss: 0.002919140039011836 2023-01-22 17:35:31.978698: step: 2088/526, loss: 0.0007625837461091578 2023-01-22 17:35:33.038807: step: 2092/526, loss: 0.012680702842772007 2023-01-22 17:35:34.107908: step: 2096/526, loss: 0.0038279560394585133 2023-01-22 17:35:35.169626: step: 2100/526, loss: 0.007321455050259829 2023-01-22 17:35:36.241977: step: 2104/526, loss: 9.103171032620594e-05 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3457911036036036, 'r': 0.2913306451612903, 'f1': 0.31623326467559215}, 'combined': 0.23301398449780472, 'stategy': 1, 'epoch': 10} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34253342370979806, 'r': 0.23964792647828362, 'f1': 0.28199949969211824}, 'combined': 0.15381790892297356, 'stategy': 1, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3148623511904762, 'r': 0.3345785895003162, 'f1': 0.32442118981907386}, 'combined': 0.23904719249826495, 'stategy': 1, 'epoch': 10} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.342687433973375, 'r': 0.2658727758549149, 'f1': 0.29943219372428853}, 'combined': 0.16332665112233918, 'stategy': 1, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260109657873179, 'r': 0.3303412822209255, 'f1': 0.3281618392656508}, 'combined': 0.24180346051153215, 'stategy': 1, 'epoch': 10} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33955533665404053, 'r': 0.27443513510395057, 'f1': 0.3035418918573998}, 'combined': 0.1655683046494908, 'stategy': 1, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.3017241379310345, 'f1': 0.3723404255319149}, 'combined': 0.2482269503546099, 'stategy': 1, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 11 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:38:20.618453: step: 4/526, loss: 0.0015527985524386168 2023-01-22 17:38:21.681408: step: 8/526, loss: 0.014374683611094952 2023-01-22 17:38:22.737693: step: 12/526, loss: 0.031547218561172485 2023-01-22 17:38:23.799442: step: 16/526, loss: 0.0018152552656829357 2023-01-22 17:38:24.862414: step: 20/526, loss: 0.005823378451168537 2023-01-22 17:38:25.950268: step: 24/526, loss: 0.0015533366240561008 2023-01-22 17:38:26.994375: step: 28/526, loss: 0.0017239629523828626 2023-01-22 17:38:28.066220: step: 32/526, loss: 0.0015903108287602663 2023-01-22 17:38:29.140275: step: 36/526, loss: 0.02297254465520382 2023-01-22 17:38:30.188750: step: 40/526, loss: 0.006770250853151083 2023-01-22 17:38:31.257949: step: 44/526, loss: 1.668453660386149e-05 2023-01-22 17:38:32.311927: step: 48/526, loss: 0.006846142932772636 2023-01-22 17:38:33.370389: step: 52/526, loss: 0.008534999564290047 2023-01-22 17:38:34.443162: step: 56/526, loss: 0.0011843078536912799 2023-01-22 17:38:35.511316: step: 60/526, loss: 0.0013481620699167252 2023-01-22 17:38:36.579132: step: 64/526, loss: 1.7529895558254793e-05 2023-01-22 17:38:37.649260: step: 68/526, loss: 0.0027651020791381598 2023-01-22 17:38:38.707682: step: 72/526, loss: 0.0037084210198372602 2023-01-22 17:38:39.759270: step: 76/526, loss: 0.004029603209346533 2023-01-22 17:38:40.820549: step: 80/526, loss: 0.0006209263810887933 2023-01-22 17:38:41.877245: step: 84/526, loss: 0.0076514557003974915 2023-01-22 17:38:42.953969: step: 88/526, loss: 0.0038024436216801405 2023-01-22 17:38:44.023001: step: 92/526, loss: 0.00641417084261775 2023-01-22 17:38:45.099196: step: 96/526, loss: 0.00012367230374366045 2023-01-22 17:38:46.164016: step: 100/526, loss: 0.02735537476837635 2023-01-22 17:38:47.237806: step: 104/526, loss: 0.004092162940651178 2023-01-22 17:38:48.315067: step: 108/526, loss: 0.011058001779019833 2023-01-22 17:38:49.400940: step: 112/526, loss: 0.02300676517188549 2023-01-22 17:38:50.469698: step: 116/526, loss: 0.022482266649603844 2023-01-22 17:38:51.546934: step: 120/526, loss: 0.005871300119906664 2023-01-22 17:38:52.614739: step: 124/526, loss: 0.004005140159279108 2023-01-22 17:38:53.679408: step: 128/526, loss: 0.0016097135376185179 2023-01-22 17:38:54.746541: step: 132/526, loss: 0.01079469919204712 2023-01-22 17:38:55.805559: step: 136/526, loss: 0.0074540250934660435 2023-01-22 17:38:56.871615: step: 140/526, loss: 0.009236044250428677 2023-01-22 17:38:57.943232: step: 144/526, loss: 0.0033466038294136524 2023-01-22 17:38:59.009188: step: 148/526, loss: 0.01184664387255907 2023-01-22 17:39:00.072625: step: 152/526, loss: 0.007839243859052658 2023-01-22 17:39:01.136580: step: 156/526, loss: 0.0007059240597300231 2023-01-22 17:39:02.210505: step: 160/526, loss: 0.00021463612210936844 2023-01-22 17:39:03.305903: step: 164/526, loss: 0.0018794374773278832 2023-01-22 17:39:04.368323: step: 168/526, loss: 1.3485400529589242e-07 2023-01-22 17:39:05.431845: step: 172/526, loss: 0.008626885712146759 2023-01-22 17:39:06.500908: step: 176/526, loss: 0.0022632277105003595 2023-01-22 17:39:07.570767: step: 180/526, loss: 0.0004742064338643104 2023-01-22 17:39:08.625057: step: 184/526, loss: 0.005966256372630596 2023-01-22 17:39:09.684474: step: 188/526, loss: 0.006731688976287842 2023-01-22 17:39:10.759031: step: 192/526, loss: 0.003024298930540681 2023-01-22 17:39:11.821793: step: 196/526, loss: 0.0009065989870578051 2023-01-22 17:39:12.909018: step: 200/526, loss: 0.005843481048941612 2023-01-22 17:39:13.980035: step: 204/526, loss: 0.007101226598024368 2023-01-22 17:39:15.080135: step: 208/526, loss: 0.0020167578477412462 2023-01-22 17:39:16.132314: step: 212/526, loss: 0.004854851868003607 2023-01-22 17:39:17.194205: step: 216/526, loss: 0.012142536230385303 2023-01-22 17:39:18.262150: step: 220/526, loss: 0.0005539586418308318 2023-01-22 17:39:19.352440: step: 224/526, loss: 0.04283495992422104 2023-01-22 17:39:20.404919: step: 228/526, loss: 0.008202563039958477 2023-01-22 17:39:21.495207: step: 232/526, loss: 7.255576929310337e-05 2023-01-22 17:39:22.552782: step: 236/526, loss: 0.005904634948819876 2023-01-22 17:39:23.623656: step: 240/526, loss: 0.006485240068286657 2023-01-22 17:39:24.691232: step: 244/526, loss: 0.0017279810272157192 2023-01-22 17:39:25.760567: step: 248/526, loss: 0.0019765414763242006 2023-01-22 17:39:26.842281: step: 252/526, loss: 0.011654307134449482 2023-01-22 17:39:27.905102: step: 256/526, loss: 0.0013926109531894326 2023-01-22 17:39:28.980785: step: 260/526, loss: 0.0012710947776213288 2023-01-22 17:39:30.056487: step: 264/526, loss: 0.004178334027528763 2023-01-22 17:39:31.129162: step: 268/526, loss: 0.005153834819793701 2023-01-22 17:39:32.202744: step: 272/526, loss: 0.009852041490375996 2023-01-22 17:39:33.284148: step: 276/526, loss: 0.014089401811361313 2023-01-22 17:39:34.346149: step: 280/526, loss: 0.0109657421708107 2023-01-22 17:39:35.418197: step: 284/526, loss: 0.009788258001208305 2023-01-22 17:39:36.490166: step: 288/526, loss: 0.0015343743143603206 2023-01-22 17:39:37.562009: step: 292/526, loss: 0.004654880613088608 2023-01-22 17:39:38.648250: step: 296/526, loss: 0.003607081947848201 2023-01-22 17:39:39.715078: step: 300/526, loss: 0.0023208078928291798 2023-01-22 17:39:40.788427: step: 304/526, loss: 9.466566552873701e-05 2023-01-22 17:39:41.865198: step: 308/526, loss: 0.0012133034178987145 2023-01-22 17:39:42.926655: step: 312/526, loss: 1.093564696930116e-05 2023-01-22 17:39:44.005031: step: 316/526, loss: 0.0027408935129642487 2023-01-22 17:39:45.062742: step: 320/526, loss: 0.0001672089856583625 2023-01-22 17:39:46.125942: step: 324/526, loss: 0.001920043141581118 2023-01-22 17:39:47.204940: step: 328/526, loss: 0.008366193622350693 2023-01-22 17:39:48.275497: step: 332/526, loss: 0.0013664762955158949 2023-01-22 17:39:49.367115: step: 336/526, loss: 0.0013654837384819984 2023-01-22 17:39:50.452668: step: 340/526, loss: 0.00759208295494318 2023-01-22 17:39:51.514038: step: 344/526, loss: 0.00845970306545496 2023-01-22 17:39:52.583327: step: 348/526, loss: 0.00380527856759727 2023-01-22 17:39:53.666088: step: 352/526, loss: 0.004360434133559465 2023-01-22 17:39:54.741249: step: 356/526, loss: 0.003152074757963419 2023-01-22 17:39:55.795880: step: 360/526, loss: 0.003814096562564373 2023-01-22 17:39:56.876907: step: 364/526, loss: 0.006370061542838812 2023-01-22 17:39:57.924412: step: 368/526, loss: 0.0013150274753570557 2023-01-22 17:39:58.995087: step: 372/526, loss: 0.004130323883146048 2023-01-22 17:40:00.054923: step: 376/526, loss: 0.003450981108471751 2023-01-22 17:40:01.129642: step: 380/526, loss: 0.009133800864219666 2023-01-22 17:40:02.207757: step: 384/526, loss: 0.0006803752621635795 2023-01-22 17:40:03.282735: step: 388/526, loss: 0.004730725660920143 2023-01-22 17:40:04.350473: step: 392/526, loss: 0.0004839395696762949 2023-01-22 17:40:05.408270: step: 396/526, loss: 0.001510030822828412 2023-01-22 17:40:06.464382: step: 400/526, loss: 0.000566075905226171 2023-01-22 17:40:07.542705: step: 404/526, loss: 0.013921750709414482 2023-01-22 17:40:08.600594: step: 408/526, loss: 0.0026388985570520163 2023-01-22 17:40:09.659810: step: 412/526, loss: 0.0052538057789206505 2023-01-22 17:40:10.710570: step: 416/526, loss: 0.004278465639799833 2023-01-22 17:40:11.783388: step: 420/526, loss: 0.0018223561346530914 2023-01-22 17:40:12.864056: step: 424/526, loss: 0.0027730856090784073 2023-01-22 17:40:13.935301: step: 428/526, loss: 0.0028003661427646875 2023-01-22 17:40:14.993052: step: 432/526, loss: 0.0019438628805801272 2023-01-22 17:40:16.061358: step: 436/526, loss: 0.03799775615334511 2023-01-22 17:40:17.116709: step: 440/526, loss: 0.0010687094181776047 2023-01-22 17:40:18.182025: step: 444/526, loss: 0.003563548671081662 2023-01-22 17:40:19.266089: step: 448/526, loss: 3.423057569307275e-05 2023-01-22 17:40:20.339224: step: 452/526, loss: 0.00042447546729817986 2023-01-22 17:40:21.408424: step: 456/526, loss: 0.003115955973044038 2023-01-22 17:40:22.490110: step: 460/526, loss: 0.0036883896682411432 2023-01-22 17:40:23.546827: step: 464/526, loss: 0.0007377912406809628 2023-01-22 17:40:24.602149: step: 468/526, loss: 0.0027743177488446236 2023-01-22 17:40:25.669735: step: 472/526, loss: 0.0005061248666606843 2023-01-22 17:40:26.748390: step: 476/526, loss: 0.011965678073465824 2023-01-22 17:40:27.811313: step: 480/526, loss: 0.0027339213993400335 2023-01-22 17:40:28.885594: step: 484/526, loss: 0.006434708833694458 2023-01-22 17:40:29.954278: step: 488/526, loss: 0.006143766921013594 2023-01-22 17:40:31.031551: step: 492/526, loss: 0.00266039720736444 2023-01-22 17:40:32.108805: step: 496/526, loss: 0.004466984886676073 2023-01-22 17:40:33.178326: step: 500/526, loss: 0.007771104574203491 2023-01-22 17:40:34.247842: step: 504/526, loss: 0.0009978280868381262 2023-01-22 17:40:35.316147: step: 508/526, loss: 0.003268573898822069 2023-01-22 17:40:36.384623: step: 512/526, loss: 0.026066893711686134 2023-01-22 17:40:37.473081: step: 516/526, loss: 0.001402052235789597 2023-01-22 17:40:38.543450: step: 520/526, loss: 0.0007467272225767374 2023-01-22 17:40:39.624230: step: 524/526, loss: 3.6508750781649724e-05 2023-01-22 17:40:40.688293: step: 528/526, loss: 0.00518309511244297 2023-01-22 17:40:41.759307: step: 532/526, loss: 0.005357992369681597 2023-01-22 17:40:42.848935: step: 536/526, loss: 0.005535739473998547 2023-01-22 17:40:43.888622: step: 540/526, loss: 0.0008894001948647201 2023-01-22 17:40:44.962842: step: 544/526, loss: 0.011152184568345547 2023-01-22 17:40:46.028347: step: 548/526, loss: 0.0015413216315209866 2023-01-22 17:40:47.085995: step: 552/526, loss: 0.003057967172935605 2023-01-22 17:40:48.161942: step: 556/526, loss: 0.004721699748188257 2023-01-22 17:40:49.234476: step: 560/526, loss: 0.007417446468025446 2023-01-22 17:40:50.315382: step: 564/526, loss: 0.002286262344568968 2023-01-22 17:40:51.377426: step: 568/526, loss: 0.004955897573381662 2023-01-22 17:40:52.443434: step: 572/526, loss: 0.0023692059330642223 2023-01-22 17:40:53.506193: step: 576/526, loss: 0.0017217101994901896 2023-01-22 17:40:54.577597: step: 580/526, loss: 0.0013488983968272805 2023-01-22 17:40:55.631032: step: 584/526, loss: 0.01599848084151745 2023-01-22 17:40:56.707365: step: 588/526, loss: 0.0005247867084108293 2023-01-22 17:40:57.774412: step: 592/526, loss: 0.001362834358587861 2023-01-22 17:40:58.838953: step: 596/526, loss: 0.0014658961445093155 2023-01-22 17:40:59.913923: step: 600/526, loss: 0.006384869571775198 2023-01-22 17:41:00.970709: step: 604/526, loss: 0.0025591112207621336 2023-01-22 17:41:02.046794: step: 608/526, loss: 0.004175042267888784 2023-01-22 17:41:03.107936: step: 612/526, loss: 0.0018523165490478277 2023-01-22 17:41:04.180304: step: 616/526, loss: 0.009955846704542637 2023-01-22 17:41:05.240027: step: 620/526, loss: 0.0005123216542415321 2023-01-22 17:41:06.299526: step: 624/526, loss: 0.004340380430221558 2023-01-22 17:41:07.367496: step: 628/526, loss: 0.003832641988992691 2023-01-22 17:41:08.430506: step: 632/526, loss: 0.005103746894747019 2023-01-22 17:41:09.510862: step: 636/526, loss: 0.0017348246183246374 2023-01-22 17:41:10.564942: step: 640/526, loss: 0.007120194844901562 2023-01-22 17:41:11.632180: step: 644/526, loss: 0.00256592920050025 2023-01-22 17:41:12.713825: step: 648/526, loss: 0.0014505682047456503 2023-01-22 17:41:13.815506: step: 652/526, loss: 0.006125299725681543 2023-01-22 17:41:14.863463: step: 656/526, loss: 0.0025907312519848347 2023-01-22 17:41:15.927158: step: 660/526, loss: 0.00689921248704195 2023-01-22 17:41:16.986153: step: 664/526, loss: 0.0011587593471631408 2023-01-22 17:41:18.053757: step: 668/526, loss: 0.0008978423429653049 2023-01-22 17:41:19.126432: step: 672/526, loss: 0.025185955688357353 2023-01-22 17:41:20.196557: step: 676/526, loss: 0.006375297904014587 2023-01-22 17:41:21.260219: step: 680/526, loss: 0.0036048106849193573 2023-01-22 17:41:22.325907: step: 684/526, loss: 0.004133682232350111 2023-01-22 17:41:23.387610: step: 688/526, loss: 0.003429161850363016 2023-01-22 17:41:24.462296: step: 692/526, loss: 0.0010575143387541175 2023-01-22 17:41:25.506215: step: 696/526, loss: 0.0071308184415102005 2023-01-22 17:41:26.584427: step: 700/526, loss: 0.0032010809518396854 2023-01-22 17:41:27.647409: step: 704/526, loss: 0.0027031844947487116 2023-01-22 17:41:28.718067: step: 708/526, loss: 0.0027517429552972317 2023-01-22 17:41:29.800746: step: 712/526, loss: 0.0039056241512298584 2023-01-22 17:41:30.886276: step: 716/526, loss: 0.00367858842946589 2023-01-22 17:41:31.947485: step: 720/526, loss: 0.005079750902950764 2023-01-22 17:41:33.016486: step: 724/526, loss: 0.014265777543187141 2023-01-22 17:41:34.070046: step: 728/526, loss: 0.004944941960275173 2023-01-22 17:41:35.139663: step: 732/526, loss: 0.007102068513631821 2023-01-22 17:41:36.214810: step: 736/526, loss: 0.004638230428099632 2023-01-22 17:41:37.286158: step: 740/526, loss: 0.00040970550617203116 2023-01-22 17:41:38.354193: step: 744/526, loss: 0.00910506397485733 2023-01-22 17:41:39.409724: step: 748/526, loss: 0.005753392819315195 2023-01-22 17:41:40.498049: step: 752/526, loss: 0.0017255906714126468 2023-01-22 17:41:41.564923: step: 756/526, loss: 0.0064187017269432545 2023-01-22 17:41:42.626901: step: 760/526, loss: 5.205323759582825e-06 2023-01-22 17:41:43.684222: step: 764/526, loss: 0.010814828798174858 2023-01-22 17:41:44.751109: step: 768/526, loss: 0.0007021827041171491 2023-01-22 17:41:45.814275: step: 772/526, loss: 0.017233768478035927 2023-01-22 17:41:46.882915: step: 776/526, loss: 0.0018336690263822675 2023-01-22 17:41:47.946981: step: 780/526, loss: 0.017620448023080826 2023-01-22 17:41:49.009075: step: 784/526, loss: 0.0032585207372903824 2023-01-22 17:41:50.067701: step: 788/526, loss: 0.003399219363927841 2023-01-22 17:41:51.140619: step: 792/526, loss: 0.002600764622911811 2023-01-22 17:41:52.205436: step: 796/526, loss: 0.0220213383436203 2023-01-22 17:41:53.270887: step: 800/526, loss: 0.005034157540649176 2023-01-22 17:41:54.330719: step: 804/526, loss: 0.0007320235599763691 2023-01-22 17:41:55.392623: step: 808/526, loss: 0.005935746245086193 2023-01-22 17:41:56.458443: step: 812/526, loss: 0.007137676235288382 2023-01-22 17:41:57.537067: step: 816/526, loss: 0.0001293757522944361 2023-01-22 17:41:58.589708: step: 820/526, loss: 0.0055822706781327724 2023-01-22 17:41:59.651936: step: 824/526, loss: 0.006855186074972153 2023-01-22 17:42:00.720544: step: 828/526, loss: 0.00790692213922739 2023-01-22 17:42:01.787917: step: 832/526, loss: 0.005788835696876049 2023-01-22 17:42:02.844962: step: 836/526, loss: 0.0043522366322577 2023-01-22 17:42:03.900055: step: 840/526, loss: 0.005438192281872034 2023-01-22 17:42:04.959577: step: 844/526, loss: 0.008489994332194328 2023-01-22 17:42:06.035084: step: 848/526, loss: 0.009495867416262627 2023-01-22 17:42:07.100553: step: 852/526, loss: 0.0021484524477273226 2023-01-22 17:42:08.171462: step: 856/526, loss: 0.00551944226026535 2023-01-22 17:42:09.218937: step: 860/526, loss: 0.007676903158426285 2023-01-22 17:42:10.286783: step: 864/526, loss: 0.0011822825763374567 2023-01-22 17:42:11.337470: step: 868/526, loss: 9.398196561960503e-05 2023-01-22 17:42:12.404545: step: 872/526, loss: 0.0012379330582916737 2023-01-22 17:42:13.481190: step: 876/526, loss: 0.0032965163700282574 2023-01-22 17:42:14.544635: step: 880/526, loss: 0.0019311937503516674 2023-01-22 17:42:15.607168: step: 884/526, loss: 0.004509120713919401 2023-01-22 17:42:16.667804: step: 888/526, loss: 7.450580152834618e-10 2023-01-22 17:42:17.742459: step: 892/526, loss: 0.0016446230001747608 2023-01-22 17:42:18.808133: step: 896/526, loss: 0.0030757449567317963 2023-01-22 17:42:19.877020: step: 900/526, loss: 0.0008036759681999683 2023-01-22 17:42:20.954508: step: 904/526, loss: 0.005558198317885399 2023-01-22 17:42:22.030073: step: 908/526, loss: 0.004003367852419615 2023-01-22 17:42:23.104759: step: 912/526, loss: 0.0060005527921020985 2023-01-22 17:42:24.159893: step: 916/526, loss: 0.004042772110551596 2023-01-22 17:42:25.218258: step: 920/526, loss: 0.005234920885413885 2023-01-22 17:42:26.265621: step: 924/526, loss: 0.0028326527681201696 2023-01-22 17:42:27.329519: step: 928/526, loss: 0.0022338866256177425 2023-01-22 17:42:28.398577: step: 932/526, loss: 0.0014650358352810144 2023-01-22 17:42:29.462407: step: 936/526, loss: 0.0013492372818291187 2023-01-22 17:42:30.546684: step: 940/526, loss: 0.0060861967504024506 2023-01-22 17:42:31.607965: step: 944/526, loss: 0.0016624766867607832 2023-01-22 17:42:32.682077: step: 948/526, loss: 0.000540849519893527 2023-01-22 17:42:33.744432: step: 952/526, loss: 0.011057938449084759 2023-01-22 17:42:34.815808: step: 956/526, loss: 0.006618036888539791 2023-01-22 17:42:35.875777: step: 960/526, loss: 0.049181126058101654 2023-01-22 17:42:36.933006: step: 964/526, loss: 0.00348468660376966 2023-01-22 17:42:37.985163: step: 968/526, loss: 0.00021710555301979184 2023-01-22 17:42:39.040458: step: 972/526, loss: 0.004641115665435791 2023-01-22 17:42:40.092969: step: 976/526, loss: 0.0018969716038554907 2023-01-22 17:42:41.170403: step: 980/526, loss: 0.011055637151002884 2023-01-22 17:42:42.245429: step: 984/526, loss: 0.01181644294410944 2023-01-22 17:42:43.321261: step: 988/526, loss: 0.00918593630194664 2023-01-22 17:42:44.370177: step: 992/526, loss: 0.020358357578516006 2023-01-22 17:42:45.447379: step: 996/526, loss: 0.00013514564489014447 2023-01-22 17:42:46.506699: step: 1000/526, loss: 0.0010516230249777436 2023-01-22 17:42:47.581475: step: 1004/526, loss: 0.0015638668555766344 2023-01-22 17:42:48.646500: step: 1008/526, loss: 0.0017984991427510977 2023-01-22 17:42:49.711940: step: 1012/526, loss: 0.026647118851542473 2023-01-22 17:42:50.794762: step: 1016/526, loss: 0.004543284419924021 2023-01-22 17:42:51.854244: step: 1020/526, loss: 3.2825851121742744e-06 2023-01-22 17:42:52.911073: step: 1024/526, loss: 0.0040977406315505505 2023-01-22 17:42:53.994961: step: 1028/526, loss: 0.005318142008036375 2023-01-22 17:42:55.063663: step: 1032/526, loss: 4.423849532031454e-05 2023-01-22 17:42:56.121936: step: 1036/526, loss: 0.00011153249943163246 2023-01-22 17:42:57.185787: step: 1040/526, loss: 0.006767992861568928 2023-01-22 17:42:58.254883: step: 1044/526, loss: 0.004334311932325363 2023-01-22 17:42:59.324723: step: 1048/526, loss: 0.0030721663497388363 2023-01-22 17:43:00.380702: step: 1052/526, loss: 0.0003806806926149875 2023-01-22 17:43:01.440075: step: 1056/526, loss: 0.008242100477218628 2023-01-22 17:43:02.501072: step: 1060/526, loss: 0.0037150925491005182 2023-01-22 17:43:03.560939: step: 1064/526, loss: 0.0025621983222663403 2023-01-22 17:43:04.622339: step: 1068/526, loss: 0.00516100600361824 2023-01-22 17:43:05.694882: step: 1072/526, loss: 0.0015349017921835184 2023-01-22 17:43:06.767305: step: 1076/526, loss: 0.0004905465175397694 2023-01-22 17:43:07.840197: step: 1080/526, loss: 0.005299912765622139 2023-01-22 17:43:08.890194: step: 1084/526, loss: 0.0012664188398048282 2023-01-22 17:43:09.958061: step: 1088/526, loss: 0.00358656351454556 2023-01-22 17:43:11.010299: step: 1092/526, loss: 0.023373868316411972 2023-01-22 17:43:12.081504: step: 1096/526, loss: 0.009309697896242142 2023-01-22 17:43:13.147059: step: 1100/526, loss: 0.004484089557081461 2023-01-22 17:43:14.228972: step: 1104/526, loss: 0.02947205677628517 2023-01-22 17:43:15.293933: step: 1108/526, loss: 0.002234118990600109 2023-01-22 17:43:16.354973: step: 1112/526, loss: 0.0008561440627090633 2023-01-22 17:43:17.424288: step: 1116/526, loss: 0.004330252762883902 2023-01-22 17:43:18.483995: step: 1120/526, loss: 0.0005354993045330048 2023-01-22 17:43:19.547142: step: 1124/526, loss: 0.0032475648913532495 2023-01-22 17:43:20.603069: step: 1128/526, loss: 0.008089636452496052 2023-01-22 17:43:21.658475: step: 1132/526, loss: 0.0055059525184333324 2023-01-22 17:43:22.739785: step: 1136/526, loss: 0.01154759619385004 2023-01-22 17:43:23.803489: step: 1140/526, loss: 6.383806612575427e-05 2023-01-22 17:43:24.845329: step: 1144/526, loss: 0.010982904583215714 2023-01-22 17:43:25.906488: step: 1148/526, loss: 0.00451228441670537 2023-01-22 17:43:26.974941: step: 1152/526, loss: 7.669332262594253e-06 2023-01-22 17:43:28.070006: step: 1156/526, loss: 0.0006826398312114179 2023-01-22 17:43:29.125721: step: 1160/526, loss: 0.004565628245472908 2023-01-22 17:43:30.195293: step: 1164/526, loss: 0.007722855545580387 2023-01-22 17:43:31.260663: step: 1168/526, loss: 0.0016432913253083825 2023-01-22 17:43:32.316313: step: 1172/526, loss: 2.1040261344751343e-05 2023-01-22 17:43:33.386482: step: 1176/526, loss: 0.004352714866399765 2023-01-22 17:43:34.450406: step: 1180/526, loss: 0.004593819845467806 2023-01-22 17:43:35.507334: step: 1184/526, loss: 1.4244415979192127e-05 2023-01-22 17:43:36.565606: step: 1188/526, loss: 0.002957102144137025 2023-01-22 17:43:37.619312: step: 1192/526, loss: 0.012502138502895832 2023-01-22 17:43:38.692134: step: 1196/526, loss: 0.012206131592392921 2023-01-22 17:43:39.746816: step: 1200/526, loss: 0.005621777847409248 2023-01-22 17:43:40.818058: step: 1204/526, loss: 0.0005674066487699747 2023-01-22 17:43:41.884672: step: 1208/526, loss: 0.0027566973585635424 2023-01-22 17:43:42.942221: step: 1212/526, loss: 0.009235570207238197 2023-01-22 17:43:44.013097: step: 1216/526, loss: 0.00575832137838006 2023-01-22 17:43:45.084710: step: 1220/526, loss: 0.011415249668061733 2023-01-22 17:43:46.179049: step: 1224/526, loss: 0.004225266631692648 2023-01-22 17:43:47.238429: step: 1228/526, loss: 0.007484679110348225 2023-01-22 17:43:48.313944: step: 1232/526, loss: 0.0015892009250819683 2023-01-22 17:43:49.372329: step: 1236/526, loss: 0.005136185325682163 2023-01-22 17:43:50.428070: step: 1240/526, loss: 0.003244815394282341 2023-01-22 17:43:51.480184: step: 1244/526, loss: 0.0006238819914869964 2023-01-22 17:43:52.583961: step: 1248/526, loss: 0.043548181653022766 2023-01-22 17:43:53.635719: step: 1252/526, loss: 0.0016069854609668255 2023-01-22 17:43:54.695067: step: 1256/526, loss: 0.0024691345170140266 2023-01-22 17:43:55.746916: step: 1260/526, loss: 0.00343396607786417 2023-01-22 17:43:56.805420: step: 1264/526, loss: 0.00020056105859111995 2023-01-22 17:43:57.874778: step: 1268/526, loss: 5.516166129382327e-05 2023-01-22 17:43:58.941191: step: 1272/526, loss: 0.00014844803081359714 2023-01-22 17:44:00.005089: step: 1276/526, loss: 0.012826718389987946 2023-01-22 17:44:01.068002: step: 1280/526, loss: 0.001885322853922844 2023-01-22 17:44:02.124363: step: 1284/526, loss: 0.01198611967265606 2023-01-22 17:44:03.204334: step: 1288/526, loss: 0.0013885529479011893 2023-01-22 17:44:04.274609: step: 1292/526, loss: 0.012700226157903671 2023-01-22 17:44:05.333075: step: 1296/526, loss: 6.615779966523405e-06 2023-01-22 17:44:06.385316: step: 1300/526, loss: 0.0020006364211440086 2023-01-22 17:44:07.434051: step: 1304/526, loss: 0.010526680387556553 2023-01-22 17:44:08.496622: step: 1308/526, loss: 0.01219138316810131 2023-01-22 17:44:09.562647: step: 1312/526, loss: 0.0016996189951896667 2023-01-22 17:44:10.629004: step: 1316/526, loss: 0.0021444889716804028 2023-01-22 17:44:11.712127: step: 1320/526, loss: 0.0048052906058728695 2023-01-22 17:44:12.780076: step: 1324/526, loss: 0.00048242032062262297 2023-01-22 17:44:13.850506: step: 1328/526, loss: 0.004876694642007351 2023-01-22 17:44:14.908286: step: 1332/526, loss: 0.0002365312830079347 2023-01-22 17:44:15.992861: step: 1336/526, loss: 0.0013371994718909264 2023-01-22 17:44:17.049261: step: 1340/526, loss: 0.0025196231435984373 2023-01-22 17:44:18.129320: step: 1344/526, loss: 0.029093828052282333 2023-01-22 17:44:19.210587: step: 1348/526, loss: 0.00916947703808546 2023-01-22 17:44:20.272848: step: 1352/526, loss: 0.001004858990199864 2023-01-22 17:44:21.351614: step: 1356/526, loss: 0.010506617836654186 2023-01-22 17:44:22.423243: step: 1360/526, loss: 0.001510967849753797 2023-01-22 17:44:23.498191: step: 1364/526, loss: 0.0021811414044350386 2023-01-22 17:44:24.556528: step: 1368/526, loss: 0.007610586006194353 2023-01-22 17:44:25.627934: step: 1372/526, loss: 0.0058801425620913506 2023-01-22 17:44:26.686843: step: 1376/526, loss: 0.008549570105969906 2023-01-22 17:44:27.741250: step: 1380/526, loss: 0.003672333201393485 2023-01-22 17:44:28.804811: step: 1384/526, loss: 0.009058075025677681 2023-01-22 17:44:29.896545: step: 1388/526, loss: 0.006501309107989073 2023-01-22 17:44:30.968539: step: 1392/526, loss: 0.007007825654000044 2023-01-22 17:44:32.035319: step: 1396/526, loss: 0.007669099606573582 2023-01-22 17:44:33.091217: step: 1400/526, loss: 8.381891802855534e-08 2023-01-22 17:44:34.176376: step: 1404/526, loss: 0.005447391886264086 2023-01-22 17:44:35.250320: step: 1408/526, loss: 0.00405966117978096 2023-01-22 17:44:36.313285: step: 1412/526, loss: 0.0013689192710444331 2023-01-22 17:44:37.369477: step: 1416/526, loss: 0.0015988851664587855 2023-01-22 17:44:38.431283: step: 1420/526, loss: 0.015160990878939629 2023-01-22 17:44:39.505981: step: 1424/526, loss: 0.000712490756995976 2023-01-22 17:44:40.596785: step: 1428/526, loss: 0.0020193003583699465 2023-01-22 17:44:41.667883: step: 1432/526, loss: 0.005865667946636677 2023-01-22 17:44:42.738310: step: 1436/526, loss: 0.0017918252851814032 2023-01-22 17:44:43.832501: step: 1440/526, loss: 0.0035313840489834547 2023-01-22 17:44:44.896154: step: 1444/526, loss: 0.0026456057094037533 2023-01-22 17:44:45.957745: step: 1448/526, loss: 0.003913404885679483 2023-01-22 17:44:47.032320: step: 1452/526, loss: 0.00258021242916584 2023-01-22 17:44:48.102147: step: 1456/526, loss: 0.0009279975784011185 2023-01-22 17:44:49.160503: step: 1460/526, loss: 2.5730187189765275e-05 2023-01-22 17:44:50.235352: step: 1464/526, loss: 0.00010479407501406968 2023-01-22 17:44:51.296863: step: 1468/526, loss: 0.0012222465593367815 2023-01-22 17:44:52.358392: step: 1472/526, loss: 0.010760725475847721 2023-01-22 17:44:53.416070: step: 1476/526, loss: 0.0032381017226725817 2023-01-22 17:44:54.476258: step: 1480/526, loss: 0.003977091982960701 2023-01-22 17:44:55.542706: step: 1484/526, loss: 0.006333249621093273 2023-01-22 17:44:56.622893: step: 1488/526, loss: 0.0033643092028796673 2023-01-22 17:44:57.688336: step: 1492/526, loss: 0.0012857952388003469 2023-01-22 17:44:58.759481: step: 1496/526, loss: 0.0041739000007510185 2023-01-22 17:44:59.811987: step: 1500/526, loss: 0.002837817184627056 2023-01-22 17:45:00.892344: step: 1504/526, loss: 0.101455919444561 2023-01-22 17:45:01.972427: step: 1508/526, loss: 0.006391642615199089 2023-01-22 17:45:03.056560: step: 1512/526, loss: 0.011850404553115368 2023-01-22 17:45:04.114680: step: 1516/526, loss: 0.00957333855330944 2023-01-22 17:45:05.165719: step: 1520/526, loss: 0.0016149585135281086 2023-01-22 17:45:06.223188: step: 1524/526, loss: 0.010195809416472912 2023-01-22 17:45:07.312483: step: 1528/526, loss: 0.001148558920249343 2023-01-22 17:45:08.387285: step: 1532/526, loss: 0.007005386985838413 2023-01-22 17:45:09.467454: step: 1536/526, loss: 0.006135713774710894 2023-01-22 17:45:10.530817: step: 1540/526, loss: 0.004064920358359814 2023-01-22 17:45:11.580158: step: 1544/526, loss: 0.0025080936029553413 2023-01-22 17:45:12.657399: step: 1548/526, loss: 0.008785512298345566 2023-01-22 17:45:13.731961: step: 1552/526, loss: 0.002307741204276681 2023-01-22 17:45:14.805334: step: 1556/526, loss: 0.004109462723135948 2023-01-22 17:45:15.874774: step: 1560/526, loss: 0.006284110248088837 2023-01-22 17:45:16.964714: step: 1564/526, loss: 0.021242180839180946 2023-01-22 17:45:18.044353: step: 1568/526, loss: 0.00031133886659517884 2023-01-22 17:45:19.115789: step: 1572/526, loss: 0.007188607472926378 2023-01-22 17:45:20.186191: step: 1576/526, loss: 0.0045284247025847435 2023-01-22 17:45:21.239923: step: 1580/526, loss: 0.025905510410666466 2023-01-22 17:45:22.314187: step: 1584/526, loss: 0.006384619511663914 2023-01-22 17:45:23.383267: step: 1588/526, loss: 0.002577879000455141 2023-01-22 17:45:24.446773: step: 1592/526, loss: 0.005117006134241819 2023-01-22 17:45:25.516967: step: 1596/526, loss: 0.0030828595627099276 2023-01-22 17:45:26.585747: step: 1600/526, loss: 0.0005668936646543443 2023-01-22 17:45:27.655675: step: 1604/526, loss: 0.008498278446495533 2023-01-22 17:45:28.721474: step: 1608/526, loss: 0.0005774215096607804 2023-01-22 17:45:29.802386: step: 1612/526, loss: 0.004483935888856649 2023-01-22 17:45:30.884015: step: 1616/526, loss: 0.013668697327375412 2023-01-22 17:45:31.942072: step: 1620/526, loss: 0.0028071808628737926 2023-01-22 17:45:33.006429: step: 1624/526, loss: 0.00044962106039747596 2023-01-22 17:45:34.072249: step: 1628/526, loss: 0.0017513372004032135 2023-01-22 17:45:35.149308: step: 1632/526, loss: 0.00021313379693310708 2023-01-22 17:45:36.223898: step: 1636/526, loss: 0.004358714912086725 2023-01-22 17:45:37.286992: step: 1640/526, loss: 0.003260429948568344 2023-01-22 17:45:38.373285: step: 1644/526, loss: 0.010220948606729507 2023-01-22 17:45:39.448772: step: 1648/526, loss: 0.003521028673276305 2023-01-22 17:45:40.508569: step: 1652/526, loss: 0.004618900362402201 2023-01-22 17:45:41.572052: step: 1656/526, loss: 0.006055415607988834 2023-01-22 17:45:42.641120: step: 1660/526, loss: 0.0022699935361742973 2023-01-22 17:45:43.733551: step: 1664/526, loss: 0.0019782076124101877 2023-01-22 17:45:44.800507: step: 1668/526, loss: 0.0008403612300753593 2023-01-22 17:45:45.857664: step: 1672/526, loss: 0.010374226607382298 2023-01-22 17:45:46.920673: step: 1676/526, loss: 0.013295567594468594 2023-01-22 17:45:47.974872: step: 1680/526, loss: 0.0026011704467236996 2023-01-22 17:45:49.054929: step: 1684/526, loss: 0.022898836061358452 2023-01-22 17:45:50.117690: step: 1688/526, loss: 0.0145388413220644 2023-01-22 17:45:51.208171: step: 1692/526, loss: 0.0014042035909369588 2023-01-22 17:45:52.267567: step: 1696/526, loss: 0.0037161032669246197 2023-01-22 17:45:53.332044: step: 1700/526, loss: 0.011581058613955975 2023-01-22 17:45:54.435049: step: 1704/526, loss: 0.008050082251429558 2023-01-22 17:45:55.506730: step: 1708/526, loss: 0.001337134512141347 2023-01-22 17:45:56.578804: step: 1712/526, loss: 0.0012007122859358788 2023-01-22 17:45:57.657708: step: 1716/526, loss: 0.0028656721115112305 2023-01-22 17:45:58.741569: step: 1720/526, loss: 0.0032513882033526897 2023-01-22 17:45:59.814307: step: 1724/526, loss: 0.004646581131964922 2023-01-22 17:46:00.886347: step: 1728/526, loss: 0.001016543130390346 2023-01-22 17:46:01.951053: step: 1732/526, loss: 0.0004443769867066294 2023-01-22 17:46:03.012446: step: 1736/526, loss: 0.003729267744347453 2023-01-22 17:46:04.074566: step: 1740/526, loss: 0.0011827360140159726 2023-01-22 17:46:05.145165: step: 1744/526, loss: 0.00376526964828372 2023-01-22 17:46:06.218901: step: 1748/526, loss: 0.01286687608808279 2023-01-22 17:46:07.278116: step: 1752/526, loss: 0.0014182575978338718 2023-01-22 17:46:08.339109: step: 1756/526, loss: 0.00057150365319103 2023-01-22 17:46:09.401592: step: 1760/526, loss: 0.011356213130056858 2023-01-22 17:46:10.451231: step: 1764/526, loss: 0.0033282830845564604 2023-01-22 17:46:11.524585: step: 1768/526, loss: 0.0003513133560772985 2023-01-22 17:46:12.596717: step: 1772/526, loss: 3.9954767999006435e-05 2023-01-22 17:46:13.659406: step: 1776/526, loss: 0.0029689716175198555 2023-01-22 17:46:14.725683: step: 1780/526, loss: 0.001050980412401259 2023-01-22 17:46:15.792376: step: 1784/526, loss: 0.0033109993673861027 2023-01-22 17:46:16.863221: step: 1788/526, loss: 0.0029314623679965734 2023-01-22 17:46:17.918902: step: 1792/526, loss: 0.01881171576678753 2023-01-22 17:46:18.975979: step: 1796/526, loss: 0.0016048286342993379 2023-01-22 17:46:20.047025: step: 1800/526, loss: 0.00024121090245898813 2023-01-22 17:46:21.111375: step: 1804/526, loss: 0.010174653492867947 2023-01-22 17:46:22.187676: step: 1808/526, loss: 0.011384622193872929 2023-01-22 17:46:23.251008: step: 1812/526, loss: 0.00020782684441655874 2023-01-22 17:46:24.314673: step: 1816/526, loss: 0.0058898888528347015 2023-01-22 17:46:25.378287: step: 1820/526, loss: 0.0040581803768873215 2023-01-22 17:46:26.442437: step: 1824/526, loss: 0.0041685886681079865 2023-01-22 17:46:27.488372: step: 1828/526, loss: 0.0007716206018812954 2023-01-22 17:46:28.566681: step: 1832/526, loss: 0.00015300473023671657 2023-01-22 17:46:29.640730: step: 1836/526, loss: 0.006592265330255032 2023-01-22 17:46:30.721866: step: 1840/526, loss: 0.011776725761592388 2023-01-22 17:46:31.810399: step: 1844/526, loss: 0.018989983946084976 2023-01-22 17:46:32.871472: step: 1848/526, loss: 0.0064499578438699245 2023-01-22 17:46:33.933672: step: 1852/526, loss: 0.0008649341762065887 2023-01-22 17:46:34.997017: step: 1856/526, loss: 0.0003299264644738287 2023-01-22 17:46:36.057302: step: 1860/526, loss: 7.778425242577214e-06 2023-01-22 17:46:37.113331: step: 1864/526, loss: 1.5760853784740902e-05 2023-01-22 17:46:38.183647: step: 1868/526, loss: 0.01908731460571289 2023-01-22 17:46:39.251548: step: 1872/526, loss: 0.0025268851313740015 2023-01-22 17:46:40.340440: step: 1876/526, loss: 0.0 2023-01-22 17:46:41.405415: step: 1880/526, loss: 0.007457737345248461 2023-01-22 17:46:42.463691: step: 1884/526, loss: 0.009490340948104858 2023-01-22 17:46:43.571257: step: 1888/526, loss: 0.004845378454774618 2023-01-22 17:46:44.627830: step: 1892/526, loss: 0.0028228999581187963 2023-01-22 17:46:45.690151: step: 1896/526, loss: 0.0077581084333360195 2023-01-22 17:46:46.748903: step: 1900/526, loss: 0.003925779834389687 2023-01-22 17:46:47.812695: step: 1904/526, loss: 0.002297257073223591 2023-01-22 17:46:48.878260: step: 1908/526, loss: 0.012945166788995266 2023-01-22 17:46:49.988239: step: 1912/526, loss: 0.03262411803007126 2023-01-22 17:46:51.065224: step: 1916/526, loss: 0.00223451666533947 2023-01-22 17:46:52.128206: step: 1920/526, loss: 0.003882618388161063 2023-01-22 17:46:53.208625: step: 1924/526, loss: 0.0033083721064031124 2023-01-22 17:46:54.287670: step: 1928/526, loss: 0.0044806464575231075 2023-01-22 17:46:55.377358: step: 1932/526, loss: 0.015671787783503532 2023-01-22 17:46:56.446181: step: 1936/526, loss: 0.00082369614392519 2023-01-22 17:46:57.513580: step: 1940/526, loss: 0.005413532257080078 2023-01-22 17:46:58.565503: step: 1944/526, loss: 2.8437520086299628e-05 2023-01-22 17:46:59.625134: step: 1948/526, loss: 0.02288207970559597 2023-01-22 17:47:00.694781: step: 1952/526, loss: 0.0006015666294842958 2023-01-22 17:47:01.745632: step: 1956/526, loss: 0.002368087647482753 2023-01-22 17:47:02.825556: step: 1960/526, loss: 0.0009501436143182218 2023-01-22 17:47:03.895971: step: 1964/526, loss: 0.005070342216640711 2023-01-22 17:47:04.952853: step: 1968/526, loss: 0.0077898805029690266 2023-01-22 17:47:06.049274: step: 1972/526, loss: 0.0027656888123601675 2023-01-22 17:47:07.120747: step: 1976/526, loss: 0.0021161020267754793 2023-01-22 17:47:08.194584: step: 1980/526, loss: 0.0005242753541097045 2023-01-22 17:47:09.291984: step: 1984/526, loss: 6.239534968699445e-07 2023-01-22 17:47:10.364082: step: 1988/526, loss: 0.007229828275740147 2023-01-22 17:47:11.440596: step: 1992/526, loss: 0.034025538712739944 2023-01-22 17:47:12.546992: step: 1996/526, loss: 0.019669758155941963 2023-01-22 17:47:13.639445: step: 2000/526, loss: 0.002099134959280491 2023-01-22 17:47:14.706371: step: 2004/526, loss: 0.00038834975566715 2023-01-22 17:47:15.777312: step: 2008/526, loss: 0.002441603457555175 2023-01-22 17:47:16.836690: step: 2012/526, loss: 0.0048280274495482445 2023-01-22 17:47:17.908889: step: 2016/526, loss: 0.013527227565646172 2023-01-22 17:47:18.967450: step: 2020/526, loss: 0.003949955105781555 2023-01-22 17:47:20.033618: step: 2024/526, loss: 4.812977203982882e-05 2023-01-22 17:47:21.086312: step: 2028/526, loss: 0.002293851226568222 2023-01-22 17:47:22.153095: step: 2032/526, loss: 0.003258189419284463 2023-01-22 17:47:23.222142: step: 2036/526, loss: 0.0047264802269637585 2023-01-22 17:47:24.306658: step: 2040/526, loss: 0.00121023238170892 2023-01-22 17:47:25.368585: step: 2044/526, loss: 0.004911414347589016 2023-01-22 17:47:26.421892: step: 2048/526, loss: 0.0007358885486610234 2023-01-22 17:47:27.479937: step: 2052/526, loss: 0.009156769141554832 2023-01-22 17:47:28.556561: step: 2056/526, loss: 0.003198770107701421 2023-01-22 17:47:29.616387: step: 2060/526, loss: 0.02316271699965 2023-01-22 17:47:30.674396: step: 2064/526, loss: 0.0004598215455189347 2023-01-22 17:47:31.726725: step: 2068/526, loss: 0.0014651265228167176 2023-01-22 17:47:32.792577: step: 2072/526, loss: 0.01609884202480316 2023-01-22 17:47:33.865846: step: 2076/526, loss: 0.007467396557331085 2023-01-22 17:47:34.933627: step: 2080/526, loss: 0.008745291270315647 2023-01-22 17:47:35.990921: step: 2084/526, loss: 0.0014709733659401536 2023-01-22 17:47:37.068196: step: 2088/526, loss: 0.0023643779568374157 2023-01-22 17:47:38.128833: step: 2092/526, loss: 0.01048876903951168 2023-01-22 17:47:39.180652: step: 2096/526, loss: 0.0013966734986752272 2023-01-22 17:47:40.244803: step: 2100/526, loss: 0.010592360980808735 2023-01-22 17:47:41.305082: step: 2104/526, loss: 0.014253076165914536 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35311092342342343, 'r': 0.29749762808349145, 'f1': 0.3229273944387229}, 'combined': 0.23794650116537477, 'stategy': 1, 'epoch': 11} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33692824801330623, 'r': 0.23850323783359498, 'f1': 0.2792981616238989}, 'combined': 0.15234445179485395, 'stategy': 1, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.315990890083632, 'r': 0.3345785895003162, 'f1': 0.32501920122887856}, 'combined': 0.23948783248443684, 'stategy': 1, 'epoch': 11} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3390886161136779, 'r': 0.2658727758549149, 'f1': 0.2980501989840226}, 'combined': 0.16257283580946685, 'stategy': 1, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3269472953753329, 'r': 0.3312900488243411, 'f1': 0.3291043463344538}, 'combined': 0.24249793940433437, 'stategy': 1, 'epoch': 11} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33361925634205786, 'r': 0.27329358259253506, 'f1': 0.3004583061634798}, 'combined': 0.1638863488164435, 'stategy': 1, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.3017241379310345, 'f1': 0.3723404255319149}, 'combined': 0.2482269503546099, 'stategy': 1, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 12 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:50:28.728820: step: 4/526, loss: 0.00031437506549991667 2023-01-22 17:50:29.800986: step: 8/526, loss: 0.000721877790056169 2023-01-22 17:50:30.882459: step: 12/526, loss: 0.0002992902009282261 2023-01-22 17:50:31.941183: step: 16/526, loss: 0.001107602845877409 2023-01-22 17:50:33.001227: step: 20/526, loss: 0.005010148044675589 2023-01-22 17:50:34.054227: step: 24/526, loss: 0.0013369632652029395 2023-01-22 17:50:35.125623: step: 28/526, loss: 0.005854390095919371 2023-01-22 17:50:36.188575: step: 32/526, loss: 0.0010378245497122407 2023-01-22 17:50:37.247210: step: 36/526, loss: 0.005830280017107725 2023-01-22 17:50:38.305504: step: 40/526, loss: 0.004884921479970217 2023-01-22 17:50:39.358910: step: 44/526, loss: 0.0001447704853489995 2023-01-22 17:50:40.426536: step: 48/526, loss: 0.0011601501610130072 2023-01-22 17:50:41.492184: step: 52/526, loss: 0.00152221426833421 2023-01-22 17:50:42.558266: step: 56/526, loss: 0.0020156328100711107 2023-01-22 17:50:43.643328: step: 60/526, loss: 1.1011295100615826e-05 2023-01-22 17:50:44.707032: step: 64/526, loss: 0.001265243161469698 2023-01-22 17:50:45.762009: step: 68/526, loss: 0.0 2023-01-22 17:50:46.834589: step: 72/526, loss: 0.0005425070412456989 2023-01-22 17:50:47.902596: step: 76/526, loss: 0.0013437970774248242 2023-01-22 17:50:48.970859: step: 80/526, loss: 0.004624505992978811 2023-01-22 17:50:50.032628: step: 84/526, loss: 0.00032763893250375986 2023-01-22 17:50:51.103162: step: 88/526, loss: 0.0016169139416888356 2023-01-22 17:50:52.173331: step: 92/526, loss: 0.0037228604778647423 2023-01-22 17:50:53.248558: step: 96/526, loss: 0.01544990111142397 2023-01-22 17:50:54.303398: step: 100/526, loss: 0.0019161321688443422 2023-01-22 17:50:55.376060: step: 104/526, loss: 0.007739551831036806 2023-01-22 17:50:56.437902: step: 108/526, loss: 0.0033388000447303057 2023-01-22 17:50:57.501460: step: 112/526, loss: 0.005482289008796215 2023-01-22 17:50:58.559874: step: 116/526, loss: 0.016631007194519043 2023-01-22 17:50:59.642220: step: 120/526, loss: 0.0017945574363693595 2023-01-22 17:51:00.708310: step: 124/526, loss: 0.008565624244511127 2023-01-22 17:51:01.773313: step: 128/526, loss: 0.001546542509458959 2023-01-22 17:51:02.850974: step: 132/526, loss: 0.0070532034151256084 2023-01-22 17:51:03.913437: step: 136/526, loss: 0.004942536354064941 2023-01-22 17:51:04.978836: step: 140/526, loss: 0.00022651706240139902 2023-01-22 17:51:06.066514: step: 144/526, loss: 0.0015404631849378347 2023-01-22 17:51:07.125703: step: 148/526, loss: 0.004854219034314156 2023-01-22 17:51:08.204399: step: 152/526, loss: 0.004633516073226929 2023-01-22 17:51:09.278657: step: 156/526, loss: 0.0013367460342124104 2023-01-22 17:51:10.322762: step: 160/526, loss: 0.0004760772571898997 2023-01-22 17:51:11.380660: step: 164/526, loss: 0.00910631287842989 2023-01-22 17:51:12.450276: step: 168/526, loss: 0.034501541405916214 2023-01-22 17:51:13.545352: step: 172/526, loss: 0.0021336048375815153 2023-01-22 17:51:14.608574: step: 176/526, loss: 0.000841932778712362 2023-01-22 17:51:15.669683: step: 180/526, loss: 0.0013022262137383223 2023-01-22 17:51:16.749437: step: 184/526, loss: 0.006308686453849077 2023-01-22 17:51:17.815533: step: 188/526, loss: 0.0015304558910429478 2023-01-22 17:51:18.890098: step: 192/526, loss: 0.0014959670370444655 2023-01-22 17:51:19.947038: step: 196/526, loss: 0.004925600253045559 2023-01-22 17:51:21.025279: step: 200/526, loss: 0.0009841794380918145 2023-01-22 17:51:22.099156: step: 204/526, loss: 0.0014439214719459414 2023-01-22 17:51:23.162433: step: 208/526, loss: 0.002681492827832699 2023-01-22 17:51:24.224102: step: 212/526, loss: 0.0010890221456065774 2023-01-22 17:51:25.292672: step: 216/526, loss: 0.003575438866391778 2023-01-22 17:51:26.373977: step: 220/526, loss: 0.0075643728487193584 2023-01-22 17:51:27.458830: step: 224/526, loss: 0.00011623015598161146 2023-01-22 17:51:28.529943: step: 228/526, loss: 0.007574285846203566 2023-01-22 17:51:29.586696: step: 232/526, loss: 0.002350882161408663 2023-01-22 17:51:30.653204: step: 236/526, loss: 0.006298688240349293 2023-01-22 17:51:31.729317: step: 240/526, loss: 0.001984883565455675 2023-01-22 17:51:32.784667: step: 244/526, loss: 0.0012347263982519507 2023-01-22 17:51:33.857527: step: 248/526, loss: 0.0029861200600862503 2023-01-22 17:51:34.942642: step: 252/526, loss: 0.013707672245800495 2023-01-22 17:51:36.018776: step: 256/526, loss: 0.0027485175523906946 2023-01-22 17:51:37.082849: step: 260/526, loss: 0.007076349575072527 2023-01-22 17:51:38.160669: step: 264/526, loss: 0.0007557463250122964 2023-01-22 17:51:39.237448: step: 268/526, loss: 0.0018420928390696645 2023-01-22 17:51:40.301177: step: 272/526, loss: 0.004124253056943417 2023-01-22 17:51:41.377228: step: 276/526, loss: 0.00149305141530931 2023-01-22 17:51:42.434326: step: 280/526, loss: 0.10160735249519348 2023-01-22 17:51:43.495229: step: 284/526, loss: 0.03166588023304939 2023-01-22 17:51:44.572893: step: 288/526, loss: 0.014715251512825489 2023-01-22 17:51:45.642212: step: 292/526, loss: 0.006889783311635256 2023-01-22 17:51:46.729458: step: 296/526, loss: 0.001305997371673584 2023-01-22 17:51:47.801969: step: 300/526, loss: 0.0012758137891069055 2023-01-22 17:51:48.894195: step: 304/526, loss: 0.0001605924335308373 2023-01-22 17:51:49.973360: step: 308/526, loss: 0.003369224490597844 2023-01-22 17:51:51.046305: step: 312/526, loss: 0.00020925667195115238 2023-01-22 17:51:52.112268: step: 316/526, loss: 0.009680021554231644 2023-01-22 17:51:53.175714: step: 320/526, loss: 0.008730200119316578 2023-01-22 17:51:54.238792: step: 324/526, loss: 0.010040685534477234 2023-01-22 17:51:55.315923: step: 328/526, loss: 0.0019895322620868683 2023-01-22 17:51:56.371494: step: 332/526, loss: 0.0005051528569310904 2023-01-22 17:51:57.442261: step: 336/526, loss: 0.003201629500836134 2023-01-22 17:51:58.509848: step: 340/526, loss: 0.005166755057871342 2023-01-22 17:51:59.570504: step: 344/526, loss: 0.0031377023551613092 2023-01-22 17:52:00.646782: step: 348/526, loss: 7.840106263756752e-05 2023-01-22 17:52:01.724884: step: 352/526, loss: 0.001754283206537366 2023-01-22 17:52:02.793635: step: 356/526, loss: 0.0007279195706360042 2023-01-22 17:52:03.857400: step: 360/526, loss: 0.013939459808170795 2023-01-22 17:52:04.925399: step: 364/526, loss: 0.009014920331537724 2023-01-22 17:52:05.984046: step: 368/526, loss: 0.004237246233969927 2023-01-22 17:52:07.052033: step: 372/526, loss: 0.0013433074345812201 2023-01-22 17:52:08.113758: step: 376/526, loss: 0.0005890174070373178 2023-01-22 17:52:09.191310: step: 380/526, loss: 0.003365324577316642 2023-01-22 17:52:10.257784: step: 384/526, loss: 0.0010860550682991743 2023-01-22 17:52:11.336235: step: 388/526, loss: 0.009376972913742065 2023-01-22 17:52:12.412669: step: 392/526, loss: 0.004481134936213493 2023-01-22 17:52:13.511607: step: 396/526, loss: 0.002291429089382291 2023-01-22 17:52:14.579382: step: 400/526, loss: 0.0002782550291158259 2023-01-22 17:52:15.675212: step: 404/526, loss: 0.0035371528938412666 2023-01-22 17:52:16.734902: step: 408/526, loss: 0.0007806267822161317 2023-01-22 17:52:17.793439: step: 412/526, loss: 0.0005052492488175631 2023-01-22 17:52:18.873281: step: 416/526, loss: 0.0021530049853026867 2023-01-22 17:52:19.939318: step: 420/526, loss: 0.023135367780923843 2023-01-22 17:52:21.013468: step: 424/526, loss: 0.002787533914670348 2023-01-22 17:52:22.082510: step: 428/526, loss: 0.010927122086286545 2023-01-22 17:52:23.155119: step: 432/526, loss: 0.002425807761028409 2023-01-22 17:52:24.221495: step: 436/526, loss: 0.004753129091113806 2023-01-22 17:52:25.299845: step: 440/526, loss: 5.0614195060916245e-05 2023-01-22 17:52:26.372479: step: 444/526, loss: 0.005468595307320356 2023-01-22 17:52:27.441374: step: 448/526, loss: 0.02075253054499626 2023-01-22 17:52:28.521591: step: 452/526, loss: 0.0013232381315901875 2023-01-22 17:52:29.601055: step: 456/526, loss: 0.004991380963474512 2023-01-22 17:52:30.685499: step: 460/526, loss: 0.01074813213199377 2023-01-22 17:52:31.772118: step: 464/526, loss: 0.0038094213232398033 2023-01-22 17:52:32.855547: step: 468/526, loss: 0.00588549580425024 2023-01-22 17:52:33.930563: step: 472/526, loss: 0.009350107982754707 2023-01-22 17:52:34.995258: step: 476/526, loss: 0.001876753056421876 2023-01-22 17:52:36.061036: step: 480/526, loss: 0.005339787341654301 2023-01-22 17:52:37.150120: step: 484/526, loss: 0.00220359954982996 2023-01-22 17:52:38.213114: step: 488/526, loss: 0.0006181065109558403 2023-01-22 17:52:39.276719: step: 492/526, loss: 0.002066671848297119 2023-01-22 17:52:40.335059: step: 496/526, loss: 0.011612337082624435 2023-01-22 17:52:41.398102: step: 500/526, loss: 8.063924906309694e-05 2023-01-22 17:52:42.469648: step: 504/526, loss: 0.007404741831123829 2023-01-22 17:52:43.560889: step: 508/526, loss: 0.009839549660682678 2023-01-22 17:52:44.605639: step: 512/526, loss: 0.004122963175177574 2023-01-22 17:52:45.690120: step: 516/526, loss: 0.0015515643171966076 2023-01-22 17:52:46.747693: step: 520/526, loss: 0.004361818544566631 2023-01-22 17:52:47.825171: step: 524/526, loss: 8.91040253918618e-05 2023-01-22 17:52:48.901683: step: 528/526, loss: 0.0017529523465782404 2023-01-22 17:52:49.970478: step: 532/526, loss: 0.006028460338711739 2023-01-22 17:52:51.032503: step: 536/526, loss: 0.001939959591254592 2023-01-22 17:52:52.099996: step: 540/526, loss: 0.004281359259039164 2023-01-22 17:52:53.162128: step: 544/526, loss: 0.017347289249300957 2023-01-22 17:52:54.218043: step: 548/526, loss: 0.0016728440532460809 2023-01-22 17:52:55.276387: step: 552/526, loss: 0.0009446667972952127 2023-01-22 17:52:56.334395: step: 556/526, loss: 0.025606101378798485 2023-01-22 17:52:57.400896: step: 560/526, loss: 0.00013952061999589205 2023-01-22 17:52:58.463472: step: 564/526, loss: 0.0009214154561050236 2023-01-22 17:52:59.524601: step: 568/526, loss: 0.0031211338937282562 2023-01-22 17:53:00.585929: step: 572/526, loss: 0.0005275773000903428 2023-01-22 17:53:01.643285: step: 576/526, loss: 0.004554534796625376 2023-01-22 17:53:02.711342: step: 580/526, loss: 0.004487162921577692 2023-01-22 17:53:03.781314: step: 584/526, loss: 0.013655285350978374 2023-01-22 17:53:04.863282: step: 588/526, loss: 0.005994296167045832 2023-01-22 17:53:05.941195: step: 592/526, loss: 0.0005603848258033395 2023-01-22 17:53:07.021684: step: 596/526, loss: 0.0005197998252697289 2023-01-22 17:53:08.116807: step: 600/526, loss: 0.002649688394740224 2023-01-22 17:53:09.184238: step: 604/526, loss: 0.0013523525558412075 2023-01-22 17:53:10.240619: step: 608/526, loss: 0.004355909768491983 2023-01-22 17:53:11.309640: step: 612/526, loss: 0.0036479176487773657 2023-01-22 17:53:12.365531: step: 616/526, loss: 0.00855250284075737 2023-01-22 17:53:13.440590: step: 620/526, loss: 0.010242723859846592 2023-01-22 17:53:14.497565: step: 624/526, loss: 0.006170874927192926 2023-01-22 17:53:15.549232: step: 628/526, loss: 0.001439704792574048 2023-01-22 17:53:16.616069: step: 632/526, loss: 0.002179022179916501 2023-01-22 17:53:17.699701: step: 636/526, loss: 0.004114803392440081 2023-01-22 17:53:18.771216: step: 640/526, loss: 0.009873596020042896 2023-01-22 17:53:19.833109: step: 644/526, loss: 0.0020701757166534662 2023-01-22 17:53:20.895059: step: 648/526, loss: 0.007152346428483725 2023-01-22 17:53:21.974227: step: 652/526, loss: 0.002764208009466529 2023-01-22 17:53:23.051423: step: 656/526, loss: 0.0021466331090778112 2023-01-22 17:53:24.124918: step: 660/526, loss: 0.0025273330975323915 2023-01-22 17:53:25.196519: step: 664/526, loss: 0.0004839191387873143 2023-01-22 17:53:26.243228: step: 668/526, loss: 0.0001491934817750007 2023-01-22 17:53:27.307228: step: 672/526, loss: 5.0661230488913134e-05 2023-01-22 17:53:28.381975: step: 676/526, loss: 2.291400232934393e-05 2023-01-22 17:53:29.442883: step: 680/526, loss: 0.018560536205768585 2023-01-22 17:53:30.499525: step: 684/526, loss: 0.004976227879524231 2023-01-22 17:53:31.554888: step: 688/526, loss: 0.0005298475152812898 2023-01-22 17:53:32.626757: step: 692/526, loss: 0.005328733008354902 2023-01-22 17:53:33.679712: step: 696/526, loss: 0.0026134364306926727 2023-01-22 17:53:34.728545: step: 700/526, loss: 0.008869764395058155 2023-01-22 17:53:35.802431: step: 704/526, loss: 0.005638923030346632 2023-01-22 17:53:36.868863: step: 708/526, loss: 0.003231939859688282 2023-01-22 17:53:37.934859: step: 712/526, loss: 0.0004281184228602797 2023-01-22 17:53:39.010516: step: 716/526, loss: 0.0039603570476174355 2023-01-22 17:53:40.079785: step: 720/526, loss: 0.005882779601961374 2023-01-22 17:53:41.138237: step: 724/526, loss: 0.00305167306214571 2023-01-22 17:53:42.194816: step: 728/526, loss: 0.0016637013759464025 2023-01-22 17:53:43.265935: step: 732/526, loss: 0.003287063678726554 2023-01-22 17:53:44.330157: step: 736/526, loss: 0.005487238988280296 2023-01-22 17:53:45.391017: step: 740/526, loss: 0.033265359699726105 2023-01-22 17:53:46.459393: step: 744/526, loss: 0.00586258340626955 2023-01-22 17:53:47.526195: step: 748/526, loss: 0.0032196512911468744 2023-01-22 17:53:48.576127: step: 752/526, loss: 0.010227379389107227 2023-01-22 17:53:49.647237: step: 756/526, loss: 0.0020482593681663275 2023-01-22 17:53:50.704642: step: 760/526, loss: 0.003393246093764901 2023-01-22 17:53:51.762483: step: 764/526, loss: 0.0035101633984595537 2023-01-22 17:53:52.830681: step: 768/526, loss: 0.006472909357398748 2023-01-22 17:53:53.896904: step: 772/526, loss: 0.0054479134269058704 2023-01-22 17:53:54.961308: step: 776/526, loss: 2.027765958700911e-06 2023-01-22 17:53:56.013549: step: 780/526, loss: 0.00970985647290945 2023-01-22 17:53:57.082719: step: 784/526, loss: 0.0011454956838861108 2023-01-22 17:53:58.132785: step: 788/526, loss: 0.0006247623823583126 2023-01-22 17:53:59.199935: step: 792/526, loss: 0.0010254670633003116 2023-01-22 17:54:00.275020: step: 796/526, loss: 0.02520628832280636 2023-01-22 17:54:01.341679: step: 800/526, loss: 0.0014865443808957934 2023-01-22 17:54:02.423152: step: 804/526, loss: 0.0014674272388219833 2023-01-22 17:54:03.482477: step: 808/526, loss: 0.007118089124560356 2023-01-22 17:54:04.540605: step: 812/526, loss: 0.00023000827059149742 2023-01-22 17:54:05.601999: step: 816/526, loss: 0.007735084742307663 2023-01-22 17:54:06.668500: step: 820/526, loss: 0.0023086194414645433 2023-01-22 17:54:07.720594: step: 824/526, loss: 0.004207658115774393 2023-01-22 17:54:08.786332: step: 828/526, loss: 0.002537284279242158 2023-01-22 17:54:09.869597: step: 832/526, loss: 0.002568707102909684 2023-01-22 17:54:10.933838: step: 836/526, loss: 0.005868341773748398 2023-01-22 17:54:11.998518: step: 840/526, loss: 0.0009080182062461972 2023-01-22 17:54:13.070133: step: 844/526, loss: 0.008289956487715244 2023-01-22 17:54:14.142379: step: 848/526, loss: 0.0021764319390058517 2023-01-22 17:54:15.219462: step: 852/526, loss: 0.007497809827327728 2023-01-22 17:54:16.273104: step: 856/526, loss: 0.0034732576459646225 2023-01-22 17:54:17.337673: step: 860/526, loss: 0.00010057733015855774 2023-01-22 17:54:18.405582: step: 864/526, loss: 0.0005223654443398118 2023-01-22 17:54:19.489124: step: 868/526, loss: 0.0010158532531931996 2023-01-22 17:54:20.560181: step: 872/526, loss: 0.0003555673756636679 2023-01-22 17:54:21.613126: step: 876/526, loss: 0.01709245890378952 2023-01-22 17:54:22.694310: step: 880/526, loss: 0.0021394919604063034 2023-01-22 17:54:23.756402: step: 884/526, loss: 0.007290750741958618 2023-01-22 17:54:24.813495: step: 888/526, loss: 0.001117272418923676 2023-01-22 17:54:25.869420: step: 892/526, loss: 0.005706317722797394 2023-01-22 17:54:26.929422: step: 896/526, loss: 0.0005349827115423977 2023-01-22 17:54:28.012190: step: 900/526, loss: 0.0011702262563630939 2023-01-22 17:54:29.092188: step: 904/526, loss: 0.008477872237563133 2023-01-22 17:54:30.156033: step: 908/526, loss: 0.0004924589302390814 2023-01-22 17:54:31.228356: step: 912/526, loss: 0.003951834514737129 2023-01-22 17:54:32.283174: step: 916/526, loss: 0.009175797924399376 2023-01-22 17:54:33.349985: step: 920/526, loss: 0.0007077965419739485 2023-01-22 17:54:34.394760: step: 924/526, loss: 0.0014777312753722072 2023-01-22 17:54:35.449145: step: 928/526, loss: 0.040600694715976715 2023-01-22 17:54:36.511649: step: 932/526, loss: 0.003321695839986205 2023-01-22 17:54:37.571365: step: 936/526, loss: 0.0034956419840455055 2023-01-22 17:54:38.647707: step: 940/526, loss: 0.001856361050158739 2023-01-22 17:54:39.707085: step: 944/526, loss: 0.003936657682061195 2023-01-22 17:54:40.770356: step: 948/526, loss: 0.00014782967627979815 2023-01-22 17:54:41.830390: step: 952/526, loss: 0.0032827730756253004 2023-01-22 17:54:42.905272: step: 956/526, loss: 0.02049161121249199 2023-01-22 17:54:43.976206: step: 960/526, loss: 0.006885509938001633 2023-01-22 17:54:45.047436: step: 964/526, loss: 0.0013700233539566398 2023-01-22 17:54:46.109239: step: 968/526, loss: 0.0029252131935209036 2023-01-22 17:54:47.162622: step: 972/526, loss: 0.00019866864022333175 2023-01-22 17:54:48.218831: step: 976/526, loss: 0.002738167531788349 2023-01-22 17:54:49.285343: step: 980/526, loss: 0.00046391243813559413 2023-01-22 17:54:50.361128: step: 984/526, loss: 0.004509535152465105 2023-01-22 17:54:51.430976: step: 988/526, loss: 0.008534682914614677 2023-01-22 17:54:52.497400: step: 992/526, loss: 0.0025360353756695986 2023-01-22 17:54:53.572472: step: 996/526, loss: 0.011647382751107216 2023-01-22 17:54:54.624594: step: 1000/526, loss: 0.003562483936548233 2023-01-22 17:54:55.678977: step: 1004/526, loss: 2.0158728148089722e-05 2023-01-22 17:54:56.726422: step: 1008/526, loss: 0.0031182165257632732 2023-01-22 17:54:57.781695: step: 1012/526, loss: 0.0 2023-01-22 17:54:58.848288: step: 1016/526, loss: 0.0003311119507998228 2023-01-22 17:54:59.917860: step: 1020/526, loss: 0.0023517049849033356 2023-01-22 17:55:00.973986: step: 1024/526, loss: 0.0025499663315713406 2023-01-22 17:55:02.035196: step: 1028/526, loss: 0.0022722072899341583 2023-01-22 17:55:03.118075: step: 1032/526, loss: 0.0007208083407022059 2023-01-22 17:55:04.185740: step: 1036/526, loss: 0.005427314434200525 2023-01-22 17:55:05.254478: step: 1040/526, loss: 0.0038968222215771675 2023-01-22 17:55:06.314935: step: 1044/526, loss: 0.005707144737243652 2023-01-22 17:55:07.377396: step: 1048/526, loss: 0.0029660374857485294 2023-01-22 17:55:08.453049: step: 1052/526, loss: 0.004226453136652708 2023-01-22 17:55:09.515113: step: 1056/526, loss: 0.019874300807714462 2023-01-22 17:55:10.581264: step: 1060/526, loss: 0.006841110065579414 2023-01-22 17:55:11.659815: step: 1064/526, loss: 0.007175834383815527 2023-01-22 17:55:12.718171: step: 1068/526, loss: 0.0005810825387015939 2023-01-22 17:55:13.801244: step: 1072/526, loss: 0.01439677644520998 2023-01-22 17:55:14.859469: step: 1076/526, loss: 0.006567763164639473 2023-01-22 17:55:15.913407: step: 1080/526, loss: 0.0012794440845027566 2023-01-22 17:55:16.971226: step: 1084/526, loss: 0.0029513114131987095 2023-01-22 17:55:18.047321: step: 1088/526, loss: 0.00069697096478194 2023-01-22 17:55:19.117698: step: 1092/526, loss: 0.006819003727287054 2023-01-22 17:55:20.169768: step: 1096/526, loss: 0.005154205486178398 2023-01-22 17:55:21.240451: step: 1100/526, loss: 0.013068868778645992 2023-01-22 17:55:22.317164: step: 1104/526, loss: 0.0005962604773230851 2023-01-22 17:55:23.379777: step: 1108/526, loss: 0.002312930766493082 2023-01-22 17:55:24.452790: step: 1112/526, loss: 0.0012140960898250341 2023-01-22 17:55:25.514751: step: 1116/526, loss: 0.002851397730410099 2023-01-22 17:55:26.592389: step: 1120/526, loss: 0.0029857249464839697 2023-01-22 17:55:27.660309: step: 1124/526, loss: 0.005991691257804632 2023-01-22 17:55:28.729588: step: 1128/526, loss: 0.003597614821046591 2023-01-22 17:55:29.791016: step: 1132/526, loss: 0.005559731740504503 2023-01-22 17:55:30.883101: step: 1136/526, loss: 0.005038572940975428 2023-01-22 17:55:31.982357: step: 1140/526, loss: 0.011019838973879814 2023-01-22 17:55:33.045629: step: 1144/526, loss: 0.012714000418782234 2023-01-22 17:55:34.106139: step: 1148/526, loss: 6.798392860218883e-05 2023-01-22 17:55:35.166553: step: 1152/526, loss: 0.0046712374314665794 2023-01-22 17:55:36.250081: step: 1156/526, loss: 0.004524369724094868 2023-01-22 17:55:37.311233: step: 1160/526, loss: 0.006142141297459602 2023-01-22 17:55:38.376752: step: 1164/526, loss: 0.0015335364732891321 2023-01-22 17:55:39.420677: step: 1168/526, loss: 0.002124929800629616 2023-01-22 17:55:40.479564: step: 1172/526, loss: 0.003984525799751282 2023-01-22 17:55:41.534035: step: 1176/526, loss: 0.0003349226899445057 2023-01-22 17:55:42.591236: step: 1180/526, loss: 0.0021272897720336914 2023-01-22 17:55:43.662454: step: 1184/526, loss: 0.0013589432928711176 2023-01-22 17:55:44.731199: step: 1188/526, loss: 0.0028363335877656937 2023-01-22 17:55:45.798747: step: 1192/526, loss: 0.0016845815116539598 2023-01-22 17:55:46.866135: step: 1196/526, loss: 0.003293329617008567 2023-01-22 17:55:47.928557: step: 1200/526, loss: 0.0057005551643669605 2023-01-22 17:55:48.972748: step: 1204/526, loss: 0.005940551403909922 2023-01-22 17:55:50.040261: step: 1208/526, loss: 0.0027162879705429077 2023-01-22 17:55:51.113079: step: 1212/526, loss: 0.0034054783172905445 2023-01-22 17:55:52.167836: step: 1216/526, loss: 0.00657767616212368 2023-01-22 17:55:53.231089: step: 1220/526, loss: 0.009493658319115639 2023-01-22 17:55:54.300551: step: 1224/526, loss: 2.3338565370067954e-05 2023-01-22 17:55:55.355981: step: 1228/526, loss: 0.0018641414353623986 2023-01-22 17:55:56.423071: step: 1232/526, loss: 0.0017801745561882854 2023-01-22 17:55:57.495644: step: 1236/526, loss: 0.004656145349144936 2023-01-22 17:55:58.563092: step: 1240/526, loss: 0.004090503789484501 2023-01-22 17:55:59.623932: step: 1244/526, loss: 0.009272011928260326 2023-01-22 17:56:00.705478: step: 1248/526, loss: 0.00012689131835941225 2023-01-22 17:56:01.769708: step: 1252/526, loss: 0.0055953143164515495 2023-01-22 17:56:02.834714: step: 1256/526, loss: 0.004648896865546703 2023-01-22 17:56:03.888047: step: 1260/526, loss: 0.002918428275734186 2023-01-22 17:56:04.939840: step: 1264/526, loss: 0.00188479945063591 2023-01-22 17:56:06.016352: step: 1268/526, loss: 0.0023304992355406284 2023-01-22 17:56:07.084052: step: 1272/526, loss: 0.003591935383155942 2023-01-22 17:56:08.153877: step: 1276/526, loss: 0.0075240060687065125 2023-01-22 17:56:09.213419: step: 1280/526, loss: 0.004160855431109667 2023-01-22 17:56:10.290700: step: 1284/526, loss: 0.0010498282499611378 2023-01-22 17:56:11.355259: step: 1288/526, loss: 0.013328592292964458 2023-01-22 17:56:12.421285: step: 1292/526, loss: 0.00090277154231444 2023-01-22 17:56:13.504988: step: 1296/526, loss: 0.004142351448535919 2023-01-22 17:56:14.577064: step: 1300/526, loss: 0.0008531195344403386 2023-01-22 17:56:15.632554: step: 1304/526, loss: 0.00687104556709528 2023-01-22 17:56:16.696913: step: 1308/526, loss: 0.0004095988115295768 2023-01-22 17:56:17.761395: step: 1312/526, loss: 0.010410458780825138 2023-01-22 17:56:18.849940: step: 1316/526, loss: 0.01988687925040722 2023-01-22 17:56:19.911985: step: 1320/526, loss: 0.0024965452030301094 2023-01-22 17:56:20.983971: step: 1324/526, loss: 0.003737257793545723 2023-01-22 17:56:22.050017: step: 1328/526, loss: 0.0007387580117210746 2023-01-22 17:56:23.117115: step: 1332/526, loss: 0.0034012352116405964 2023-01-22 17:56:24.190671: step: 1336/526, loss: 0.005119304172694683 2023-01-22 17:56:25.238215: step: 1340/526, loss: 0.000796099950093776 2023-01-22 17:56:26.318733: step: 1344/526, loss: 0.004002515226602554 2023-01-22 17:56:27.375210: step: 1348/526, loss: 0.002700971905142069 2023-01-22 17:56:28.445447: step: 1352/526, loss: 7.12940382072702e-05 2023-01-22 17:56:29.505525: step: 1356/526, loss: 0.001824081758968532 2023-01-22 17:56:30.573229: step: 1360/526, loss: 0.0008810501312837005 2023-01-22 17:56:31.633976: step: 1364/526, loss: 0.003603171557188034 2023-01-22 17:56:32.692849: step: 1368/526, loss: 0.006394225172698498 2023-01-22 17:56:33.776485: step: 1372/526, loss: 0.007410027086734772 2023-01-22 17:56:34.851372: step: 1376/526, loss: 0.005779553670436144 2023-01-22 17:56:35.930527: step: 1380/526, loss: 0.007174394093453884 2023-01-22 17:56:36.996282: step: 1384/526, loss: 0.03878473863005638 2023-01-22 17:56:38.077862: step: 1388/526, loss: 0.07310990989208221 2023-01-22 17:56:39.149771: step: 1392/526, loss: 0.02617347612977028 2023-01-22 17:56:40.224746: step: 1396/526, loss: 0.05329953506588936 2023-01-22 17:56:41.292835: step: 1400/526, loss: 0.0 2023-01-22 17:56:42.355463: step: 1404/526, loss: 0.017215203493833542 2023-01-22 17:56:43.415444: step: 1408/526, loss: 0.0026311581023037434 2023-01-22 17:56:44.470992: step: 1412/526, loss: 0.0010681774001568556 2023-01-22 17:56:45.533741: step: 1416/526, loss: 0.0020601856522262096 2023-01-22 17:56:46.593818: step: 1420/526, loss: 0.002179694129154086 2023-01-22 17:56:47.663497: step: 1424/526, loss: 0.0013555905316025019 2023-01-22 17:56:48.720504: step: 1428/526, loss: 0.006631419062614441 2023-01-22 17:56:49.780602: step: 1432/526, loss: 0.012579905800521374 2023-01-22 17:56:50.848535: step: 1436/526, loss: 0.004499421454966068 2023-01-22 17:56:51.932822: step: 1440/526, loss: 0.04087740555405617 2023-01-22 17:56:52.998576: step: 1444/526, loss: 0.00010615806968417019 2023-01-22 17:56:54.066051: step: 1448/526, loss: 0.006730419117957354 2023-01-22 17:56:55.133472: step: 1452/526, loss: 0.00636819563806057 2023-01-22 17:56:56.205652: step: 1456/526, loss: 0.01905321143567562 2023-01-22 17:56:57.288833: step: 1460/526, loss: 0.0031310312915593386 2023-01-22 17:56:58.340965: step: 1464/526, loss: 0.0027232805732637644 2023-01-22 17:56:59.426063: step: 1468/526, loss: 0.010189528577029705 2023-01-22 17:57:00.499166: step: 1472/526, loss: 0.004538217093795538 2023-01-22 17:57:01.549999: step: 1476/526, loss: 0.0019517021719366312 2023-01-22 17:57:02.603938: step: 1480/526, loss: 0.0003497126745060086 2023-01-22 17:57:03.673525: step: 1484/526, loss: 6.269663572311401e-05 2023-01-22 17:57:04.737939: step: 1488/526, loss: 5.916481313761324e-05 2023-01-22 17:57:05.803725: step: 1492/526, loss: 0.004511381033807993 2023-01-22 17:57:06.877481: step: 1496/526, loss: 2.935009979410097e-05 2023-01-22 17:57:07.926250: step: 1500/526, loss: 0.00831483956426382 2023-01-22 17:57:09.010960: step: 1504/526, loss: 0.006276101339608431 2023-01-22 17:57:10.091043: step: 1508/526, loss: 0.001494137686677277 2023-01-22 17:57:11.165094: step: 1512/526, loss: 0.009022546000778675 2023-01-22 17:57:12.229257: step: 1516/526, loss: 0.02004232071340084 2023-01-22 17:57:13.315889: step: 1520/526, loss: 0.060813140124082565 2023-01-22 17:57:14.374925: step: 1524/526, loss: 0.00411297706887126 2023-01-22 17:57:15.443898: step: 1528/526, loss: 0.016256025061011314 2023-01-22 17:57:16.525099: step: 1532/526, loss: 0.02163807675242424 2023-01-22 17:57:17.590574: step: 1536/526, loss: 0.000977646792307496 2023-01-22 17:57:18.652563: step: 1540/526, loss: 0.00652306480333209 2023-01-22 17:57:19.717922: step: 1544/526, loss: 0.02499307133257389 2023-01-22 17:57:20.772732: step: 1548/526, loss: 0.0010517260525375605 2023-01-22 17:57:21.850074: step: 1552/526, loss: 0.00844988226890564 2023-01-22 17:57:22.940736: step: 1556/526, loss: 0.006858066190034151 2023-01-22 17:57:23.995874: step: 1560/526, loss: 0.0036917785182595253 2023-01-22 17:57:25.040550: step: 1564/526, loss: 0.0032497786451131105 2023-01-22 17:57:26.114890: step: 1568/526, loss: 0.0035531248431652784 2023-01-22 17:57:27.170487: step: 1572/526, loss: 0.0020319416653364897 2023-01-22 17:57:28.239225: step: 1576/526, loss: 0.00012026914191665128 2023-01-22 17:57:29.320326: step: 1580/526, loss: 0.0006924492190591991 2023-01-22 17:57:30.381883: step: 1584/526, loss: 0.012939331121742725 2023-01-22 17:57:31.455833: step: 1588/526, loss: 0.0003669472935143858 2023-01-22 17:57:32.505206: step: 1592/526, loss: 0.004529666155576706 2023-01-22 17:57:33.576696: step: 1596/526, loss: 0.008283115923404694 2023-01-22 17:57:34.635779: step: 1600/526, loss: 0.004352471325546503 2023-01-22 17:57:35.700818: step: 1604/526, loss: 0.018357954919338226 2023-01-22 17:57:36.781028: step: 1608/526, loss: 0.007702977396547794 2023-01-22 17:57:37.874913: step: 1612/526, loss: 0.005042489618062973 2023-01-22 17:57:38.952753: step: 1616/526, loss: 0.00972728244960308 2023-01-22 17:57:40.015905: step: 1620/526, loss: 0.009273105300962925 2023-01-22 17:57:41.101975: step: 1624/526, loss: 7.472003198927268e-05 2023-01-22 17:57:42.197909: step: 1628/526, loss: 0.011308124288916588 2023-01-22 17:57:43.262595: step: 1632/526, loss: 0.002210986101999879 2023-01-22 17:57:44.343272: step: 1636/526, loss: 0.0015535252168774605 2023-01-22 17:57:45.407716: step: 1640/526, loss: 0.0019940512720495462 2023-01-22 17:57:46.471680: step: 1644/526, loss: 4.983371400157921e-05 2023-01-22 17:57:47.542540: step: 1648/526, loss: 0.0005068308091722429 2023-01-22 17:57:48.617102: step: 1652/526, loss: 0.008029392920434475 2023-01-22 17:57:49.695944: step: 1656/526, loss: 0.03286871686577797 2023-01-22 17:57:50.772377: step: 1660/526, loss: 0.0011309122201055288 2023-01-22 17:57:51.833075: step: 1664/526, loss: 0.0019505774835124612 2023-01-22 17:57:52.917549: step: 1668/526, loss: 0.0023209021892398596 2023-01-22 17:57:53.995524: step: 1672/526, loss: 0.0003546491207089275 2023-01-22 17:57:55.076247: step: 1676/526, loss: 0.004372374154627323 2023-01-22 17:57:56.146187: step: 1680/526, loss: 0.0036947571206837893 2023-01-22 17:57:57.208257: step: 1684/526, loss: 0.026176095008850098 2023-01-22 17:57:58.270637: step: 1688/526, loss: 7.028792197161238e-07 2023-01-22 17:57:59.320629: step: 1692/526, loss: 0.006586136296391487 2023-01-22 17:58:00.380353: step: 1696/526, loss: 0.01703745685517788 2023-01-22 17:58:01.470264: step: 1700/526, loss: 0.009997772052884102 2023-01-22 17:58:02.547051: step: 1704/526, loss: 0.0013539936626330018 2023-01-22 17:58:03.630178: step: 1708/526, loss: 0.00545844342559576 2023-01-22 17:58:04.692246: step: 1712/526, loss: 0.003199740080162883 2023-01-22 17:58:05.765441: step: 1716/526, loss: 0.004632893949747086 2023-01-22 17:58:06.819588: step: 1720/526, loss: 0.0006263629184104502 2023-01-22 17:58:07.898251: step: 1724/526, loss: 0.01591830886900425 2023-01-22 17:58:08.965618: step: 1728/526, loss: 0.007647314108908176 2023-01-22 17:58:10.036870: step: 1732/526, loss: 0.003030173946171999 2023-01-22 17:58:11.097856: step: 1736/526, loss: 0.007665179669857025 2023-01-22 17:58:12.159764: step: 1740/526, loss: 0.007942522875964642 2023-01-22 17:58:13.224543: step: 1744/526, loss: 0.0168094951659441 2023-01-22 17:58:14.294191: step: 1748/526, loss: 0.0019358332501724362 2023-01-22 17:58:15.364571: step: 1752/526, loss: 5.112489088787697e-05 2023-01-22 17:58:16.445975: step: 1756/526, loss: 0.001044497941620648 2023-01-22 17:58:17.516257: step: 1760/526, loss: 0.011358664371073246 2023-01-22 17:58:18.606711: step: 1764/526, loss: 0.0041775889694690704 2023-01-22 17:58:19.684949: step: 1768/526, loss: 0.0010591248283162713 2023-01-22 17:58:20.746153: step: 1772/526, loss: 0.00029247647034935653 2023-01-22 17:58:21.817658: step: 1776/526, loss: 0.006607255432754755 2023-01-22 17:58:22.880998: step: 1780/526, loss: 0.001824350212700665 2023-01-22 17:58:23.949127: step: 1784/526, loss: 0.0005624218611046672 2023-01-22 17:58:25.016852: step: 1788/526, loss: 0.01835312321782112 2023-01-22 17:58:26.080411: step: 1792/526, loss: 0.0026089251041412354 2023-01-22 17:58:27.144489: step: 1796/526, loss: 0.005027491599321365 2023-01-22 17:58:28.211086: step: 1800/526, loss: 0.0036666837986558676 2023-01-22 17:58:29.275469: step: 1804/526, loss: 0.004081855993717909 2023-01-22 17:58:30.348281: step: 1808/526, loss: 0.004595533944666386 2023-01-22 17:58:31.392431: step: 1812/526, loss: 0.008726425468921661 2023-01-22 17:58:32.445816: step: 1816/526, loss: 0.00014430210285354406 2023-01-22 17:58:33.506515: step: 1820/526, loss: 0.02427584119141102 2023-01-22 17:58:34.588788: step: 1824/526, loss: 0.024298807606101036 2023-01-22 17:58:35.654948: step: 1828/526, loss: 0.017614293843507767 2023-01-22 17:58:36.723546: step: 1832/526, loss: 0.004321054555475712 2023-01-22 17:58:37.790939: step: 1836/526, loss: 0.001116780680604279 2023-01-22 17:58:38.842293: step: 1840/526, loss: 0.004009234253317118 2023-01-22 17:58:39.910168: step: 1844/526, loss: 0.004184509627521038 2023-01-22 17:58:40.975279: step: 1848/526, loss: 0.0007857540622353554 2023-01-22 17:58:42.029520: step: 1852/526, loss: 0.002389674074947834 2023-01-22 17:58:43.099681: step: 1856/526, loss: 0.005993325263261795 2023-01-22 17:58:44.174847: step: 1860/526, loss: 0.011724431067705154 2023-01-22 17:58:45.235578: step: 1864/526, loss: 0.0001669973280513659 2023-01-22 17:58:46.317021: step: 1868/526, loss: 0.004174278117716312 2023-01-22 17:58:47.386780: step: 1872/526, loss: 0.0048838225193321705 2023-01-22 17:58:48.460035: step: 1876/526, loss: 0.00153544161003083 2023-01-22 17:58:49.511586: step: 1880/526, loss: 0.0016118037747219205 2023-01-22 17:58:50.589871: step: 1884/526, loss: 0.0014590248465538025 2023-01-22 17:58:51.663213: step: 1888/526, loss: 0.034674499183893204 2023-01-22 17:58:52.722235: step: 1892/526, loss: 0.00033258445910178125 2023-01-22 17:58:53.786705: step: 1896/526, loss: 0.006668785586953163 2023-01-22 17:58:54.859859: step: 1900/526, loss: 0.006076838355511427 2023-01-22 17:58:55.935952: step: 1904/526, loss: 0.010348460637032986 2023-01-22 17:58:57.028196: step: 1908/526, loss: 0.0012399296974763274 2023-01-22 17:58:58.098328: step: 1912/526, loss: 0.0014980159467086196 2023-01-22 17:58:59.176357: step: 1916/526, loss: 0.005179837811738253 2023-01-22 17:59:00.233930: step: 1920/526, loss: 0.0004589584714267403 2023-01-22 17:59:01.334309: step: 1924/526, loss: 0.002740239491686225 2023-01-22 17:59:02.422677: step: 1928/526, loss: 0.0029836641624569893 2023-01-22 17:59:03.500614: step: 1932/526, loss: 0.0030564088374376297 2023-01-22 17:59:04.567893: step: 1936/526, loss: 0.0006544237257912755 2023-01-22 17:59:05.635219: step: 1940/526, loss: 0.0024999219458550215 2023-01-22 17:59:06.705376: step: 1944/526, loss: 0.007214280776679516 2023-01-22 17:59:07.786212: step: 1948/526, loss: 0.0008418355137109756 2023-01-22 17:59:08.844992: step: 1952/526, loss: 8.560383139410987e-05 2023-01-22 17:59:09.909083: step: 1956/526, loss: 0.004812711384147406 2023-01-22 17:59:10.991927: step: 1960/526, loss: 0.014564109966158867 2023-01-22 17:59:12.088927: step: 1964/526, loss: 0.02157345600426197 2023-01-22 17:59:13.189276: step: 1968/526, loss: 0.005345316603779793 2023-01-22 17:59:14.269970: step: 1972/526, loss: 0.008475751616060734 2023-01-22 17:59:15.341828: step: 1976/526, loss: 0.004904601722955704 2023-01-22 17:59:16.404737: step: 1980/526, loss: 0.03099743276834488 2023-01-22 17:59:17.483601: step: 1984/526, loss: 0.0004093674651812762 2023-01-22 17:59:18.549803: step: 1988/526, loss: 0.016514822840690613 2023-01-22 17:59:19.608651: step: 1992/526, loss: 0.0014487385051324964 2023-01-22 17:59:20.694681: step: 1996/526, loss: 0.0004042002547066659 2023-01-22 17:59:21.755312: step: 2000/526, loss: 0.0007758474675938487 2023-01-22 17:59:22.827290: step: 2004/526, loss: 0.0043033999390900135 2023-01-22 17:59:23.894260: step: 2008/526, loss: 0.0067427936010062695 2023-01-22 17:59:24.958388: step: 2012/526, loss: 0.0038654550444334745 2023-01-22 17:59:26.022353: step: 2016/526, loss: 0.0068710013292729855 2023-01-22 17:59:27.079566: step: 2020/526, loss: 0.008543653413653374 2023-01-22 17:59:28.137703: step: 2024/526, loss: 0.0040159812197089195 2023-01-22 17:59:29.198108: step: 2028/526, loss: 0.007723723538219929 2023-01-22 17:59:30.256184: step: 2032/526, loss: 0.003561505815014243 2023-01-22 17:59:31.311514: step: 2036/526, loss: 8.433743641944602e-05 2023-01-22 17:59:32.353856: step: 2040/526, loss: 0.0007240792619995773 2023-01-22 17:59:33.443570: step: 2044/526, loss: 0.004231943283230066 2023-01-22 17:59:34.532485: step: 2048/526, loss: 0.0024330653250217438 2023-01-22 17:59:35.588628: step: 2052/526, loss: 0.00860643945634365 2023-01-22 17:59:36.669095: step: 2056/526, loss: 0.0004952047020196915 2023-01-22 17:59:37.744655: step: 2060/526, loss: 0.00782067608088255 2023-01-22 17:59:38.816208: step: 2064/526, loss: 0.0006878709536977112 2023-01-22 17:59:39.884268: step: 2068/526, loss: 4.943471139995381e-05 2023-01-22 17:59:40.971175: step: 2072/526, loss: 0.0070662242360413074 2023-01-22 17:59:42.042775: step: 2076/526, loss: 0.0008313669823110104 2023-01-22 17:59:43.117905: step: 2080/526, loss: 0.005298133939504623 2023-01-22 17:59:44.204232: step: 2084/526, loss: 0.001045431476086378 2023-01-22 17:59:45.285036: step: 2088/526, loss: 0.0005098225083202124 2023-01-22 17:59:46.354384: step: 2092/526, loss: 0.012814799323678017 2023-01-22 17:59:47.429600: step: 2096/526, loss: 0.0020076052751392126 2023-01-22 17:59:48.509768: step: 2100/526, loss: 0.004314163234084845 2023-01-22 17:59:49.581660: step: 2104/526, loss: 0.006428925786167383 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34056843891402716, 'r': 0.28563804554079697, 'f1': 0.31069401444788447}, 'combined': 0.22893243169844119, 'stategy': 1, 'epoch': 12} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34009835379464287, 'r': 0.23919005102040816, 'f1': 0.28085541474654374}, 'combined': 0.15319386258902384, 'stategy': 1, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3127422480620155, 'r': 0.33173228969006957, 'f1': 0.321957489257213}, 'combined': 0.23723183418952537, 'stategy': 1, 'epoch': 12} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3403609158988575, 'r': 0.26655896066735774, 'f1': 0.29897274911177224}, 'combined': 0.16307604497005757, 'stategy': 1, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3242787560494902, 'r': 0.32858606400460677, 'f1': 0.32641820118836523}, 'combined': 0.24051867455984804, 'stategy': 1, 'epoch': 12} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33427376588894275, 'r': 0.2726086510856857, 'f1': 0.30030832287608233}, 'combined': 0.16380453975059034, 'stategy': 1, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3252032520325203, 'r': 0.38095238095238093, 'f1': 0.3508771929824561}, 'combined': 0.23391812865497075, 'stategy': 1, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3557692307692308, 'r': 0.40217391304347827, 'f1': 0.37755102040816324}, 'combined': 0.18877551020408162, 'stategy': 1, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 13 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 18:02:33.407106: step: 4/526, loss: 0.0038160455878823996 2023-01-22 18:02:34.468906: step: 8/526, loss: 0.002674256917089224 2023-01-22 18:02:35.538100: step: 12/526, loss: 0.004621006548404694 2023-01-22 18:02:36.596074: step: 16/526, loss: 0.003732952056452632 2023-01-22 18:02:37.657402: step: 20/526, loss: 0.006812302861362696 2023-01-22 18:02:38.714798: step: 24/526, loss: 0.00046529434621334076 2023-01-22 18:02:39.804135: step: 28/526, loss: 0.0023023944813758135 2023-01-22 18:02:40.859698: step: 32/526, loss: 0.0070501365698874 2023-01-22 18:02:41.911338: step: 36/526, loss: 0.0027976122219115496 2023-01-22 18:02:42.985372: step: 40/526, loss: 0.0023237173445522785 2023-01-22 18:02:44.050130: step: 44/526, loss: 0.0016857532318681479 2023-01-22 18:02:45.108199: step: 48/526, loss: 0.003736741142347455 2023-01-22 18:02:46.181520: step: 52/526, loss: 0.0010771984234452248 2023-01-22 18:02:47.237256: step: 56/526, loss: 0.0005143777816556394 2023-01-22 18:02:48.291581: step: 60/526, loss: 0.004283219110220671 2023-01-22 18:02:49.371717: step: 64/526, loss: 0.00323097943328321 2023-01-22 18:02:50.432333: step: 68/526, loss: 0.0270400233566761 2023-01-22 18:02:51.493172: step: 72/526, loss: 0.0008317366009578109 2023-01-22 18:02:52.562204: step: 76/526, loss: 0.0031352487858384848 2023-01-22 18:02:53.634610: step: 80/526, loss: 0.0028990048449486494 2023-01-22 18:02:54.716507: step: 84/526, loss: 0.00010778255091281608 2023-01-22 18:02:55.767810: step: 88/526, loss: 0.007569632027298212 2023-01-22 18:02:56.821815: step: 92/526, loss: 0.003725457936525345 2023-01-22 18:02:57.896700: step: 96/526, loss: 0.005097818095237017 2023-01-22 18:02:58.946601: step: 100/526, loss: 0.005310761742293835 2023-01-22 18:03:00.005477: step: 104/526, loss: 0.0038388094399124384 2023-01-22 18:03:01.056503: step: 108/526, loss: 0.02295560948550701 2023-01-22 18:03:02.140421: step: 112/526, loss: 0.0023131135385483503 2023-01-22 18:03:03.204704: step: 116/526, loss: 0.005850787740200758 2023-01-22 18:03:04.253994: step: 120/526, loss: 0.0006953251431696117 2023-01-22 18:03:05.328989: step: 124/526, loss: 0.005086628720164299 2023-01-22 18:03:06.399500: step: 128/526, loss: 0.0022583589889109135 2023-01-22 18:03:07.471364: step: 132/526, loss: 0.004099803976714611 2023-01-22 18:03:08.539003: step: 136/526, loss: 0.001496132928878069 2023-01-22 18:03:09.600341: step: 140/526, loss: 3.492146424832754e-05 2023-01-22 18:03:10.667099: step: 144/526, loss: 0.0012209846172481775 2023-01-22 18:03:11.726352: step: 148/526, loss: 0.004968051798641682 2023-01-22 18:03:12.843202: step: 152/526, loss: 0.003387398086488247 2023-01-22 18:03:13.905807: step: 156/526, loss: 0.00079245341476053 2023-01-22 18:03:14.984724: step: 160/526, loss: 0.004783318378031254 2023-01-22 18:03:16.052647: step: 164/526, loss: 0.00048400519881397486 2023-01-22 18:03:17.115911: step: 168/526, loss: 0.0016160618979483843 2023-01-22 18:03:18.191014: step: 172/526, loss: 0.00713609391823411 2023-01-22 18:03:19.252599: step: 176/526, loss: 0.004505706951022148 2023-01-22 18:03:20.330569: step: 180/526, loss: 0.013733535073697567 2023-01-22 18:03:21.404719: step: 184/526, loss: 0.00499225128442049 2023-01-22 18:03:22.466807: step: 188/526, loss: 0.00606751162558794 2023-01-22 18:03:23.534622: step: 192/526, loss: 0.002098146826028824 2023-01-22 18:03:24.586256: step: 196/526, loss: 0.002904574852436781 2023-01-22 18:03:25.647673: step: 200/526, loss: 0.0004220707342028618 2023-01-22 18:03:26.712409: step: 204/526, loss: 0.002314547775313258 2023-01-22 18:03:27.785030: step: 208/526, loss: 0.009258140809834003 2023-01-22 18:03:28.858650: step: 212/526, loss: 0.0006761526456102729 2023-01-22 18:03:29.930732: step: 216/526, loss: 0.004619353450834751 2023-01-22 18:03:30.988428: step: 220/526, loss: 0.0006481197196990252 2023-01-22 18:03:32.067756: step: 224/526, loss: 0.008110636845231056 2023-01-22 18:03:33.138519: step: 228/526, loss: 0.004194497596472502 2023-01-22 18:03:34.207182: step: 232/526, loss: 0.006563953123986721 2023-01-22 18:03:35.275315: step: 236/526, loss: 0.004174908623099327 2023-01-22 18:03:36.348422: step: 240/526, loss: 0.003269864246249199 2023-01-22 18:03:37.416335: step: 244/526, loss: 0.0012675767065957189 2023-01-22 18:03:38.482453: step: 248/526, loss: 0.0012934055412188172 2023-01-22 18:03:39.559203: step: 252/526, loss: 0.02725783921778202 2023-01-22 18:03:40.624516: step: 256/526, loss: 0.001841770252212882 2023-01-22 18:03:41.694339: step: 260/526, loss: 0.005191021133214235 2023-01-22 18:03:42.765098: step: 264/526, loss: 0.00185909285210073 2023-01-22 18:03:43.845852: step: 268/526, loss: 0.0045059965923428535 2023-01-22 18:03:44.919897: step: 272/526, loss: 0.0065894341096282005 2023-01-22 18:03:45.987184: step: 276/526, loss: 0.004639733117073774 2023-01-22 18:03:47.040639: step: 280/526, loss: 0.0016907708486542106 2023-01-22 18:03:48.102992: step: 284/526, loss: 0.0005969437770545483 2023-01-22 18:03:49.165619: step: 288/526, loss: 0.004094295669347048 2023-01-22 18:03:50.234771: step: 292/526, loss: 0.0058018798008561134 2023-01-22 18:03:51.304665: step: 296/526, loss: 0.00014649178774561733 2023-01-22 18:03:52.369446: step: 300/526, loss: 0.0004003380599897355 2023-01-22 18:03:53.439432: step: 304/526, loss: 0.006760130170732737 2023-01-22 18:03:54.500631: step: 308/526, loss: 0.00048514080117456615 2023-01-22 18:03:55.587046: step: 312/526, loss: 0.00806464534252882 2023-01-22 18:03:56.647671: step: 316/526, loss: 0.0018172974232584238 2023-01-22 18:03:57.724506: step: 320/526, loss: 0.006813384592533112 2023-01-22 18:03:58.784611: step: 324/526, loss: 1.2232379049237352e-05 2023-01-22 18:03:59.862393: step: 328/526, loss: 0.00028781688888557255 2023-01-22 18:04:00.929802: step: 332/526, loss: 0.0031622473616153 2023-01-22 18:04:01.991681: step: 336/526, loss: 0.0003086850920226425 2023-01-22 18:04:03.097974: step: 340/526, loss: 0.0005392441526055336 2023-01-22 18:04:04.183513: step: 344/526, loss: 0.005243216175585985 2023-01-22 18:04:05.253274: step: 348/526, loss: 0.0031118332408368587 2023-01-22 18:04:06.324496: step: 352/526, loss: 0.0036169998347759247 2023-01-22 18:04:07.390382: step: 356/526, loss: 0.00370042328722775 2023-01-22 18:04:08.468777: step: 360/526, loss: 0.010754608549177647 2023-01-22 18:04:09.549401: step: 364/526, loss: 0.003144514514133334 2023-01-22 18:04:10.619217: step: 368/526, loss: 0.002588221337646246 2023-01-22 18:04:11.680816: step: 372/526, loss: 0.006095807068049908 2023-01-22 18:04:12.762067: step: 376/526, loss: 0.0034547345712780952 2023-01-22 18:04:13.844390: step: 380/526, loss: 0.0033636896405369043 2023-01-22 18:04:14.935496: step: 384/526, loss: 0.0008751609711907804 2023-01-22 18:04:15.988682: step: 388/526, loss: 0.0005274321883916855 2023-01-22 18:04:17.054385: step: 392/526, loss: 0.045020926743745804 2023-01-22 18:04:18.135576: step: 396/526, loss: 0.004994106013327837 2023-01-22 18:04:19.199445: step: 400/526, loss: 0.0030030838679522276 2023-01-22 18:04:20.271649: step: 404/526, loss: 0.002722801873460412 2023-01-22 18:04:21.327237: step: 408/526, loss: 0.00035113509511575103 2023-01-22 18:04:22.417170: step: 412/526, loss: 0.002638966543599963 2023-01-22 18:04:23.500020: step: 416/526, loss: 0.0010901845525950193 2023-01-22 18:04:24.584235: step: 420/526, loss: 0.006364356726408005 2023-01-22 18:04:25.669962: step: 424/526, loss: 0.006897682324051857 2023-01-22 18:04:26.744529: step: 428/526, loss: 0.007479271851480007 2023-01-22 18:04:27.817131: step: 432/526, loss: 0.01680966094136238 2023-01-22 18:04:28.905236: step: 436/526, loss: 0.007896722294390202 2023-01-22 18:04:29.967309: step: 440/526, loss: 0.0063851564191281796 2023-01-22 18:04:31.046587: step: 444/526, loss: 0.008131866343319416 2023-01-22 18:04:32.118577: step: 448/526, loss: 0.0006371065974235535 2023-01-22 18:04:33.198676: step: 452/526, loss: 0.005970512051135302 2023-01-22 18:04:34.262381: step: 456/526, loss: 0.00030315155163407326 2023-01-22 18:04:35.334039: step: 460/526, loss: 0.005403812509030104 2023-01-22 18:04:36.426446: step: 464/526, loss: 0.0019965041428804398 2023-01-22 18:04:37.487837: step: 468/526, loss: 0.002764735836535692 2023-01-22 18:04:38.558449: step: 472/526, loss: 0.0028944150544703007 2023-01-22 18:04:39.628794: step: 476/526, loss: 0.006938498001545668 2023-01-22 18:04:40.694841: step: 480/526, loss: 0.004693842492997646 2023-01-22 18:04:41.760845: step: 484/526, loss: 0.0007596335490234196 2023-01-22 18:04:42.858532: step: 488/526, loss: 0.0001611992047401145 2023-01-22 18:04:43.913431: step: 492/526, loss: 0.0010644515277817845 2023-01-22 18:04:44.991607: step: 496/526, loss: 0.003196912119165063 2023-01-22 18:04:46.052803: step: 500/526, loss: 0.003928244113922119 2023-01-22 18:04:47.137859: step: 504/526, loss: 0.0023313816636800766 2023-01-22 18:04:48.204213: step: 508/526, loss: 0.0010442739585414529 2023-01-22 18:04:49.296673: step: 512/526, loss: 0.005013664253056049 2023-01-22 18:04:50.357767: step: 516/526, loss: 0.005086920224130154 2023-01-22 18:04:51.416262: step: 520/526, loss: 0.00214071455411613 2023-01-22 18:04:52.485448: step: 524/526, loss: 0.0009304194245487452 2023-01-22 18:04:53.556963: step: 528/526, loss: 0.003397251944988966 2023-01-22 18:04:54.628347: step: 532/526, loss: 0.001379756722599268 2023-01-22 18:04:55.691671: step: 536/526, loss: 0.0003635635948739946 2023-01-22 18:04:56.766614: step: 540/526, loss: 2.497303648851812e-05 2023-01-22 18:04:57.832816: step: 544/526, loss: 0.007742059882730246 2023-01-22 18:04:58.898423: step: 548/526, loss: 0.00014858537178952247 2023-01-22 18:04:59.974722: step: 552/526, loss: 0.002425577025860548 2023-01-22 18:05:01.046621: step: 556/526, loss: 0.00551773514598608 2023-01-22 18:05:02.122119: step: 560/526, loss: 0.0014679240994155407 2023-01-22 18:05:03.194836: step: 564/526, loss: 0.010785788297653198 2023-01-22 18:05:04.271182: step: 568/526, loss: 0.0007370549137704074 2023-01-22 18:05:05.350279: step: 572/526, loss: 0.00351623073220253 2023-01-22 18:05:06.431424: step: 576/526, loss: 0.0031941228080540895 2023-01-22 18:05:07.507052: step: 580/526, loss: 0.0018449927447363734 2023-01-22 18:05:08.571513: step: 584/526, loss: 0.0013085560640320182 2023-01-22 18:05:09.660257: step: 588/526, loss: 0.012037588283419609 2023-01-22 18:05:10.735746: step: 592/526, loss: 0.00040585047099739313 2023-01-22 18:05:11.799237: step: 596/526, loss: 0.00393644068390131 2023-01-22 18:05:12.903854: step: 600/526, loss: 0.0020843103993684053 2023-01-22 18:05:13.960009: step: 604/526, loss: 0.0017401882214471698 2023-01-22 18:05:15.010751: step: 608/526, loss: 0.00011535193334566429 2023-01-22 18:05:16.078159: step: 612/526, loss: 0.0033245920203626156 2023-01-22 18:05:17.151847: step: 616/526, loss: 0.00035558539093472064 2023-01-22 18:05:18.234241: step: 620/526, loss: 0.023518720641732216 2023-01-22 18:05:19.313591: step: 624/526, loss: 0.007926519960165024 2023-01-22 18:05:20.395142: step: 628/526, loss: 0.002083064988255501 2023-01-22 18:05:21.464643: step: 632/526, loss: 0.0016286555910483003 2023-01-22 18:05:22.551044: step: 636/526, loss: 0.0015901544829830527 2023-01-22 18:05:23.631104: step: 640/526, loss: 0.0018651778809726238 2023-01-22 18:05:24.692631: step: 644/526, loss: 0.004405217245221138 2023-01-22 18:05:25.745130: step: 648/526, loss: 0.005960434675216675 2023-01-22 18:05:26.801643: step: 652/526, loss: 4.190753770672018e-06 2023-01-22 18:05:27.865401: step: 656/526, loss: 0.0030449663754552603 2023-01-22 18:05:28.932503: step: 660/526, loss: 0.0040357387624681 2023-01-22 18:05:29.990792: step: 664/526, loss: 0.0010376276914030313 2023-01-22 18:05:31.091761: step: 668/526, loss: 0.00599429989233613 2023-01-22 18:05:32.164329: step: 672/526, loss: 9.253184543922544e-05 2023-01-22 18:05:33.224683: step: 676/526, loss: 0.00021377208759076893 2023-01-22 18:05:34.289279: step: 680/526, loss: 0.0012489090440794826 2023-01-22 18:05:35.369646: step: 684/526, loss: 0.0052191708236932755 2023-01-22 18:05:36.469347: step: 688/526, loss: 0.0028376926202327013 2023-01-22 18:05:37.537016: step: 692/526, loss: 0.009810381568968296 2023-01-22 18:05:38.601341: step: 696/526, loss: 0.006933249533176422 2023-01-22 18:05:39.665383: step: 700/526, loss: 0.0074748084880411625 2023-01-22 18:05:40.753712: step: 704/526, loss: 0.008868585340678692 2023-01-22 18:05:41.847436: step: 708/526, loss: 0.060324057936668396 2023-01-22 18:05:42.908911: step: 712/526, loss: 0.0018125202041119337 2023-01-22 18:05:43.989560: step: 716/526, loss: 9.370889893034473e-05 2023-01-22 18:05:45.060327: step: 720/526, loss: 0.0038213878870010376 2023-01-22 18:05:46.128906: step: 724/526, loss: 0.01805827207863331 2023-01-22 18:05:47.203191: step: 728/526, loss: 0.0323360413312912 2023-01-22 18:05:48.263417: step: 732/526, loss: 0.010594063438475132 2023-01-22 18:05:49.345960: step: 736/526, loss: 0.002987007610499859 2023-01-22 18:05:50.418846: step: 740/526, loss: 0.0011591152288019657 2023-01-22 18:05:51.485612: step: 744/526, loss: 0.006188101135194302 2023-01-22 18:05:52.579851: step: 748/526, loss: 0.0031672571785748005 2023-01-22 18:05:53.643031: step: 752/526, loss: 0.0032132300548255444 2023-01-22 18:05:54.698387: step: 756/526, loss: 3.894660494552227e-06 2023-01-22 18:05:55.772670: step: 760/526, loss: 0.007211936637759209 2023-01-22 18:05:56.871495: step: 764/526, loss: 0.012980573810636997 2023-01-22 18:05:57.939930: step: 768/526, loss: 0.0025059001054614782 2023-01-22 18:05:59.003541: step: 772/526, loss: 0.002924935193732381 2023-01-22 18:06:00.082678: step: 776/526, loss: 0.008592157624661922 2023-01-22 18:06:01.152176: step: 780/526, loss: 0.00019979050557594746 2023-01-22 18:06:02.247370: step: 784/526, loss: 0.0001128509029513225 2023-01-22 18:06:03.301331: step: 788/526, loss: 0.0 2023-01-22 18:06:04.379734: step: 792/526, loss: 0.0019905706867575645 2023-01-22 18:06:05.456911: step: 796/526, loss: 0.003986770287156105 2023-01-22 18:06:06.540490: step: 800/526, loss: 0.0027946890331804752 2023-01-22 18:06:07.607537: step: 804/526, loss: 0.019000818952918053 2023-01-22 18:06:08.672993: step: 808/526, loss: 0.008973361924290657 2023-01-22 18:06:09.749821: step: 812/526, loss: 0.0011635440168902278 2023-01-22 18:06:10.826880: step: 816/526, loss: 6.616743485210463e-05 2023-01-22 18:06:11.892914: step: 820/526, loss: 0.0033114461693912745 2023-01-22 18:06:12.982643: step: 824/526, loss: 0.001900022616609931 2023-01-22 18:06:14.053571: step: 828/526, loss: 0.002874800469726324 2023-01-22 18:06:15.119424: step: 832/526, loss: 0.0013039191253483295 2023-01-22 18:06:16.185969: step: 836/526, loss: 0.0022603338584303856 2023-01-22 18:06:17.250533: step: 840/526, loss: 0.008574477396905422 2023-01-22 18:06:18.324193: step: 844/526, loss: 2.190962368331384e-05 2023-01-22 18:06:19.392835: step: 848/526, loss: 0.004295805934816599 2023-01-22 18:06:20.469822: step: 852/526, loss: 0.003289527492597699 2023-01-22 18:06:21.544339: step: 856/526, loss: 0.005523450206965208 2023-01-22 18:06:22.631261: step: 860/526, loss: 0.0016545332036912441 2023-01-22 18:06:23.711022: step: 864/526, loss: 0.0038719524163752794 2023-01-22 18:06:24.788819: step: 868/526, loss: 0.008013435639441013 2023-01-22 18:06:25.866809: step: 872/526, loss: 0.000556713028345257 2023-01-22 18:06:26.931164: step: 876/526, loss: 0.003172683995217085 2023-01-22 18:06:27.995104: step: 880/526, loss: 0.00021134625421836972 2023-01-22 18:06:29.054652: step: 884/526, loss: 0.0016915379092097282 2023-01-22 18:06:30.114063: step: 888/526, loss: 0.0017804628005251288 2023-01-22 18:06:31.192258: step: 892/526, loss: 0.0026804746594280005 2023-01-22 18:06:32.254370: step: 896/526, loss: 0.014465970918536186 2023-01-22 18:06:33.345243: step: 900/526, loss: 0.007293372415006161 2023-01-22 18:06:34.438549: step: 904/526, loss: 0.006600086577236652 2023-01-22 18:06:35.492618: step: 908/526, loss: 0.0007659685797989368 2023-01-22 18:06:36.560285: step: 912/526, loss: 0.00018975512648466974 2023-01-22 18:06:37.653748: step: 916/526, loss: 0.005219451151788235 2023-01-22 18:06:38.734002: step: 920/526, loss: 0.001895737717859447 2023-01-22 18:06:39.805577: step: 924/526, loss: 0.000740026356652379 2023-01-22 18:06:40.874585: step: 928/526, loss: 0.006662923377007246 2023-01-22 18:06:41.961582: step: 932/526, loss: 0.0013474173611029983 2023-01-22 18:06:43.049375: step: 936/526, loss: 0.002233668463304639 2023-01-22 18:06:44.113606: step: 940/526, loss: 0.0038880868814885616 2023-01-22 18:06:45.183332: step: 944/526, loss: 0.011396192014217377 2023-01-22 18:06:46.255375: step: 948/526, loss: 0.0037230944726616144 2023-01-22 18:06:47.331577: step: 952/526, loss: 0.0034529289696365595 2023-01-22 18:06:48.394776: step: 956/526, loss: 0.004115849733352661 2023-01-22 18:06:49.462732: step: 960/526, loss: 0.0022732792422175407 2023-01-22 18:06:50.561646: step: 964/526, loss: 0.007227274589240551 2023-01-22 18:06:51.641275: step: 968/526, loss: 0.0020248712971806526 2023-01-22 18:06:52.708612: step: 972/526, loss: 0.005124710500240326 2023-01-22 18:06:53.784662: step: 976/526, loss: 0.00014067483425606042 2023-01-22 18:06:54.865919: step: 980/526, loss: 0.021593360230326653 2023-01-22 18:06:55.931934: step: 984/526, loss: 0.005690298974514008 2023-01-22 18:06:57.016962: step: 988/526, loss: 0.009877980686724186 2023-01-22 18:06:58.076639: step: 992/526, loss: 0.004007709678262472 2023-01-22 18:06:59.146578: step: 996/526, loss: 0.000538425229024142 2023-01-22 18:07:00.200820: step: 1000/526, loss: 0.002025959547609091 2023-01-22 18:07:01.271703: step: 1004/526, loss: 0.005184090230613947 2023-01-22 18:07:02.333265: step: 1008/526, loss: 0.005766283720731735 2023-01-22 18:07:03.396517: step: 1012/526, loss: 0.0011326140956953168 2023-01-22 18:07:04.480192: step: 1016/526, loss: 0.0030917273834347725 2023-01-22 18:07:05.527241: step: 1020/526, loss: 0.006913172546774149 2023-01-22 18:07:06.603856: step: 1024/526, loss: 0.002029015449807048 2023-01-22 18:07:07.662038: step: 1028/526, loss: 0.004745482467114925 2023-01-22 18:07:08.730030: step: 1032/526, loss: 0.0005182833410799503 2023-01-22 18:07:09.778578: step: 1036/526, loss: 0.0001842290657805279 2023-01-22 18:07:10.854536: step: 1040/526, loss: 0.0011114665539935231 2023-01-22 18:07:11.919462: step: 1044/526, loss: 0.0016391983954235911 2023-01-22 18:07:12.975334: step: 1048/526, loss: 0.0013662866549566388 2023-01-22 18:07:14.042711: step: 1052/526, loss: 0.0003473217075224966 2023-01-22 18:07:15.118127: step: 1056/526, loss: 0.0001806815853342414 2023-01-22 18:07:16.195983: step: 1060/526, loss: 4.835849176743068e-05 2023-01-22 18:07:17.262526: step: 1064/526, loss: 0.013094817288219929 2023-01-22 18:07:18.328297: step: 1068/526, loss: 0.00040008482756093144 2023-01-22 18:07:19.401143: step: 1072/526, loss: 0.019123949110507965 2023-01-22 18:07:20.462527: step: 1076/526, loss: 0.006047382019460201 2023-01-22 18:07:21.535088: step: 1080/526, loss: 0.0006867832271382213 2023-01-22 18:07:22.610845: step: 1084/526, loss: 0.0026648559141904116 2023-01-22 18:07:23.680041: step: 1088/526, loss: 0.0013683350989595056 2023-01-22 18:07:24.736778: step: 1092/526, loss: 2.264326212753076e-06 2023-01-22 18:07:25.823452: step: 1096/526, loss: 0.03855287283658981 2023-01-22 18:07:26.884440: step: 1100/526, loss: 0.0012021881993860006 2023-01-22 18:07:27.962816: step: 1104/526, loss: 0.003373167011886835 2023-01-22 18:07:29.018155: step: 1108/526, loss: 0.00037629471626132727 2023-01-22 18:07:30.079809: step: 1112/526, loss: 0.0031781475991010666 2023-01-22 18:07:31.146906: step: 1116/526, loss: 0.003785996697843075 2023-01-22 18:07:32.209264: step: 1120/526, loss: 0.008271785452961922 2023-01-22 18:07:33.302373: step: 1124/526, loss: 0.004597133491188288 2023-01-22 18:07:34.363048: step: 1128/526, loss: 0.00392197398468852 2023-01-22 18:07:35.418065: step: 1132/526, loss: 0.0005994778475724161 2023-01-22 18:07:36.485914: step: 1136/526, loss: 0.003938647452741861 2023-01-22 18:07:37.583741: step: 1140/526, loss: 0.0038212265353649855 2023-01-22 18:07:38.649738: step: 1144/526, loss: 0.00294835539534688 2023-01-22 18:07:39.735573: step: 1148/526, loss: 0.007164319511502981 2023-01-22 18:07:40.811000: step: 1152/526, loss: 2.6576759410090744e-05 2023-01-22 18:07:41.882730: step: 1156/526, loss: 0.0032929328735917807 2023-01-22 18:07:42.943527: step: 1160/526, loss: 0.00012819006224162877 2023-01-22 18:07:44.013454: step: 1164/526, loss: 0.003070864826440811 2023-01-22 18:07:45.084908: step: 1168/526, loss: 0.0036052772775292397 2023-01-22 18:07:46.145297: step: 1172/526, loss: 0.0032888096757233143 2023-01-22 18:07:47.206804: step: 1176/526, loss: 0.004941609688103199 2023-01-22 18:07:48.273064: step: 1180/526, loss: 0.00017654371913522482 2023-01-22 18:07:49.336540: step: 1184/526, loss: 0.00016994915495160967 2023-01-22 18:07:50.409325: step: 1188/526, loss: 0.0024686739780008793 2023-01-22 18:07:51.475368: step: 1192/526, loss: 0.002231811173260212 2023-01-22 18:07:52.536068: step: 1196/526, loss: 0.0019481753697618842 2023-01-22 18:07:53.594853: step: 1200/526, loss: 0.005375716369599104 2023-01-22 18:07:54.665793: step: 1204/526, loss: 0.011711164377629757 2023-01-22 18:07:55.745528: step: 1208/526, loss: 0.0010047671385109425 2023-01-22 18:07:56.830742: step: 1212/526, loss: 0.00487243477255106 2023-01-22 18:07:57.896248: step: 1216/526, loss: 0.007283544633537531 2023-01-22 18:07:58.969808: step: 1220/526, loss: 0.002689747139811516 2023-01-22 18:08:00.035736: step: 1224/526, loss: 0.012705979868769646 2023-01-22 18:08:01.109061: step: 1228/526, loss: 0.0022503305226564407 2023-01-22 18:08:02.168233: step: 1232/526, loss: 0.0039559220895171165 2023-01-22 18:08:03.242010: step: 1236/526, loss: 0.002737303264439106 2023-01-22 18:08:04.319098: step: 1240/526, loss: 0.006115739233791828 2023-01-22 18:08:05.401330: step: 1244/526, loss: 0.0063429721631109715 2023-01-22 18:08:06.474851: step: 1248/526, loss: 0.004716824274510145 2023-01-22 18:08:07.543460: step: 1252/526, loss: 0.0030517836567014456 2023-01-22 18:08:08.611125: step: 1256/526, loss: 0.01928587630391121 2023-01-22 18:08:09.678701: step: 1260/526, loss: 0.0038072813767939806 2023-01-22 18:08:10.748885: step: 1264/526, loss: 0.009622993879020214 2023-01-22 18:08:11.809259: step: 1268/526, loss: 0.002055876422673464 2023-01-22 18:08:12.903772: step: 1272/526, loss: 0.0011493697529658675 2023-01-22 18:08:13.977588: step: 1276/526, loss: 0.002703531412407756 2023-01-22 18:08:15.073134: step: 1280/526, loss: 0.015853796154260635 2023-01-22 18:08:16.157444: step: 1284/526, loss: 0.0015725308330729604 2023-01-22 18:08:17.227266: step: 1288/526, loss: 4.404744231578661e-06 2023-01-22 18:08:18.297208: step: 1292/526, loss: 0.002780213486403227 2023-01-22 18:08:19.368715: step: 1296/526, loss: 0.001070625614374876 2023-01-22 18:08:20.437801: step: 1300/526, loss: 0.0006446315092034638 2023-01-22 18:08:21.498498: step: 1304/526, loss: 3.962144910474308e-05 2023-01-22 18:08:22.564491: step: 1308/526, loss: 0.0008306821109727025 2023-01-22 18:08:23.638431: step: 1312/526, loss: 0.004246818833053112 2023-01-22 18:08:24.707687: step: 1316/526, loss: 0.0076587446965277195 2023-01-22 18:08:25.779623: step: 1320/526, loss: 0.006820672657340765 2023-01-22 18:08:26.851165: step: 1324/526, loss: 0.0016523490194231272 2023-01-22 18:08:27.902902: step: 1328/526, loss: 0.0062357583083212376 2023-01-22 18:08:28.997190: step: 1332/526, loss: 0.0062552401795983315 2023-01-22 18:08:30.060195: step: 1336/526, loss: 0.00016951176803559065 2023-01-22 18:08:31.126176: step: 1340/526, loss: 0.0019224941497668624 2023-01-22 18:08:32.188655: step: 1344/526, loss: 0.014512458816170692 2023-01-22 18:08:33.255480: step: 1348/526, loss: 0.0014699813909828663 2023-01-22 18:08:34.314550: step: 1352/526, loss: 2.0005210899398662e-05 2023-01-22 18:08:35.374663: step: 1356/526, loss: 0.00202702428214252 2023-01-22 18:08:36.432613: step: 1360/526, loss: 0.011726485565304756 2023-01-22 18:08:37.508363: step: 1364/526, loss: 0.00832261424511671 2023-01-22 18:08:38.575939: step: 1368/526, loss: 9.265230801247526e-06 2023-01-22 18:08:39.653616: step: 1372/526, loss: 0.003955105319619179 2023-01-22 18:08:40.718790: step: 1376/526, loss: 0.0012653687736019492 2023-01-22 18:08:41.787766: step: 1380/526, loss: 0.0023829981219023466 2023-01-22 18:08:42.885156: step: 1384/526, loss: 0.0024809204041957855 2023-01-22 18:08:43.950809: step: 1388/526, loss: 0.0013029719702899456 2023-01-22 18:08:45.038077: step: 1392/526, loss: 0.009118972346186638 2023-01-22 18:08:46.092622: step: 1396/526, loss: 0.010650471784174442 2023-01-22 18:08:47.146143: step: 1400/526, loss: 0.000396323564928025 2023-01-22 18:08:48.200088: step: 1404/526, loss: 0.00012168128159828484 2023-01-22 18:08:49.256286: step: 1408/526, loss: 0.016102680936455727 2023-01-22 18:08:50.317817: step: 1412/526, loss: 0.013401266187429428 2023-01-22 18:08:51.392795: step: 1416/526, loss: 0.003793393261730671 2023-01-22 18:08:52.463687: step: 1420/526, loss: 0.000843375688418746 2023-01-22 18:08:53.550807: step: 1424/526, loss: 0.008080673404037952 2023-01-22 18:08:54.619345: step: 1428/526, loss: 0.005300865508615971 2023-01-22 18:08:55.687962: step: 1432/526, loss: 0.004407604690641165 2023-01-22 18:08:56.781261: step: 1436/526, loss: 8.405968401348218e-05 2023-01-22 18:08:57.833042: step: 1440/526, loss: 0.00010387034126324579 2023-01-22 18:08:58.901226: step: 1444/526, loss: 0.008012795820832253 2023-01-22 18:08:59.980110: step: 1448/526, loss: 0.00514595489948988 2023-01-22 18:09:01.057376: step: 1452/526, loss: 0.000801900343503803 2023-01-22 18:09:02.125269: step: 1456/526, loss: 0.00414240313693881 2023-01-22 18:09:03.204055: step: 1460/526, loss: 0.02435186132788658 2023-01-22 18:09:04.269494: step: 1464/526, loss: 0.0 2023-01-22 18:09:05.329538: step: 1468/526, loss: 0.004740624222904444 2023-01-22 18:09:06.392454: step: 1472/526, loss: 0.019496286287903786 2023-01-22 18:09:07.457518: step: 1476/526, loss: 0.0034376918338239193 2023-01-22 18:09:08.515510: step: 1480/526, loss: 1.0285552889399696e-05 2023-01-22 18:09:09.589950: step: 1484/526, loss: 0.0026216916739940643 2023-01-22 18:09:10.647383: step: 1488/526, loss: 0.0010655343066900969 2023-01-22 18:09:11.715858: step: 1492/526, loss: 0.00011161769361933693 2023-01-22 18:09:12.784318: step: 1496/526, loss: 0.00897468626499176 2023-01-22 18:09:13.859717: step: 1500/526, loss: 0.003293408080935478 2023-01-22 18:09:14.931196: step: 1504/526, loss: 0.007067199796438217 2023-01-22 18:09:16.003546: step: 1508/526, loss: 0.004014688543975353 2023-01-22 18:09:17.071439: step: 1512/526, loss: 0.00434343796223402 2023-01-22 18:09:18.153532: step: 1516/526, loss: 0.011981457471847534 2023-01-22 18:09:19.225562: step: 1520/526, loss: 0.00016639447130728513 2023-01-22 18:09:20.295624: step: 1524/526, loss: 0.013790886849164963 2023-01-22 18:09:21.350029: step: 1528/526, loss: 0.00020824189414270222 2023-01-22 18:09:22.415585: step: 1532/526, loss: 0.0012086728820577264 2023-01-22 18:09:23.490192: step: 1536/526, loss: 0.04319644719362259 2023-01-22 18:09:24.558891: step: 1540/526, loss: 0.011203614994883537 2023-01-22 18:09:25.633127: step: 1544/526, loss: 0.005573430098593235 2023-01-22 18:09:26.702412: step: 1548/526, loss: 0.045756854116916656 2023-01-22 18:09:27.778103: step: 1552/526, loss: 0.011881670914590359 2023-01-22 18:09:28.843441: step: 1556/526, loss: 0.011266571469604969 2023-01-22 18:09:29.905814: step: 1560/526, loss: 0.0001165743960882537 2023-01-22 18:09:30.964671: step: 1564/526, loss: 0.006261349655687809 2023-01-22 18:09:32.031981: step: 1568/526, loss: 0.0 2023-01-22 18:09:33.112343: step: 1572/526, loss: 0.004248825833201408 2023-01-22 18:09:34.178116: step: 1576/526, loss: 0.0038028049748390913 2023-01-22 18:09:35.258031: step: 1580/526, loss: 0.0007728872587904334 2023-01-22 18:09:36.333591: step: 1584/526, loss: 0.015161864459514618 2023-01-22 18:09:37.413028: step: 1588/526, loss: 0.004029589239507914 2023-01-22 18:09:38.479172: step: 1592/526, loss: 0.0016217725351452827 2023-01-22 18:09:39.553861: step: 1596/526, loss: 0.02138603664934635 2023-01-22 18:09:40.620063: step: 1600/526, loss: 8.453485497739166e-05 2023-01-22 18:09:41.670709: step: 1604/526, loss: 0.03369440510869026 2023-01-22 18:09:42.774959: step: 1608/526, loss: 0.014535359106957912 2023-01-22 18:09:43.871347: step: 1612/526, loss: 0.0036891575437039137 2023-01-22 18:09:44.943107: step: 1616/526, loss: 0.010762116871774197 2023-01-22 18:09:46.023509: step: 1620/526, loss: 2.4214376281861405e-08 2023-01-22 18:09:47.106233: step: 1624/526, loss: 0.004704159218817949 2023-01-22 18:09:48.178438: step: 1628/526, loss: 0.008366054855287075 2023-01-22 18:09:49.244323: step: 1632/526, loss: 0.007280722260475159 2023-01-22 18:09:50.306082: step: 1636/526, loss: 0.0005960150738246739 2023-01-22 18:09:51.373220: step: 1640/526, loss: 0.009195374324917793 2023-01-22 18:09:52.441770: step: 1644/526, loss: 0.0010494155576452613 2023-01-22 18:09:53.508957: step: 1648/526, loss: 0.0012640261556953192 2023-01-22 18:09:54.580783: step: 1652/526, loss: 0.0015899322461336851 2023-01-22 18:09:55.646108: step: 1656/526, loss: 0.0003950555110350251 2023-01-22 18:09:56.727501: step: 1660/526, loss: 0.011772912926971912 2023-01-22 18:09:57.809287: step: 1664/526, loss: 0.0038584391586482525 2023-01-22 18:09:58.885217: step: 1668/526, loss: 0.004730330314487219 2023-01-22 18:09:59.953901: step: 1672/526, loss: 0.001573633635416627 2023-01-22 18:10:01.032922: step: 1676/526, loss: 0.003443900728598237 2023-01-22 18:10:02.110950: step: 1680/526, loss: 0.0012091778917238116 2023-01-22 18:10:03.174137: step: 1684/526, loss: 0.00231906003318727 2023-01-22 18:10:04.267161: step: 1688/526, loss: 0.005402205046266317 2023-01-22 18:10:05.333833: step: 1692/526, loss: 0.02763788402080536 2023-01-22 18:10:06.428854: step: 1696/526, loss: 0.012245790101587772 2023-01-22 18:10:07.517043: step: 1700/526, loss: 0.002371357986703515 2023-01-22 18:10:08.584738: step: 1704/526, loss: 0.0001006985257845372 2023-01-22 18:10:09.655504: step: 1708/526, loss: 0.00019306459580548108 2023-01-22 18:10:10.734969: step: 1712/526, loss: 0.0002702659403439611 2023-01-22 18:10:11.812340: step: 1716/526, loss: 0.00045841519022360444 2023-01-22 18:10:12.904416: step: 1720/526, loss: 0.009159048087894917 2023-01-22 18:10:13.984528: step: 1724/526, loss: 0.0026574034709483385 2023-01-22 18:10:15.054230: step: 1728/526, loss: 0.0026909809093922377 2023-01-22 18:10:16.119951: step: 1732/526, loss: 0.0038029202260077 2023-01-22 18:10:17.201039: step: 1736/526, loss: 0.0006001431029289961 2023-01-22 18:10:18.281654: step: 1740/526, loss: 0.02261241339147091 2023-01-22 18:10:19.353315: step: 1744/526, loss: 0.004342994187027216 2023-01-22 18:10:20.429002: step: 1748/526, loss: 0.0018650016281753778 2023-01-22 18:10:21.485003: step: 1752/526, loss: 0.0003598167095333338 2023-01-22 18:10:22.548991: step: 1756/526, loss: 0.0018435847014188766 2023-01-22 18:10:23.631034: step: 1760/526, loss: 0.000660406774841249 2023-01-22 18:10:24.714948: step: 1764/526, loss: 0.007831739261746407 2023-01-22 18:10:25.800949: step: 1768/526, loss: 0.009272853843867779 2023-01-22 18:10:26.879618: step: 1772/526, loss: 0.0011731403646990657 2023-01-22 18:10:27.939831: step: 1776/526, loss: 0.0036302516236901283 2023-01-22 18:10:29.025333: step: 1780/526, loss: 0.0040840343572199345 2023-01-22 18:10:30.112094: step: 1784/526, loss: 0.00019168520520906895 2023-01-22 18:10:31.180975: step: 1788/526, loss: 0.003376036649569869 2023-01-22 18:10:32.246519: step: 1792/526, loss: 0.0020819026976823807 2023-01-22 18:10:33.304821: step: 1796/526, loss: 0.011754350736737251 2023-01-22 18:10:34.377296: step: 1800/526, loss: 0.005021013785153627 2023-01-22 18:10:35.450985: step: 1804/526, loss: 0.0022896593436598778 2023-01-22 18:10:36.548247: step: 1808/526, loss: 0.00749589316546917 2023-01-22 18:10:37.619364: step: 1812/526, loss: 0.00020558516553137451 2023-01-22 18:10:38.696232: step: 1816/526, loss: 2.550763383624144e-05 2023-01-22 18:10:39.782972: step: 1820/526, loss: 0.002272322541102767 2023-01-22 18:10:40.861859: step: 1824/526, loss: 0.004736368544399738 2023-01-22 18:10:41.934354: step: 1828/526, loss: 0.0037193482276052237 2023-01-22 18:10:42.998925: step: 1832/526, loss: 0.000213428313145414 2023-01-22 18:10:44.082292: step: 1836/526, loss: 0.004171588458120823 2023-01-22 18:10:45.160832: step: 1840/526, loss: 0.002222105860710144 2023-01-22 18:10:46.235235: step: 1844/526, loss: 0.007533005438745022 2023-01-22 18:10:47.312703: step: 1848/526, loss: 0.0026535443030297756 2023-01-22 18:10:48.397196: step: 1852/526, loss: 0.001969150034710765 2023-01-22 18:10:49.463287: step: 1856/526, loss: 6.986632070038468e-05 2023-01-22 18:10:50.512043: step: 1860/526, loss: 0.000547143688891083 2023-01-22 18:10:51.585795: step: 1864/526, loss: 0.01142438966780901 2023-01-22 18:10:52.664820: step: 1868/526, loss: 0.00744243897497654 2023-01-22 18:10:53.741668: step: 1872/526, loss: 0.008655395358800888 2023-01-22 18:10:54.841186: step: 1876/526, loss: 0.004293091129511595 2023-01-22 18:10:55.910632: step: 1880/526, loss: 0.0027628173120319843 2023-01-22 18:10:56.979139: step: 1884/526, loss: 3.396796455490403e-05 2023-01-22 18:10:58.057441: step: 1888/526, loss: 0.0010185787687078118 2023-01-22 18:10:59.150931: step: 1892/526, loss: 0.0015422259457409382 2023-01-22 18:11:00.230414: step: 1896/526, loss: 0.007528111804276705 2023-01-22 18:11:01.299056: step: 1900/526, loss: 0.0017608848866075277 2023-01-22 18:11:02.368292: step: 1904/526, loss: 0.00040609354618936777 2023-01-22 18:11:03.459770: step: 1908/526, loss: 0.0008595864637754858 2023-01-22 18:11:04.542331: step: 1912/526, loss: 0.0008734037401154637 2023-01-22 18:11:05.617959: step: 1916/526, loss: 0.004083402454853058 2023-01-22 18:11:06.668677: step: 1920/526, loss: 0.014334799721837044 2023-01-22 18:11:07.748259: step: 1924/526, loss: 0.0008618760039098561 2023-01-22 18:11:08.820148: step: 1928/526, loss: 0.004487842321395874 2023-01-22 18:11:09.869809: step: 1932/526, loss: 0.0019823003094643354 2023-01-22 18:11:10.938566: step: 1936/526, loss: 0.011330187320709229 2023-01-22 18:11:12.009392: step: 1940/526, loss: 0.004352687858045101 2023-01-22 18:11:13.075737: step: 1944/526, loss: 0.00017388183914590627 2023-01-22 18:11:14.162298: step: 1948/526, loss: 0.007207114715129137 2023-01-22 18:11:15.230190: step: 1952/526, loss: 0.005646920762956142 2023-01-22 18:11:16.316517: step: 1956/526, loss: 0.0038300473242998123 2023-01-22 18:11:17.394467: step: 1960/526, loss: 0.010747403837740421 2023-01-22 18:11:18.458604: step: 1964/526, loss: 0.0013486654497683048 2023-01-22 18:11:19.544701: step: 1968/526, loss: 0.010565017350018024 2023-01-22 18:11:20.620721: step: 1972/526, loss: 0.005794962402433157 2023-01-22 18:11:21.704456: step: 1976/526, loss: 0.009057620540261269 2023-01-22 18:11:22.771313: step: 1980/526, loss: 0.029522715136408806 2023-01-22 18:11:23.853037: step: 1984/526, loss: 0.006884191185235977 2023-01-22 18:11:24.930761: step: 1988/526, loss: 0.004918457008898258 2023-01-22 18:11:26.006871: step: 1992/526, loss: 0.02453145571053028 2023-01-22 18:11:27.101753: step: 1996/526, loss: 0.004031417425721884 2023-01-22 18:11:28.175687: step: 2000/526, loss: 0.0009891398949548602 2023-01-22 18:11:29.249575: step: 2004/526, loss: 0.00017858234059531242 2023-01-22 18:11:30.333583: step: 2008/526, loss: 0.0069486224092543125 2023-01-22 18:11:31.387422: step: 2012/526, loss: 0.0012803251156583428 2023-01-22 18:11:32.476450: step: 2016/526, loss: 0.005079634487628937 2023-01-22 18:11:33.543547: step: 2020/526, loss: 0.0021159108728170395 2023-01-22 18:11:34.613636: step: 2024/526, loss: 0.005618637893348932 2023-01-22 18:11:35.696465: step: 2028/526, loss: 0.00835944339632988 2023-01-22 18:11:36.766976: step: 2032/526, loss: 0.0010824339697137475 2023-01-22 18:11:37.825745: step: 2036/526, loss: 0.0034036103170365095 2023-01-22 18:11:38.893448: step: 2040/526, loss: 1.948093449755106e-05 2023-01-22 18:11:39.990950: step: 2044/526, loss: 0.001862821402028203 2023-01-22 18:11:41.066779: step: 2048/526, loss: 0.000306715868646279 2023-01-22 18:11:42.145719: step: 2052/526, loss: 0.004162499215453863 2023-01-22 18:11:43.205071: step: 2056/526, loss: 0.005048396531492472 2023-01-22 18:11:44.284687: step: 2060/526, loss: 0.001531820627860725 2023-01-22 18:11:45.353017: step: 2064/526, loss: 0.007688785437494516 2023-01-22 18:11:46.416390: step: 2068/526, loss: 0.0029728268273174763 2023-01-22 18:11:47.489766: step: 2072/526, loss: 0.03896621987223625 2023-01-22 18:11:48.567490: step: 2076/526, loss: 0.0004868621763307601 2023-01-22 18:11:49.638446: step: 2080/526, loss: 0.0005071642808616161 2023-01-22 18:11:50.709416: step: 2084/526, loss: 0.004734321031719446 2023-01-22 18:11:51.759595: step: 2088/526, loss: 0.000133141249534674 2023-01-22 18:11:52.826134: step: 2092/526, loss: 0.0053694709204137325 2023-01-22 18:11:53.904875: step: 2096/526, loss: 0.005157975479960442 2023-01-22 18:11:54.972399: step: 2100/526, loss: 0.0001623660937184468 2023-01-22 18:11:56.020287: step: 2104/526, loss: 0.0005135132814757526 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34574316628701596, 'r': 0.2880099620493359, 'f1': 0.3142468944099379}, 'combined': 0.23155034324942791, 'stategy': 1, 'epoch': 13} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.344416713535808, 'r': 0.2403347396650968, 'f1': 0.283112767760826}, 'combined': 0.15442514605135962, 'stategy': 1, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.315990890083632, 'r': 0.3345785895003162, 'f1': 0.32501920122887856}, 'combined': 0.23948783248443684, 'stategy': 1, 'epoch': 13} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3430920236238748, 'r': 0.2658727758549149, 'f1': 0.299586540215899}, 'combined': 0.16341084011776308, 'stategy': 1, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32847142590646744, 'r': 0.3303412822209255, 'f1': 0.3294037005306107}, 'combined': 0.24271851618044996, 'stategy': 1, 'epoch': 13} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3366437972127133, 'r': 0.2714670985742702, 'f1': 0.300562662223282}, 'combined': 0.16394327030360833, 'stategy': 1, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 14 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 18:14:39.896850: step: 4/526, loss: 0.002307166112586856 2023-01-22 18:14:40.966310: step: 8/526, loss: 0.00016390436212532222 2023-01-22 18:14:42.011339: step: 12/526, loss: 0.003355986438691616 2023-01-22 18:14:43.071792: step: 16/526, loss: 0.0032070777378976345 2023-01-22 18:14:44.120901: step: 20/526, loss: 0.000874431396368891 2023-01-22 18:14:45.190033: step: 24/526, loss: 0.0017243623733520508 2023-01-22 18:14:46.233146: step: 28/526, loss: 0.0002141358272638172 2023-01-22 18:14:47.306594: step: 32/526, loss: 0.004313035402446985 2023-01-22 18:14:48.352399: step: 36/526, loss: 0.009496822021901608 2023-01-22 18:14:49.398761: step: 40/526, loss: 0.015115546062588692 2023-01-22 18:14:50.463613: step: 44/526, loss: 0.0006571476114913821 2023-01-22 18:14:51.532595: step: 48/526, loss: 0.0005053265485912561 2023-01-22 18:14:52.593842: step: 52/526, loss: 0.007660075090825558 2023-01-22 18:14:53.661154: step: 56/526, loss: 0.002688624430447817 2023-01-22 18:14:54.724122: step: 60/526, loss: 0.009465493261814117 2023-01-22 18:14:55.795449: step: 64/526, loss: 0.003573128255084157 2023-01-22 18:14:56.853149: step: 68/526, loss: 0.0008480855613015592 2023-01-22 18:14:57.901174: step: 72/526, loss: 0.010601562447845936 2023-01-22 18:14:58.978334: step: 76/526, loss: 0.004917262587696314 2023-01-22 18:15:00.038725: step: 80/526, loss: 0.012845807708799839 2023-01-22 18:15:01.094416: step: 84/526, loss: 0.001812226721085608 2023-01-22 18:15:02.163157: step: 88/526, loss: 0.002933220937848091 2023-01-22 18:15:03.224866: step: 92/526, loss: 0.002123152371495962 2023-01-22 18:15:04.279103: step: 96/526, loss: 2.8769592972821556e-05 2023-01-22 18:15:05.353242: step: 100/526, loss: 0.007094432599842548 2023-01-22 18:15:06.414362: step: 104/526, loss: 0.0007247141329571605 2023-01-22 18:15:07.468723: step: 108/526, loss: 4.0705032006371766e-05 2023-01-22 18:15:08.522602: step: 112/526, loss: 0.004695121664553881 2023-01-22 18:15:09.589858: step: 116/526, loss: 7.974659820320085e-05 2023-01-22 18:15:10.641512: step: 120/526, loss: 0.0008255448192358017 2023-01-22 18:15:11.708099: step: 124/526, loss: 0.0070709241554141045 2023-01-22 18:15:12.780526: step: 128/526, loss: 0.0016856011934578419 2023-01-22 18:15:13.848032: step: 132/526, loss: 1.3539904102799483e-05 2023-01-22 18:15:14.921280: step: 136/526, loss: 0.007703426294028759 2023-01-22 18:15:15.983167: step: 140/526, loss: 0.0006940584280528128 2023-01-22 18:15:17.051434: step: 144/526, loss: 0.007444867864251137 2023-01-22 18:15:18.139716: step: 148/526, loss: 0.006417728029191494 2023-01-22 18:15:19.199952: step: 152/526, loss: 0.00017218659922946244 2023-01-22 18:15:20.274015: step: 156/526, loss: 0.0003996709128841758 2023-01-22 18:15:21.328830: step: 160/526, loss: 0.00028717415989376605 2023-01-22 18:15:22.390599: step: 164/526, loss: 0.0036907135508954525 2023-01-22 18:15:23.458597: step: 168/526, loss: 0.0024098502472043037 2023-01-22 18:15:24.530657: step: 172/526, loss: 0.005466167815029621 2023-01-22 18:15:25.597987: step: 176/526, loss: 0.0004699561686720699 2023-01-22 18:15:26.650383: step: 180/526, loss: 0.000760965165682137 2023-01-22 18:15:27.731131: step: 184/526, loss: 0.00011297981109237298 2023-01-22 18:15:28.799498: step: 188/526, loss: 0.008172375150024891 2023-01-22 18:15:29.865556: step: 192/526, loss: 0.0004477706679608673 2023-01-22 18:15:30.933843: step: 196/526, loss: 0.0038734604604542255 2023-01-22 18:15:32.025172: step: 200/526, loss: 0.014641694724559784 2023-01-22 18:15:33.085896: step: 204/526, loss: 0.0037262861151248217 2023-01-22 18:15:34.160454: step: 208/526, loss: 0.00356103852391243 2023-01-22 18:15:35.219191: step: 212/526, loss: 0.001980154076591134 2023-01-22 18:15:36.296539: step: 216/526, loss: 0.0034438883885741234 2023-01-22 18:15:37.369143: step: 220/526, loss: 0.004583067260682583 2023-01-22 18:15:38.452354: step: 224/526, loss: 0.002736735623329878 2023-01-22 18:15:39.524081: step: 228/526, loss: 0.013606034219264984 2023-01-22 18:15:40.602781: step: 232/526, loss: 0.004091767128556967 2023-01-22 18:15:41.666441: step: 236/526, loss: 0.0037918041925877333 2023-01-22 18:15:42.727883: step: 240/526, loss: 5.625628909911029e-05 2023-01-22 18:15:43.818604: step: 244/526, loss: 0.0 2023-01-22 18:15:44.896993: step: 248/526, loss: 0.00024778343504294753 2023-01-22 18:15:45.966338: step: 252/526, loss: 3.603561708587222e-05 2023-01-22 18:15:47.009683: step: 256/526, loss: 5.9348498325562105e-05 2023-01-22 18:15:48.098242: step: 260/526, loss: 0.0035893144086003304 2023-01-22 18:15:49.164579: step: 264/526, loss: 0.006451513152569532 2023-01-22 18:15:50.237324: step: 268/526, loss: 0.021374007686972618 2023-01-22 18:15:51.325256: step: 272/526, loss: 0.0026988228783011436 2023-01-22 18:15:52.391479: step: 276/526, loss: 0.0035164428409188986 2023-01-22 18:15:53.459377: step: 280/526, loss: 0.0035909716971218586 2023-01-22 18:15:54.520562: step: 284/526, loss: 0.005486360285431147 2023-01-22 18:15:55.586332: step: 288/526, loss: 0.0019414079142734408 2023-01-22 18:15:56.674820: step: 292/526, loss: 0.007578476332128048 2023-01-22 18:15:57.735056: step: 296/526, loss: 6.2344761317945085e-06 2023-01-22 18:15:58.824146: step: 300/526, loss: 0.0033236127346754074 2023-01-22 18:15:59.876213: step: 304/526, loss: 0.010147430002689362 2023-01-22 18:16:00.939722: step: 308/526, loss: 0.002167791360989213 2023-01-22 18:16:02.013029: step: 312/526, loss: 0.00039145632763393223 2023-01-22 18:16:03.069364: step: 316/526, loss: 0.02178303897380829 2023-01-22 18:16:04.132268: step: 320/526, loss: 0.004287473391741514 2023-01-22 18:16:05.202201: step: 324/526, loss: 0.006246659904718399 2023-01-22 18:16:06.269847: step: 328/526, loss: 0.002571484539657831 2023-01-22 18:16:07.333615: step: 332/526, loss: 0.0004141709068790078 2023-01-22 18:16:08.405918: step: 336/526, loss: 7.810800161678344e-05 2023-01-22 18:16:09.486461: step: 340/526, loss: 0.005919633898884058 2023-01-22 18:16:10.574744: step: 344/526, loss: 0.0003430758079048246 2023-01-22 18:16:11.631960: step: 348/526, loss: 0.0013421847252175212 2023-01-22 18:16:12.685594: step: 352/526, loss: 0.0011762031354010105 2023-01-22 18:16:13.764545: step: 356/526, loss: 0.00042755200411193073 2023-01-22 18:16:14.812880: step: 360/526, loss: 1.2852225950155116e-07 2023-01-22 18:16:15.891714: step: 364/526, loss: 0.0021133606787770987 2023-01-22 18:16:16.954043: step: 368/526, loss: 0.0013215214712545276 2023-01-22 18:16:18.024052: step: 372/526, loss: 0.002105488209053874 2023-01-22 18:16:19.098502: step: 376/526, loss: 0.005996872205287218 2023-01-22 18:16:20.171855: step: 380/526, loss: 0.0012232906883582473 2023-01-22 18:16:21.237992: step: 384/526, loss: 0.00020889371808152646 2023-01-22 18:16:22.328092: step: 388/526, loss: 0.0007443691720254719 2023-01-22 18:16:23.421017: step: 392/526, loss: 0.017321476712822914 2023-01-22 18:16:24.506866: step: 396/526, loss: 0.0005412495229393244 2023-01-22 18:16:25.576579: step: 400/526, loss: 0.00012738963414449245 2023-01-22 18:16:26.639383: step: 404/526, loss: 0.006571083329617977 2023-01-22 18:16:27.701185: step: 408/526, loss: 0.0068222819827497005 2023-01-22 18:16:28.778901: step: 412/526, loss: 0.0012285879347473383 2023-01-22 18:16:29.860158: step: 416/526, loss: 1.7125488511737785e-06 2023-01-22 18:16:30.949407: step: 420/526, loss: 0.0077162547968328 2023-01-22 18:16:32.028688: step: 424/526, loss: 0.021344834938645363 2023-01-22 18:16:33.108608: step: 428/526, loss: 0.0022521985229104757 2023-01-22 18:16:34.181657: step: 432/526, loss: 0.0032798433676362038 2023-01-22 18:16:35.252843: step: 436/526, loss: 0.006046767346560955 2023-01-22 18:16:36.332639: step: 440/526, loss: 0.017067667096853256 2023-01-22 18:16:37.382992: step: 444/526, loss: 0.0008532463689334691 2023-01-22 18:16:38.451298: step: 448/526, loss: 0.0032414861489087343 2023-01-22 18:16:39.541684: step: 452/526, loss: 0.004999093245714903 2023-01-22 18:16:40.609679: step: 456/526, loss: 0.0040327440947294235 2023-01-22 18:16:41.691287: step: 460/526, loss: 0.0020243411418050528 2023-01-22 18:16:42.762578: step: 464/526, loss: 0.0022450743708759546 2023-01-22 18:16:43.826119: step: 468/526, loss: 3.589477637433447e-05 2023-01-22 18:16:44.891714: step: 472/526, loss: 0.0010907717514783144 2023-01-22 18:16:45.956235: step: 476/526, loss: 0.0004973181057721376 2023-01-22 18:16:47.033963: step: 480/526, loss: 0.010154790244996548 2023-01-22 18:16:48.111182: step: 484/526, loss: 0.0004196219961158931 2023-01-22 18:16:49.175627: step: 488/526, loss: 0.005656337831169367 2023-01-22 18:16:50.250290: step: 492/526, loss: 0.004369535017758608 2023-01-22 18:16:51.325643: step: 496/526, loss: 0.0001062605733750388 2023-01-22 18:16:52.402904: step: 500/526, loss: 0.004484541714191437 2023-01-22 18:16:53.469855: step: 504/526, loss: 0.00677845673635602 2023-01-22 18:16:54.540678: step: 508/526, loss: 0.0002809167781379074 2023-01-22 18:16:55.616529: step: 512/526, loss: 0.0029430161230266094 2023-01-22 18:16:56.680887: step: 516/526, loss: 0.0005283295176923275 2023-01-22 18:16:57.759694: step: 520/526, loss: 0.006898556370288134 2023-01-22 18:16:58.829586: step: 524/526, loss: 0.008260136470198631 2023-01-22 18:16:59.902513: step: 528/526, loss: 0.008318664506077766 2023-01-22 18:17:00.954318: step: 532/526, loss: 5.350386709324084e-05 2023-01-22 18:17:02.010527: step: 536/526, loss: 0.0066989148035645485 2023-01-22 18:17:03.066428: step: 540/526, loss: 0.0002137723204214126 2023-01-22 18:17:04.143076: step: 544/526, loss: 0.011100714094936848 2023-01-22 18:17:05.230659: step: 548/526, loss: 0.0029905603732913733 2023-01-22 18:17:06.290794: step: 552/526, loss: 4.41963056800887e-05 2023-01-22 18:17:07.362678: step: 556/526, loss: 0.019678698852658272 2023-01-22 18:17:08.429114: step: 560/526, loss: 0.0003933538682758808 2023-01-22 18:17:09.503422: step: 564/526, loss: 0.004244382027536631 2023-01-22 18:17:10.572366: step: 568/526, loss: 0.0026927590370178223 2023-01-22 18:17:11.641098: step: 572/526, loss: 0.0013517928309738636 2023-01-22 18:17:12.733214: step: 576/526, loss: 0.0028755960520356894 2023-01-22 18:17:13.815174: step: 580/526, loss: 0.0009696271736174822 2023-01-22 18:17:14.887286: step: 584/526, loss: 0.002408436266705394 2023-01-22 18:17:15.952741: step: 588/526, loss: 0.005607598926872015 2023-01-22 18:17:17.017152: step: 592/526, loss: 0.0017189460340887308 2023-01-22 18:17:18.083492: step: 596/526, loss: 0.00016286822210531682 2023-01-22 18:17:19.147554: step: 600/526, loss: 0.00045663630589842796 2023-01-22 18:17:20.212446: step: 604/526, loss: 0.007644603028893471 2023-01-22 18:17:21.286892: step: 608/526, loss: 0.003453565528616309 2023-01-22 18:17:22.398226: step: 612/526, loss: 0.0024117999710142612 2023-01-22 18:17:23.462631: step: 616/526, loss: 0.0017960545374080539 2023-01-22 18:17:24.519006: step: 620/526, loss: 0.0016979104839265347 2023-01-22 18:17:25.590344: step: 624/526, loss: 0.0016665789298713207 2023-01-22 18:17:26.653633: step: 628/526, loss: 0.009512514807283878 2023-01-22 18:17:27.741137: step: 632/526, loss: 0.0005530774360522628 2023-01-22 18:17:28.809804: step: 636/526, loss: 0.0023438313510268927 2023-01-22 18:17:29.870649: step: 640/526, loss: 0.0009590488043613732 2023-01-22 18:17:30.925836: step: 644/526, loss: 0.004253941588103771 2023-01-22 18:17:32.018123: step: 648/526, loss: 0.0003688369761221111 2023-01-22 18:17:33.081653: step: 652/526, loss: 0.0001511715236119926 2023-01-22 18:17:34.140689: step: 656/526, loss: 7.127510457394237e-07 2023-01-22 18:17:35.222387: step: 660/526, loss: 0.0003850888169836253 2023-01-22 18:17:36.284114: step: 664/526, loss: 0.0011288495734333992 2023-01-22 18:17:37.377909: step: 668/526, loss: 0.004587870091199875 2023-01-22 18:17:38.460219: step: 672/526, loss: 0.019386034458875656 2023-01-22 18:17:39.518254: step: 676/526, loss: 0.021321089938282967 2023-01-22 18:17:40.579671: step: 680/526, loss: 0.0010789288207888603 2023-01-22 18:17:41.657508: step: 684/526, loss: 0.0037535966839641333 2023-01-22 18:17:42.740726: step: 688/526, loss: 0.0023384590167552233 2023-01-22 18:17:43.804914: step: 692/526, loss: 0.0070309690199792385 2023-01-22 18:17:44.869102: step: 696/526, loss: 0.00038893689634278417 2023-01-22 18:17:45.948200: step: 700/526, loss: 4.2838233639486134e-05 2023-01-22 18:17:47.014222: step: 704/526, loss: 0.009444604627788067 2023-01-22 18:17:48.076756: step: 708/526, loss: 0.010152694769203663 2023-01-22 18:17:49.156995: step: 712/526, loss: 0.0028974285814911127 2023-01-22 18:17:50.243203: step: 716/526, loss: 0.0032435518223792315 2023-01-22 18:17:51.312266: step: 720/526, loss: 0.0010638185776770115 2023-01-22 18:17:52.382736: step: 724/526, loss: 0.0033046293538063765 2023-01-22 18:17:53.451064: step: 728/526, loss: 0.013791908510029316 2023-01-22 18:17:54.518253: step: 732/526, loss: 2.527087417547591e-06 2023-01-22 18:17:55.580853: step: 736/526, loss: 0.00329029094427824 2023-01-22 18:17:56.654804: step: 740/526, loss: 0.0009505374473519623 2023-01-22 18:17:57.724501: step: 744/526, loss: 5.103231046632573e-07 2023-01-22 18:17:58.785163: step: 748/526, loss: 0.0021410330664366484 2023-01-22 18:17:59.853501: step: 752/526, loss: 0.0019387867068871856 2023-01-22 18:18:00.936071: step: 756/526, loss: 0.005651320796459913 2023-01-22 18:18:02.017289: step: 760/526, loss: 0.008545869030058384 2023-01-22 18:18:03.099726: step: 764/526, loss: 0.0011596831027418375 2023-01-22 18:18:04.160583: step: 768/526, loss: 0.0032350532710552216 2023-01-22 18:18:05.224697: step: 772/526, loss: 0.012262407690286636 2023-01-22 18:18:06.295759: step: 776/526, loss: 0.0034392091911286116 2023-01-22 18:18:07.363239: step: 780/526, loss: 0.02677333354949951 2023-01-22 18:18:08.431925: step: 784/526, loss: 8.11841255199397e-06 2023-01-22 18:18:09.482296: step: 788/526, loss: 1.0902474059548695e-05 2023-01-22 18:18:10.563295: step: 792/526, loss: 0.005818450823426247 2023-01-22 18:18:11.625143: step: 796/526, loss: 0.002057104604318738 2023-01-22 18:18:12.707831: step: 800/526, loss: 0.004009247291833162 2023-01-22 18:18:13.786781: step: 804/526, loss: 0.006768904626369476 2023-01-22 18:18:14.857192: step: 808/526, loss: 0.01550119835883379 2023-01-22 18:18:15.932064: step: 812/526, loss: 0.002734885783866048 2023-01-22 18:18:17.006402: step: 816/526, loss: 0.0004063333908561617 2023-01-22 18:18:18.076898: step: 820/526, loss: 0.005199453327804804 2023-01-22 18:18:19.145251: step: 824/526, loss: 0.00032127118902280927 2023-01-22 18:18:20.223924: step: 828/526, loss: 0.0037883908953517675 2023-01-22 18:18:21.303640: step: 832/526, loss: 0.0018259456846863031 2023-01-22 18:18:22.379884: step: 836/526, loss: 0.012628414668142796 2023-01-22 18:18:23.454578: step: 840/526, loss: 0.0022392510436475277 2023-01-22 18:18:24.511836: step: 844/526, loss: 0.0010122767416760325 2023-01-22 18:18:25.597016: step: 848/526, loss: 0.003944905940443277 2023-01-22 18:18:26.681583: step: 852/526, loss: 0.00800758134573698 2023-01-22 18:18:27.752349: step: 856/526, loss: 0.009272287599742413 2023-01-22 18:18:28.840282: step: 860/526, loss: 0.007457132916897535 2023-01-22 18:18:29.918505: step: 864/526, loss: 0.008085169829428196 2023-01-22 18:18:30.983820: step: 868/526, loss: 0.018574386835098267 2023-01-22 18:18:32.047835: step: 872/526, loss: 0.0013659705873578787 2023-01-22 18:18:33.117653: step: 876/526, loss: 0.0022928675170987844 2023-01-22 18:18:34.185274: step: 880/526, loss: 2.8176586056360975e-05 2023-01-22 18:18:35.277436: step: 884/526, loss: 0.0028159264475107193 2023-01-22 18:18:36.348871: step: 888/526, loss: 0.002168482169508934 2023-01-22 18:18:37.436530: step: 892/526, loss: 0.002853300189599395 2023-01-22 18:18:38.509880: step: 896/526, loss: 0.00027688051341101527 2023-01-22 18:18:39.577968: step: 900/526, loss: 0.0013850006507709622 2023-01-22 18:18:40.649442: step: 904/526, loss: 0.001309840939939022 2023-01-22 18:18:41.705789: step: 908/526, loss: 7.723381713731214e-05 2023-01-22 18:18:42.793212: step: 912/526, loss: 0.0020622939337044954 2023-01-22 18:18:43.871775: step: 916/526, loss: 0.0009144251816906035 2023-01-22 18:18:44.935615: step: 920/526, loss: 0.0067610410042107105 2023-01-22 18:18:45.993667: step: 924/526, loss: 0.005464465357363224 2023-01-22 18:18:47.054675: step: 928/526, loss: 0.00013922154903411865 2023-01-22 18:18:48.108662: step: 932/526, loss: 0.0022986449766904116 2023-01-22 18:18:49.192138: step: 936/526, loss: 0.004184935707598925 2023-01-22 18:18:50.262404: step: 940/526, loss: 0.0007891824934631586 2023-01-22 18:18:51.344952: step: 944/526, loss: 0.004375447519123554 2023-01-22 18:18:52.418696: step: 948/526, loss: 0.0005012222100049257 2023-01-22 18:18:53.475675: step: 952/526, loss: 0.014180784113705158 2023-01-22 18:18:54.552794: step: 956/526, loss: 0.005079489666968584 2023-01-22 18:18:55.630673: step: 960/526, loss: 0.0028964467346668243 2023-01-22 18:18:56.697613: step: 964/526, loss: 0.0024710383731871843 2023-01-22 18:18:57.778496: step: 968/526, loss: 0.026737647131085396 2023-01-22 18:18:58.853918: step: 972/526, loss: 0.017084207385778427 2023-01-22 18:18:59.915775: step: 976/526, loss: 0.011491220444440842 2023-01-22 18:19:00.984460: step: 980/526, loss: 0.0007972380262799561 2023-01-22 18:19:02.053275: step: 984/526, loss: 0.0014202527236193419 2023-01-22 18:19:03.156249: step: 988/526, loss: 0.0015676483744755387 2023-01-22 18:19:04.229251: step: 992/526, loss: 0.0031679163221269846 2023-01-22 18:19:05.288059: step: 996/526, loss: 0.0033847338054329157 2023-01-22 18:19:06.345491: step: 1000/526, loss: 0.002245976123958826 2023-01-22 18:19:07.406364: step: 1004/526, loss: 0.0019594307523220778 2023-01-22 18:19:08.470830: step: 1008/526, loss: 0.0007398559246212244 2023-01-22 18:19:09.546559: step: 1012/526, loss: 0.0002759054768830538 2023-01-22 18:19:10.610977: step: 1016/526, loss: 0.0007809091475792229 2023-01-22 18:19:11.689527: step: 1020/526, loss: 7.455585728166625e-05 2023-01-22 18:19:12.750621: step: 1024/526, loss: 0.006111177150160074 2023-01-22 18:19:13.822314: step: 1028/526, loss: 0.0006516418652608991 2023-01-22 18:19:14.897787: step: 1032/526, loss: 0.0012602662900462747 2023-01-22 18:19:15.957855: step: 1036/526, loss: 0.00102605065330863 2023-01-22 18:19:17.041531: step: 1040/526, loss: 0.0005091484053991735 2023-01-22 18:19:18.109137: step: 1044/526, loss: 0.029566867277026176 2023-01-22 18:19:19.171198: step: 1048/526, loss: 0.00036553433164954185 2023-01-22 18:19:20.239751: step: 1052/526, loss: 0.0033097942359745502 2023-01-22 18:19:21.306705: step: 1056/526, loss: 0.001217966666445136 2023-01-22 18:19:22.384014: step: 1060/526, loss: 0.0014171070652082562 2023-01-22 18:19:23.445542: step: 1064/526, loss: 0.023390233516693115 2023-01-22 18:19:24.510965: step: 1068/526, loss: 0.0004296435508877039 2023-01-22 18:19:25.577972: step: 1072/526, loss: 0.0016299390699714422 2023-01-22 18:19:26.643910: step: 1076/526, loss: 0.0017074395436793566 2023-01-22 18:19:27.704627: step: 1080/526, loss: 0.00136851635761559 2023-01-22 18:19:28.781556: step: 1084/526, loss: 0.0011942698620259762 2023-01-22 18:19:29.868182: step: 1088/526, loss: 0.006335163488984108 2023-01-22 18:19:30.922886: step: 1092/526, loss: 1.2097493709006812e-05 2023-01-22 18:19:31.992155: step: 1096/526, loss: 0.006443346384912729 2023-01-22 18:19:33.052806: step: 1100/526, loss: 0.002692027948796749 2023-01-22 18:19:34.125478: step: 1104/526, loss: 0.0007058934425003827 2023-01-22 18:19:35.211567: step: 1108/526, loss: 0.010884067043662071 2023-01-22 18:19:36.304156: step: 1112/526, loss: 0.011060385033488274 2023-01-22 18:19:37.379619: step: 1116/526, loss: 0.0005565279861912131 2023-01-22 18:19:38.443812: step: 1120/526, loss: 0.0027569830417633057 2023-01-22 18:19:39.511119: step: 1124/526, loss: 0.007407648488879204 2023-01-22 18:19:40.589629: step: 1128/526, loss: 0.0076655857264995575 2023-01-22 18:19:41.656437: step: 1132/526, loss: 0.0008564227027818561 2023-01-22 18:19:42.724612: step: 1136/526, loss: 0.002139363205060363 2023-01-22 18:19:43.804397: step: 1140/526, loss: 0.00011179518332937732 2023-01-22 18:19:44.865926: step: 1144/526, loss: 0.00026275331038050354 2023-01-22 18:19:45.955437: step: 1148/526, loss: 0.017415888607501984 2023-01-22 18:19:47.037517: step: 1152/526, loss: 0.0012456434778869152 2023-01-22 18:19:48.103677: step: 1156/526, loss: 0.00034680109820328653 2023-01-22 18:19:49.149286: step: 1160/526, loss: 0.0007098432979546487 2023-01-22 18:19:50.239723: step: 1164/526, loss: 3.0032533686608076e-05 2023-01-22 18:19:51.319629: step: 1168/526, loss: 0.0028228273149579763 2023-01-22 18:19:52.405367: step: 1172/526, loss: 0.003709799377247691 2023-01-22 18:19:53.478673: step: 1176/526, loss: 0.02493387646973133 2023-01-22 18:19:54.546154: step: 1180/526, loss: 0.01921447180211544 2023-01-22 18:19:55.611016: step: 1184/526, loss: 1.3776319974567741e-05 2023-01-22 18:19:56.674827: step: 1188/526, loss: 0.0022829556837677956 2023-01-22 18:19:57.741943: step: 1192/526, loss: 0.00014040459063835442 2023-01-22 18:19:58.814768: step: 1196/526, loss: 0.002130881417542696 2023-01-22 18:19:59.873753: step: 1200/526, loss: 0.00022746642935089767 2023-01-22 18:20:00.956167: step: 1204/526, loss: 0.0020031214226037264 2023-01-22 18:20:02.017144: step: 1208/526, loss: 0.000355652766302228 2023-01-22 18:20:03.079876: step: 1212/526, loss: 0.0020097908563911915 2023-01-22 18:20:04.155125: step: 1216/526, loss: 0.0001625945296837017 2023-01-22 18:20:05.244648: step: 1220/526, loss: 0.006345596630126238 2023-01-22 18:20:06.314044: step: 1224/526, loss: 0.00043828244088217616 2023-01-22 18:20:07.370624: step: 1228/526, loss: 0.0003940975875593722 2023-01-22 18:20:08.443546: step: 1232/526, loss: 0.0013128803111612797 2023-01-22 18:20:09.508537: step: 1236/526, loss: 0.004025301430374384 2023-01-22 18:20:10.581244: step: 1240/526, loss: 0.009535791352391243 2023-01-22 18:20:11.655203: step: 1244/526, loss: 0.0067873080261051655 2023-01-22 18:20:12.726417: step: 1248/526, loss: 0.0031826540362089872 2023-01-22 18:20:13.811439: step: 1252/526, loss: 0.0003247931017540395 2023-01-22 18:20:14.905291: step: 1256/526, loss: 0.0019273733487352729 2023-01-22 18:20:15.968047: step: 1260/526, loss: 0.0014707774389535189 2023-01-22 18:20:17.024530: step: 1264/526, loss: 0.004232752602547407 2023-01-22 18:20:18.089037: step: 1268/526, loss: 0.000627980858553201 2023-01-22 18:20:19.153089: step: 1272/526, loss: 0.011762239970266819 2023-01-22 18:20:20.227055: step: 1276/526, loss: 0.00855042040348053 2023-01-22 18:20:21.296632: step: 1280/526, loss: 0.0014893842162564397 2023-01-22 18:20:22.366340: step: 1284/526, loss: 0.00023896177299320698 2023-01-22 18:20:23.451250: step: 1288/526, loss: 0.0037234509363770485 2023-01-22 18:20:24.523054: step: 1292/526, loss: 0.0015185344964265823 2023-01-22 18:20:25.594592: step: 1296/526, loss: 0.009883145801723003 2023-01-22 18:20:26.680386: step: 1300/526, loss: 0.006715449504554272 2023-01-22 18:20:27.763785: step: 1304/526, loss: 0.010385330766439438 2023-01-22 18:20:28.825262: step: 1308/526, loss: 0.016307534649968147 2023-01-22 18:20:29.908525: step: 1312/526, loss: 0.00171584018971771 2023-01-22 18:20:30.968822: step: 1316/526, loss: 0.004585589747875929 2023-01-22 18:20:32.015435: step: 1320/526, loss: 0.0017021321691572666 2023-01-22 18:20:33.099218: step: 1324/526, loss: 0.0023780791088938713 2023-01-22 18:20:34.158847: step: 1328/526, loss: 0.0005325642414391041 2023-01-22 18:20:35.241141: step: 1332/526, loss: 0.004799291025847197 2023-01-22 18:20:36.296783: step: 1336/526, loss: 8.75878322403878e-05 2023-01-22 18:20:37.381698: step: 1340/526, loss: 0.013781948946416378 2023-01-22 18:20:38.454830: step: 1344/526, loss: 0.0018188146641477942 2023-01-22 18:20:39.530211: step: 1348/526, loss: 0.0011055388022214174 2023-01-22 18:20:40.593861: step: 1352/526, loss: 0.0021018851548433304 2023-01-22 18:20:41.648249: step: 1356/526, loss: 0.0001546824350953102 2023-01-22 18:20:42.706431: step: 1360/526, loss: 0.0022100184578448534 2023-01-22 18:20:43.781141: step: 1364/526, loss: 0.002128913765773177 2023-01-22 18:20:44.850426: step: 1368/526, loss: 0.0008413918549194932 2023-01-22 18:20:45.919071: step: 1372/526, loss: 0.00020194821991026402 2023-01-22 18:20:46.989736: step: 1376/526, loss: 0.01098920963704586 2023-01-22 18:20:48.066659: step: 1380/526, loss: 0.002626180648803711 2023-01-22 18:20:49.137866: step: 1384/526, loss: 8.364782843273133e-06 2023-01-22 18:20:50.226621: step: 1388/526, loss: 0.00019631536270026118 2023-01-22 18:20:51.311535: step: 1392/526, loss: 0.024831082671880722 2023-01-22 18:20:52.397666: step: 1396/526, loss: 0.0033678554464131594 2023-01-22 18:20:53.467231: step: 1400/526, loss: 0.005119773093611002 2023-01-22 18:20:54.522448: step: 1404/526, loss: 0.0038946541026234627 2023-01-22 18:20:55.583905: step: 1408/526, loss: 5.344546298147179e-05 2023-01-22 18:20:56.663277: step: 1412/526, loss: 0.002227138727903366 2023-01-22 18:20:57.753171: step: 1416/526, loss: 0.0013618605444207788 2023-01-22 18:20:58.839494: step: 1420/526, loss: 0.0016318444395437837 2023-01-22 18:20:59.897810: step: 1424/526, loss: 0.003463602624833584 2023-01-22 18:21:00.963081: step: 1428/526, loss: 0.0008891662582755089 2023-01-22 18:21:02.050024: step: 1432/526, loss: 0.001142619177699089 2023-01-22 18:21:03.120379: step: 1436/526, loss: 7.592178735649213e-05 2023-01-22 18:21:04.211185: step: 1440/526, loss: 0.00045594715629704297 2023-01-22 18:21:05.274993: step: 1444/526, loss: 0.004796857479959726 2023-01-22 18:21:06.335455: step: 1448/526, loss: 0.00995884370058775 2023-01-22 18:21:07.397893: step: 1452/526, loss: 0.0018776168581098318 2023-01-22 18:21:08.486020: step: 1456/526, loss: 0.002360767684876919 2023-01-22 18:21:09.557484: step: 1460/526, loss: 0.004143232014030218 2023-01-22 18:21:10.613373: step: 1464/526, loss: 0.00925966165959835 2023-01-22 18:21:11.680231: step: 1468/526, loss: 0.0022747707553207874 2023-01-22 18:21:12.741044: step: 1472/526, loss: 0.0030110818333923817 2023-01-22 18:21:13.807216: step: 1476/526, loss: 0.000270563003141433 2023-01-22 18:21:14.866012: step: 1480/526, loss: 0.00017031615425366908 2023-01-22 18:21:15.927972: step: 1484/526, loss: 0.0049527850933372974 2023-01-22 18:21:17.002638: step: 1488/526, loss: 0.0016987328417599201 2023-01-22 18:21:18.074627: step: 1492/526, loss: 0.005898380186408758 2023-01-22 18:21:19.135200: step: 1496/526, loss: 0.004284883849322796 2023-01-22 18:21:20.217712: step: 1500/526, loss: 0.025537550449371338 2023-01-22 18:21:21.290356: step: 1504/526, loss: 0.004737743642181158 2023-01-22 18:21:22.354735: step: 1508/526, loss: 0.00871719978749752 2023-01-22 18:21:23.414018: step: 1512/526, loss: 0.00036977906711399555 2023-01-22 18:21:24.488372: step: 1516/526, loss: 0.0009836264653131366 2023-01-22 18:21:25.556045: step: 1520/526, loss: 0.00043516108416952193 2023-01-22 18:21:26.616736: step: 1524/526, loss: 0.05500594899058342 2023-01-22 18:21:27.701995: step: 1528/526, loss: 0.00010957517224596813 2023-01-22 18:21:28.787902: step: 1532/526, loss: 0.003940837923437357 2023-01-22 18:21:29.855723: step: 1536/526, loss: 0.00016839402087498456 2023-01-22 18:21:30.926244: step: 1540/526, loss: 0.00221430417150259 2023-01-22 18:21:31.987774: step: 1544/526, loss: 0.002151322551071644 2023-01-22 18:21:33.041768: step: 1548/526, loss: 0.002618568716570735 2023-01-22 18:21:34.104007: step: 1552/526, loss: 0.0002220207534264773 2023-01-22 18:21:35.165422: step: 1556/526, loss: 0.0 2023-01-22 18:21:36.233534: step: 1560/526, loss: 0.0033717863261699677 2023-01-22 18:21:37.304481: step: 1564/526, loss: 1.969084951269906e-05 2023-01-22 18:21:38.374779: step: 1568/526, loss: 0.001348881283774972 2023-01-22 18:21:39.438992: step: 1572/526, loss: 0.003764993976801634 2023-01-22 18:21:40.513130: step: 1576/526, loss: 0.0006136494339443743 2023-01-22 18:21:41.592321: step: 1580/526, loss: 0.0022987457923591137 2023-01-22 18:21:42.657054: step: 1584/526, loss: 0.014885510317981243 2023-01-22 18:21:43.736002: step: 1588/526, loss: 0.006264332681894302 2023-01-22 18:21:44.841417: step: 1592/526, loss: 1.3827666407451034e-05 2023-01-22 18:21:45.899205: step: 1596/526, loss: 0.013956621289253235 2023-01-22 18:21:46.964653: step: 1600/526, loss: 0.0021332809701561928 2023-01-22 18:21:48.016862: step: 1604/526, loss: 0.00043565905070863664 2023-01-22 18:21:49.087122: step: 1608/526, loss: 0.00025349942734465003 2023-01-22 18:21:50.167643: step: 1612/526, loss: 0.0009876987896859646 2023-01-22 18:21:51.249178: step: 1616/526, loss: 0.0023905502166599035 2023-01-22 18:21:52.334206: step: 1620/526, loss: 0.0028823192697018385 2023-01-22 18:21:53.393443: step: 1624/526, loss: 0.008391663432121277 2023-01-22 18:21:54.461745: step: 1628/526, loss: 0.01414255890995264 2023-01-22 18:21:55.536782: step: 1632/526, loss: 0.0006167968967929482 2023-01-22 18:21:56.611461: step: 1636/526, loss: 0.0017983202124014497 2023-01-22 18:21:57.681611: step: 1640/526, loss: 0.00035625157761387527 2023-01-22 18:21:58.737436: step: 1644/526, loss: 0.001247352221980691 2023-01-22 18:21:59.814497: step: 1648/526, loss: 0.00615895027294755 2023-01-22 18:22:00.891091: step: 1652/526, loss: 0.00038407949614338577 2023-01-22 18:22:01.960406: step: 1656/526, loss: 0.0007547169225290418 2023-01-22 18:22:03.043711: step: 1660/526, loss: 0.0030446143355220556 2023-01-22 18:22:04.108678: step: 1664/526, loss: 0.00211614603176713 2023-01-22 18:22:05.170980: step: 1668/526, loss: 1.129423708334798e-05 2023-01-22 18:22:06.230225: step: 1672/526, loss: 6.556462750495484e-08 2023-01-22 18:22:07.299382: step: 1676/526, loss: 0.0011379700154066086 2023-01-22 18:22:08.364993: step: 1680/526, loss: 0.0006366141024045646 2023-01-22 18:22:09.420352: step: 1684/526, loss: 0.0068187415599823 2023-01-22 18:22:10.487481: step: 1688/526, loss: 0.005435027182102203 2023-01-22 18:22:11.562457: step: 1692/526, loss: 0.006031684577465057 2023-01-22 18:22:12.639796: step: 1696/526, loss: 0.004352853633463383 2023-01-22 18:22:13.711081: step: 1700/526, loss: 0.0036465846933424473 2023-01-22 18:22:14.784288: step: 1704/526, loss: 0.0008612232632003725 2023-01-22 18:22:15.838904: step: 1708/526, loss: 0.002266576746478677 2023-01-22 18:22:16.913454: step: 1712/526, loss: 1.0622950867400505e-05 2023-01-22 18:22:17.973891: step: 1716/526, loss: 0.002228684024885297 2023-01-22 18:22:19.033161: step: 1720/526, loss: 0.00016539690841455013 2023-01-22 18:22:20.097317: step: 1724/526, loss: 0.004582252353429794 2023-01-22 18:22:21.174914: step: 1728/526, loss: 0.0024971964303404093 2023-01-22 18:22:22.236317: step: 1732/526, loss: 0.000973179005086422 2023-01-22 18:22:23.307073: step: 1736/526, loss: 0.0020890154410153627 2023-01-22 18:22:24.388955: step: 1740/526, loss: 0.0013916163006797433 2023-01-22 18:22:25.464261: step: 1744/526, loss: 0.001978084212169051 2023-01-22 18:22:26.563013: step: 1748/526, loss: 0.0027675561141222715 2023-01-22 18:22:27.629641: step: 1752/526, loss: 0.0013488027034327388 2023-01-22 18:22:28.710234: step: 1756/526, loss: 0.005854002665728331 2023-01-22 18:22:29.800306: step: 1760/526, loss: 0.007177778985351324 2023-01-22 18:22:30.892135: step: 1764/526, loss: 0.0071561397053301334 2023-01-22 18:22:31.972621: step: 1768/526, loss: 0.011648480780422688 2023-01-22 18:22:33.060453: step: 1772/526, loss: 0.005798365455120802 2023-01-22 18:22:34.125968: step: 1776/526, loss: 0.0035777639131993055 2023-01-22 18:22:35.210681: step: 1780/526, loss: 0.002151547698304057 2023-01-22 18:22:36.296417: step: 1784/526, loss: 0.0028241644613444805 2023-01-22 18:22:37.361876: step: 1788/526, loss: 0.002040019491687417 2023-01-22 18:22:38.438794: step: 1792/526, loss: 0.002346805762499571 2023-01-22 18:22:39.505117: step: 1796/526, loss: 0.0003907376085408032 2023-01-22 18:22:40.567801: step: 1800/526, loss: 0.007825076580047607 2023-01-22 18:22:41.643682: step: 1804/526, loss: 0.000653119059279561 2023-01-22 18:22:42.718493: step: 1808/526, loss: 0.005234704352915287 2023-01-22 18:22:43.798612: step: 1812/526, loss: 0.0002433751942589879 2023-01-22 18:22:44.852813: step: 1816/526, loss: 3.3592212275834754e-05 2023-01-22 18:22:45.925017: step: 1820/526, loss: 0.0006443400634452701 2023-01-22 18:22:47.006192: step: 1824/526, loss: 0.005711184814572334 2023-01-22 18:22:48.073026: step: 1828/526, loss: 0.005810832604765892 2023-01-22 18:22:49.145073: step: 1832/526, loss: 0.012061625719070435 2023-01-22 18:22:50.216881: step: 1836/526, loss: 0.027010422199964523 2023-01-22 18:22:51.288871: step: 1840/526, loss: 0.002008101437240839 2023-01-22 18:22:52.372968: step: 1844/526, loss: 0.0037057814188301563 2023-01-22 18:22:53.436956: step: 1848/526, loss: 0.002416230970993638 2023-01-22 18:22:54.506863: step: 1852/526, loss: 6.940684397704899e-05 2023-01-22 18:22:55.559453: step: 1856/526, loss: 0.003448176896199584 2023-01-22 18:22:56.633349: step: 1860/526, loss: 0.020395131781697273 2023-01-22 18:22:57.698220: step: 1864/526, loss: 0.0014182326849550009 2023-01-22 18:22:58.768043: step: 1868/526, loss: 0.002631265437230468 2023-01-22 18:22:59.862878: step: 1872/526, loss: 0.004197416361421347 2023-01-22 18:23:00.936886: step: 1876/526, loss: 0.0028786426410079002 2023-01-22 18:23:02.024964: step: 1880/526, loss: 0.005287399515509605 2023-01-22 18:23:03.086447: step: 1884/526, loss: 0.00788356363773346 2023-01-22 18:23:04.189565: step: 1888/526, loss: 0.007930414751172066 2023-01-22 18:23:05.257812: step: 1892/526, loss: 0.0024912424851208925 2023-01-22 18:23:06.331484: step: 1896/526, loss: 0.0017342055216431618 2023-01-22 18:23:07.401001: step: 1900/526, loss: 0.004057242069393396 2023-01-22 18:23:08.469889: step: 1904/526, loss: 0.0010492629371583462 2023-01-22 18:23:09.527695: step: 1908/526, loss: 0.0007356642163358629 2023-01-22 18:23:10.606557: step: 1912/526, loss: 0.0019300456624478102 2023-01-22 18:23:11.662888: step: 1916/526, loss: 0.00762151600793004 2023-01-22 18:23:12.723220: step: 1920/526, loss: 0.012163740582764149 2023-01-22 18:23:13.809041: step: 1924/526, loss: 0.0008196496637538075 2023-01-22 18:23:14.882261: step: 1928/526, loss: 0.009409359656274319 2023-01-22 18:23:15.953205: step: 1932/526, loss: 0.0034837662242352962 2023-01-22 18:23:17.016194: step: 1936/526, loss: 0.0024080027360469103 2023-01-22 18:23:18.062270: step: 1940/526, loss: 0.005544229876250029 2023-01-22 18:23:19.144215: step: 1944/526, loss: 2.0633342501241714e-06 2023-01-22 18:23:20.201134: step: 1948/526, loss: 0.0016030978877097368 2023-01-22 18:23:21.262114: step: 1952/526, loss: 0.001350992708466947 2023-01-22 18:23:22.327352: step: 1956/526, loss: 0.004187397193163633 2023-01-22 18:23:23.407211: step: 1960/526, loss: 0.0009545682114548981 2023-01-22 18:23:24.459063: step: 1964/526, loss: 0.0011566587490960956 2023-01-22 18:23:25.519205: step: 1968/526, loss: 0.0005285569932311773 2023-01-22 18:23:26.599780: step: 1972/526, loss: 0.008675693534314632 2023-01-22 18:23:27.673465: step: 1976/526, loss: 0.006488036829978228 2023-01-22 18:23:28.764151: step: 1980/526, loss: 0.001760836923494935 2023-01-22 18:23:29.837348: step: 1984/526, loss: 0.0003029238432645798 2023-01-22 18:23:30.904108: step: 1988/526, loss: 0.002028706483542919 2023-01-22 18:23:31.972652: step: 1992/526, loss: 0.00293004815466702 2023-01-22 18:23:33.032801: step: 1996/526, loss: 0.003305745078250766 2023-01-22 18:23:34.106408: step: 2000/526, loss: 0.002126662991940975 2023-01-22 18:23:35.161388: step: 2004/526, loss: 0.0017672862159088254 2023-01-22 18:23:36.247327: step: 2008/526, loss: 0.004866019822657108 2023-01-22 18:23:37.318819: step: 2012/526, loss: 0.009832726791501045 2023-01-22 18:23:38.376708: step: 2016/526, loss: 0.0006234565516933799 2023-01-22 18:23:39.463095: step: 2020/526, loss: 0.023399246856570244 2023-01-22 18:23:40.543017: step: 2024/526, loss: 0.0049022892490029335 2023-01-22 18:23:41.619423: step: 2028/526, loss: 0.003801533719524741 2023-01-22 18:23:42.689975: step: 2032/526, loss: 0.0014570566127076745 2023-01-22 18:23:43.748954: step: 2036/526, loss: 0.0033299934584647417 2023-01-22 18:23:44.814517: step: 2040/526, loss: 0.0007682955474592745 2023-01-22 18:23:45.870658: step: 2044/526, loss: 0.007122586015611887 2023-01-22 18:23:46.930695: step: 2048/526, loss: 0.001749532762914896 2023-01-22 18:23:48.007794: step: 2052/526, loss: 4.950724814989371e-06 2023-01-22 18:23:49.068144: step: 2056/526, loss: 0.00013669295003637671 2023-01-22 18:23:50.156174: step: 2060/526, loss: 0.0047966125421226025 2023-01-22 18:23:51.218797: step: 2064/526, loss: 0.006963830441236496 2023-01-22 18:23:52.280597: step: 2068/526, loss: 0.00017712145927362144 2023-01-22 18:23:53.341989: step: 2072/526, loss: 0.003607033286243677 2023-01-22 18:23:54.397098: step: 2076/526, loss: 0.0011266947258263826 2023-01-22 18:23:55.462205: step: 2080/526, loss: 0.0041417754255235195 2023-01-22 18:23:56.531163: step: 2084/526, loss: 1.3134545042703394e-05 2023-01-22 18:23:57.600884: step: 2088/526, loss: 6.524119271489326e-06 2023-01-22 18:23:58.677733: step: 2092/526, loss: 0.0020126684103161097 2023-01-22 18:23:59.743490: step: 2096/526, loss: 0.003926699049770832 2023-01-22 18:24:00.814995: step: 2100/526, loss: 0.02189287543296814 2023-01-22 18:24:01.877076: step: 2104/526, loss: 0.005942977499216795 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3475765306122449, 'r': 0.2908562618595825, 'f1': 0.3166967975206612}, 'combined': 0.23335553501522405, 'stategy': 1, 'epoch': 14} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34175887021475254, 'r': 0.2394189887493459, 'f1': 0.2815783906454342}, 'combined': 0.15358821307932774, 'stategy': 1, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3154256112104949, 'r': 0.3345785895003162, 'f1': 0.32471992019643947}, 'combined': 0.23926730961842907, 'stategy': 1, 'epoch': 14} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3450749636687245, 'r': 0.269303699917129, 'f1': 0.3025169003180082}, 'combined': 0.16500921835527718, 'stategy': 1, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.326440406271992, 'r': 0.3289181323158022, 'f1': 0.32767458550175377}, 'combined': 0.24144443142234487, 'stategy': 1, 'epoch': 14} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33804342318151903, 'r': 0.27352189309481817, 'f1': 0.30237907414318616}, 'combined': 0.1649340404417379, 'stategy': 1, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.3017241379310345, 'f1': 0.3723404255319149}, 'combined': 0.2482269503546099, 'stategy': 1, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 15 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 18:26:45.560609: step: 4/526, loss: 0.0030837920494377613 2023-01-22 18:26:46.611388: step: 8/526, loss: 5.392525054048747e-06 2023-01-22 18:26:47.675636: step: 12/526, loss: 3.723362533492036e-05 2023-01-22 18:26:48.713313: step: 16/526, loss: 3.468860813882202e-05 2023-01-22 18:26:49.761932: step: 20/526, loss: 1.832795959444411e-07 2023-01-22 18:26:50.823586: step: 24/526, loss: 0.001636696164496243 2023-01-22 18:26:51.890880: step: 28/526, loss: 0.01823437213897705 2023-01-22 18:26:52.953213: step: 32/526, loss: 0.002591481665149331 2023-01-22 18:26:54.008836: step: 36/526, loss: 3.17294652631972e-05 2023-01-22 18:26:55.088053: step: 40/526, loss: 0.005457828287035227 2023-01-22 18:26:56.162156: step: 44/526, loss: 0.008973308838903904 2023-01-22 18:26:57.231550: step: 48/526, loss: 0.002074422314763069 2023-01-22 18:26:58.289504: step: 52/526, loss: 0.002159965457394719 2023-01-22 18:26:59.368317: step: 56/526, loss: 0.004904224071651697 2023-01-22 18:27:00.421482: step: 60/526, loss: 0.0021082209423184395 2023-01-22 18:27:01.506272: step: 64/526, loss: 0.004400089383125305 2023-01-22 18:27:02.564289: step: 68/526, loss: 0.007214154116809368 2023-01-22 18:27:03.636316: step: 72/526, loss: 0.0073178550228476524 2023-01-22 18:27:04.697418: step: 76/526, loss: 0.002440760377794504 2023-01-22 18:27:05.763227: step: 80/526, loss: 0.004771697334945202 2023-01-22 18:27:06.832136: step: 84/526, loss: 2.9408399768726667e-06 2023-01-22 18:27:07.902199: step: 88/526, loss: 0.055071085691452026 2023-01-22 18:27:08.955177: step: 92/526, loss: 0.0007967138080857694 2023-01-22 18:27:10.023104: step: 96/526, loss: 0.0013606281718239188 2023-01-22 18:27:11.124057: step: 100/526, loss: 0.004530522506684065 2023-01-22 18:27:12.188744: step: 104/526, loss: 0.00034838594729080796 2023-01-22 18:27:13.250301: step: 108/526, loss: 0.00045359120122157037 2023-01-22 18:27:14.318276: step: 112/526, loss: 0.0021417131647467613 2023-01-22 18:27:15.390012: step: 116/526, loss: 0.004992205183953047 2023-01-22 18:27:16.458086: step: 120/526, loss: 0.00042990536894649267 2023-01-22 18:27:17.526204: step: 124/526, loss: 0.002021200256422162 2023-01-22 18:27:18.595746: step: 128/526, loss: 6.429754284908995e-05 2023-01-22 18:27:19.659293: step: 132/526, loss: 0.0036365953274071217 2023-01-22 18:27:20.733390: step: 136/526, loss: 0.008111150935292244 2023-01-22 18:27:21.804454: step: 140/526, loss: 0.0005944063304923475 2023-01-22 18:27:22.863609: step: 144/526, loss: 0.001067745964974165 2023-01-22 18:27:23.933520: step: 148/526, loss: 0.000510052777826786 2023-01-22 18:27:25.000798: step: 152/526, loss: 0.005446300841867924 2023-01-22 18:27:26.065763: step: 156/526, loss: 5.827929999213666e-05 2023-01-22 18:27:27.140273: step: 160/526, loss: 0.0017400509677827358 2023-01-22 18:27:28.228898: step: 164/526, loss: 0.003879789263010025 2023-01-22 18:27:29.289022: step: 168/526, loss: 0.002057411940768361 2023-01-22 18:27:30.357594: step: 172/526, loss: 0.006376966834068298 2023-01-22 18:27:31.423708: step: 176/526, loss: 0.006155486684292555 2023-01-22 18:27:32.517995: step: 180/526, loss: 0.0008909195312298834 2023-01-22 18:27:33.589432: step: 184/526, loss: 0.0017011663876473904 2023-01-22 18:27:34.648856: step: 188/526, loss: 6.545661108248169e-06 2023-01-22 18:27:35.717659: step: 192/526, loss: 0.0014572531217709184 2023-01-22 18:27:36.782639: step: 196/526, loss: 0.0029842257499694824 2023-01-22 18:27:37.867566: step: 200/526, loss: 0.00021754551562480628 2023-01-22 18:27:38.929816: step: 204/526, loss: 0.0018615300068631768 2023-01-22 18:27:40.006014: step: 208/526, loss: 0.0027310634031891823 2023-01-22 18:27:41.095969: step: 212/526, loss: 0.0007247552275657654 2023-01-22 18:27:42.162531: step: 216/526, loss: 0.005324308294802904 2023-01-22 18:27:43.238234: step: 220/526, loss: 0.001008346094749868 2023-01-22 18:27:44.328105: step: 224/526, loss: 0.02025071531534195 2023-01-22 18:27:45.397739: step: 228/526, loss: 0.002746953396126628 2023-01-22 18:27:46.456557: step: 232/526, loss: 5.369909922592342e-05 2023-01-22 18:27:47.558689: step: 236/526, loss: 0.0011727057863026857 2023-01-22 18:27:48.635945: step: 240/526, loss: 0.0015861911233514547 2023-01-22 18:27:49.704376: step: 244/526, loss: 0.0011751428246498108 2023-01-22 18:27:50.796783: step: 248/526, loss: 0.005034309811890125 2023-01-22 18:27:51.865274: step: 252/526, loss: 1.7892503819894046e-05 2023-01-22 18:27:52.936472: step: 256/526, loss: 0.005751111079007387 2023-01-22 18:27:53.993278: step: 260/526, loss: 0.005098654888570309 2023-01-22 18:27:55.075220: step: 264/526, loss: 0.0010037494357675314 2023-01-22 18:27:56.143789: step: 268/526, loss: 0.002472367836162448 2023-01-22 18:27:57.227254: step: 272/526, loss: 0.003121607471257448 2023-01-22 18:27:58.312410: step: 276/526, loss: 0.00014994754747021943 2023-01-22 18:27:59.396952: step: 280/526, loss: 0.003918241709470749 2023-01-22 18:28:00.466038: step: 284/526, loss: 9.827670874074101e-05 2023-01-22 18:28:01.527086: step: 288/526, loss: 0.0021978854201734066 2023-01-22 18:28:02.597382: step: 292/526, loss: 1.741137748467736e-05 2023-01-22 18:28:03.665393: step: 296/526, loss: 0.0014372613513842225 2023-01-22 18:28:04.750141: step: 300/526, loss: 0.0003253793402109295 2023-01-22 18:28:05.839567: step: 304/526, loss: 0.020002910867333412 2023-01-22 18:28:06.913475: step: 308/526, loss: 0.002194400876760483 2023-01-22 18:28:07.984915: step: 312/526, loss: 0.003964131698012352 2023-01-22 18:28:09.057692: step: 316/526, loss: 0.001673134509474039 2023-01-22 18:28:10.126644: step: 320/526, loss: 0.00022924515360500664 2023-01-22 18:28:11.203093: step: 324/526, loss: 0.0007709195488132536 2023-01-22 18:28:12.278298: step: 328/526, loss: 0.001254845643416047 2023-01-22 18:28:13.343412: step: 332/526, loss: 0.002715372946113348 2023-01-22 18:28:14.415978: step: 336/526, loss: 0.0002852242032531649 2023-01-22 18:28:15.491338: step: 340/526, loss: 0.004312833771109581 2023-01-22 18:28:16.562368: step: 344/526, loss: 0.007030846551060677 2023-01-22 18:28:17.639969: step: 348/526, loss: 0.0018822341226041317 2023-01-22 18:28:18.709369: step: 352/526, loss: 0.004254063591361046 2023-01-22 18:28:19.776279: step: 356/526, loss: 0.002120479941368103 2023-01-22 18:28:20.846429: step: 360/526, loss: 0.007172430399805307 2023-01-22 18:28:21.923830: step: 364/526, loss: 0.006202549207955599 2023-01-22 18:28:22.989609: step: 368/526, loss: 0.00013633868366014212 2023-01-22 18:28:24.062653: step: 372/526, loss: 0.0009908577194437385 2023-01-22 18:28:25.122958: step: 376/526, loss: 0.0046517071314156055 2023-01-22 18:28:26.190349: step: 380/526, loss: 0.0016412843251600862 2023-01-22 18:28:27.267737: step: 384/526, loss: 0.008400763384997845 2023-01-22 18:28:28.343104: step: 388/526, loss: 0.0003771914925891906 2023-01-22 18:28:29.412636: step: 392/526, loss: 0.007985120639204979 2023-01-22 18:28:30.473504: step: 396/526, loss: 0.00022815779084339738 2023-01-22 18:28:31.554306: step: 400/526, loss: 0.0019783317111432552 2023-01-22 18:28:32.638473: step: 404/526, loss: 0.002509333658963442 2023-01-22 18:28:33.704327: step: 408/526, loss: 0.002491546329110861 2023-01-22 18:28:34.761768: step: 412/526, loss: 0.0038743168115615845 2023-01-22 18:28:35.833814: step: 416/526, loss: 0.003474264871329069 2023-01-22 18:28:36.888683: step: 420/526, loss: 1.7693611880531535e-05 2023-01-22 18:28:37.943348: step: 424/526, loss: 9.059806325240061e-05 2023-01-22 18:28:38.997402: step: 428/526, loss: 0.0005716729792766273 2023-01-22 18:28:40.062621: step: 432/526, loss: 0.007911231368780136 2023-01-22 18:28:41.134139: step: 436/526, loss: 0.0011817221529781818 2023-01-22 18:28:42.205318: step: 440/526, loss: 7.003147038631141e-05 2023-01-22 18:28:43.265436: step: 444/526, loss: 0.024542130529880524 2023-01-22 18:28:44.345399: step: 448/526, loss: 0.0009809286566451192 2023-01-22 18:28:45.409416: step: 452/526, loss: 2.513647632440552e-05 2023-01-22 18:28:46.455475: step: 456/526, loss: 0.003996597602963448 2023-01-22 18:28:47.515928: step: 460/526, loss: 0.0010373241966590285 2023-01-22 18:28:48.603396: step: 464/526, loss: 0.00015869905473664403 2023-01-22 18:28:49.676341: step: 468/526, loss: 0.001618224661797285 2023-01-22 18:28:50.760494: step: 472/526, loss: 0.0003693256003316492 2023-01-22 18:28:51.841990: step: 476/526, loss: 0.004332988988608122 2023-01-22 18:28:52.918290: step: 480/526, loss: 0.00193475850392133 2023-01-22 18:28:53.994706: step: 484/526, loss: 0.009533879347145557 2023-01-22 18:28:55.055262: step: 488/526, loss: 0.005407858639955521 2023-01-22 18:28:56.119695: step: 492/526, loss: 0.0011100302217528224 2023-01-22 18:28:57.182609: step: 496/526, loss: 0.007173910737037659 2023-01-22 18:28:58.242445: step: 500/526, loss: 9.391533967573196e-05 2023-01-22 18:28:59.306983: step: 504/526, loss: 5.143625458003953e-05 2023-01-22 18:29:00.379266: step: 508/526, loss: 0.00042028838652186096 2023-01-22 18:29:01.441731: step: 512/526, loss: 0.0005628418875858188 2023-01-22 18:29:02.525271: step: 516/526, loss: 0.004128247033804655 2023-01-22 18:29:03.599917: step: 520/526, loss: 0.013559453189373016 2023-01-22 18:29:04.658417: step: 524/526, loss: 3.6681096389656886e-05 2023-01-22 18:29:05.711895: step: 528/526, loss: 0.003175022779032588 2023-01-22 18:29:06.773815: step: 532/526, loss: 0.0035885819233953953 2023-01-22 18:29:07.834507: step: 536/526, loss: 0.0002623856416903436 2023-01-22 18:29:08.893339: step: 540/526, loss: 3.9572631067130715e-05 2023-01-22 18:29:09.968324: step: 544/526, loss: 0.004054659511893988 2023-01-22 18:29:11.028152: step: 548/526, loss: 0.00040347164031118155 2023-01-22 18:29:12.093584: step: 552/526, loss: 5.39237807970494e-05 2023-01-22 18:29:13.156003: step: 556/526, loss: 3.1650237360736355e-05 2023-01-22 18:29:14.222952: step: 560/526, loss: 0.005598296876996756 2023-01-22 18:29:15.288135: step: 564/526, loss: 0.0028897884767502546 2023-01-22 18:29:16.343933: step: 568/526, loss: 0.0011353478766977787 2023-01-22 18:29:17.405043: step: 572/526, loss: 0.0019426702056080103 2023-01-22 18:29:18.466939: step: 576/526, loss: 0.0028505504596978426 2023-01-22 18:29:19.529330: step: 580/526, loss: 0.026631765067577362 2023-01-22 18:29:20.603696: step: 584/526, loss: 0.00011829569848487154 2023-01-22 18:29:21.665566: step: 588/526, loss: 0.001947137643583119 2023-01-22 18:29:22.710851: step: 592/526, loss: 0.002710395958274603 2023-01-22 18:29:23.769095: step: 596/526, loss: 0.0033332386519759893 2023-01-22 18:29:24.824654: step: 600/526, loss: 0.010657456703484058 2023-01-22 18:29:25.891651: step: 604/526, loss: 0.0006016839761286974 2023-01-22 18:29:26.989166: step: 608/526, loss: 0.008573072031140327 2023-01-22 18:29:28.073877: step: 612/526, loss: 0.0037041762843728065 2023-01-22 18:29:29.142359: step: 616/526, loss: 0.001809286535717547 2023-01-22 18:29:30.212582: step: 620/526, loss: 0.003464053850620985 2023-01-22 18:29:31.292786: step: 624/526, loss: 0.03140509873628616 2023-01-22 18:29:32.383428: step: 628/526, loss: 3.086018477915786e-05 2023-01-22 18:29:33.445059: step: 632/526, loss: 0.0005039675161242485 2023-01-22 18:29:34.519593: step: 636/526, loss: 0.006325926166027784 2023-01-22 18:29:35.595930: step: 640/526, loss: 0.021041058003902435 2023-01-22 18:29:36.654290: step: 644/526, loss: 0.00012388094910420477 2023-01-22 18:29:37.713314: step: 648/526, loss: 0.0009035734110511839 2023-01-22 18:29:38.762160: step: 652/526, loss: 0.0025565605610609055 2023-01-22 18:29:39.836943: step: 656/526, loss: 0.00212524994276464 2023-01-22 18:29:40.907720: step: 660/526, loss: 3.650477447081357e-05 2023-01-22 18:29:41.963653: step: 664/526, loss: 0.0030039974953979254 2023-01-22 18:29:43.040068: step: 668/526, loss: 0.0033427078742533922 2023-01-22 18:29:44.102803: step: 672/526, loss: 0.00217223446816206 2023-01-22 18:29:45.183013: step: 676/526, loss: 0.003401148598641157 2023-01-22 18:29:46.282246: step: 680/526, loss: 0.0015600905753672123 2023-01-22 18:29:47.346504: step: 684/526, loss: 0.0010989258298650384 2023-01-22 18:29:48.425260: step: 688/526, loss: 0.007191374897956848 2023-01-22 18:29:49.496490: step: 692/526, loss: 0.0015263402601704001 2023-01-22 18:29:50.576682: step: 696/526, loss: 2.1196392481215298e-05 2023-01-22 18:29:51.640065: step: 700/526, loss: 8.010312740225345e-05 2023-01-22 18:29:52.718465: step: 704/526, loss: 0.010291696526110172 2023-01-22 18:29:53.785397: step: 708/526, loss: 0.00112661044113338 2023-01-22 18:29:54.831992: step: 712/526, loss: 0.0002964239101856947 2023-01-22 18:29:55.898938: step: 716/526, loss: 0.011497760191559792 2023-01-22 18:29:56.968770: step: 720/526, loss: 0.006778793875128031 2023-01-22 18:29:58.038586: step: 724/526, loss: 0.005778077989816666 2023-01-22 18:29:59.099822: step: 728/526, loss: 0.004655781202018261 2023-01-22 18:30:00.167645: step: 732/526, loss: 0.015510223805904388 2023-01-22 18:30:01.240694: step: 736/526, loss: 0.0029409381095319986 2023-01-22 18:30:02.315082: step: 740/526, loss: 0.0007851281552575529 2023-01-22 18:30:03.396558: step: 744/526, loss: 0.0039327433332800865 2023-01-22 18:30:04.467228: step: 748/526, loss: 0.008739260025322437 2023-01-22 18:30:05.516010: step: 752/526, loss: 0.00786527618765831 2023-01-22 18:30:06.585821: step: 756/526, loss: 0.00018130269018001854 2023-01-22 18:30:07.635880: step: 760/526, loss: 0.0013243837747722864 2023-01-22 18:30:08.708777: step: 764/526, loss: 0.001044142059981823 2023-01-22 18:30:09.759792: step: 768/526, loss: 0.005148190073668957 2023-01-22 18:30:10.827931: step: 772/526, loss: 0.002936548786237836 2023-01-22 18:30:11.898821: step: 776/526, loss: 0.0024159452877938747 2023-01-22 18:30:12.965442: step: 780/526, loss: 0.0020789632108062506 2023-01-22 18:30:14.034626: step: 784/526, loss: 0.00671969261020422 2023-01-22 18:30:15.099866: step: 788/526, loss: 0.0045554968528449535 2023-01-22 18:30:16.168315: step: 792/526, loss: 0.0027457494288682938 2023-01-22 18:30:17.238111: step: 796/526, loss: 0.0029943548142910004 2023-01-22 18:30:18.300136: step: 800/526, loss: 2.6185055048699724e-06 2023-01-22 18:30:19.363105: step: 804/526, loss: 0.0009109939564950764 2023-01-22 18:30:20.445262: step: 808/526, loss: 0.0033608537632972 2023-01-22 18:30:21.515261: step: 812/526, loss: 0.0009890490910038352 2023-01-22 18:30:22.582574: step: 816/526, loss: 0.008628185838460922 2023-01-22 18:30:23.640505: step: 820/526, loss: 0.0002820239751599729 2023-01-22 18:30:24.689470: step: 824/526, loss: 0.001793151954188943 2023-01-22 18:30:25.763844: step: 828/526, loss: 0.00478060357272625 2023-01-22 18:30:26.829035: step: 832/526, loss: 0.008563145995140076 2023-01-22 18:30:27.905924: step: 836/526, loss: 7.561895927210571e-07 2023-01-22 18:30:28.967573: step: 840/526, loss: 0.0048646871000528336 2023-01-22 18:30:30.039090: step: 844/526, loss: 0.005125503521412611 2023-01-22 18:30:31.094332: step: 848/526, loss: 0.005483269691467285 2023-01-22 18:30:32.166192: step: 852/526, loss: 0.004141530022025108 2023-01-22 18:30:33.225774: step: 856/526, loss: 0.005066207610070705 2023-01-22 18:30:34.284249: step: 860/526, loss: 0.004884377587586641 2023-01-22 18:30:35.365386: step: 864/526, loss: 0.005633790977299213 2023-01-22 18:30:36.442576: step: 868/526, loss: 0.003129212185740471 2023-01-22 18:30:37.512122: step: 872/526, loss: 0.010895761661231518 2023-01-22 18:30:38.562340: step: 876/526, loss: 6.200697680469602e-05 2023-01-22 18:30:39.626388: step: 880/526, loss: 0.002143584191799164 2023-01-22 18:30:40.696687: step: 884/526, loss: 0.0023682815954089165 2023-01-22 18:30:41.753149: step: 888/526, loss: 0.006069981958717108 2023-01-22 18:30:42.840969: step: 892/526, loss: 0.0014427766436710954 2023-01-22 18:30:43.899184: step: 896/526, loss: 0.01585986278951168 2023-01-22 18:30:44.969513: step: 900/526, loss: 2.9660292057087645e-05 2023-01-22 18:30:46.040147: step: 904/526, loss: 4.7156729124253616e-05 2023-01-22 18:30:47.107164: step: 908/526, loss: 1.3617322110803798e-05 2023-01-22 18:30:48.164035: step: 912/526, loss: 8.883810551196802e-06 2023-01-22 18:30:49.229884: step: 916/526, loss: 0.0001118779182434082 2023-01-22 18:30:50.293423: step: 920/526, loss: 0.005256551317870617 2023-01-22 18:30:51.366940: step: 924/526, loss: 0.009016133844852448 2023-01-22 18:30:52.445834: step: 928/526, loss: 0.00021188198297750205 2023-01-22 18:30:53.517279: step: 932/526, loss: 0.003637598594650626 2023-01-22 18:30:54.601047: step: 936/526, loss: 0.016615239903330803 2023-01-22 18:30:55.671482: step: 940/526, loss: 0.00040934860589914024 2023-01-22 18:30:56.734777: step: 944/526, loss: 0.0044012474827468395 2023-01-22 18:30:57.797546: step: 948/526, loss: 0.006855659186840057 2023-01-22 18:30:58.860831: step: 952/526, loss: 0.0003996416344307363 2023-01-22 18:30:59.926207: step: 956/526, loss: 7.552288298029453e-05 2023-01-22 18:31:00.999607: step: 960/526, loss: 0.0003318695235066116 2023-01-22 18:31:02.092659: step: 964/526, loss: 0.0017283064080402255 2023-01-22 18:31:03.184232: step: 968/526, loss: 0.0026691537350416183 2023-01-22 18:31:04.260519: step: 972/526, loss: 0.019624311476945877 2023-01-22 18:31:05.354783: step: 976/526, loss: 0.00014126319729257375 2023-01-22 18:31:06.442441: step: 980/526, loss: 0.0018047704361379147 2023-01-22 18:31:07.514745: step: 984/526, loss: 0.005832708440721035 2023-01-22 18:31:08.577942: step: 988/526, loss: 0.0017403271049261093 2023-01-22 18:31:09.656583: step: 992/526, loss: 0.0018242273945361376 2023-01-22 18:31:10.722289: step: 996/526, loss: 0.0009334094938822091 2023-01-22 18:31:11.786167: step: 1000/526, loss: 1.8027827763944515e-06 2023-01-22 18:31:12.858914: step: 1004/526, loss: 0.00012063900067005306 2023-01-22 18:31:13.922373: step: 1008/526, loss: 0.0023849811404943466 2023-01-22 18:31:15.007372: step: 1012/526, loss: 0.0011543261352926493 2023-01-22 18:31:16.073211: step: 1016/526, loss: 0.006599085405468941 2023-01-22 18:31:17.137898: step: 1020/526, loss: 0.0015714691253378987 2023-01-22 18:31:18.208491: step: 1024/526, loss: 0.0004263747250661254 2023-01-22 18:31:19.279888: step: 1028/526, loss: 0.0019194228807464242 2023-01-22 18:31:20.339039: step: 1032/526, loss: 0.008408263325691223 2023-01-22 18:31:21.420319: step: 1036/526, loss: 0.008526553399860859 2023-01-22 18:31:22.475852: step: 1040/526, loss: 0.005731299519538879 2023-01-22 18:31:23.539973: step: 1044/526, loss: 0.0020719878375530243 2023-01-22 18:31:24.603416: step: 1048/526, loss: 0.000633670948445797 2023-01-22 18:31:25.683686: step: 1052/526, loss: 0.0077095492742955685 2023-01-22 18:31:26.738351: step: 1056/526, loss: 0.0008598942076787353 2023-01-22 18:31:27.792663: step: 1060/526, loss: 0.004499847535043955 2023-01-22 18:31:28.858733: step: 1064/526, loss: 0.0008381953230127692 2023-01-22 18:31:29.932319: step: 1068/526, loss: 1.901035284390673e-05 2023-01-22 18:31:31.006515: step: 1072/526, loss: 0.005073018372058868 2023-01-22 18:31:32.079830: step: 1076/526, loss: 0.0025398065336048603 2023-01-22 18:31:33.157487: step: 1080/526, loss: 0.00629441486671567 2023-01-22 18:31:34.217493: step: 1084/526, loss: 0.00017982145072892308 2023-01-22 18:31:35.317734: step: 1088/526, loss: 0.0069209313951432705 2023-01-22 18:31:36.400546: step: 1092/526, loss: 0.0008697143639437854 2023-01-22 18:31:37.473687: step: 1096/526, loss: 0.0031767701730132103 2023-01-22 18:31:38.549594: step: 1100/526, loss: 0.0003001618024427444 2023-01-22 18:31:39.608416: step: 1104/526, loss: 0.0014380216598510742 2023-01-22 18:31:40.689241: step: 1108/526, loss: 0.0025857852306216955 2023-01-22 18:31:41.762170: step: 1112/526, loss: 0.000907846842892468 2023-01-22 18:31:42.850927: step: 1116/526, loss: 0.0008653496624901891 2023-01-22 18:31:43.903591: step: 1120/526, loss: 0.004827171564102173 2023-01-22 18:31:44.981522: step: 1124/526, loss: 0.026355817914009094 2023-01-22 18:31:46.043589: step: 1128/526, loss: 0.0024555332493036985 2023-01-22 18:31:47.106230: step: 1132/526, loss: 0.0009359808755107224 2023-01-22 18:31:48.165706: step: 1136/526, loss: 0.0004610107862390578 2023-01-22 18:31:49.258468: step: 1140/526, loss: 9.134774882113561e-05 2023-01-22 18:31:50.343387: step: 1144/526, loss: 0.00035617267712950706 2023-01-22 18:31:51.409336: step: 1148/526, loss: 4.709354470833205e-05 2023-01-22 18:31:52.498110: step: 1152/526, loss: 0.002250316087156534 2023-01-22 18:31:53.568438: step: 1156/526, loss: 3.417308835196309e-05 2023-01-22 18:31:54.642536: step: 1160/526, loss: 0.002382897771894932 2023-01-22 18:31:55.716260: step: 1164/526, loss: 0.04007905349135399 2023-01-22 18:31:56.784294: step: 1168/526, loss: 0.00044090134906582534 2023-01-22 18:31:57.871254: step: 1172/526, loss: 0.0035805406514555216 2023-01-22 18:31:58.937906: step: 1176/526, loss: 0.0007598121301271021 2023-01-22 18:32:00.005481: step: 1180/526, loss: 0.0035539648961275816 2023-01-22 18:32:01.093748: step: 1184/526, loss: 0.006274072453379631 2023-01-22 18:32:02.167095: step: 1188/526, loss: 0.0008535367669537663 2023-01-22 18:32:03.232159: step: 1192/526, loss: 9.034009053721093e-06 2023-01-22 18:32:04.303700: step: 1196/526, loss: 0.0010479734046384692 2023-01-22 18:32:05.358446: step: 1200/526, loss: 8.021449320949614e-05 2023-01-22 18:32:06.431613: step: 1204/526, loss: 0.012833714485168457 2023-01-22 18:32:07.511723: step: 1208/526, loss: 0.0031448130030184984 2023-01-22 18:32:08.592435: step: 1212/526, loss: 0.0006451279041357338 2023-01-22 18:32:09.644451: step: 1216/526, loss: 4.499461283558048e-06 2023-01-22 18:32:10.710244: step: 1220/526, loss: 0.0018942963797599077 2023-01-22 18:32:11.778939: step: 1224/526, loss: 0.0002706024970393628 2023-01-22 18:32:12.857674: step: 1228/526, loss: 0.007170474156737328 2023-01-22 18:32:13.961889: step: 1232/526, loss: 0.002073476789519191 2023-01-22 18:32:15.036157: step: 1236/526, loss: 0.0014980868436396122 2023-01-22 18:32:16.108334: step: 1240/526, loss: 0.0036375101190060377 2023-01-22 18:32:17.172415: step: 1244/526, loss: 0.006721377372741699 2023-01-22 18:32:18.262440: step: 1248/526, loss: 0.002317317295819521 2023-01-22 18:32:19.328860: step: 1252/526, loss: 0.0017704438650980592 2023-01-22 18:32:20.401357: step: 1256/526, loss: 0.009525641798973083 2023-01-22 18:32:21.509383: step: 1260/526, loss: 0.000281882646959275 2023-01-22 18:32:22.576429: step: 1264/526, loss: 0.0019992971792817116 2023-01-22 18:32:23.627283: step: 1268/526, loss: 0.0002666060463525355 2023-01-22 18:32:24.703107: step: 1272/526, loss: 0.0005354165914468467 2023-01-22 18:32:25.786458: step: 1276/526, loss: 1.6846135622472502e-05 2023-01-22 18:32:26.868979: step: 1280/526, loss: 0.0006203344091773033 2023-01-22 18:32:27.922717: step: 1284/526, loss: 0.0017842404777184129 2023-01-22 18:32:29.007157: step: 1288/526, loss: 0.0007508570561185479 2023-01-22 18:32:30.063795: step: 1292/526, loss: 0.004485786892473698 2023-01-22 18:32:31.131001: step: 1296/526, loss: 0.00012149167741881683 2023-01-22 18:32:32.200493: step: 1300/526, loss: 6.777839007554576e-05 2023-01-22 18:32:33.264907: step: 1304/526, loss: 0.010077118873596191 2023-01-22 18:32:34.343527: step: 1308/526, loss: 0.0033351690508425236 2023-01-22 18:32:35.412807: step: 1312/526, loss: 0.0044865114614367485 2023-01-22 18:32:36.489056: step: 1316/526, loss: 0.00027660667547024786 2023-01-22 18:32:37.558930: step: 1320/526, loss: 0.0003761245170608163 2023-01-22 18:32:38.641449: step: 1324/526, loss: 0.0065785860642790794 2023-01-22 18:32:39.688596: step: 1328/526, loss: 1.4990439467510441e-06 2023-01-22 18:32:40.754371: step: 1332/526, loss: 0.0018392038764432073 2023-01-22 18:32:41.826005: step: 1336/526, loss: 9.461479749006685e-06 2023-01-22 18:32:42.908592: step: 1340/526, loss: 0.0026516823563724756 2023-01-22 18:32:43.988528: step: 1344/526, loss: 0.0034164125099778175 2023-01-22 18:32:45.050092: step: 1348/526, loss: 0.006494275294244289 2023-01-22 18:32:46.129647: step: 1352/526, loss: 6.096492143115029e-05 2023-01-22 18:32:47.205003: step: 1356/526, loss: 0.001117043779231608 2023-01-22 18:32:48.293950: step: 1360/526, loss: 0.004773973952978849 2023-01-22 18:32:49.349283: step: 1364/526, loss: 0.00043638001079671085 2023-01-22 18:32:50.413493: step: 1368/526, loss: 0.0005487690214067698 2023-01-22 18:32:51.497311: step: 1372/526, loss: 7.673940126551315e-05 2023-01-22 18:32:52.565668: step: 1376/526, loss: 0.010712930001318455 2023-01-22 18:32:53.649237: step: 1380/526, loss: 0.004162668716162443 2023-01-22 18:32:54.721583: step: 1384/526, loss: 0.0001251902140211314 2023-01-22 18:32:55.793217: step: 1388/526, loss: 0.003523906460031867 2023-01-22 18:32:56.866367: step: 1392/526, loss: 7.750542863504961e-05 2023-01-22 18:32:57.933154: step: 1396/526, loss: 0.013332800939679146 2023-01-22 18:32:59.016816: step: 1400/526, loss: 0.000300068553769961 2023-01-22 18:33:00.091193: step: 1404/526, loss: 0.000286574853817001 2023-01-22 18:33:01.158475: step: 1408/526, loss: 0.0067119356244802475 2023-01-22 18:33:02.231965: step: 1412/526, loss: 0.008381916210055351 2023-01-22 18:33:03.310224: step: 1416/526, loss: 0.021354010328650475 2023-01-22 18:33:04.366028: step: 1420/526, loss: 0.0021013005170971155 2023-01-22 18:33:05.465395: step: 1424/526, loss: 0.007751236204057932 2023-01-22 18:33:06.541754: step: 1428/526, loss: 0.0023494577035307884 2023-01-22 18:33:07.609154: step: 1432/526, loss: 1.2432211406121496e-05 2023-01-22 18:33:08.689605: step: 1436/526, loss: 0.0022359767463058233 2023-01-22 18:33:09.759263: step: 1440/526, loss: 0.01749587617814541 2023-01-22 18:33:10.849002: step: 1444/526, loss: 0.003885730169713497 2023-01-22 18:33:11.918737: step: 1448/526, loss: 0.003991383593529463 2023-01-22 18:33:12.998167: step: 1452/526, loss: 0.002184422453865409 2023-01-22 18:33:14.085493: step: 1456/526, loss: 0.001274257549084723 2023-01-22 18:33:15.169862: step: 1460/526, loss: 3.861587174469605e-05 2023-01-22 18:33:16.235540: step: 1464/526, loss: 0.001832600450143218 2023-01-22 18:33:17.312229: step: 1468/526, loss: 1.347813395113917e-05 2023-01-22 18:33:18.381354: step: 1472/526, loss: 0.001567143015563488 2023-01-22 18:33:19.461213: step: 1476/526, loss: 0.002534782513976097 2023-01-22 18:33:20.533716: step: 1480/526, loss: 0.0012802876299247146 2023-01-22 18:33:21.617718: step: 1484/526, loss: 0.0027030291967093945 2023-01-22 18:33:22.695749: step: 1488/526, loss: 0.00013787155330646783 2023-01-22 18:33:23.774263: step: 1492/526, loss: 0.0011151605285704136 2023-01-22 18:33:24.846914: step: 1496/526, loss: 0.0018621307099238038 2023-01-22 18:33:25.909049: step: 1500/526, loss: 1.7261953644265304e-06 2023-01-22 18:33:26.975907: step: 1504/526, loss: 5.487791349878535e-05 2023-01-22 18:33:28.037086: step: 1508/526, loss: 0.0016611238243058324 2023-01-22 18:33:29.104992: step: 1512/526, loss: 0.004168468527495861 2023-01-22 18:33:30.183232: step: 1516/526, loss: 6.243514235393377e-06 2023-01-22 18:33:31.267407: step: 1520/526, loss: 0.005801422521471977 2023-01-22 18:33:32.332387: step: 1524/526, loss: 2.4645178200444207e-05 2023-01-22 18:33:33.416368: step: 1528/526, loss: 0.022216124460101128 2023-01-22 18:33:34.501200: step: 1532/526, loss: 0.0007919517811387777 2023-01-22 18:33:35.571007: step: 1536/526, loss: 0.00010203020792687312 2023-01-22 18:33:36.647547: step: 1540/526, loss: 0.0037814879324287176 2023-01-22 18:33:37.725185: step: 1544/526, loss: 0.0026631627697497606 2023-01-22 18:33:38.790397: step: 1548/526, loss: 0.007588594686239958 2023-01-22 18:33:39.852403: step: 1552/526, loss: 0.0005021351389586926 2023-01-22 18:33:40.938736: step: 1556/526, loss: 0.004490867257118225 2023-01-22 18:33:42.019282: step: 1560/526, loss: 0.006299326196312904 2023-01-22 18:33:43.091594: step: 1564/526, loss: 8.10993806226179e-05 2023-01-22 18:33:44.150612: step: 1568/526, loss: 3.5936860513174906e-05 2023-01-22 18:33:45.223188: step: 1572/526, loss: 0.005455671343952417 2023-01-22 18:33:46.288376: step: 1576/526, loss: 0.0022343171294778585 2023-01-22 18:33:47.342985: step: 1580/526, loss: 0.00565881934016943 2023-01-22 18:33:48.420700: step: 1584/526, loss: 0.002394011477008462 2023-01-22 18:33:49.500383: step: 1588/526, loss: 0.0005993566010147333 2023-01-22 18:33:50.570938: step: 1592/526, loss: 0.0018220811616629362 2023-01-22 18:33:51.661089: step: 1596/526, loss: 0.0029813917353749275 2023-01-22 18:33:52.732008: step: 1600/526, loss: 0.011426822282373905 2023-01-22 18:33:53.784978: step: 1604/526, loss: 0.001398384920321405 2023-01-22 18:33:54.862474: step: 1608/526, loss: 0.013480094261467457 2023-01-22 18:33:55.924258: step: 1612/526, loss: 0.004890612792223692 2023-01-22 18:33:56.986623: step: 1616/526, loss: 0.0 2023-01-22 18:33:58.061632: step: 1620/526, loss: 0.004262028727680445 2023-01-22 18:33:59.134388: step: 1624/526, loss: 0.001888250932097435 2023-01-22 18:34:00.207943: step: 1628/526, loss: 0.0030150809325277805 2023-01-22 18:34:01.303420: step: 1632/526, loss: 0.004785026889294386 2023-01-22 18:34:02.375073: step: 1636/526, loss: 0.006957195699214935 2023-01-22 18:34:03.456870: step: 1640/526, loss: 0.0035553639754652977 2023-01-22 18:34:04.532485: step: 1644/526, loss: 0.01161589939147234 2023-01-22 18:34:05.622681: step: 1648/526, loss: 0.008723870851099491 2023-01-22 18:34:06.685319: step: 1652/526, loss: 8.930452167987823e-05 2023-01-22 18:34:07.759594: step: 1656/526, loss: 0.013251790776848793 2023-01-22 18:34:08.824372: step: 1660/526, loss: 0.00013941475481260568 2023-01-22 18:34:09.904652: step: 1664/526, loss: 0.007746046409010887 2023-01-22 18:34:10.973103: step: 1668/526, loss: 0.004453939851373434 2023-01-22 18:34:12.044242: step: 1672/526, loss: 0.005910741165280342 2023-01-22 18:34:13.131051: step: 1676/526, loss: 0.007134700194001198 2023-01-22 18:34:14.187475: step: 1680/526, loss: 0.001658489927649498 2023-01-22 18:34:15.261866: step: 1684/526, loss: 0.00494175311177969 2023-01-22 18:34:16.321819: step: 1688/526, loss: 0.009418007917702198 2023-01-22 18:34:17.404408: step: 1692/526, loss: 0.009221172891557217 2023-01-22 18:34:18.477682: step: 1696/526, loss: 0.0004322432796470821 2023-01-22 18:34:19.548259: step: 1700/526, loss: 0.004004043061286211 2023-01-22 18:34:20.606601: step: 1704/526, loss: 0.002552700461819768 2023-01-22 18:34:21.672184: step: 1708/526, loss: 0.004919564817100763 2023-01-22 18:34:22.737852: step: 1712/526, loss: 0.006907130591571331 2023-01-22 18:34:23.801614: step: 1716/526, loss: 0.0006193004664964974 2023-01-22 18:34:24.874474: step: 1720/526, loss: 0.0037172953598201275 2023-01-22 18:34:25.939235: step: 1724/526, loss: 0.0005401995731517673 2023-01-22 18:34:26.997608: step: 1728/526, loss: 0.0053809951059520245 2023-01-22 18:34:28.059376: step: 1732/526, loss: 0.0023989479523152113 2023-01-22 18:34:29.130439: step: 1736/526, loss: 0.0012443233281373978 2023-01-22 18:34:30.194069: step: 1740/526, loss: 0.000889366667252034 2023-01-22 18:34:31.263668: step: 1744/526, loss: 0.0032394747249782085 2023-01-22 18:34:32.333162: step: 1748/526, loss: 0.003769340692088008 2023-01-22 18:34:33.407574: step: 1752/526, loss: 0.0005162744782865047 2023-01-22 18:34:34.485616: step: 1756/526, loss: 0.0006097472505643964 2023-01-22 18:34:35.564722: step: 1760/526, loss: 0.0001414238940924406 2023-01-22 18:34:36.630152: step: 1764/526, loss: 0.003278970718383789 2023-01-22 18:34:37.691083: step: 1768/526, loss: 0.000331067421939224 2023-01-22 18:34:38.766085: step: 1772/526, loss: 0.006036052014678717 2023-01-22 18:34:39.836238: step: 1776/526, loss: 0.0033444338478147984 2023-01-22 18:34:40.902177: step: 1780/526, loss: 0.0003828260232694447 2023-01-22 18:34:41.965474: step: 1784/526, loss: 0.004634466953575611 2023-01-22 18:34:43.058719: step: 1788/526, loss: 0.006191542837768793 2023-01-22 18:34:44.138503: step: 1792/526, loss: 0.010699182748794556 2023-01-22 18:34:45.215404: step: 1796/526, loss: 0.0032722302712500095 2023-01-22 18:34:46.307198: step: 1800/526, loss: 0.0 2023-01-22 18:34:47.387361: step: 1804/526, loss: 0.006117125041782856 2023-01-22 18:34:48.437042: step: 1808/526, loss: 0.007697529159486294 2023-01-22 18:34:49.502508: step: 1812/526, loss: 0.00014335779997054487 2023-01-22 18:34:50.564583: step: 1816/526, loss: 0.00042543807649053633 2023-01-22 18:34:51.634353: step: 1820/526, loss: 7.046247674225015e-07 2023-01-22 18:34:52.695040: step: 1824/526, loss: 0.001535007613711059 2023-01-22 18:34:53.770590: step: 1828/526, loss: 0.011807135306298733 2023-01-22 18:34:54.837361: step: 1832/526, loss: 0.0045645572245121 2023-01-22 18:34:55.896514: step: 1836/526, loss: 2.718126961553935e-05 2023-01-22 18:34:56.960138: step: 1840/526, loss: 0.00169331522192806 2023-01-22 18:34:58.019162: step: 1844/526, loss: 0.005931252613663673 2023-01-22 18:34:59.088973: step: 1848/526, loss: 0.002233896404504776 2023-01-22 18:35:00.159315: step: 1852/526, loss: 0.005738517735153437 2023-01-22 18:35:01.222547: step: 1856/526, loss: 0.006837034597992897 2023-01-22 18:35:02.294396: step: 1860/526, loss: 0.007836534641683102 2023-01-22 18:35:03.354662: step: 1864/526, loss: 0.006747606676071882 2023-01-22 18:35:04.396950: step: 1868/526, loss: 0.0005719589535146952 2023-01-22 18:35:05.475307: step: 1872/526, loss: 0.0006534755229949951 2023-01-22 18:35:06.522858: step: 1876/526, loss: 0.00026500006788410246 2023-01-22 18:35:07.578548: step: 1880/526, loss: 0.004114976618438959 2023-01-22 18:35:08.638583: step: 1884/526, loss: 0.0024707880802452564 2023-01-22 18:35:09.694870: step: 1888/526, loss: 0.0055861808359622955 2023-01-22 18:35:10.756134: step: 1892/526, loss: 0.004871733486652374 2023-01-22 18:35:11.795969: step: 1896/526, loss: 3.1851061521592783e-07 2023-01-22 18:35:12.863275: step: 1900/526, loss: 0.0063774725422263145 2023-01-22 18:35:13.920717: step: 1904/526, loss: 0.000972148438449949 2023-01-22 18:35:14.973368: step: 1908/526, loss: 0.0008065954898484051 2023-01-22 18:35:16.048857: step: 1912/526, loss: 0.0013497774489223957 2023-01-22 18:35:17.124631: step: 1916/526, loss: 0.0036005214788019657 2023-01-22 18:35:18.189612: step: 1920/526, loss: 8.062608685577288e-05 2023-01-22 18:35:19.252074: step: 1924/526, loss: 0.006431287154555321 2023-01-22 18:35:20.310406: step: 1928/526, loss: 0.006485434714704752 2023-01-22 18:35:21.350136: step: 1932/526, loss: 0.0005947311292402446 2023-01-22 18:35:22.406014: step: 1936/526, loss: 0.003580398391932249 2023-01-22 18:35:23.462428: step: 1940/526, loss: 0.0007175241480581462 2023-01-22 18:35:24.525405: step: 1944/526, loss: 0.002594831632450223 2023-01-22 18:35:25.589827: step: 1948/526, loss: 0.004493865184485912 2023-01-22 18:35:26.650794: step: 1952/526, loss: 6.127478991402313e-05 2023-01-22 18:35:27.717385: step: 1956/526, loss: 0.002369788009673357 2023-01-22 18:35:28.774884: step: 1960/526, loss: 0.002066218527033925 2023-01-22 18:35:29.836315: step: 1964/526, loss: 0.001789343194104731 2023-01-22 18:35:30.896698: step: 1968/526, loss: 0.0037907143123447895 2023-01-22 18:35:31.957042: step: 1972/526, loss: 8.078163955360651e-05 2023-01-22 18:35:33.007891: step: 1976/526, loss: 0.018211212009191513 2023-01-22 18:35:34.072142: step: 1980/526, loss: 0.007960271090269089 2023-01-22 18:35:35.127047: step: 1984/526, loss: 0.000857973878737539 2023-01-22 18:35:36.195789: step: 1988/526, loss: 0.00034316728124395013 2023-01-22 18:35:37.263444: step: 1992/526, loss: 0.001411373377777636 2023-01-22 18:35:38.320457: step: 1996/526, loss: 0.003415762446820736 2023-01-22 18:35:39.389577: step: 2000/526, loss: 0.011371809989213943 2023-01-22 18:35:40.453854: step: 2004/526, loss: 0.005508440546691418 2023-01-22 18:35:41.525139: step: 2008/526, loss: 0.00264719408005476 2023-01-22 18:35:42.566428: step: 2012/526, loss: 1.6927418982959352e-05 2023-01-22 18:35:43.655581: step: 2016/526, loss: 0.000632552313618362 2023-01-22 18:35:44.704539: step: 2020/526, loss: 0.0013850086834281683 2023-01-22 18:35:45.779827: step: 2024/526, loss: 0.0017230726080015302 2023-01-22 18:35:46.856557: step: 2028/526, loss: 0.0006493153050541878 2023-01-22 18:35:47.907388: step: 2032/526, loss: 0.0014450449962168932 2023-01-22 18:35:48.983821: step: 2036/526, loss: 0.004113477189093828 2023-01-22 18:35:50.040344: step: 2040/526, loss: 0.0014669264201074839 2023-01-22 18:35:51.135630: step: 2044/526, loss: 0.010625802911818027 2023-01-22 18:35:52.202478: step: 2048/526, loss: 0.001426542759872973 2023-01-22 18:35:53.267526: step: 2052/526, loss: 0.0011626218911260366 2023-01-22 18:35:54.318799: step: 2056/526, loss: 0.0018838917603716254 2023-01-22 18:35:55.397566: step: 2060/526, loss: 0.0001352133695036173 2023-01-22 18:35:56.454870: step: 2064/526, loss: 0.014209941029548645 2023-01-22 18:35:57.517875: step: 2068/526, loss: 0.005324861500412226 2023-01-22 18:35:58.582071: step: 2072/526, loss: 0.0004976170603185892 2023-01-22 18:35:59.637401: step: 2076/526, loss: 0.001420331303961575 2023-01-22 18:36:00.700054: step: 2080/526, loss: 0.006688391324132681 2023-01-22 18:36:01.761692: step: 2084/526, loss: 0.0013125926489010453 2023-01-22 18:36:02.842907: step: 2088/526, loss: 0.0029741707257926464 2023-01-22 18:36:03.904314: step: 2092/526, loss: 0.003950865939259529 2023-01-22 18:36:04.960520: step: 2096/526, loss: 0.00022132458980195224 2023-01-22 18:36:06.040064: step: 2100/526, loss: 0.0014117838582023978 2023-01-22 18:36:07.107244: step: 2104/526, loss: 0.002955192234367132 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3465716704288939, 'r': 0.2913306451612903, 'f1': 0.3165592783505155}, 'combined': 0.23325420510037984, 'stategy': 1, 'epoch': 15} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3395883975317876, 'r': 0.23758748691784407, 'f1': 0.279574930726601}, 'combined': 0.15249541675996417, 'stategy': 1, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3141987753882915, 'r': 0.3326810562934851, 'f1': 0.3231758832565284}, 'combined': 0.2381295981890209, 'stategy': 1, 'epoch': 15} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3426043943772558, 'r': 0.2670621628631491, 'f1': 0.30015315579375007}, 'combined': 0.1637199031602273, 'stategy': 1, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3246064714052954, 'r': 0.3289181323158022, 'f1': 0.3267480786624463}, 'combined': 0.24076174217232885, 'stategy': 1, 'epoch': 15} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33804342318151903, 'r': 0.27352189309481817, 'f1': 0.30237907414318616}, 'combined': 0.1649340404417379, 'stategy': 1, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3557692307692308, 'r': 0.40217391304347827, 'f1': 0.37755102040816324}, 'combined': 0.18877551020408162, 'stategy': 1, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 16 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 18:38:47.307616: step: 4/526, loss: 4.068960151926149e-06 2023-01-22 18:38:48.361597: step: 8/526, loss: 0.0010283082956448197 2023-01-22 18:38:49.408474: step: 12/526, loss: 0.0008490660111419857 2023-01-22 18:38:50.447738: step: 16/526, loss: 0.005104940850287676 2023-01-22 18:38:51.493343: step: 20/526, loss: 0.0044412012211978436 2023-01-22 18:38:52.549717: step: 24/526, loss: 0.0003217730263713747 2023-01-22 18:38:53.605190: step: 28/526, loss: 0.0014519531978294253 2023-01-22 18:38:54.681588: step: 32/526, loss: 0.0076652467250823975 2023-01-22 18:38:55.750576: step: 36/526, loss: 9.561112528899685e-05 2023-01-22 18:38:56.814039: step: 40/526, loss: 0.003580830991268158 2023-01-22 18:38:57.877122: step: 44/526, loss: 4.567169526126236e-05 2023-01-22 18:38:58.961546: step: 48/526, loss: 0.004925690125674009 2023-01-22 18:39:00.028653: step: 52/526, loss: 0.006543352734297514 2023-01-22 18:39:01.069454: step: 56/526, loss: 4.94741543661803e-05 2023-01-22 18:39:02.122756: step: 60/526, loss: 0.0013182016555219889 2023-01-22 18:39:03.181043: step: 64/526, loss: 0.0053622061386704445 2023-01-22 18:39:04.233795: step: 68/526, loss: 0.002632154617458582 2023-01-22 18:39:05.291211: step: 72/526, loss: 0.0018756737699732184 2023-01-22 18:39:06.345793: step: 76/526, loss: 4.548280230665114e-06 2023-01-22 18:39:07.397891: step: 80/526, loss: 0.00036563625326380134 2023-01-22 18:39:08.460345: step: 84/526, loss: 0.000709105865098536 2023-01-22 18:39:09.512601: step: 88/526, loss: 0.00803337711840868 2023-01-22 18:39:10.588026: step: 92/526, loss: 0.00015364907449111342 2023-01-22 18:39:11.657196: step: 96/526, loss: 0.011357865296304226 2023-01-22 18:39:12.708895: step: 100/526, loss: 0.0007185018621385098 2023-01-22 18:39:13.788449: step: 104/526, loss: 0.0031617230270057917 2023-01-22 18:39:14.840855: step: 108/526, loss: 0.002540634712204337 2023-01-22 18:39:15.882916: step: 112/526, loss: 0.0017137971008196473 2023-01-22 18:39:16.947514: step: 116/526, loss: 0.009323597885668278 2023-01-22 18:39:18.008875: step: 120/526, loss: 0.005170644260942936 2023-01-22 18:39:19.072279: step: 124/526, loss: 0.0014280106406658888 2023-01-22 18:39:20.131245: step: 128/526, loss: 0.002427392639219761 2023-01-22 18:39:21.186041: step: 132/526, loss: 0.003096052911132574 2023-01-22 18:39:22.255615: step: 136/526, loss: 0.0019009527750313282 2023-01-22 18:39:23.337897: step: 140/526, loss: 0.0002588734496384859 2023-01-22 18:39:24.401665: step: 144/526, loss: 0.002704356098547578 2023-01-22 18:39:25.449206: step: 148/526, loss: 0.0046740686520934105 2023-01-22 18:39:26.502588: step: 152/526, loss: 0.000814518250990659 2023-01-22 18:39:27.572322: step: 156/526, loss: 0.0011716143926605582 2023-01-22 18:39:28.627427: step: 160/526, loss: 0.003976736683398485 2023-01-22 18:39:29.701169: step: 164/526, loss: 1.899892225765143e-07 2023-01-22 18:39:30.751351: step: 168/526, loss: 0.004484981298446655 2023-01-22 18:39:31.805613: step: 172/526, loss: 0.006474488414824009 2023-01-22 18:39:32.870842: step: 176/526, loss: 0.0006338073872029781 2023-01-22 18:39:33.929325: step: 180/526, loss: 0.0008972170180641115 2023-01-22 18:39:35.004211: step: 184/526, loss: 0.0003931898099835962 2023-01-22 18:39:36.050053: step: 188/526, loss: 0.006468059495091438 2023-01-22 18:39:37.112445: step: 192/526, loss: 0.003356741974130273 2023-01-22 18:39:38.175707: step: 196/526, loss: 0.00032153548090718687 2023-01-22 18:39:39.257736: step: 200/526, loss: 0.0027515965048223734 2023-01-22 18:39:40.322902: step: 204/526, loss: 0.0005426167626865208 2023-01-22 18:39:41.385172: step: 208/526, loss: 0.0001539260701974854 2023-01-22 18:39:42.446532: step: 212/526, loss: 0.004132452886551619 2023-01-22 18:39:43.504476: step: 216/526, loss: 0.0036066637840121984 2023-01-22 18:39:44.565149: step: 220/526, loss: 0.010818486101925373 2023-01-22 18:39:45.621809: step: 224/526, loss: 7.07808940205723e-05 2023-01-22 18:39:46.690265: step: 228/526, loss: 0.027018606662750244 2023-01-22 18:39:47.760027: step: 232/526, loss: 0.008826402947306633 2023-01-22 18:39:48.834279: step: 236/526, loss: 0.00012490537483245134 2023-01-22 18:39:49.875086: step: 240/526, loss: 9.547896479489282e-05 2023-01-22 18:39:50.948563: step: 244/526, loss: 0.0031370255164802074 2023-01-22 18:39:52.015145: step: 248/526, loss: 0.00010075035970658064 2023-01-22 18:39:53.078605: step: 252/526, loss: 0.002512074541300535 2023-01-22 18:39:54.150235: step: 256/526, loss: 0.001070154830813408 2023-01-22 18:39:55.203461: step: 260/526, loss: 0.0002965281018987298 2023-01-22 18:39:56.270422: step: 264/526, loss: 0.0008890108438208699 2023-01-22 18:39:57.332605: step: 268/526, loss: 0.0033979739528149366 2023-01-22 18:39:58.410415: step: 272/526, loss: 0.0011074502253904939 2023-01-22 18:39:59.479330: step: 276/526, loss: 0.00023713205882813781 2023-01-22 18:40:00.537902: step: 280/526, loss: 0.010953271761536598 2023-01-22 18:40:01.599984: step: 284/526, loss: 0.00011625825572991744 2023-01-22 18:40:02.688527: step: 288/526, loss: 0.0014055744977667928 2023-01-22 18:40:03.763502: step: 292/526, loss: 0.005265504587441683 2023-01-22 18:40:04.855662: step: 296/526, loss: 0.0021303279791027308 2023-01-22 18:40:05.917517: step: 300/526, loss: 0.0002604361798148602 2023-01-22 18:40:06.979612: step: 304/526, loss: 0.0006046337075531483 2023-01-22 18:40:08.035599: step: 308/526, loss: 0.0003276804636698216 2023-01-22 18:40:09.106412: step: 312/526, loss: 0.0008781430660746992 2023-01-22 18:40:10.166942: step: 316/526, loss: 3.695489795063622e-05 2023-01-22 18:40:11.242942: step: 320/526, loss: 0.0010540714720264077 2023-01-22 18:40:12.299515: step: 324/526, loss: 0.0006770803011022508 2023-01-22 18:40:13.371094: step: 328/526, loss: 0.0005231563118286431 2023-01-22 18:40:14.433883: step: 332/526, loss: 0.006028560921549797 2023-01-22 18:40:15.496585: step: 336/526, loss: 0.0022658423986285925 2023-01-22 18:40:16.565941: step: 340/526, loss: 0.0016470607370138168 2023-01-22 18:40:17.636041: step: 344/526, loss: 0.0007941769436001778 2023-01-22 18:40:18.688596: step: 348/526, loss: 0.0001967969146789983 2023-01-22 18:40:19.766357: step: 352/526, loss: 0.0034742760471999645 2023-01-22 18:40:20.841224: step: 356/526, loss: 0.00588691420853138 2023-01-22 18:40:21.904865: step: 360/526, loss: 0.002327021909877658 2023-01-22 18:40:22.955049: step: 364/526, loss: 0.0023656049743294716 2023-01-22 18:40:24.018015: step: 368/526, loss: 0.0026068270672112703 2023-01-22 18:40:25.066476: step: 372/526, loss: 0.0004402359772939235 2023-01-22 18:40:26.125068: step: 376/526, loss: 0.0018315461929887533 2023-01-22 18:40:27.187117: step: 380/526, loss: 0.0009081062162294984 2023-01-22 18:40:28.248613: step: 384/526, loss: 0.00020659201254602522 2023-01-22 18:40:29.305735: step: 388/526, loss: 0.00024463256704621017 2023-01-22 18:40:30.389530: step: 392/526, loss: 0.04892623797059059 2023-01-22 18:40:31.436814: step: 396/526, loss: 0.0076407152228057384 2023-01-22 18:40:32.522137: step: 400/526, loss: 0.0008287741802632809 2023-01-22 18:40:33.589958: step: 404/526, loss: 0.0003356395463924855 2023-01-22 18:40:34.652946: step: 408/526, loss: 0.01065562292933464 2023-01-22 18:40:35.706911: step: 412/526, loss: 5.333791341399774e-05 2023-01-22 18:40:36.777351: step: 416/526, loss: 6.649323040619493e-05 2023-01-22 18:40:37.837766: step: 420/526, loss: 0.00020237365970388055 2023-01-22 18:40:38.891390: step: 424/526, loss: 2.79204632533947e-05 2023-01-22 18:40:39.961995: step: 428/526, loss: 0.0009523549815639853 2023-01-22 18:40:41.007936: step: 432/526, loss: 0.00656931521371007 2023-01-22 18:40:42.069555: step: 436/526, loss: 1.2138661986682564e-06 2023-01-22 18:40:43.142479: step: 440/526, loss: 2.51901747105876e-05 2023-01-22 18:40:44.214066: step: 444/526, loss: 0.002826628740876913 2023-01-22 18:40:45.270852: step: 448/526, loss: 6.302967813098803e-05 2023-01-22 18:40:46.340503: step: 452/526, loss: 0.006691085174679756 2023-01-22 18:40:47.401349: step: 456/526, loss: 0.0016829074593260884 2023-01-22 18:40:48.465708: step: 460/526, loss: 0.004967859480530024 2023-01-22 18:40:49.536783: step: 464/526, loss: 0.001895446446724236 2023-01-22 18:40:50.593156: step: 468/526, loss: 0.006243078038096428 2023-01-22 18:40:51.687828: step: 472/526, loss: 0.002942392136901617 2023-01-22 18:40:52.736971: step: 476/526, loss: 0.0003913755645044148 2023-01-22 18:40:53.810998: step: 480/526, loss: 0.0001730632793623954 2023-01-22 18:40:54.869428: step: 484/526, loss: 0.0012408597394824028 2023-01-22 18:40:55.937751: step: 488/526, loss: 2.2972491933614947e-05 2023-01-22 18:40:57.004100: step: 492/526, loss: 0.0018391635967418551 2023-01-22 18:40:58.090136: step: 496/526, loss: 0.0035132095217704773 2023-01-22 18:40:59.155444: step: 500/526, loss: 0.0032427296973764896 2023-01-22 18:41:00.208256: step: 504/526, loss: 0.0003710891760420054 2023-01-22 18:41:01.279126: step: 508/526, loss: 0.00019869758398272097 2023-01-22 18:41:02.336117: step: 512/526, loss: 0.012902242131531239 2023-01-22 18:41:03.405056: step: 516/526, loss: 0.006148544140160084 2023-01-22 18:41:04.464083: step: 520/526, loss: 0.0013278629630804062 2023-01-22 18:41:05.512850: step: 524/526, loss: 0.00012468411296140403 2023-01-22 18:41:06.565764: step: 528/526, loss: 0.0007348512881435454 2023-01-22 18:41:07.643356: step: 532/526, loss: 0.0028557819314301014 2023-01-22 18:41:08.696329: step: 536/526, loss: 0.0013069960987195373 2023-01-22 18:41:09.755368: step: 540/526, loss: 5.154605560164782e-07 2023-01-22 18:41:10.814100: step: 544/526, loss: 0.005783005617558956 2023-01-22 18:41:11.868245: step: 548/526, loss: 0.004283764399588108 2023-01-22 18:41:12.945322: step: 552/526, loss: 0.0016775608528405428 2023-01-22 18:41:14.002869: step: 556/526, loss: 0.002984053920954466 2023-01-22 18:41:15.079024: step: 560/526, loss: 0.005226318724453449 2023-01-22 18:41:16.155896: step: 564/526, loss: 0.004898657090961933 2023-01-22 18:41:17.223749: step: 568/526, loss: 0.000245068920776248 2023-01-22 18:41:18.323590: step: 572/526, loss: 0.0023428683634847403 2023-01-22 18:41:19.383287: step: 576/526, loss: 0.0027577108703553677 2023-01-22 18:41:20.440943: step: 580/526, loss: 0.008508670143783092 2023-01-22 18:41:21.492754: step: 584/526, loss: 0.00961547251790762 2023-01-22 18:41:22.545013: step: 588/526, loss: 6.287618361966452e-06 2023-01-22 18:41:23.618668: step: 592/526, loss: 0.00040863556205295026 2023-01-22 18:41:24.674821: step: 596/526, loss: 0.001620003255084157 2023-01-22 18:41:25.729288: step: 600/526, loss: 0.0004178356612101197 2023-01-22 18:41:26.776103: step: 604/526, loss: 0.0023813536390662193 2023-01-22 18:41:27.830841: step: 608/526, loss: 0.003520034020766616 2023-01-22 18:41:28.909022: step: 612/526, loss: 0.004535804968327284 2023-01-22 18:41:29.973546: step: 616/526, loss: 0.005251354072242975 2023-01-22 18:41:31.045549: step: 620/526, loss: 0.006738132797181606 2023-01-22 18:41:32.114082: step: 624/526, loss: 0.009503346867859364 2023-01-22 18:41:33.183697: step: 628/526, loss: 0.0016803494654595852 2023-01-22 18:41:34.234685: step: 632/526, loss: 0.0001733922545099631 2023-01-22 18:41:35.276220: step: 636/526, loss: 0.0021864569280296564 2023-01-22 18:41:36.332326: step: 640/526, loss: 0.0009065971826203167 2023-01-22 18:41:37.402910: step: 644/526, loss: 0.004248241428285837 2023-01-22 18:41:38.481492: step: 648/526, loss: 0.003038237104192376 2023-01-22 18:41:39.524905: step: 652/526, loss: 0.0012156427837908268 2023-01-22 18:41:40.580651: step: 656/526, loss: 0.0007117817876860499 2023-01-22 18:41:41.642565: step: 660/526, loss: 0.0007347794016823173 2023-01-22 18:41:42.690636: step: 664/526, loss: 0.000734483590349555 2023-01-22 18:41:43.755249: step: 668/526, loss: 0.003017725655809045 2023-01-22 18:41:44.815414: step: 672/526, loss: 3.981875124736689e-06 2023-01-22 18:41:45.903150: step: 676/526, loss: 0.0028258981183171272 2023-01-22 18:41:46.978410: step: 680/526, loss: 7.842243940103799e-05 2023-01-22 18:41:48.041398: step: 684/526, loss: 0.003698616288602352 2023-01-22 18:41:49.106926: step: 688/526, loss: 0.012323994189500809 2023-01-22 18:41:50.155979: step: 692/526, loss: 0.00047964302939362824 2023-01-22 18:41:51.216376: step: 696/526, loss: 0.008856515400111675 2023-01-22 18:41:52.260548: step: 700/526, loss: 0.0016728986520320177 2023-01-22 18:41:53.338190: step: 704/526, loss: 0.003224977059289813 2023-01-22 18:41:54.405613: step: 708/526, loss: 0.0005297587485983968 2023-01-22 18:41:55.482747: step: 712/526, loss: 0.0014179447898641229 2023-01-22 18:41:56.544659: step: 716/526, loss: 0.0033910893835127354 2023-01-22 18:41:57.619968: step: 720/526, loss: 0.0012026974000036716 2023-01-22 18:41:58.674716: step: 724/526, loss: 0.0008877201471477747 2023-01-22 18:41:59.739398: step: 728/526, loss: 0.0003373197396285832 2023-01-22 18:42:00.796638: step: 732/526, loss: 0.003152693621814251 2023-01-22 18:42:01.846428: step: 736/526, loss: 0.0013204979477450252 2023-01-22 18:42:02.908241: step: 740/526, loss: 0.0005635274574160576 2023-01-22 18:42:03.974579: step: 744/526, loss: 3.672230013762601e-05 2023-01-22 18:42:05.030321: step: 748/526, loss: 0.00015696999616920948 2023-01-22 18:42:06.080055: step: 752/526, loss: 2.9896282285335474e-05 2023-01-22 18:42:07.136694: step: 756/526, loss: 0.00040561743662692606 2023-01-22 18:42:08.208689: step: 760/526, loss: 0.014711439609527588 2023-01-22 18:42:09.279342: step: 764/526, loss: 0.00485612777993083 2023-01-22 18:42:10.345165: step: 768/526, loss: 0.0010174484923481941 2023-01-22 18:42:11.393287: step: 772/526, loss: 0.0018834865186363459 2023-01-22 18:42:12.459487: step: 776/526, loss: 0.0003905899648088962 2023-01-22 18:42:13.551435: step: 780/526, loss: 0.0012355458457022905 2023-01-22 18:42:14.605553: step: 784/526, loss: 0.0020234622061252594 2023-01-22 18:42:15.671708: step: 788/526, loss: 3.3573032851563767e-05 2023-01-22 18:42:16.754248: step: 792/526, loss: 0.0018341653048992157 2023-01-22 18:42:17.842352: step: 796/526, loss: 0.012312685139477253 2023-01-22 18:42:18.895624: step: 800/526, loss: 0.0008794953464530408 2023-01-22 18:42:19.947801: step: 804/526, loss: 0.0002312470314791426 2023-01-22 18:42:21.012735: step: 808/526, loss: 0.00383273814804852 2023-01-22 18:42:22.060460: step: 812/526, loss: 0.0037538649048656225 2023-01-22 18:42:23.127378: step: 816/526, loss: 0.005142300855368376 2023-01-22 18:42:24.183391: step: 820/526, loss: 0.0004101977974642068 2023-01-22 18:42:25.247878: step: 824/526, loss: 0.0037264302372932434 2023-01-22 18:42:26.304864: step: 828/526, loss: 0.0015933893155306578 2023-01-22 18:42:27.363749: step: 832/526, loss: 3.902550361090107e-06 2023-01-22 18:42:28.422141: step: 836/526, loss: 0.0007026093080639839 2023-01-22 18:42:29.481133: step: 840/526, loss: 6.361864961945685e-06 2023-01-22 18:42:30.560689: step: 844/526, loss: 0.005074107553809881 2023-01-22 18:42:31.636326: step: 848/526, loss: 0.0006282091489993036 2023-01-22 18:42:32.691489: step: 852/526, loss: 0.025801418349146843 2023-01-22 18:42:33.748902: step: 856/526, loss: 0.00027012216742150486 2023-01-22 18:42:34.820600: step: 860/526, loss: 0.0008300377521663904 2023-01-22 18:42:35.873453: step: 864/526, loss: 0.0003779538383241743 2023-01-22 18:42:36.919487: step: 868/526, loss: 0.0005150707438588142 2023-01-22 18:42:37.973891: step: 872/526, loss: 0.0019969423301517963 2023-01-22 18:42:39.053587: step: 876/526, loss: 0.003915760666131973 2023-01-22 18:42:40.115227: step: 880/526, loss: 0.0008733583963476121 2023-01-22 18:42:41.177473: step: 884/526, loss: 0.000988204381428659 2023-01-22 18:42:42.226572: step: 888/526, loss: 0.00015588136739097536 2023-01-22 18:42:43.338812: step: 892/526, loss: 0.004775822162628174 2023-01-22 18:42:44.386950: step: 896/526, loss: 0.00043138235923834145 2023-01-22 18:42:45.457420: step: 900/526, loss: 0.0018807523883879185 2023-01-22 18:42:46.531484: step: 904/526, loss: 0.002841358305886388 2023-01-22 18:42:47.593928: step: 908/526, loss: 0.008554196916520596 2023-01-22 18:42:48.659900: step: 912/526, loss: 0.0013046994572505355 2023-01-22 18:42:49.725999: step: 916/526, loss: 0.002964446786791086 2023-01-22 18:42:50.791339: step: 920/526, loss: 0.007046420592814684 2023-01-22 18:42:51.846713: step: 924/526, loss: 0.00029620452551171184 2023-01-22 18:42:52.908576: step: 928/526, loss: 0.0010005889926105738 2023-01-22 18:42:53.972778: step: 932/526, loss: 0.005172127857804298 2023-01-22 18:42:55.013540: step: 936/526, loss: 1.3173620573070366e-05 2023-01-22 18:42:56.075016: step: 940/526, loss: 0.0014675639104098082 2023-01-22 18:42:57.143889: step: 944/526, loss: 0.0006840009591542184 2023-01-22 18:42:58.199705: step: 948/526, loss: 0.0015970367239788175 2023-01-22 18:42:59.268173: step: 952/526, loss: 0.00048429524758830667 2023-01-22 18:43:00.336744: step: 956/526, loss: 0.00012359561515040696 2023-01-22 18:43:01.388587: step: 960/526, loss: 0.0005046874866820872 2023-01-22 18:43:02.453990: step: 964/526, loss: 0.005257238168269396 2023-01-22 18:43:03.525384: step: 968/526, loss: 0.00025499783805571496 2023-01-22 18:43:04.589124: step: 972/526, loss: 0.0010451560374349356 2023-01-22 18:43:05.664553: step: 976/526, loss: 0.005054119508713484 2023-01-22 18:43:06.742600: step: 980/526, loss: 0.002518031280487776 2023-01-22 18:43:07.802040: step: 984/526, loss: 0.002625023713335395 2023-01-22 18:43:08.869713: step: 988/526, loss: 0.0029690733645111322 2023-01-22 18:43:09.923316: step: 992/526, loss: 0.0016516625182703137 2023-01-22 18:43:10.992292: step: 996/526, loss: 0.004330814816057682 2023-01-22 18:43:12.064356: step: 1000/526, loss: 0.0007549662259407341 2023-01-22 18:43:13.134382: step: 1004/526, loss: 0.001201360602863133 2023-01-22 18:43:14.200792: step: 1008/526, loss: 0.025760134682059288 2023-01-22 18:43:15.292053: step: 1012/526, loss: 0.0006959103047847748 2023-01-22 18:43:16.352812: step: 1016/526, loss: 0.0007833081763237715 2023-01-22 18:43:17.423035: step: 1020/526, loss: 0.00019327751942910254 2023-01-22 18:43:18.487277: step: 1024/526, loss: 0.005619648844003677 2023-01-22 18:43:19.543234: step: 1028/526, loss: 0.0009776627412065864 2023-01-22 18:43:20.601040: step: 1032/526, loss: 6.949681119294837e-05 2023-01-22 18:43:21.662631: step: 1036/526, loss: 0.019426757469773293 2023-01-22 18:43:22.735778: step: 1040/526, loss: 0.007642331998795271 2023-01-22 18:43:23.826631: step: 1044/526, loss: 0.00037573548615910113 2023-01-22 18:43:24.893247: step: 1048/526, loss: 0.0027573942206799984 2023-01-22 18:43:25.959865: step: 1052/526, loss: 0.006561039015650749 2023-01-22 18:43:27.025066: step: 1056/526, loss: 0.000256403029197827 2023-01-22 18:43:28.081578: step: 1060/526, loss: 0.026282310485839844 2023-01-22 18:43:29.138587: step: 1064/526, loss: 0.0025156671181321144 2023-01-22 18:43:30.199716: step: 1068/526, loss: 0.002596191829070449 2023-01-22 18:43:31.252339: step: 1072/526, loss: 0.0012271407758817077 2023-01-22 18:43:32.320501: step: 1076/526, loss: 0.0027946566697210073 2023-01-22 18:43:33.377712: step: 1080/526, loss: 0.008756760507822037 2023-01-22 18:43:34.449397: step: 1084/526, loss: 0.009264402091503143 2023-01-22 18:43:35.513615: step: 1088/526, loss: 0.0018663645023480058 2023-01-22 18:43:36.588441: step: 1092/526, loss: 0.004904875531792641 2023-01-22 18:43:37.649106: step: 1096/526, loss: 0.0001582323748152703 2023-01-22 18:43:38.695910: step: 1100/526, loss: 0.000568255374673754 2023-01-22 18:43:39.751045: step: 1104/526, loss: 0.004775824025273323 2023-01-22 18:43:40.812375: step: 1108/526, loss: 0.0055280146189033985 2023-01-22 18:43:41.869907: step: 1112/526, loss: 0.003691577585414052 2023-01-22 18:43:42.922359: step: 1116/526, loss: 0.00018885928147938102 2023-01-22 18:43:43.983466: step: 1120/526, loss: 0.0007241407874971628 2023-01-22 18:43:45.058556: step: 1124/526, loss: 0.00014857054338790476 2023-01-22 18:43:46.120293: step: 1128/526, loss: 9.757585939951241e-05 2023-01-22 18:43:47.187172: step: 1132/526, loss: 0.0001084349351003766 2023-01-22 18:43:48.242510: step: 1136/526, loss: 0.00022744714806322008 2023-01-22 18:43:49.298199: step: 1140/526, loss: 0.0033771421294659376 2023-01-22 18:43:50.353310: step: 1144/526, loss: 0.008672861382365227 2023-01-22 18:43:51.406604: step: 1148/526, loss: 0.0002721585042309016 2023-01-22 18:43:52.456319: step: 1152/526, loss: 0.0007387588266283274 2023-01-22 18:43:53.516915: step: 1156/526, loss: 0.0036961787845939398 2023-01-22 18:43:54.587692: step: 1160/526, loss: 0.007462798152118921 2023-01-22 18:43:55.638006: step: 1164/526, loss: 4.965122116118437e-06 2023-01-22 18:43:56.713922: step: 1168/526, loss: 0.0025989911518990993 2023-01-22 18:43:57.792948: step: 1172/526, loss: 9.702862371341325e-06 2023-01-22 18:43:58.849508: step: 1176/526, loss: 0.007660785689949989 2023-01-22 18:43:59.914191: step: 1180/526, loss: 0.0233598779886961 2023-01-22 18:44:00.993485: step: 1184/526, loss: 0.00031129305716603994 2023-01-22 18:44:02.071170: step: 1188/526, loss: 0.0014312692219391465 2023-01-22 18:44:03.141514: step: 1192/526, loss: 0.006747620180249214 2023-01-22 18:44:04.217791: step: 1196/526, loss: 0.0053878226317465305 2023-01-22 18:44:05.296055: step: 1200/526, loss: 0.00036053103394806385 2023-01-22 18:44:06.360221: step: 1204/526, loss: 0.004016116727143526 2023-01-22 18:44:07.447781: step: 1208/526, loss: 0.0005234128329902887 2023-01-22 18:44:08.513685: step: 1212/526, loss: 0.0008772523142397404 2023-01-22 18:44:09.576525: step: 1216/526, loss: 0.0006490530795417726 2023-01-22 18:44:10.637520: step: 1220/526, loss: 0.002764162141829729 2023-01-22 18:44:11.695105: step: 1224/526, loss: 0.00039265770465135574 2023-01-22 18:44:12.744882: step: 1228/526, loss: 0.0022180587984621525 2023-01-22 18:44:13.835024: step: 1232/526, loss: 0.002280870685353875 2023-01-22 18:44:14.912524: step: 1236/526, loss: 0.0023603339213877916 2023-01-22 18:44:15.975487: step: 1240/526, loss: 0.0003964714123867452 2023-01-22 18:44:17.045480: step: 1244/526, loss: 0.022700568661093712 2023-01-22 18:44:18.115592: step: 1248/526, loss: 0.0120023088529706 2023-01-22 18:44:19.179200: step: 1252/526, loss: 0.001121222274377942 2023-01-22 18:44:20.262716: step: 1256/526, loss: 0.0008426845888607204 2023-01-22 18:44:21.305429: step: 1260/526, loss: 0.005639289505779743 2023-01-22 18:44:22.379135: step: 1264/526, loss: 0.05096236243844032 2023-01-22 18:44:23.455971: step: 1268/526, loss: 0.00015530727978330106 2023-01-22 18:44:24.522782: step: 1272/526, loss: 0.004302356857806444 2023-01-22 18:44:25.590266: step: 1276/526, loss: 4.115326373721473e-05 2023-01-22 18:44:26.652642: step: 1280/526, loss: 0.002614434575662017 2023-01-22 18:44:27.716867: step: 1284/526, loss: 0.004619142971932888 2023-01-22 18:44:28.771228: step: 1288/526, loss: 0.001732018543407321 2023-01-22 18:44:29.819260: step: 1292/526, loss: 0.01094040460884571 2023-01-22 18:44:30.882213: step: 1296/526, loss: 0.009468117728829384 2023-01-22 18:44:31.942322: step: 1300/526, loss: 0.00018498997087590396 2023-01-22 18:44:33.019930: step: 1304/526, loss: 0.004117588046938181 2023-01-22 18:44:34.073456: step: 1308/526, loss: 0.0008519261027686298 2023-01-22 18:44:35.162631: step: 1312/526, loss: 0.0042347111739218235 2023-01-22 18:44:36.231920: step: 1316/526, loss: 0.002486675977706909 2023-01-22 18:44:37.289298: step: 1320/526, loss: 0.004362615756690502 2023-01-22 18:44:38.357287: step: 1324/526, loss: 0.004353542346507311 2023-01-22 18:44:39.428778: step: 1328/526, loss: 0.0025297722313553095 2023-01-22 18:44:40.477408: step: 1332/526, loss: 0.0013502462534233928 2023-01-22 18:44:41.538449: step: 1336/526, loss: 0.004053368698805571 2023-01-22 18:44:42.615300: step: 1340/526, loss: 0.0014518317766487598 2023-01-22 18:44:43.664948: step: 1344/526, loss: 0.000652880291454494 2023-01-22 18:44:44.733374: step: 1348/526, loss: 7.175199425546452e-05 2023-01-22 18:44:45.793340: step: 1352/526, loss: 0.00521273585036397 2023-01-22 18:44:46.856873: step: 1356/526, loss: 0.0006639304338023067 2023-01-22 18:44:47.937845: step: 1360/526, loss: 0.009443363174796104 2023-01-22 18:44:49.008222: step: 1364/526, loss: 0.021721703931689262 2023-01-22 18:44:50.074623: step: 1368/526, loss: 0.009586770087480545 2023-01-22 18:44:51.155626: step: 1372/526, loss: 0.0026133153587579727 2023-01-22 18:44:52.205002: step: 1376/526, loss: 0.004981752950698137 2023-01-22 18:44:53.253852: step: 1380/526, loss: 0.0008841459057293832 2023-01-22 18:44:54.311242: step: 1384/526, loss: 0.005524763371795416 2023-01-22 18:44:55.378748: step: 1388/526, loss: 0.004551276098936796 2023-01-22 18:44:56.441140: step: 1392/526, loss: 0.0033916854299604893 2023-01-22 18:44:57.507627: step: 1396/526, loss: 0.0003417876723688096 2023-01-22 18:44:58.568488: step: 1400/526, loss: 0.005211398005485535 2023-01-22 18:44:59.613859: step: 1404/526, loss: 0.0066888281144201756 2023-01-22 18:45:00.691481: step: 1408/526, loss: 0.001584374695084989 2023-01-22 18:45:01.777702: step: 1412/526, loss: 0.01504538208246231 2023-01-22 18:45:02.849587: step: 1416/526, loss: 0.006462868768721819 2023-01-22 18:45:03.930496: step: 1420/526, loss: 2.6769657779368572e-05 2023-01-22 18:45:04.991018: step: 1424/526, loss: 0.0009967457735911012 2023-01-22 18:45:06.040160: step: 1428/526, loss: 0.000379564386093989 2023-01-22 18:45:07.087604: step: 1432/526, loss: 1.5280096704373136e-05 2023-01-22 18:45:08.143213: step: 1436/526, loss: 0.0013429338578134775 2023-01-22 18:45:09.206373: step: 1440/526, loss: 0.0020745915826410055 2023-01-22 18:45:10.273555: step: 1444/526, loss: 6.939359445823357e-05 2023-01-22 18:45:11.339574: step: 1448/526, loss: 0.007030988112092018 2023-01-22 18:45:12.411860: step: 1452/526, loss: 0.0007565665291622281 2023-01-22 18:45:13.473704: step: 1456/526, loss: 0.0010497402399778366 2023-01-22 18:45:14.547368: step: 1460/526, loss: 0.0021779565140604973 2023-01-22 18:45:15.601207: step: 1464/526, loss: 1.1618034477578476e-05 2023-01-22 18:45:16.681888: step: 1468/526, loss: 0.0005448565352708101 2023-01-22 18:45:17.761096: step: 1472/526, loss: 0.0010734691750258207 2023-01-22 18:45:18.809089: step: 1476/526, loss: 0.00302223046310246 2023-01-22 18:45:19.868427: step: 1480/526, loss: 0.0034733391366899014 2023-01-22 18:45:20.935480: step: 1484/526, loss: 5.210693871049443e-06 2023-01-22 18:45:21.993802: step: 1488/526, loss: 0.009244834072887897 2023-01-22 18:45:23.058668: step: 1492/526, loss: 0.0016329266363754869 2023-01-22 18:45:24.117071: step: 1496/526, loss: 0.007443442940711975 2023-01-22 18:45:25.181892: step: 1500/526, loss: 0.005738737992942333 2023-01-22 18:45:26.249160: step: 1504/526, loss: 0.009049099870026112 2023-01-22 18:45:27.305173: step: 1508/526, loss: 0.0006189775886014104 2023-01-22 18:45:28.372499: step: 1512/526, loss: 0.0015118439914658666 2023-01-22 18:45:29.428384: step: 1516/526, loss: 1.087980763259111e-06 2023-01-22 18:45:30.477611: step: 1520/526, loss: 0.005591883324086666 2023-01-22 18:45:31.539414: step: 1524/526, loss: 0.0005771254654973745 2023-01-22 18:45:32.612753: step: 1528/526, loss: 0.005772868171334267 2023-01-22 18:45:33.675750: step: 1532/526, loss: 0.0005950903287157416 2023-01-22 18:45:34.727431: step: 1536/526, loss: 0.0033837007358670235 2023-01-22 18:45:35.783289: step: 1540/526, loss: 0.0016145651461556554 2023-01-22 18:45:36.838105: step: 1544/526, loss: 0.002009027637541294 2023-01-22 18:45:37.888230: step: 1548/526, loss: 0.004241458605974913 2023-01-22 18:45:38.961627: step: 1552/526, loss: 0.03162920102477074 2023-01-22 18:45:40.032030: step: 1556/526, loss: 0.04032851383090019 2023-01-22 18:45:41.089508: step: 1560/526, loss: 4.8521524149691686e-05 2023-01-22 18:45:42.156774: step: 1564/526, loss: 0.002941735554486513 2023-01-22 18:45:43.220636: step: 1568/526, loss: 0.0013211008626967669 2023-01-22 18:45:44.283689: step: 1572/526, loss: 0.00104530097451061 2023-01-22 18:45:45.331283: step: 1576/526, loss: 0.00015663108206354082 2023-01-22 18:45:46.408043: step: 1580/526, loss: 0.00042911607306450605 2023-01-22 18:45:47.463479: step: 1584/526, loss: 0.0017179639544337988 2023-01-22 18:45:48.518777: step: 1588/526, loss: 0.0005434823106043041 2023-01-22 18:45:49.595326: step: 1592/526, loss: 0.007555896881967783 2023-01-22 18:45:50.664365: step: 1596/526, loss: 0.0017910299357026815 2023-01-22 18:45:51.725243: step: 1600/526, loss: 0.011661226861178875 2023-01-22 18:45:52.797591: step: 1604/526, loss: 0.008104612119495869 2023-01-22 18:45:53.864448: step: 1608/526, loss: 0.01066520158201456 2023-01-22 18:45:54.934804: step: 1612/526, loss: 0.00431863171979785 2023-01-22 18:45:55.993924: step: 1616/526, loss: 0.0006449631182476878 2023-01-22 18:45:57.052880: step: 1620/526, loss: 0.0013828417286276817 2023-01-22 18:45:58.120049: step: 1624/526, loss: 0.000535124447196722 2023-01-22 18:45:59.201138: step: 1628/526, loss: 0.0016050381818786263 2023-01-22 18:46:00.249696: step: 1632/526, loss: 0.0070837028324604034 2023-01-22 18:46:01.316804: step: 1636/526, loss: 0.005527130328118801 2023-01-22 18:46:02.404680: step: 1640/526, loss: 0.010753164999186993 2023-01-22 18:46:03.454463: step: 1644/526, loss: 0.002849144861102104 2023-01-22 18:46:04.517001: step: 1648/526, loss: 0.00012709471047855914 2023-01-22 18:46:05.577677: step: 1652/526, loss: 0.00023370598501060158 2023-01-22 18:46:06.654236: step: 1656/526, loss: 0.00445404089987278 2023-01-22 18:46:07.708916: step: 1660/526, loss: 0.023180246353149414 2023-01-22 18:46:08.780853: step: 1664/526, loss: 0.0011947044404223561 2023-01-22 18:46:09.848753: step: 1668/526, loss: 0.004260911140590906 2023-01-22 18:46:10.922090: step: 1672/526, loss: 8.963334403233603e-05 2023-01-22 18:46:12.012289: step: 1676/526, loss: 0.0007914318703114986 2023-01-22 18:46:13.080103: step: 1680/526, loss: 0.0015796126099303365 2023-01-22 18:46:14.153043: step: 1684/526, loss: 0.0013691557105630636 2023-01-22 18:46:15.210949: step: 1688/526, loss: 0.0003086199576500803 2023-01-22 18:46:16.289634: step: 1692/526, loss: 0.008661982603371143 2023-01-22 18:46:17.358839: step: 1696/526, loss: 0.00021431955974549055 2023-01-22 18:46:18.418099: step: 1700/526, loss: 0.001694253645837307 2023-01-22 18:46:19.473105: step: 1704/526, loss: 0.0001425338996341452 2023-01-22 18:46:20.519392: step: 1708/526, loss: 0.002138415351510048 2023-01-22 18:46:21.580995: step: 1712/526, loss: 0.00791004579514265 2023-01-22 18:46:22.643671: step: 1716/526, loss: 0.00325650442391634 2023-01-22 18:46:23.710795: step: 1720/526, loss: 0.007972813211381435 2023-01-22 18:46:24.770538: step: 1724/526, loss: 0.001294921850785613 2023-01-22 18:46:25.865401: step: 1728/526, loss: 0.0015757974470034242 2023-01-22 18:46:26.941855: step: 1732/526, loss: 0.007017717696726322 2023-01-22 18:46:28.011426: step: 1736/526, loss: 0.0016986187547445297 2023-01-22 18:46:29.072595: step: 1740/526, loss: 0.0077350446954369545 2023-01-22 18:46:30.137506: step: 1744/526, loss: 0.0003149181429762393 2023-01-22 18:46:31.200541: step: 1748/526, loss: 0.0009582445491105318 2023-01-22 18:46:32.266461: step: 1752/526, loss: 0.001306909485720098 2023-01-22 18:46:33.315218: step: 1756/526, loss: 0.00040419885772280395 2023-01-22 18:46:34.373712: step: 1760/526, loss: 0.0005854007904417813 2023-01-22 18:46:35.439788: step: 1764/526, loss: 0.018983155488967896 2023-01-22 18:46:36.500782: step: 1768/526, loss: 0.0009746649884618819 2023-01-22 18:46:37.563450: step: 1772/526, loss: 0.0016093113226816058 2023-01-22 18:46:38.642832: step: 1776/526, loss: 0.00048363127280026674 2023-01-22 18:46:39.711386: step: 1780/526, loss: 0.004746370483189821 2023-01-22 18:46:40.778364: step: 1784/526, loss: 0.003191588679328561 2023-01-22 18:46:41.833635: step: 1788/526, loss: 0.004765322897583246 2023-01-22 18:46:42.902412: step: 1792/526, loss: 0.002008237410336733 2023-01-22 18:46:43.973623: step: 1796/526, loss: 0.0025999981444329023 2023-01-22 18:46:45.023157: step: 1800/526, loss: 0.0008397491765208542 2023-01-22 18:46:46.109362: step: 1804/526, loss: 0.04514950513839722 2023-01-22 18:46:47.182951: step: 1808/526, loss: 0.005623677745461464 2023-01-22 18:46:48.238342: step: 1812/526, loss: 0.0019293545046821237 2023-01-22 18:46:49.320150: step: 1816/526, loss: 0.005144578870385885 2023-01-22 18:46:50.374585: step: 1820/526, loss: 0.0005856275092810392 2023-01-22 18:46:51.438339: step: 1824/526, loss: 0.0028202657122164965 2023-01-22 18:46:52.493364: step: 1828/526, loss: 0.0016322726150974631 2023-01-22 18:46:53.561564: step: 1832/526, loss: 0.024153169244527817 2023-01-22 18:46:54.613887: step: 1836/526, loss: 0.03205622360110283 2023-01-22 18:46:55.691691: step: 1840/526, loss: 0.013268989510834217 2023-01-22 18:46:56.767359: step: 1844/526, loss: 0.0019674438517540693 2023-01-22 18:46:57.828461: step: 1848/526, loss: 8.317752508446574e-05 2023-01-22 18:46:58.897467: step: 1852/526, loss: 2.348231828364078e-05 2023-01-22 18:46:59.959191: step: 1856/526, loss: 0.006586694624274969 2023-01-22 18:47:01.031208: step: 1860/526, loss: 4.206393350614235e-05 2023-01-22 18:47:02.103116: step: 1864/526, loss: 1.93113082787022e-05 2023-01-22 18:47:03.157648: step: 1868/526, loss: 0.00041411424172110856 2023-01-22 18:47:04.221115: step: 1872/526, loss: 0.0025553200393915176 2023-01-22 18:47:05.286515: step: 1876/526, loss: 0.015814675018191338 2023-01-22 18:47:06.363382: step: 1880/526, loss: 0.0005905671860091388 2023-01-22 18:47:07.439320: step: 1884/526, loss: 0.003998665139079094 2023-01-22 18:47:08.493010: step: 1888/526, loss: 1.3777264939562883e-05 2023-01-22 18:47:09.550884: step: 1892/526, loss: 0.000997500610537827 2023-01-22 18:47:10.611826: step: 1896/526, loss: 0.004966071341186762 2023-01-22 18:47:11.695140: step: 1900/526, loss: 0.006368839647620916 2023-01-22 18:47:12.744099: step: 1904/526, loss: 1.996709897866822e-06 2023-01-22 18:47:13.825259: step: 1908/526, loss: 0.009706745855510235 2023-01-22 18:47:14.875846: step: 1912/526, loss: 0.0 2023-01-22 18:47:15.942436: step: 1916/526, loss: 0.000681404781062156 2023-01-22 18:47:16.999305: step: 1920/526, loss: 0.00039162480970844626 2023-01-22 18:47:18.081700: step: 1924/526, loss: 0.004568703938275576 2023-01-22 18:47:19.174742: step: 1928/526, loss: 0.006932826712727547 2023-01-22 18:47:20.237153: step: 1932/526, loss: 0.010500838980078697 2023-01-22 18:47:21.296968: step: 1936/526, loss: 0.0037788108456879854 2023-01-22 18:47:22.358402: step: 1940/526, loss: 0.0022299950942397118 2023-01-22 18:47:23.426327: step: 1944/526, loss: 0.0011111543281003833 2023-01-22 18:47:24.486785: step: 1948/526, loss: 0.000446643796749413 2023-01-22 18:47:25.563534: step: 1952/526, loss: 0.01724560745060444 2023-01-22 18:47:26.635832: step: 1956/526, loss: 0.002889660419896245 2023-01-22 18:47:27.722697: step: 1960/526, loss: 0.005162473302334547 2023-01-22 18:47:28.786442: step: 1964/526, loss: 0.0034572849981486797 2023-01-22 18:47:29.876404: step: 1968/526, loss: 0.0018098610453307629 2023-01-22 18:47:30.938286: step: 1972/526, loss: 0.0025778994895517826 2023-01-22 18:47:31.999470: step: 1976/526, loss: 0.00012220801727380604 2023-01-22 18:47:33.067531: step: 1980/526, loss: 0.005837772972881794 2023-01-22 18:47:34.125309: step: 1984/526, loss: 0.00015310892194975168 2023-01-22 18:47:35.186224: step: 1988/526, loss: 0.006770716048777103 2023-01-22 18:47:36.251973: step: 1992/526, loss: 0.002294846111908555 2023-01-22 18:47:37.315914: step: 1996/526, loss: 0.006976480595767498 2023-01-22 18:47:38.396063: step: 2000/526, loss: 0.0006915026460774243 2023-01-22 18:47:39.471532: step: 2004/526, loss: 0.0003553621645551175 2023-01-22 18:47:40.547045: step: 2008/526, loss: 0.0068934205919504166 2023-01-22 18:47:41.628960: step: 2012/526, loss: 0.0031260449904948473 2023-01-22 18:47:42.710898: step: 2016/526, loss: 0.0005862055695615709 2023-01-22 18:47:43.783555: step: 2020/526, loss: 0.012843873351812363 2023-01-22 18:47:44.842139: step: 2024/526, loss: 1.1201146662642714e-05 2023-01-22 18:47:45.909193: step: 2028/526, loss: 0.0007346518686972558 2023-01-22 18:47:46.999372: step: 2032/526, loss: 0.0020734791178256273 2023-01-22 18:47:48.074766: step: 2036/526, loss: 0.0020234466064721346 2023-01-22 18:47:49.144383: step: 2040/526, loss: 0.0004323887114878744 2023-01-22 18:47:50.218440: step: 2044/526, loss: 0.0011445109266787767 2023-01-22 18:47:51.287850: step: 2048/526, loss: 5.2066792704863474e-05 2023-01-22 18:47:52.369796: step: 2052/526, loss: 0.0017658856231719255 2023-01-22 18:47:53.459046: step: 2056/526, loss: 0.011797196231782436 2023-01-22 18:47:54.542969: step: 2060/526, loss: 0.0002819258952513337 2023-01-22 18:47:55.629204: step: 2064/526, loss: 0.00212306366302073 2023-01-22 18:47:56.688514: step: 2068/526, loss: 0.00039372092578560114 2023-01-22 18:47:57.754294: step: 2072/526, loss: 0.0016727183246985078 2023-01-22 18:47:58.843051: step: 2076/526, loss: 0.0034725950099527836 2023-01-22 18:47:59.927397: step: 2080/526, loss: 0.0016738607082515955 2023-01-22 18:48:00.975622: step: 2084/526, loss: 1.2970345778740011e-05 2023-01-22 18:48:02.039342: step: 2088/526, loss: 0.007320513017475605 2023-01-22 18:48:03.119394: step: 2092/526, loss: 1.9310269635752775e-05 2023-01-22 18:48:04.195285: step: 2096/526, loss: 0.007351463660597801 2023-01-22 18:48:05.286346: step: 2100/526, loss: 0.0007869754335843027 2023-01-22 18:48:06.354695: step: 2104/526, loss: 0.0004477521579246968 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3442404708520179, 'r': 0.2913306451612903, 'f1': 0.31558324768756424}, 'combined': 0.23253502461188943, 'stategy': 1, 'epoch': 16} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33900589819803084, 'r': 0.2387321755625327, 'f1': 0.2801671528364167}, 'combined': 0.15281844700168182, 'stategy': 1, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31196248517200476, 'r': 0.3326810562934851, 'f1': 0.32198882767064585}, 'combined': 0.23725492565205483, 'stategy': 1, 'epoch': 16} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3389202157203071, 'r': 0.26450040623002924, 'f1': 0.2971212168647708}, 'combined': 0.16206611828987497, 'stategy': 1, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3246064714052954, 'r': 0.3289181323158022, 'f1': 0.3267480786624463}, 'combined': 0.24076174217232885, 'stategy': 1, 'epoch': 16} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33774631376695824, 'r': 0.27420682460166745, 'f1': 0.3026778961076874}, 'combined': 0.16509703424055677, 'stategy': 1, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.3017241379310345, 'f1': 0.3723404255319149}, 'combined': 0.2482269503546099, 'stategy': 1, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 17 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 18:50:48.281365: step: 4/526, loss: 0.0463980957865715 2023-01-22 18:50:49.325951: step: 8/526, loss: 0.00010991937597282231 2023-01-22 18:50:50.367391: step: 12/526, loss: 0.004510130267590284 2023-01-22 18:50:51.419518: step: 16/526, loss: 1.4702322005177848e-05 2023-01-22 18:50:52.489778: step: 20/526, loss: 0.0006512802792713046 2023-01-22 18:50:53.552007: step: 24/526, loss: 0.0010251847561448812 2023-01-22 18:50:54.585152: step: 28/526, loss: 1.1938166608160827e-05 2023-01-22 18:50:55.648491: step: 32/526, loss: 0.00642089918255806 2023-01-22 18:50:56.704991: step: 36/526, loss: 1.5204660485323984e-05 2023-01-22 18:50:57.766621: step: 40/526, loss: 0.0020667295902967453 2023-01-22 18:50:58.819255: step: 44/526, loss: 0.004025040194392204 2023-01-22 18:50:59.878579: step: 48/526, loss: 0.0002887505106627941 2023-01-22 18:51:00.949774: step: 52/526, loss: 0.0001808819069992751 2023-01-22 18:51:02.026207: step: 56/526, loss: 0.0009427643963135779 2023-01-22 18:51:03.102789: step: 60/526, loss: 0.0016344982432201505 2023-01-22 18:51:04.151283: step: 64/526, loss: 4.090594302397221e-06 2023-01-22 18:51:05.210279: step: 68/526, loss: 0.0003312677436042577 2023-01-22 18:51:06.262031: step: 72/526, loss: 0.00603075185790658 2023-01-22 18:51:07.331378: step: 76/526, loss: 0.003071579383686185 2023-01-22 18:51:08.386426: step: 80/526, loss: 0.0028638802468776703 2023-01-22 18:51:09.435559: step: 84/526, loss: 0.001981923822313547 2023-01-22 18:51:10.482823: step: 88/526, loss: 0.0013352293753996491 2023-01-22 18:51:11.571807: step: 92/526, loss: 0.0016963854432106018 2023-01-22 18:51:12.624101: step: 96/526, loss: 0.0008963853470049798 2023-01-22 18:51:13.698325: step: 100/526, loss: 0.0018659612396731973 2023-01-22 18:51:14.757927: step: 104/526, loss: 0.0013818941079080105 2023-01-22 18:51:15.820368: step: 108/526, loss: 0.00042988662607967854 2023-01-22 18:51:16.876903: step: 112/526, loss: 0.0006805207231082022 2023-01-22 18:51:17.923432: step: 116/526, loss: 0.0001503357052570209 2023-01-22 18:51:18.972710: step: 120/526, loss: 0.0036784945987164974 2023-01-22 18:51:20.011509: step: 124/526, loss: 0.0030546069610863924 2023-01-22 18:51:21.070281: step: 128/526, loss: 0.003129085060209036 2023-01-22 18:51:22.119637: step: 132/526, loss: 6.463538738898933e-05 2023-01-22 18:51:23.178027: step: 136/526, loss: 0.00036310305586084723 2023-01-22 18:51:24.214368: step: 140/526, loss: 0.0016909617697820067 2023-01-22 18:51:25.278417: step: 144/526, loss: 0.010252445936203003 2023-01-22 18:51:26.330321: step: 148/526, loss: 0.0054732272401452065 2023-01-22 18:51:27.368183: step: 152/526, loss: 0.0015789339086040854 2023-01-22 18:51:28.445657: step: 156/526, loss: 0.0012789018219336867 2023-01-22 18:51:29.531108: step: 160/526, loss: 7.850086694816127e-05 2023-01-22 18:51:30.578809: step: 164/526, loss: 0.0014245674246922135 2023-01-22 18:51:31.657103: step: 168/526, loss: 0.0036292877048254013 2023-01-22 18:51:32.724652: step: 172/526, loss: 0.0018398505635559559 2023-01-22 18:51:33.782162: step: 176/526, loss: 0.0002395951742073521 2023-01-22 18:51:34.840861: step: 180/526, loss: 0.0033870332408696413 2023-01-22 18:51:35.898544: step: 184/526, loss: 0.010737528093159199 2023-01-22 18:51:36.975866: step: 188/526, loss: 0.0025251656770706177 2023-01-22 18:51:38.049900: step: 192/526, loss: 0.0008991694776341319 2023-01-22 18:51:39.129523: step: 196/526, loss: 0.0049573746509850025 2023-01-22 18:51:40.185736: step: 200/526, loss: 0.01414923369884491 2023-01-22 18:51:41.236333: step: 204/526, loss: 0.002367520472034812 2023-01-22 18:51:42.294768: step: 208/526, loss: 0.004535994958132505 2023-01-22 18:51:43.356177: step: 212/526, loss: 0.00047451950376853347 2023-01-22 18:51:44.399338: step: 216/526, loss: 0.006600670516490936 2023-01-22 18:51:45.443984: step: 220/526, loss: 0.00016484559455420822 2023-01-22 18:51:46.511696: step: 224/526, loss: 0.0022476345766335726 2023-01-22 18:51:47.580829: step: 228/526, loss: 0.0013701578136533499 2023-01-22 18:51:48.630865: step: 232/526, loss: 0.00036878802347928286 2023-01-22 18:51:49.691319: step: 236/526, loss: 0.002993670292198658 2023-01-22 18:51:50.769426: step: 240/526, loss: 0.001210243790410459 2023-01-22 18:51:51.833619: step: 244/526, loss: 0.003514036536216736 2023-01-22 18:51:52.900265: step: 248/526, loss: 0.001499002450145781 2023-01-22 18:51:53.956420: step: 252/526, loss: 0.003715614089742303 2023-01-22 18:51:55.025915: step: 256/526, loss: 0.034819819033145905 2023-01-22 18:51:56.085632: step: 260/526, loss: 0.0037892204709351063 2023-01-22 18:51:57.153697: step: 264/526, loss: 0.0003726025461219251 2023-01-22 18:51:58.209935: step: 268/526, loss: 0.005182519089430571 2023-01-22 18:51:59.284383: step: 272/526, loss: 0.007129094563424587 2023-01-22 18:52:00.337507: step: 276/526, loss: 0.0014610864454880357 2023-01-22 18:52:01.408109: step: 280/526, loss: 0.00016787397908046842 2023-01-22 18:52:02.476145: step: 284/526, loss: 0.0033606623765081167 2023-01-22 18:52:03.536709: step: 288/526, loss: 0.0008211909444071352 2023-01-22 18:52:04.590497: step: 292/526, loss: 0.0011222274042665958 2023-01-22 18:52:05.652098: step: 296/526, loss: 1.1055620916522457e-06 2023-01-22 18:52:06.711122: step: 300/526, loss: 0.00038951190072111785 2023-01-22 18:52:07.791518: step: 304/526, loss: 0.0004283290181774646 2023-01-22 18:52:08.849463: step: 308/526, loss: 0.001642512739636004 2023-01-22 18:52:09.911638: step: 312/526, loss: 0.005180171225219965 2023-01-22 18:52:10.990140: step: 316/526, loss: 0.01095285452902317 2023-01-22 18:52:12.042625: step: 320/526, loss: 0.00023335735022556037 2023-01-22 18:52:13.089814: step: 324/526, loss: 0.00012285835691727698 2023-01-22 18:52:14.147818: step: 328/526, loss: 0.0018274281173944473 2023-01-22 18:52:15.211312: step: 332/526, loss: 0.0010510620195418596 2023-01-22 18:52:16.268352: step: 336/526, loss: 0.0028317952528595924 2023-01-22 18:52:17.317679: step: 340/526, loss: 0.004292685072869062 2023-01-22 18:52:18.370660: step: 344/526, loss: 0.003843134269118309 2023-01-22 18:52:19.421908: step: 348/526, loss: 0.0006477513234131038 2023-01-22 18:52:20.489909: step: 352/526, loss: 0.0001832250418374315 2023-01-22 18:52:21.551661: step: 356/526, loss: 0.00012352815247140825 2023-01-22 18:52:22.614992: step: 360/526, loss: 7.168961246861727e-08 2023-01-22 18:52:23.673093: step: 364/526, loss: 0.0075658452697098255 2023-01-22 18:52:24.745560: step: 368/526, loss: 0.004586181603372097 2023-01-22 18:52:25.809513: step: 372/526, loss: 0.000249495729804039 2023-01-22 18:52:26.867064: step: 376/526, loss: 0.0002550693752709776 2023-01-22 18:52:27.930694: step: 380/526, loss: 0.003682951908558607 2023-01-22 18:52:28.978552: step: 384/526, loss: 0.0015401209238916636 2023-01-22 18:52:30.047713: step: 388/526, loss: 0.011026284657418728 2023-01-22 18:52:31.128377: step: 392/526, loss: 0.001119575696066022 2023-01-22 18:52:32.183907: step: 396/526, loss: 0.00017400254728272557 2023-01-22 18:52:33.242201: step: 400/526, loss: 0.004120440222322941 2023-01-22 18:52:34.300528: step: 404/526, loss: 0.003093558130785823 2023-01-22 18:52:35.360723: step: 408/526, loss: 0.0018006149912253022 2023-01-22 18:52:36.418992: step: 412/526, loss: 0.00022988353157415986 2023-01-22 18:52:37.463749: step: 416/526, loss: 0.00015818413521628827 2023-01-22 18:52:38.541676: step: 420/526, loss: 0.0016527051338925958 2023-01-22 18:52:39.620505: step: 424/526, loss: 0.001137578277848661 2023-01-22 18:52:40.673969: step: 428/526, loss: 7.243343134177849e-05 2023-01-22 18:52:41.732369: step: 432/526, loss: 0.0004274975508451462 2023-01-22 18:52:42.842103: step: 436/526, loss: 0.003246559528633952 2023-01-22 18:52:43.896529: step: 440/526, loss: 0.0019939234480261803 2023-01-22 18:52:44.958522: step: 444/526, loss: 0.0015268611023202538 2023-01-22 18:52:46.018601: step: 448/526, loss: 0.01211488526314497 2023-01-22 18:52:47.079864: step: 452/526, loss: 0.0031942531932145357 2023-01-22 18:52:48.144677: step: 456/526, loss: 0.00773973111063242 2023-01-22 18:52:49.209924: step: 460/526, loss: 6.892277451697737e-05 2023-01-22 18:52:50.271331: step: 464/526, loss: 0.0005133071099407971 2023-01-22 18:52:51.336906: step: 468/526, loss: 0.002507448196411133 2023-01-22 18:52:52.418412: step: 472/526, loss: 0.0012764750281348825 2023-01-22 18:52:53.488979: step: 476/526, loss: 0.0010258274851366878 2023-01-22 18:52:54.545624: step: 480/526, loss: 0.00020125247829128057 2023-01-22 18:52:55.618036: step: 484/526, loss: 0.0011133721563965082 2023-01-22 18:52:56.681217: step: 488/526, loss: 0.0011401246301829815 2023-01-22 18:52:57.736407: step: 492/526, loss: 0.0016982285305857658 2023-01-22 18:52:58.794061: step: 496/526, loss: 0.00862390547990799 2023-01-22 18:52:59.847276: step: 500/526, loss: 0.0004218018730171025 2023-01-22 18:53:00.912604: step: 504/526, loss: 0.007057816721498966 2023-01-22 18:53:01.964773: step: 508/526, loss: 0.007039402611553669 2023-01-22 18:53:03.015794: step: 512/526, loss: 0.005749070085585117 2023-01-22 18:53:04.078989: step: 516/526, loss: 0.0037090883124619722 2023-01-22 18:53:05.150394: step: 520/526, loss: 4.112521310162265e-07 2023-01-22 18:53:06.218148: step: 524/526, loss: 0.0020158865954726934 2023-01-22 18:53:07.274568: step: 528/526, loss: 2.7897622203454375e-05 2023-01-22 18:53:08.341906: step: 532/526, loss: 0.0008194040274247527 2023-01-22 18:53:09.408779: step: 536/526, loss: 0.005127327982336283 2023-01-22 18:53:10.466027: step: 540/526, loss: 4.3473715777508914e-05 2023-01-22 18:53:11.520523: step: 544/526, loss: 0.003732114564627409 2023-01-22 18:53:12.587869: step: 548/526, loss: 0.008060650900006294 2023-01-22 18:53:13.640207: step: 552/526, loss: 0.004624656867235899 2023-01-22 18:53:14.728902: step: 556/526, loss: 0.0032527304720133543 2023-01-22 18:53:15.792224: step: 560/526, loss: 0.00018322185496799648 2023-01-22 18:53:16.852838: step: 564/526, loss: 0.0021202941425144672 2023-01-22 18:53:17.909648: step: 568/526, loss: 0.0020952620543539524 2023-01-22 18:53:18.999206: step: 572/526, loss: 0.0027186234947293997 2023-01-22 18:53:20.059999: step: 576/526, loss: 0.005280424375087023 2023-01-22 18:53:21.121220: step: 580/526, loss: 0.0011867227731272578 2023-01-22 18:53:22.178207: step: 584/526, loss: 5.937131936661899e-05 2023-01-22 18:53:23.243206: step: 588/526, loss: 0.0008186287013813853 2023-01-22 18:53:24.317075: step: 592/526, loss: 0.002644090913236141 2023-01-22 18:53:25.377824: step: 596/526, loss: 2.1075493350508623e-05 2023-01-22 18:53:26.429263: step: 600/526, loss: 5.671819508279441e-06 2023-01-22 18:53:27.489714: step: 604/526, loss: 3.3184303902089596e-05 2023-01-22 18:53:28.561873: step: 608/526, loss: 0.010636086575686932 2023-01-22 18:53:29.626903: step: 612/526, loss: 0.0005038361414335668 2023-01-22 18:53:30.703190: step: 616/526, loss: 0.003825089894235134 2023-01-22 18:53:31.782605: step: 620/526, loss: 0.00010419063619337976 2023-01-22 18:53:32.853931: step: 624/526, loss: 0.005193542223423719 2023-01-22 18:53:33.916902: step: 628/526, loss: 0.005949428305029869 2023-01-22 18:53:34.982245: step: 632/526, loss: 8.242437615990639e-06 2023-01-22 18:53:36.037723: step: 636/526, loss: 0.01410337258130312 2023-01-22 18:53:37.096188: step: 640/526, loss: 0.0002600831212475896 2023-01-22 18:53:38.162898: step: 644/526, loss: 0.0010079031344503164 2023-01-22 18:53:39.226777: step: 648/526, loss: 0.00011994480155408382 2023-01-22 18:53:40.295693: step: 652/526, loss: 0.00839836336672306 2023-01-22 18:53:41.351491: step: 656/526, loss: 8.093093128991313e-06 2023-01-22 18:53:42.419327: step: 660/526, loss: 0.012778832577168941 2023-01-22 18:53:43.483638: step: 664/526, loss: 0.007677122019231319 2023-01-22 18:53:44.531020: step: 668/526, loss: 4.567364157992415e-05 2023-01-22 18:53:45.586412: step: 672/526, loss: 0.00015888040070421994 2023-01-22 18:53:46.646985: step: 676/526, loss: 0.0023467233404517174 2023-01-22 18:53:47.721513: step: 680/526, loss: 0.002413022331893444 2023-01-22 18:53:48.785623: step: 684/526, loss: 0.003936802037060261 2023-01-22 18:53:49.840780: step: 688/526, loss: 0.00031596774351783097 2023-01-22 18:53:50.896744: step: 692/526, loss: 0.0037119807675480843 2023-01-22 18:53:51.962975: step: 696/526, loss: 0.0020477024372667074 2023-01-22 18:53:53.029803: step: 700/526, loss: 0.006950508803129196 2023-01-22 18:53:54.089242: step: 704/526, loss: 0.0001317803980782628 2023-01-22 18:53:55.154963: step: 708/526, loss: 0.006935785990208387 2023-01-22 18:53:56.223079: step: 712/526, loss: 1.296458958677249e-05 2023-01-22 18:53:57.281631: step: 716/526, loss: 0.0006262968527153134 2023-01-22 18:53:58.354057: step: 720/526, loss: 0.0008764247177168727 2023-01-22 18:53:59.427235: step: 724/526, loss: 0.002296761143952608 2023-01-22 18:54:00.512994: step: 728/526, loss: 0.0046785855665802956 2023-01-22 18:54:01.585720: step: 732/526, loss: 0.0030133621767163277 2023-01-22 18:54:02.644875: step: 736/526, loss: 0.0013013080460950732 2023-01-22 18:54:03.735584: step: 740/526, loss: 0.019722046330571175 2023-01-22 18:54:04.809687: step: 744/526, loss: 0.00013010535622015595 2023-01-22 18:54:05.877498: step: 748/526, loss: 0.004622706212103367 2023-01-22 18:54:06.952307: step: 752/526, loss: 0.0040210518054664135 2023-01-22 18:54:08.022853: step: 756/526, loss: 0.0013683774741366506 2023-01-22 18:54:09.086480: step: 760/526, loss: 0.000799557426944375 2023-01-22 18:54:10.140889: step: 764/526, loss: 6.303599366219714e-05 2023-01-22 18:54:11.195969: step: 768/526, loss: 0.0005105281597934663 2023-01-22 18:54:12.265654: step: 772/526, loss: 2.7423491701483727e-05 2023-01-22 18:54:13.332878: step: 776/526, loss: 0.0011220870073884726 2023-01-22 18:54:14.395815: step: 780/526, loss: 0.00508394418284297 2023-01-22 18:54:15.468273: step: 784/526, loss: 0.00108869350515306 2023-01-22 18:54:16.556106: step: 788/526, loss: 0.005428141448646784 2023-01-22 18:54:17.615869: step: 792/526, loss: 0.0010028373217210174 2023-01-22 18:54:18.679717: step: 796/526, loss: 6.29554824627121e-06 2023-01-22 18:54:19.740456: step: 800/526, loss: 0.0014613389503210783 2023-01-22 18:54:20.805020: step: 804/526, loss: 0.0006679213256575167 2023-01-22 18:54:21.864715: step: 808/526, loss: 0.002016930840909481 2023-01-22 18:54:22.931894: step: 812/526, loss: 0.0024851940106600523 2023-01-22 18:54:24.002964: step: 816/526, loss: 0.0006577189778909087 2023-01-22 18:54:25.071229: step: 820/526, loss: 0.0016194492345675826 2023-01-22 18:54:26.128975: step: 824/526, loss: 0.004470439162105322 2023-01-22 18:54:27.203922: step: 828/526, loss: 0.0027023714501410723 2023-01-22 18:54:28.282815: step: 832/526, loss: 4.603931665769778e-05 2023-01-22 18:54:29.351796: step: 836/526, loss: 5.3113872127141804e-05 2023-01-22 18:54:30.445418: step: 840/526, loss: 0.0031346650794148445 2023-01-22 18:54:31.500088: step: 844/526, loss: 3.0452334613073617e-05 2023-01-22 18:54:32.569852: step: 848/526, loss: 0.0023021860979497433 2023-01-22 18:54:33.636711: step: 852/526, loss: 0.00036915394593961537 2023-01-22 18:54:34.694133: step: 856/526, loss: 0.0008140862337313592 2023-01-22 18:54:35.742195: step: 860/526, loss: 0.0006362684653140604 2023-01-22 18:54:36.801921: step: 864/526, loss: 9.940393647411838e-05 2023-01-22 18:54:37.885855: step: 868/526, loss: 0.008510332554578781 2023-01-22 18:54:38.948202: step: 872/526, loss: 0.0002390950539847836 2023-01-22 18:54:40.008355: step: 876/526, loss: 0.0013669779291376472 2023-01-22 18:54:41.067382: step: 880/526, loss: 0.003962170332670212 2023-01-22 18:54:42.140375: step: 884/526, loss: 0.004155547358095646 2023-01-22 18:54:43.195942: step: 888/526, loss: 0.0016149826114997268 2023-01-22 18:54:44.255559: step: 892/526, loss: 0.017426956444978714 2023-01-22 18:54:45.315753: step: 896/526, loss: 7.972426828928292e-05 2023-01-22 18:54:46.380104: step: 900/526, loss: 0.019362036138772964 2023-01-22 18:54:47.456048: step: 904/526, loss: 0.0078095560893416405 2023-01-22 18:54:48.534016: step: 908/526, loss: 0.0005308479885570705 2023-01-22 18:54:49.615538: step: 912/526, loss: 2.2195943529368378e-05 2023-01-22 18:54:50.673004: step: 916/526, loss: 4.1376155422767624e-05 2023-01-22 18:54:51.762996: step: 920/526, loss: 0.0024681328795850277 2023-01-22 18:54:52.820092: step: 924/526, loss: 0.01001826487481594 2023-01-22 18:54:53.905043: step: 928/526, loss: 0.01671735569834709 2023-01-22 18:54:54.974414: step: 932/526, loss: 0.006348755676299334 2023-01-22 18:54:56.042949: step: 936/526, loss: 0.0004991499008610845 2023-01-22 18:54:57.100912: step: 940/526, loss: 6.73061003908515e-06 2023-01-22 18:54:58.164783: step: 944/526, loss: 0.0015974065754562616 2023-01-22 18:54:59.222628: step: 948/526, loss: 0.0007682112045586109 2023-01-22 18:55:00.265084: step: 952/526, loss: 3.084801937802695e-05 2023-01-22 18:55:01.333807: step: 956/526, loss: 0.002170176012441516 2023-01-22 18:55:02.407538: step: 960/526, loss: 3.2930158340604976e-05 2023-01-22 18:55:03.472808: step: 964/526, loss: 0.0003970520047005266 2023-01-22 18:55:04.543796: step: 968/526, loss: 0.0002952840004581958 2023-01-22 18:55:05.613175: step: 972/526, loss: 7.388223821180873e-06 2023-01-22 18:55:06.673742: step: 976/526, loss: 7.909903797553852e-05 2023-01-22 18:55:07.724692: step: 980/526, loss: 5.600322037935257e-05 2023-01-22 18:55:08.792053: step: 984/526, loss: 0.0005911869811825454 2023-01-22 18:55:09.883293: step: 988/526, loss: 0.0035353952553123236 2023-01-22 18:55:10.956568: step: 992/526, loss: 0.00419087428599596 2023-01-22 18:55:12.026889: step: 996/526, loss: 9.73637816059636e-06 2023-01-22 18:55:13.087825: step: 1000/526, loss: 0.0007429146207869053 2023-01-22 18:55:14.157633: step: 1004/526, loss: 0.0023746758233755827 2023-01-22 18:55:15.223421: step: 1008/526, loss: 0.0007079019560478628 2023-01-22 18:55:16.307892: step: 1012/526, loss: 0.015971630811691284 2023-01-22 18:55:17.376664: step: 1016/526, loss: 0.0018676278414204717 2023-01-22 18:55:18.441012: step: 1020/526, loss: 0.0007657906389795244 2023-01-22 18:55:19.509235: step: 1024/526, loss: 0.006397362798452377 2023-01-22 18:55:20.571470: step: 1028/526, loss: 9.910325752571225e-05 2023-01-22 18:55:21.633236: step: 1032/526, loss: 0.005097044166177511 2023-01-22 18:55:22.705959: step: 1036/526, loss: 0.0003145902301184833 2023-01-22 18:55:23.760503: step: 1040/526, loss: 0.0015700054354965687 2023-01-22 18:55:24.823675: step: 1044/526, loss: 0.0014154105447232723 2023-01-22 18:55:25.868122: step: 1048/526, loss: 9.224838140653446e-05 2023-01-22 18:55:26.917741: step: 1052/526, loss: 0.0006847636541351676 2023-01-22 18:55:27.991020: step: 1056/526, loss: 0.0030639139004051685 2023-01-22 18:55:29.072605: step: 1060/526, loss: 0.00014613453822676092 2023-01-22 18:55:30.127901: step: 1064/526, loss: 0.004744696896523237 2023-01-22 18:55:31.201068: step: 1068/526, loss: 3.0585622880607843e-05 2023-01-22 18:55:32.250223: step: 1072/526, loss: 0.00046359343104995787 2023-01-22 18:55:33.331069: step: 1076/526, loss: 0.0016133025055751204 2023-01-22 18:55:34.386631: step: 1080/526, loss: 6.0761421991628595e-06 2023-01-22 18:55:35.480084: step: 1084/526, loss: 0.0034017544239759445 2023-01-22 18:55:36.560786: step: 1088/526, loss: 5.141642031958327e-05 2023-01-22 18:55:37.609871: step: 1092/526, loss: 0.0027938869316130877 2023-01-22 18:55:38.684389: step: 1096/526, loss: 4.976510808774037e-06 2023-01-22 18:55:39.767005: step: 1100/526, loss: 0.02569619193673134 2023-01-22 18:55:40.817972: step: 1104/526, loss: 0.0012464254396036267 2023-01-22 18:55:41.895831: step: 1108/526, loss: 0.0017776619642972946 2023-01-22 18:55:42.957864: step: 1112/526, loss: 0.0008599167922511697 2023-01-22 18:55:44.023939: step: 1116/526, loss: 0.0005587437190115452 2023-01-22 18:55:45.080087: step: 1120/526, loss: 0.002503796713426709 2023-01-22 18:55:46.144975: step: 1124/526, loss: 0.008139527402818203 2023-01-22 18:55:47.205969: step: 1128/526, loss: 8.482647535856813e-05 2023-01-22 18:55:48.281577: step: 1132/526, loss: 0.001659313915297389 2023-01-22 18:55:49.349892: step: 1136/526, loss: 1.154838447092743e-08 2023-01-22 18:55:50.416351: step: 1140/526, loss: 0.0034066070802509785 2023-01-22 18:55:51.478820: step: 1144/526, loss: 0.00011975752568105236 2023-01-22 18:55:52.538078: step: 1148/526, loss: 0.028846673667430878 2023-01-22 18:55:53.610241: step: 1152/526, loss: 0.0016402938636019826 2023-01-22 18:55:54.673512: step: 1156/526, loss: 0.0002949299232568592 2023-01-22 18:55:55.735220: step: 1160/526, loss: 1.1845213521155529e-05 2023-01-22 18:55:56.804331: step: 1164/526, loss: 0.00739887123927474 2023-01-22 18:55:57.874489: step: 1168/526, loss: 0.0024101652670651674 2023-01-22 18:55:58.932408: step: 1172/526, loss: 0.003392466576769948 2023-01-22 18:56:00.030524: step: 1176/526, loss: 0.0017701422329992056 2023-01-22 18:56:01.090161: step: 1180/526, loss: 0.0008206646889448166 2023-01-22 18:56:02.149240: step: 1184/526, loss: 0.00033309072023257613 2023-01-22 18:56:03.222498: step: 1188/526, loss: 0.0029173051007092 2023-01-22 18:56:04.286317: step: 1192/526, loss: 0.00566058186814189 2023-01-22 18:56:05.359024: step: 1196/526, loss: 0.0035131387412548065 2023-01-22 18:56:06.451287: step: 1200/526, loss: 0.00038095717900432646 2023-01-22 18:56:07.520493: step: 1204/526, loss: 0.0006500615272670984 2023-01-22 18:56:08.575454: step: 1208/526, loss: 0.00023163575679063797 2023-01-22 18:56:09.667606: step: 1212/526, loss: 0.005997935775667429 2023-01-22 18:56:10.736773: step: 1216/526, loss: 0.00024209878756664693 2023-01-22 18:56:11.792970: step: 1220/526, loss: 0.004172225948423147 2023-01-22 18:56:12.860447: step: 1224/526, loss: 0.0001175685683847405 2023-01-22 18:56:13.894069: step: 1228/526, loss: 0.0002994227106682956 2023-01-22 18:56:14.958600: step: 1232/526, loss: 0.0001685644529061392 2023-01-22 18:56:16.021464: step: 1236/526, loss: 0.0025238997768610716 2023-01-22 18:56:17.100600: step: 1240/526, loss: 0.006070937030017376 2023-01-22 18:56:18.162104: step: 1244/526, loss: 0.0007993319886736572 2023-01-22 18:56:19.252722: step: 1248/526, loss: 0.0009237503982149065 2023-01-22 18:56:20.313808: step: 1252/526, loss: 0.011913645081222057 2023-01-22 18:56:21.378775: step: 1256/526, loss: 0.0013019415782764554 2023-01-22 18:56:22.446337: step: 1260/526, loss: 1.474797954870155e-06 2023-01-22 18:56:23.519233: step: 1264/526, loss: 0.026881493628025055 2023-01-22 18:56:24.580982: step: 1268/526, loss: 0.00030063901795074344 2023-01-22 18:56:25.635278: step: 1272/526, loss: 0.005757459439337254 2023-01-22 18:56:26.710094: step: 1276/526, loss: 0.0024113182444125414 2023-01-22 18:56:27.770854: step: 1280/526, loss: 0.002858600579202175 2023-01-22 18:56:28.834821: step: 1284/526, loss: 0.0003360543632879853 2023-01-22 18:56:29.894203: step: 1288/526, loss: 0.00010401618055766448 2023-01-22 18:56:30.978754: step: 1292/526, loss: 0.002575970022007823 2023-01-22 18:56:32.046117: step: 1296/526, loss: 0.00018918524438049644 2023-01-22 18:56:33.089715: step: 1300/526, loss: 5.509396578418091e-05 2023-01-22 18:56:34.170059: step: 1304/526, loss: 0.00040850392542779446 2023-01-22 18:56:35.242654: step: 1308/526, loss: 0.0033422140404582024 2023-01-22 18:56:36.312689: step: 1312/526, loss: 0.00016894582950044423 2023-01-22 18:56:37.375364: step: 1316/526, loss: 0.004275804851204157 2023-01-22 18:56:38.445524: step: 1320/526, loss: 0.0005342953372746706 2023-01-22 18:56:39.504967: step: 1324/526, loss: 0.00015412109496537596 2023-01-22 18:56:40.586163: step: 1328/526, loss: 0.003829848486930132 2023-01-22 18:56:41.655899: step: 1332/526, loss: 0.00039161162567324936 2023-01-22 18:56:42.720129: step: 1336/526, loss: 0.00024154286074917763 2023-01-22 18:56:43.811321: step: 1340/526, loss: 0.0016671280609443784 2023-01-22 18:56:44.866894: step: 1344/526, loss: 0.00041484637768007815 2023-01-22 18:56:45.930782: step: 1348/526, loss: 0.001319577102549374 2023-01-22 18:56:47.002290: step: 1352/526, loss: 0.0048068612813949585 2023-01-22 18:56:48.056183: step: 1356/526, loss: 0.005619472824037075 2023-01-22 18:56:49.109758: step: 1360/526, loss: 1.8526162648413447e-06 2023-01-22 18:56:50.179831: step: 1364/526, loss: 0.01796802692115307 2023-01-22 18:56:51.248249: step: 1368/526, loss: 0.0016684645088389516 2023-01-22 18:56:52.306099: step: 1372/526, loss: 0.006749644875526428 2023-01-22 18:56:53.366736: step: 1376/526, loss: 0.00036148566869087517 2023-01-22 18:56:54.434317: step: 1380/526, loss: 0.0059757172130048275 2023-01-22 18:56:55.500889: step: 1384/526, loss: 0.002105912659317255 2023-01-22 18:56:56.575903: step: 1388/526, loss: 0.00016594017506577075 2023-01-22 18:56:57.648962: step: 1392/526, loss: 0.0063743372447788715 2023-01-22 18:56:58.720892: step: 1396/526, loss: 0.006596965715289116 2023-01-22 18:56:59.788921: step: 1400/526, loss: 0.0002547740878071636 2023-01-22 18:57:00.864572: step: 1404/526, loss: 0.009166822768747807 2023-01-22 18:57:01.936870: step: 1408/526, loss: 0.005374606233090162 2023-01-22 18:57:02.996908: step: 1412/526, loss: 0.0013521861983463168 2023-01-22 18:57:04.084734: step: 1416/526, loss: 0.006498397793620825 2023-01-22 18:57:05.151311: step: 1420/526, loss: 0.0008393136085942388 2023-01-22 18:57:06.216139: step: 1424/526, loss: 0.00023064140987116843 2023-01-22 18:57:07.268065: step: 1428/526, loss: 3.39574853569502e-06 2023-01-22 18:57:08.334523: step: 1432/526, loss: 0.00016327248886227608 2023-01-22 18:57:09.403139: step: 1436/526, loss: 0.0004489348502829671 2023-01-22 18:57:10.461598: step: 1440/526, loss: 1.8964508853969164e-05 2023-01-22 18:57:11.556116: step: 1444/526, loss: 0.0023021011147648096 2023-01-22 18:57:12.623912: step: 1448/526, loss: 0.007020849268883467 2023-01-22 18:57:13.677425: step: 1452/526, loss: 6.2382678152062e-05 2023-01-22 18:57:14.757067: step: 1456/526, loss: 0.001106567564420402 2023-01-22 18:57:15.825328: step: 1460/526, loss: 0.002262725029140711 2023-01-22 18:57:16.906268: step: 1464/526, loss: 0.01350381225347519 2023-01-22 18:57:17.949149: step: 1468/526, loss: 0.00018576462753117085 2023-01-22 18:57:19.019224: step: 1472/526, loss: 0.0015261798398569226 2023-01-22 18:57:20.088406: step: 1476/526, loss: 4.380718746688217e-05 2023-01-22 18:57:21.142836: step: 1480/526, loss: 8.293452992802486e-05 2023-01-22 18:57:22.205842: step: 1484/526, loss: 6.634479359490797e-05 2023-01-22 18:57:23.268292: step: 1488/526, loss: 0.0038234770763665438 2023-01-22 18:57:24.341672: step: 1492/526, loss: 0.0013298607664182782 2023-01-22 18:57:25.401984: step: 1496/526, loss: 4.532401726464741e-05 2023-01-22 18:57:26.464408: step: 1500/526, loss: 4.7750781959621236e-05 2023-01-22 18:57:27.519716: step: 1504/526, loss: 0.00211342447437346 2023-01-22 18:57:28.585560: step: 1508/526, loss: 0.008225289173424244 2023-01-22 18:57:29.656591: step: 1512/526, loss: 0.002450736705213785 2023-01-22 18:57:30.715363: step: 1516/526, loss: 0.0007001644698902965 2023-01-22 18:57:31.775108: step: 1520/526, loss: 0.007758734747767448 2023-01-22 18:57:32.848755: step: 1524/526, loss: 0.016332169994711876 2023-01-22 18:57:33.914653: step: 1528/526, loss: 0.000947068736422807 2023-01-22 18:57:34.978512: step: 1532/526, loss: 0.002255253028124571 2023-01-22 18:57:36.047231: step: 1536/526, loss: 0.002192385494709015 2023-01-22 18:57:37.111801: step: 1540/526, loss: 0.010639390908181667 2023-01-22 18:57:38.184814: step: 1544/526, loss: 0.005155371502041817 2023-01-22 18:57:39.244625: step: 1548/526, loss: 0.008084229193627834 2023-01-22 18:57:40.322076: step: 1552/526, loss: 0.0002110498317051679 2023-01-22 18:57:41.390114: step: 1556/526, loss: 0.01814371533691883 2023-01-22 18:57:42.443187: step: 1560/526, loss: 2.07447854450038e-07 2023-01-22 18:57:43.524488: step: 1564/526, loss: 0.0015109218657016754 2023-01-22 18:57:44.587969: step: 1568/526, loss: 0.0008219032315537333 2023-01-22 18:57:45.642176: step: 1572/526, loss: 0.0027636263985186815 2023-01-22 18:57:46.714995: step: 1576/526, loss: 0.003368040546774864 2023-01-22 18:57:47.804226: step: 1580/526, loss: 0.001999292988330126 2023-01-22 18:57:48.865306: step: 1584/526, loss: 0.0016826161881908774 2023-01-22 18:57:49.936617: step: 1588/526, loss: 0.00857156328856945 2023-01-22 18:57:51.020847: step: 1592/526, loss: 2.837777356035076e-05 2023-01-22 18:57:52.085067: step: 1596/526, loss: 0.0005658446461893618 2023-01-22 18:57:53.157939: step: 1600/526, loss: 0.0022893897257745266 2023-01-22 18:57:54.270359: step: 1604/526, loss: 0.003975036554038525 2023-01-22 18:57:55.332751: step: 1608/526, loss: 0.0005028306040912867 2023-01-22 18:57:56.399167: step: 1612/526, loss: 0.0007159552769735456 2023-01-22 18:57:57.455001: step: 1616/526, loss: 0.006780128926038742 2023-01-22 18:57:58.537923: step: 1620/526, loss: 0.003525175154209137 2023-01-22 18:57:59.595507: step: 1624/526, loss: 0.00011376501061022282 2023-01-22 18:58:00.684263: step: 1628/526, loss: 0.003667653538286686 2023-01-22 18:58:01.752401: step: 1632/526, loss: 0.003684388706460595 2023-01-22 18:58:02.806680: step: 1636/526, loss: 0.0022180743981152773 2023-01-22 18:58:03.878240: step: 1640/526, loss: 0.001281987875699997 2023-01-22 18:58:04.931303: step: 1644/526, loss: 0.00011861836537718773 2023-01-22 18:58:05.988946: step: 1648/526, loss: 0.0014403087552636862 2023-01-22 18:58:07.059213: step: 1652/526, loss: 5.618975046672858e-05 2023-01-22 18:58:08.139963: step: 1656/526, loss: 0.004613439552485943 2023-01-22 18:58:09.218719: step: 1660/526, loss: 0.0007005402585491538 2023-01-22 18:58:10.275385: step: 1664/526, loss: 0.0003630566643550992 2023-01-22 18:58:11.349354: step: 1668/526, loss: 0.007170497439801693 2023-01-22 18:58:12.417745: step: 1672/526, loss: 0.004329638555645943 2023-01-22 18:58:13.491762: step: 1676/526, loss: 0.0053426772356033325 2023-01-22 18:58:14.558286: step: 1680/526, loss: 0.00219931872561574 2023-01-22 18:58:15.614902: step: 1684/526, loss: 0.002501806477084756 2023-01-22 18:58:16.694684: step: 1688/526, loss: 0.0022931781131774187 2023-01-22 18:58:17.762329: step: 1692/526, loss: 0.006508628372102976 2023-01-22 18:58:18.826330: step: 1696/526, loss: 0.001422066823579371 2023-01-22 18:58:19.888213: step: 1700/526, loss: 1.9098439224762842e-05 2023-01-22 18:58:20.940906: step: 1704/526, loss: 0.0021764866542071104 2023-01-22 18:58:22.001603: step: 1708/526, loss: 2.0567573301377706e-05 2023-01-22 18:58:23.063942: step: 1712/526, loss: 0.0009687712299637496 2023-01-22 18:58:24.131072: step: 1716/526, loss: 0.03214814513921738 2023-01-22 18:58:25.193443: step: 1720/526, loss: 0.00015621911734342575 2023-01-22 18:58:26.262655: step: 1724/526, loss: 0.0026943183038383722 2023-01-22 18:58:27.335436: step: 1728/526, loss: 0.004763968288898468 2023-01-22 18:58:28.420004: step: 1732/526, loss: 0.005509181879460812 2023-01-22 18:58:29.478765: step: 1736/526, loss: 0.006067181006073952 2023-01-22 18:58:30.540558: step: 1740/526, loss: 7.264299028975074e-08 2023-01-22 18:58:31.605652: step: 1744/526, loss: 0.0003242361999582499 2023-01-22 18:58:32.669823: step: 1748/526, loss: 0.0012354010250419378 2023-01-22 18:58:33.729167: step: 1752/526, loss: 0.007575752679258585 2023-01-22 18:58:34.806424: step: 1756/526, loss: 0.020506858825683594 2023-01-22 18:58:35.861033: step: 1760/526, loss: 0.00012024528405163437 2023-01-22 18:58:36.946324: step: 1764/526, loss: 0.0060985335148870945 2023-01-22 18:58:37.996059: step: 1768/526, loss: 0.00016203972336370498 2023-01-22 18:58:39.057365: step: 1772/526, loss: 0.0017784223891794682 2023-01-22 18:58:40.122614: step: 1776/526, loss: 6.240505172172561e-05 2023-01-22 18:58:41.179668: step: 1780/526, loss: 0.007599841337651014 2023-01-22 18:58:42.238219: step: 1784/526, loss: 0.0002489571343176067 2023-01-22 18:58:43.326320: step: 1788/526, loss: 0.0031920955516397953 2023-01-22 18:58:44.393404: step: 1792/526, loss: 0.000125853403005749 2023-01-22 18:58:45.447092: step: 1796/526, loss: 0.003233651164919138 2023-01-22 18:58:46.506552: step: 1800/526, loss: 0.004029898438602686 2023-01-22 18:58:47.574271: step: 1804/526, loss: 0.003109175246208906 2023-01-22 18:58:48.643695: step: 1808/526, loss: 0.0013230028562247753 2023-01-22 18:58:49.708664: step: 1812/526, loss: 0.011759774759411812 2023-01-22 18:58:50.784302: step: 1816/526, loss: 0.001905420096591115 2023-01-22 18:58:51.843413: step: 1820/526, loss: 0.000376980664441362 2023-01-22 18:58:52.901223: step: 1824/526, loss: 0.003011233638972044 2023-01-22 18:58:53.957423: step: 1828/526, loss: 0.0036070875357836485 2023-01-22 18:58:55.007783: step: 1832/526, loss: 1.220539274982002e-06 2023-01-22 18:58:56.084111: step: 1836/526, loss: 0.005619187839329243 2023-01-22 18:58:57.131592: step: 1840/526, loss: 0.00010750297224149108 2023-01-22 18:58:58.192522: step: 1844/526, loss: 9.586124360794201e-05 2023-01-22 18:58:59.262732: step: 1848/526, loss: 0.0029806678649038076 2023-01-22 18:59:00.310873: step: 1852/526, loss: 0.00026063984842039645 2023-01-22 18:59:01.380468: step: 1856/526, loss: 0.00395971117541194 2023-01-22 18:59:02.445329: step: 1860/526, loss: 0.0036354425828903913 2023-01-22 18:59:03.501289: step: 1864/526, loss: 0.00698345759883523 2023-01-22 18:59:04.574765: step: 1868/526, loss: 0.002495624590665102 2023-01-22 18:59:05.644798: step: 1872/526, loss: 0.0001881919160950929 2023-01-22 18:59:06.696837: step: 1876/526, loss: 0.0022773980163037777 2023-01-22 18:59:07.755451: step: 1880/526, loss: 0.01914193667471409 2023-01-22 18:59:08.822734: step: 1884/526, loss: 0.00043189391726627946 2023-01-22 18:59:09.877431: step: 1888/526, loss: 0.002126776846125722 2023-01-22 18:59:10.946098: step: 1892/526, loss: 0.007313101086765528 2023-01-22 18:59:12.005334: step: 1896/526, loss: 0.0006145249353721738 2023-01-22 18:59:13.073135: step: 1900/526, loss: 4.086378339707153e-06 2023-01-22 18:59:14.147935: step: 1904/526, loss: 0.002802101895213127 2023-01-22 18:59:15.231814: step: 1908/526, loss: 0.0017569992924109101 2023-01-22 18:59:16.287095: step: 1912/526, loss: 3.8908549271354786e-08 2023-01-22 18:59:17.367632: step: 1916/526, loss: 0.0006196315516717732 2023-01-22 18:59:18.423293: step: 1920/526, loss: 0.00011819563223980367 2023-01-22 18:59:19.508270: step: 1924/526, loss: 0.0014133146032691002 2023-01-22 18:59:20.588374: step: 1928/526, loss: 0.002454676665365696 2023-01-22 18:59:21.666313: step: 1932/526, loss: 0.0020834344904869795 2023-01-22 18:59:22.742824: step: 1936/526, loss: 0.004841333720833063 2023-01-22 18:59:23.798382: step: 1940/526, loss: 0.002398829907178879 2023-01-22 18:59:24.849485: step: 1944/526, loss: 0.022020474076271057 2023-01-22 18:59:25.918744: step: 1948/526, loss: 0.0002267273812321946 2023-01-22 18:59:26.971276: step: 1952/526, loss: 0.005362228490412235 2023-01-22 18:59:28.054367: step: 1956/526, loss: 0.004876091610640287 2023-01-22 18:59:29.123138: step: 1960/526, loss: 0.000742474861908704 2023-01-22 18:59:30.190380: step: 1964/526, loss: 0.0007090331055223942 2023-01-22 18:59:31.253639: step: 1968/526, loss: 0.005307865794748068 2023-01-22 18:59:32.336225: step: 1972/526, loss: 0.00046011366066522896 2023-01-22 18:59:33.402840: step: 1976/526, loss: 2.7777618015534244e-05 2023-01-22 18:59:34.488346: step: 1980/526, loss: 0.0022148333955556154 2023-01-22 18:59:35.544859: step: 1984/526, loss: 0.00011739695037249476 2023-01-22 18:59:36.616517: step: 1988/526, loss: 0.0007252011564560235 2023-01-22 18:59:37.702445: step: 1992/526, loss: 0.005862390622496605 2023-01-22 18:59:38.767398: step: 1996/526, loss: 0.0009581076446920633 2023-01-22 18:59:39.855483: step: 2000/526, loss: 0.0003459296131040901 2023-01-22 18:59:40.913464: step: 2004/526, loss: 0.0059761786833405495 2023-01-22 18:59:41.984926: step: 2008/526, loss: 1.6038326066336595e-05 2023-01-22 18:59:43.065041: step: 2012/526, loss: 0.0003017112903762609 2023-01-22 18:59:44.131091: step: 2016/526, loss: 0.004875024780631065 2023-01-22 18:59:45.189566: step: 2020/526, loss: 0.0007622579578310251 2023-01-22 18:59:46.240787: step: 2024/526, loss: 0.0005461559630930424 2023-01-22 18:59:47.313325: step: 2028/526, loss: 0.004356930032372475 2023-01-22 18:59:48.387594: step: 2032/526, loss: 0.01109517365694046 2023-01-22 18:59:49.447684: step: 2036/526, loss: 0.0016498526092618704 2023-01-22 18:59:50.510467: step: 2040/526, loss: 0.002063177293166518 2023-01-22 18:59:51.564967: step: 2044/526, loss: 0.004093632567673922 2023-01-22 18:59:52.614060: step: 2048/526, loss: 0.0005458946106955409 2023-01-22 18:59:53.696298: step: 2052/526, loss: 0.0023855725303292274 2023-01-22 18:59:54.767331: step: 2056/526, loss: 0.002443872159346938 2023-01-22 18:59:55.821701: step: 2060/526, loss: 0.0003304611600469798 2023-01-22 18:59:56.878227: step: 2064/526, loss: 0.00010898959590122104 2023-01-22 18:59:57.937887: step: 2068/526, loss: 1.2314034393057227e-05 2023-01-22 18:59:59.013359: step: 2072/526, loss: 0.00027076713740825653 2023-01-22 19:00:00.088265: step: 2076/526, loss: 0.017586981877684593 2023-01-22 19:00:01.178473: step: 2080/526, loss: 3.6498055123956874e-05 2023-01-22 19:00:02.257911: step: 2084/526, loss: 0.0009478374267928302 2023-01-22 19:00:03.338157: step: 2088/526, loss: 0.003462414722889662 2023-01-22 19:00:04.419541: step: 2092/526, loss: 0.0009158753091469407 2023-01-22 19:00:05.489795: step: 2096/526, loss: 0.0028194477781653404 2023-01-22 19:00:06.555140: step: 2100/526, loss: 0.000534126244019717 2023-01-22 19:00:07.627436: step: 2104/526, loss: 3.6577486753230914e-05 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3478472222222222, 'r': 0.29702324478178366, 'f1': 0.3204324462640737}, 'combined': 0.23610811829984377, 'stategy': 1, 'epoch': 17} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33986433209647493, 'r': 0.2394286694549732, 'f1': 0.28093992025151443}, 'combined': 0.15323995650082603, 'stategy': 1, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.313636702444842, 'r': 0.3326810562934851, 'f1': 0.3228782995702885}, 'combined': 0.23791032599915996, 'stategy': 1, 'epoch': 17} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3416361918238854, 'r': 0.26724514547980055, 'f1': 0.29989624641624435}, 'combined': 0.1635797707724969, 'stategy': 1, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3258688892157528, 'r': 0.3308156655226333, 'f1': 0.32832364544336673}, 'combined': 0.24192268611616494, 'stategy': 1, 'epoch': 17} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3376623144744373, 'r': 0.27352189309481817, 'f1': 0.30222651154271024}, 'combined': 0.16485082447784194, 'stategy': 1, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.3017241379310345, 'f1': 0.3723404255319149}, 'combined': 0.2482269503546099, 'stategy': 1, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165581986834231, 'r': 0.3345785895003162, 'f1': 0.3253190344403444}, 'combined': 0.23970876221920112, 'stategy': 1, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.340623754728092, 'r': 0.26427167795921497, 'f1': 0.29762899949451}, 'combined': 0.16234309063336907, 'stategy': 1, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'stategy': 1, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 18 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 19:02:48.237277: step: 4/526, loss: 0.0012552232947200537 2023-01-22 19:02:49.287080: step: 8/526, loss: 0.0015432165237143636 2023-01-22 19:02:50.351263: step: 12/526, loss: 0.00029508236912079155 2023-01-22 19:02:51.426000: step: 16/526, loss: 0.0018553943373262882 2023-01-22 19:02:52.494090: step: 20/526, loss: 0.002844705479219556 2023-01-22 19:02:53.550807: step: 24/526, loss: 0.005759004037827253 2023-01-22 19:02:54.621400: step: 28/526, loss: 0.007542275823652744 2023-01-22 19:02:55.666056: step: 32/526, loss: 5.0567694415804e-05 2023-01-22 19:02:56.710225: step: 36/526, loss: 0.0005824349937029183 2023-01-22 19:02:57.776368: step: 40/526, loss: 5.674270505551249e-05 2023-01-22 19:02:58.846049: step: 44/526, loss: 0.005207403562963009 2023-01-22 19:02:59.912246: step: 48/526, loss: 0.004842313472181559 2023-01-22 19:03:00.989309: step: 52/526, loss: 0.016914453357458115 2023-01-22 19:03:02.046061: step: 56/526, loss: 0.0007114142063073814 2023-01-22 19:03:03.108784: step: 60/526, loss: 0.0053308820351958275 2023-01-22 19:03:04.172448: step: 64/526, loss: 0.0013062546495348215 2023-01-22 19:03:05.239221: step: 68/526, loss: 0.00212805881164968 2023-01-22 19:03:06.311787: step: 72/526, loss: 0.003834059927612543 2023-01-22 19:03:07.375482: step: 76/526, loss: 1.2718445759674069e-05 2023-01-22 19:03:08.427447: step: 80/526, loss: 4.7502679080935195e-05 2023-01-22 19:03:09.493921: step: 84/526, loss: 0.00018531570094637573 2023-01-22 19:03:10.553391: step: 88/526, loss: 0.0014669826487079263 2023-01-22 19:03:11.606662: step: 92/526, loss: 0.0002054189535556361 2023-01-22 19:03:12.668265: step: 96/526, loss: 0.0005185614572837949 2023-01-22 19:03:13.736991: step: 100/526, loss: 0.0020779587794095278 2023-01-22 19:03:14.798107: step: 104/526, loss: 0.00024974357802420855 2023-01-22 19:03:15.876511: step: 108/526, loss: 0.00022171359159983695 2023-01-22 19:03:16.936563: step: 112/526, loss: 1.4006927528953383e-07 2023-01-22 19:03:18.000819: step: 116/526, loss: 0.0016064938390627503 2023-01-22 19:03:19.078538: step: 120/526, loss: 0.000878556165844202 2023-01-22 19:03:20.149469: step: 124/526, loss: 0.0001981613168027252 2023-01-22 19:03:21.223076: step: 128/526, loss: 0.0005566237377934158 2023-01-22 19:03:22.278897: step: 132/526, loss: 0.0008138316916301847 2023-01-22 19:03:23.339587: step: 136/526, loss: 1.825946856115479e-05 2023-01-22 19:03:24.424057: step: 140/526, loss: 0.001798274926841259 2023-01-22 19:03:25.482662: step: 144/526, loss: 0.00013317097909748554 2023-01-22 19:03:26.550921: step: 148/526, loss: 0.0001498697092756629 2023-01-22 19:03:27.617430: step: 152/526, loss: 0.006235620938241482 2023-01-22 19:03:28.676471: step: 156/526, loss: 4.8580281145405024e-05 2023-01-22 19:03:29.733085: step: 160/526, loss: 3.3379259548382834e-05 2023-01-22 19:03:30.813007: step: 164/526, loss: 0.00551857752725482 2023-01-22 19:03:31.872062: step: 168/526, loss: 0.015086750499904156 2023-01-22 19:03:32.935295: step: 172/526, loss: 0.01825663261115551 2023-01-22 19:03:34.006112: step: 176/526, loss: 0.0008697194280102849 2023-01-22 19:03:35.062831: step: 180/526, loss: 0.00011589085625018924 2023-01-22 19:03:36.128773: step: 184/526, loss: 0.0015482259914278984 2023-01-22 19:03:37.188115: step: 188/526, loss: 0.00011017138604074717 2023-01-22 19:03:38.272462: step: 192/526, loss: 5.817196870339103e-05 2023-01-22 19:03:39.337506: step: 196/526, loss: 0.0013396100839599967 2023-01-22 19:03:40.413194: step: 200/526, loss: 5.4391639423556626e-05 2023-01-22 19:03:41.480000: step: 204/526, loss: 0.009423713199794292 2023-01-22 19:03:42.559678: step: 208/526, loss: 0.002839298453181982 2023-01-22 19:03:43.627219: step: 212/526, loss: 0.02412470243871212 2023-01-22 19:03:44.695396: step: 216/526, loss: 0.003705165581777692 2023-01-22 19:03:45.752546: step: 220/526, loss: 5.2387463256309275e-06 2023-01-22 19:03:46.808290: step: 224/526, loss: 5.684604911948554e-05 2023-01-22 19:03:47.890778: step: 228/526, loss: 0.00020536499505396932 2023-01-22 19:03:48.959542: step: 232/526, loss: 0.0006745310383848846 2023-01-22 19:03:50.033340: step: 236/526, loss: 0.000883669825270772 2023-01-22 19:03:51.098282: step: 240/526, loss: 0.0013289484195411205 2023-01-22 19:03:52.155783: step: 244/526, loss: 7.763106987113133e-05 2023-01-22 19:03:53.214440: step: 248/526, loss: 0.006935635581612587 2023-01-22 19:03:54.296351: step: 252/526, loss: 0.01863492652773857 2023-01-22 19:03:55.374344: step: 256/526, loss: 0.00023739633616060019 2023-01-22 19:03:56.455509: step: 260/526, loss: 0.002382552018389106 2023-01-22 19:03:57.517105: step: 264/526, loss: 0.0004798165464308113 2023-01-22 19:03:58.595193: step: 268/526, loss: 0.008879052475094795 2023-01-22 19:03:59.642976: step: 272/526, loss: 7.940034265629947e-06 2023-01-22 19:04:00.716119: step: 276/526, loss: 0.00391066400334239 2023-01-22 19:04:01.774263: step: 280/526, loss: 1.1152409570058808e-05 2023-01-22 19:04:02.855076: step: 284/526, loss: 0.009206926450133324 2023-01-22 19:04:03.923457: step: 288/526, loss: 0.004390468820929527 2023-01-22 19:04:04.994376: step: 292/526, loss: 0.0010674571385607123 2023-01-22 19:04:06.067390: step: 296/526, loss: 0.009136535227298737 2023-01-22 19:04:07.128997: step: 300/526, loss: 0.00143871596083045 2023-01-22 19:04:08.189116: step: 304/526, loss: 0.006449790671467781 2023-01-22 19:04:09.258667: step: 308/526, loss: 0.002073228359222412 2023-01-22 19:04:10.339614: step: 312/526, loss: 0.0001982772082556039 2023-01-22 19:04:11.398780: step: 316/526, loss: 0.00011031299800379202 2023-01-22 19:04:12.456354: step: 320/526, loss: 0.0018698429921641946 2023-01-22 19:04:13.529752: step: 324/526, loss: 0.011391861364245415 2023-01-22 19:04:14.603157: step: 328/526, loss: 0.003047780366614461 2023-01-22 19:04:15.666564: step: 332/526, loss: 0.0006797324167564511 2023-01-22 19:04:16.744195: step: 336/526, loss: 0.0009688339196145535 2023-01-22 19:04:17.799929: step: 340/526, loss: 0.0008761750650592148 2023-01-22 19:04:18.875585: step: 344/526, loss: 0.00014412908058147877 2023-01-22 19:04:19.943626: step: 348/526, loss: 0.005223248619586229 2023-01-22 19:04:21.013496: step: 352/526, loss: 0.001717216451652348 2023-01-22 19:04:22.074365: step: 356/526, loss: 0.0007923348457552493 2023-01-22 19:04:23.151665: step: 360/526, loss: 0.0024021712597459555 2023-01-22 19:04:24.234801: step: 364/526, loss: 0.0038250302895903587 2023-01-22 19:04:25.309002: step: 368/526, loss: 0.00015468662604689598 2023-01-22 19:04:26.373806: step: 372/526, loss: 0.008588100783526897 2023-01-22 19:04:27.448950: step: 376/526, loss: 7.460331107722595e-05 2023-01-22 19:04:28.520052: step: 380/526, loss: 0.0007693101069889963 2023-01-22 19:04:29.590113: step: 384/526, loss: 8.940690854331024e-09 2023-01-22 19:04:30.658012: step: 388/526, loss: 0.00017963080608751625 2023-01-22 19:04:31.743815: step: 392/526, loss: 0.004797870293259621 2023-01-22 19:04:32.799183: step: 396/526, loss: 0.004584247712045908 2023-01-22 19:04:33.855202: step: 400/526, loss: 0.0027248673141002655 2023-01-22 19:04:34.908713: step: 404/526, loss: 0.0012818766990676522 2023-01-22 19:04:35.979783: step: 408/526, loss: 0.000545281043741852 2023-01-22 19:04:37.043575: step: 412/526, loss: 5.995823084958829e-05 2023-01-22 19:04:38.106016: step: 416/526, loss: 0.005484024528414011 2023-01-22 19:04:39.166513: step: 420/526, loss: 0.01205903384834528 2023-01-22 19:04:40.270242: step: 424/526, loss: 0.004198983311653137 2023-01-22 19:04:41.332193: step: 428/526, loss: 0.004050122573971748 2023-01-22 19:04:42.402655: step: 432/526, loss: 4.898705583400442e-07 2023-01-22 19:04:43.480659: step: 436/526, loss: 0.0020249783992767334 2023-01-22 19:04:44.554069: step: 440/526, loss: 0.0012515069684013724 2023-01-22 19:04:45.656552: step: 444/526, loss: 0.007109955884516239 2023-01-22 19:04:46.722378: step: 448/526, loss: 9.808143659029156e-05 2023-01-22 19:04:47.777542: step: 452/526, loss: 4.350836206867825e-07 2023-01-22 19:04:48.866311: step: 456/526, loss: 0.0023045584093779325 2023-01-22 19:04:49.935086: step: 460/526, loss: 0.006414884701371193 2023-01-22 19:04:51.000577: step: 464/526, loss: 0.0014815098838880658 2023-01-22 19:04:52.090784: step: 468/526, loss: 0.003358148969709873 2023-01-22 19:04:53.153550: step: 472/526, loss: 0.00015327546861954033 2023-01-22 19:04:54.222172: step: 476/526, loss: 2.0948546080035158e-05 2023-01-22 19:04:55.310924: step: 480/526, loss: 0.00772051140666008 2023-01-22 19:04:56.364612: step: 484/526, loss: 0.0003301272518001497 2023-01-22 19:04:57.428962: step: 488/526, loss: 0.0018478522542864084 2023-01-22 19:04:58.496159: step: 492/526, loss: 0.0019110242137685418 2023-01-22 19:04:59.560686: step: 496/526, loss: 0.0033654216676950455 2023-01-22 19:05:00.628303: step: 500/526, loss: 0.0014292567502707243 2023-01-22 19:05:01.698813: step: 504/526, loss: 0.0013498679036274552 2023-01-22 19:05:02.767625: step: 508/526, loss: 0.00046547886449843645 2023-01-22 19:05:03.853124: step: 512/526, loss: 0.00325377332046628 2023-01-22 19:05:04.926947: step: 516/526, loss: 0.0006572949350811541 2023-01-22 19:05:06.004167: step: 520/526, loss: 0.01178732793778181 2023-01-22 19:05:07.079745: step: 524/526, loss: 0.04705970361828804 2023-01-22 19:05:08.161558: step: 528/526, loss: 0.010218554176390171 2023-01-22 19:05:09.213797: step: 532/526, loss: 0.004194215871393681 2023-01-22 19:05:10.274201: step: 536/526, loss: 0.000167710124514997 2023-01-22 19:05:11.346871: step: 540/526, loss: 0.0002781951043289155 2023-01-22 19:05:12.412778: step: 544/526, loss: 0.00947430357336998 2023-01-22 19:05:13.481828: step: 548/526, loss: 0.00024416757514700294 2023-01-22 19:05:14.563461: step: 552/526, loss: 0.007816259749233723 2023-01-22 19:05:15.645196: step: 556/526, loss: 0.00991266593337059 2023-01-22 19:05:16.713358: step: 560/526, loss: 0.00022158684441819787 2023-01-22 19:05:17.789157: step: 564/526, loss: 0.00968110840767622 2023-01-22 19:05:18.841562: step: 568/526, loss: 0.00019517895998433232 2023-01-22 19:05:19.918305: step: 572/526, loss: 0.000621023413259536 2023-01-22 19:05:21.000229: step: 576/526, loss: 0.0002952921495307237 2023-01-22 19:05:22.047683: step: 580/526, loss: 0.012463895604014397 2023-01-22 19:05:23.123934: step: 584/526, loss: 0.045810893177986145 2023-01-22 19:05:24.189045: step: 588/526, loss: 0.00214147986844182 2023-01-22 19:05:25.250140: step: 592/526, loss: 0.005158303305506706 2023-01-22 19:05:26.320055: step: 596/526, loss: 1.2003584970443626e-07 2023-01-22 19:05:27.393851: step: 600/526, loss: 1.314815790465218e-06 2023-01-22 19:05:28.460436: step: 604/526, loss: 7.70184415159747e-05 2023-01-22 19:05:29.529495: step: 608/526, loss: 0.010479443706572056 2023-01-22 19:05:30.597404: step: 612/526, loss: 0.010963741689920425 2023-01-22 19:05:31.666863: step: 616/526, loss: 2.131580731656868e-05 2023-01-22 19:05:32.730338: step: 620/526, loss: 0.0013266350142657757 2023-01-22 19:05:33.798830: step: 624/526, loss: 0.029699038714170456 2023-01-22 19:05:34.860690: step: 628/526, loss: 1.816090843931306e-05 2023-01-22 19:05:35.941761: step: 632/526, loss: 0.0066762701608240604 2023-01-22 19:05:36.995071: step: 636/526, loss: 0.0032258988358080387 2023-01-22 19:05:38.064042: step: 640/526, loss: 0.0013715425739064813 2023-01-22 19:05:39.133973: step: 644/526, loss: 0.0016116806073114276 2023-01-22 19:05:40.202050: step: 648/526, loss: 0.0020830663852393627 2023-01-22 19:05:41.266555: step: 652/526, loss: 0.0019107782281935215 2023-01-22 19:05:42.325427: step: 656/526, loss: 0.00430784048512578 2023-01-22 19:05:43.395969: step: 660/526, loss: 0.0019466597586870193 2023-01-22 19:05:44.467715: step: 664/526, loss: 0.010582627728581429 2023-01-22 19:05:45.524522: step: 668/526, loss: 5.284410872263834e-05 2023-01-22 19:05:46.588559: step: 672/526, loss: 0.002617364749312401 2023-01-22 19:05:47.669895: step: 676/526, loss: 0.000698226154781878 2023-01-22 19:05:48.741364: step: 680/526, loss: 0.0015460714930668473 2023-01-22 19:05:49.807658: step: 684/526, loss: 0.0019059531623497605 2023-01-22 19:05:50.864343: step: 688/526, loss: 0.005433516576886177 2023-01-22 19:05:51.916364: step: 692/526, loss: 0.00041815851000137627 2023-01-22 19:05:52.994409: step: 696/526, loss: 0.006803176831454039 2023-01-22 19:05:54.062948: step: 700/526, loss: 0.005165285896509886 2023-01-22 19:05:55.122226: step: 704/526, loss: 0.0012575032887980342 2023-01-22 19:05:56.191351: step: 708/526, loss: 0.004025866277515888 2023-01-22 19:05:57.270673: step: 712/526, loss: 0.005767675116658211 2023-01-22 19:05:58.327359: step: 716/526, loss: 0.002313079545274377 2023-01-22 19:05:59.398703: step: 720/526, loss: 0.005921508651226759 2023-01-22 19:06:00.496449: step: 724/526, loss: 1.7197493434650823e-05 2023-01-22 19:06:01.558953: step: 728/526, loss: 0.0003048692306037992 2023-01-22 19:06:02.643549: step: 732/526, loss: 2.1102641767356545e-05 2023-01-22 19:06:03.693504: step: 736/526, loss: 0.0010254151420667768 2023-01-22 19:06:04.750802: step: 740/526, loss: 2.731672975642141e-05 2023-01-22 19:06:05.807636: step: 744/526, loss: 0.0015289149014279246 2023-01-22 19:06:06.884777: step: 748/526, loss: 0.004055901430547237 2023-01-22 19:06:07.972051: step: 752/526, loss: 0.0011888708686456084 2023-01-22 19:06:09.049967: step: 756/526, loss: 0.001324503100477159 2023-01-22 19:06:10.110917: step: 760/526, loss: 0.0005707154050469398 2023-01-22 19:06:11.178502: step: 764/526, loss: 0.0003623607917688787 2023-01-22 19:06:12.245717: step: 768/526, loss: 0.017197584733366966 2023-01-22 19:06:13.320987: step: 772/526, loss: 0.007878645323216915 2023-01-22 19:06:14.394648: step: 776/526, loss: 0.0019487126264721155 2023-01-22 19:06:15.463355: step: 780/526, loss: 0.0001883176009869203 2023-01-22 19:06:16.529927: step: 784/526, loss: 0.00622760783880949 2023-01-22 19:06:17.589297: step: 788/526, loss: 0.020675070583820343 2023-01-22 19:06:18.662545: step: 792/526, loss: 0.0001200784754473716 2023-01-22 19:06:19.722531: step: 796/526, loss: 6.32395267530228e-06 2023-01-22 19:06:20.775858: step: 800/526, loss: 8.042090485105291e-05 2023-01-22 19:06:21.861542: step: 804/526, loss: 0.00407861452549696 2023-01-22 19:06:22.931965: step: 808/526, loss: 0.0008503625285811722 2023-01-22 19:06:24.005927: step: 812/526, loss: 0.0016875102883204818 2023-01-22 19:06:25.082805: step: 816/526, loss: 0.00010302245937054977 2023-01-22 19:06:26.176787: step: 820/526, loss: 0.016716904938220978 2023-01-22 19:06:27.253863: step: 824/526, loss: 0.008030310273170471 2023-01-22 19:06:28.334753: step: 828/526, loss: 0.004671158734709024 2023-01-22 19:06:29.421227: step: 832/526, loss: 0.0042756772600114346 2023-01-22 19:06:30.502930: step: 836/526, loss: 0.01802619732916355 2023-01-22 19:06:31.561739: step: 840/526, loss: 0.00265071471221745 2023-01-22 19:06:32.627018: step: 844/526, loss: 8.32009973237291e-05 2023-01-22 19:06:33.704984: step: 848/526, loss: 0.007865662686526775 2023-01-22 19:06:34.782326: step: 852/526, loss: 0.002822272013872862 2023-01-22 19:06:35.848747: step: 856/526, loss: 0.020397650077939034 2023-01-22 19:06:36.906257: step: 860/526, loss: 0.005402701906859875 2023-01-22 19:06:37.948665: step: 864/526, loss: 0.005068182945251465 2023-01-22 19:06:39.017541: step: 868/526, loss: 0.007363060489296913 2023-01-22 19:06:40.100278: step: 872/526, loss: 0.005407257936894894 2023-01-22 19:06:41.172751: step: 876/526, loss: 0.002812668215483427 2023-01-22 19:06:42.255780: step: 880/526, loss: 0.002861372660845518 2023-01-22 19:06:43.319924: step: 884/526, loss: 0.0036447602324187756 2023-01-22 19:06:44.380056: step: 888/526, loss: 0.0014477837830781937 2023-01-22 19:06:45.437858: step: 892/526, loss: 0.0039412034675478935 2023-01-22 19:06:46.496050: step: 896/526, loss: 0.0034513745922595263 2023-01-22 19:06:47.579974: step: 900/526, loss: 0.002188685117289424 2023-01-22 19:06:48.648844: step: 904/526, loss: 0.017191726714372635 2023-01-22 19:06:49.705234: step: 908/526, loss: 0.0036448754835873842 2023-01-22 19:06:50.772920: step: 912/526, loss: 0.00021355488570407033 2023-01-22 19:06:51.846657: step: 916/526, loss: 0.006277720909565687 2023-01-22 19:06:52.908252: step: 920/526, loss: 0.0027636305894702673 2023-01-22 19:06:53.960419: step: 924/526, loss: 0.0003872735833283514 2023-01-22 19:06:55.037779: step: 928/526, loss: 0.00211701774969697 2023-01-22 19:06:56.101580: step: 932/526, loss: 5.502950443769805e-06 2023-01-22 19:06:57.163642: step: 936/526, loss: 0.0 2023-01-22 19:06:58.221982: step: 940/526, loss: 0.023802487179636955 2023-01-22 19:06:59.286751: step: 944/526, loss: 0.0020583763252943754 2023-01-22 19:07:00.360610: step: 948/526, loss: 0.00654433760792017 2023-01-22 19:07:01.430654: step: 952/526, loss: 0.00029706076020374894 2023-01-22 19:07:02.487278: step: 956/526, loss: 1.2530629192042397e-06 2023-01-22 19:07:03.564541: step: 960/526, loss: 0.00025267916498705745 2023-01-22 19:07:04.651996: step: 964/526, loss: 0.011599645018577576 2023-01-22 19:07:05.715911: step: 968/526, loss: 0.00014393254241440445 2023-01-22 19:07:06.787094: step: 972/526, loss: 0.003187676426023245 2023-01-22 19:07:07.854207: step: 976/526, loss: 0.018875805661082268 2023-01-22 19:07:08.917897: step: 980/526, loss: 0.001086581964045763 2023-01-22 19:07:10.002970: step: 984/526, loss: 0.0055618006736040115 2023-01-22 19:07:11.064081: step: 988/526, loss: 0.0018091712845489383 2023-01-22 19:07:12.148738: step: 992/526, loss: 0.0011270674876868725 2023-01-22 19:07:13.245830: step: 996/526, loss: 0.003447149880230427 2023-01-22 19:07:14.320815: step: 1000/526, loss: 0.005919341463595629 2023-01-22 19:07:15.387711: step: 1004/526, loss: 9.845956810750067e-05 2023-01-22 19:07:16.450501: step: 1008/526, loss: 0.005300566554069519 2023-01-22 19:07:17.527450: step: 1012/526, loss: 0.023917904123663902 2023-01-22 19:07:18.607255: step: 1016/526, loss: 0.005917427595704794 2023-01-22 19:07:19.674827: step: 1020/526, loss: 1.5766294382046908e-05 2023-01-22 19:07:20.739036: step: 1024/526, loss: 0.0003110162215307355 2023-01-22 19:07:21.793461: step: 1028/526, loss: 0.014999203383922577 2023-01-22 19:07:22.863842: step: 1032/526, loss: 0.0016218553064391017 2023-01-22 19:07:23.919382: step: 1036/526, loss: 0.00023428162967320532 2023-01-22 19:07:24.988103: step: 1040/526, loss: 0.001272703055292368 2023-01-22 19:07:26.045210: step: 1044/526, loss: 0.00016677107487339526 2023-01-22 19:07:27.108591: step: 1048/526, loss: 0.001082417438738048 2023-01-22 19:07:28.178184: step: 1052/526, loss: 0.00884781964123249 2023-01-22 19:07:29.232420: step: 1056/526, loss: 3.5004750316147693e-06 2023-01-22 19:07:30.318809: step: 1060/526, loss: 0.00772174634039402 2023-01-22 19:07:31.370373: step: 1064/526, loss: 1.67637832504397e-08 2023-01-22 19:07:32.437843: step: 1068/526, loss: 0.003868556348606944 2023-01-22 19:07:33.504684: step: 1072/526, loss: 0.0027479268610477448 2023-01-22 19:07:34.563046: step: 1076/526, loss: 0.001488381065428257 2023-01-22 19:07:35.643328: step: 1080/526, loss: 0.000838089850731194 2023-01-22 19:07:36.707750: step: 1084/526, loss: 0.0033276726026088 2023-01-22 19:07:37.774876: step: 1088/526, loss: 0.010394815355539322 2023-01-22 19:07:38.841583: step: 1092/526, loss: 0.0001972942118300125 2023-01-22 19:07:39.906177: step: 1096/526, loss: 0.003735617734491825 2023-01-22 19:07:40.979025: step: 1100/526, loss: 0.002268086886033416 2023-01-22 19:07:42.045286: step: 1104/526, loss: 0.0003494498669169843 2023-01-22 19:07:43.127730: step: 1108/526, loss: 0.009836919605731964 2023-01-22 19:07:44.197042: step: 1112/526, loss: 0.0021283235400915146 2023-01-22 19:07:45.285370: step: 1116/526, loss: 0.001698526437394321 2023-01-22 19:07:46.347666: step: 1120/526, loss: 0.002202930161729455 2023-01-22 19:07:47.421653: step: 1124/526, loss: 0.0014717536978423595 2023-01-22 19:07:48.505762: step: 1128/526, loss: 0.0005914639332331717 2023-01-22 19:07:49.596463: step: 1132/526, loss: 0.0019307390321046114 2023-01-22 19:07:50.659065: step: 1136/526, loss: 0.0006913546239957213 2023-01-22 19:07:51.727040: step: 1140/526, loss: 2.4064775061560795e-07 2023-01-22 19:07:52.807810: step: 1144/526, loss: 0.0036357359495013952 2023-01-22 19:07:53.874239: step: 1148/526, loss: 0.00438983179628849 2023-01-22 19:07:54.952468: step: 1152/526, loss: 0.006356218364089727 2023-01-22 19:07:56.004380: step: 1156/526, loss: 6.725236016791314e-05 2023-01-22 19:07:57.059882: step: 1160/526, loss: 0.004138583783060312 2023-01-22 19:07:58.129505: step: 1164/526, loss: 0.014911224134266376 2023-01-22 19:07:59.197818: step: 1168/526, loss: 0.0010892170248553157 2023-01-22 19:08:00.272097: step: 1172/526, loss: 0.0026417344342917204 2023-01-22 19:08:01.350175: step: 1176/526, loss: 0.0014654689002782106 2023-01-22 19:08:02.416398: step: 1180/526, loss: 0.0023486234713345766 2023-01-22 19:08:03.488266: step: 1184/526, loss: 0.016048137098550797 2023-01-22 19:08:04.553542: step: 1188/526, loss: 0.0032369846012443304 2023-01-22 19:08:05.629114: step: 1192/526, loss: 0.003851949004456401 2023-01-22 19:08:06.713026: step: 1196/526, loss: 0.0007502248627133667 2023-01-22 19:08:07.784742: step: 1200/526, loss: 0.0017704274505376816 2023-01-22 19:08:08.856723: step: 1204/526, loss: 0.000317789992550388 2023-01-22 19:08:09.919150: step: 1208/526, loss: 0.00018782414554152638 2023-01-22 19:08:10.992559: step: 1212/526, loss: 0.0005745368544012308 2023-01-22 19:08:12.042510: step: 1216/526, loss: 0.0016599230002611876 2023-01-22 19:08:13.104879: step: 1220/526, loss: 0.0050687422044575214 2023-01-22 19:08:14.168740: step: 1224/526, loss: 0.0016273499932140112 2023-01-22 19:08:15.221971: step: 1228/526, loss: 5.8454443205846474e-05 2023-01-22 19:08:16.301306: step: 1232/526, loss: 0.0006205542595125735 2023-01-22 19:08:17.362450: step: 1236/526, loss: 0.0033030908089131117 2023-01-22 19:08:18.418443: step: 1240/526, loss: 0.00029082567198202014 2023-01-22 19:08:19.495045: step: 1244/526, loss: 0.0015689934371039271 2023-01-22 19:08:20.546498: step: 1248/526, loss: 0.0007233127835206687 2023-01-22 19:08:21.618333: step: 1252/526, loss: 0.002532371086999774 2023-01-22 19:08:22.684119: step: 1256/526, loss: 2.2210946553968824e-05 2023-01-22 19:08:23.747507: step: 1260/526, loss: 0.002181590534746647 2023-01-22 19:08:24.818265: step: 1264/526, loss: 0.0019160900264978409 2023-01-22 19:08:25.894376: step: 1268/526, loss: 0.00016038064495660365 2023-01-22 19:08:26.954669: step: 1272/526, loss: 0.00285283918492496 2023-01-22 19:08:28.020107: step: 1276/526, loss: 0.00339349708519876 2023-01-22 19:08:29.072547: step: 1280/526, loss: 0.004356840159744024 2023-01-22 19:08:30.141519: step: 1284/526, loss: 3.193903830833733e-05 2023-01-22 19:08:31.198096: step: 1288/526, loss: 0.0013525394024327397 2023-01-22 19:08:32.250432: step: 1292/526, loss: 0.00020840868819504976 2023-01-22 19:08:33.319433: step: 1296/526, loss: 0.0008722965721972287 2023-01-22 19:08:34.391004: step: 1300/526, loss: 0.006697444710880518 2023-01-22 19:08:35.473719: step: 1304/526, loss: 0.00042540772119536996 2023-01-22 19:08:36.540215: step: 1308/526, loss: 0.0019329932983964682 2023-01-22 19:08:37.620677: step: 1312/526, loss: 0.0026747705414891243 2023-01-22 19:08:38.687460: step: 1316/526, loss: 0.00422718096524477 2023-01-22 19:08:39.733217: step: 1320/526, loss: 0.0002730051055550575 2023-01-22 19:08:40.809055: step: 1324/526, loss: 0.0023508635349571705 2023-01-22 19:08:41.870466: step: 1328/526, loss: 0.0005915589863434434 2023-01-22 19:08:42.933569: step: 1332/526, loss: 0.015235540457069874 2023-01-22 19:08:44.000823: step: 1336/526, loss: 3.590072810766287e-05 2023-01-22 19:08:45.081799: step: 1340/526, loss: 0.0007943271775729954 2023-01-22 19:08:46.146967: step: 1344/526, loss: 0.0016559530049562454 2023-01-22 19:08:47.207028: step: 1348/526, loss: 0.0007076248293742537 2023-01-22 19:08:48.278703: step: 1352/526, loss: 8.903425907647033e-08 2023-01-22 19:08:49.337496: step: 1356/526, loss: 0.001404323149472475 2023-01-22 19:08:50.406369: step: 1360/526, loss: 0.015384942293167114 2023-01-22 19:08:51.473275: step: 1364/526, loss: 0.012438459321856499 2023-01-22 19:08:52.540970: step: 1368/526, loss: 3.979983739554882e-05 2023-01-22 19:08:53.596947: step: 1372/526, loss: 4.5222627704788465e-06 2023-01-22 19:08:54.666585: step: 1376/526, loss: 1.7502095943200402e-05 2023-01-22 19:08:55.738155: step: 1380/526, loss: 0.002580471569672227 2023-01-22 19:08:56.813394: step: 1384/526, loss: 0.0015712663298472762 2023-01-22 19:08:57.892297: step: 1388/526, loss: 0.00015530420932918787 2023-01-22 19:08:58.979798: step: 1392/526, loss: 0.002688699634745717 2023-01-22 19:09:00.049849: step: 1396/526, loss: 0.0003553742717485875 2023-01-22 19:09:01.113152: step: 1400/526, loss: 0.0008208309882320464 2023-01-22 19:09:02.204182: step: 1404/526, loss: 0.0028692386113107204 2023-01-22 19:09:03.271715: step: 1408/526, loss: 0.007166000548750162 2023-01-22 19:09:04.347379: step: 1412/526, loss: 0.0019115055911242962 2023-01-22 19:09:05.401363: step: 1416/526, loss: 0.0001056921246345155 2023-01-22 19:09:06.471518: step: 1420/526, loss: 0.0048519521951675415 2023-01-22 19:09:07.534771: step: 1424/526, loss: 0.0022555729374289513 2023-01-22 19:09:08.619234: step: 1428/526, loss: 0.001717675942927599 2023-01-22 19:09:09.669587: step: 1432/526, loss: 0.0009089690865948796 2023-01-22 19:09:10.748214: step: 1436/526, loss: 0.0009131130645982921 2023-01-22 19:09:11.812377: step: 1440/526, loss: 3.941644899896346e-05 2023-01-22 19:09:12.885046: step: 1444/526, loss: 0.004647328983992338 2023-01-22 19:09:13.959891: step: 1448/526, loss: 0.004759537987411022 2023-01-22 19:09:15.043496: step: 1452/526, loss: 0.009364446625113487 2023-01-22 19:09:16.100222: step: 1456/526, loss: 0.004628939554095268 2023-01-22 19:09:17.163839: step: 1460/526, loss: 0.003136337734758854 2023-01-22 19:09:18.215857: step: 1464/526, loss: 0.0025901608169078827 2023-01-22 19:09:19.278242: step: 1468/526, loss: 0.002862700028344989 2023-01-22 19:09:20.348692: step: 1472/526, loss: 0.0007572476170025766 2023-01-22 19:09:21.424491: step: 1476/526, loss: 0.00041822122875601053 2023-01-22 19:09:22.480023: step: 1480/526, loss: 1.0058267996271297e-08 2023-01-22 19:09:23.536068: step: 1484/526, loss: 0.0030537082348018885 2023-01-22 19:09:24.608570: step: 1488/526, loss: 0.0032735567074269056 2023-01-22 19:09:25.679919: step: 1492/526, loss: 0.005973272956907749 2023-01-22 19:09:26.731960: step: 1496/526, loss: 0.0034401416778564453 2023-01-22 19:09:27.819828: step: 1500/526, loss: 0.0072583528235554695 2023-01-22 19:09:28.890627: step: 1504/526, loss: 0.027225736528635025 2023-01-22 19:09:29.953799: step: 1508/526, loss: 3.34796350216493e-05 2023-01-22 19:09:31.032121: step: 1512/526, loss: 0.0007972502498887479 2023-01-22 19:09:32.100566: step: 1516/526, loss: 0.002958337077870965 2023-01-22 19:09:33.161072: step: 1520/526, loss: 0.0003607078979257494 2023-01-22 19:09:34.218837: step: 1524/526, loss: 0.002616273704916239 2023-01-22 19:09:35.295767: step: 1528/526, loss: 0.03285061568021774 2023-01-22 19:09:36.366649: step: 1532/526, loss: 0.0025253540370613337 2023-01-22 19:09:37.453605: step: 1536/526, loss: 0.0006588302785530686 2023-01-22 19:09:38.520660: step: 1540/526, loss: 0.0011586386244744062 2023-01-22 19:09:39.587172: step: 1544/526, loss: 0.001132720848545432 2023-01-22 19:09:40.663776: step: 1548/526, loss: 0.0004962706007063389 2023-01-22 19:09:41.716551: step: 1552/526, loss: 0.008462740108370781 2023-01-22 19:09:42.772145: step: 1556/526, loss: 0.0015544499037787318 2023-01-22 19:09:43.840823: step: 1560/526, loss: 0.002198633970692754 2023-01-22 19:09:44.923002: step: 1564/526, loss: 0.0031304911244660616 2023-01-22 19:09:46.007104: step: 1568/526, loss: 0.0003748323943000287 2023-01-22 19:09:47.099550: step: 1572/526, loss: 0.002680462319403887 2023-01-22 19:09:48.177598: step: 1576/526, loss: 0.0027973796240985394 2023-01-22 19:09:49.243605: step: 1580/526, loss: 2.9268176149344072e-05 2023-01-22 19:09:50.324762: step: 1584/526, loss: 0.0005433326005004346 2023-01-22 19:09:51.398449: step: 1588/526, loss: 0.004561637528240681 2023-01-22 19:09:52.457541: step: 1592/526, loss: 0.00035780001780949533 2023-01-22 19:09:53.517110: step: 1596/526, loss: 0.00047474136226810515 2023-01-22 19:09:54.575738: step: 1600/526, loss: 2.9299728339537978e-05 2023-01-22 19:09:55.644455: step: 1604/526, loss: 0.002011873759329319 2023-01-22 19:09:56.697678: step: 1608/526, loss: 5.00100250064861e-05 2023-01-22 19:09:57.739495: step: 1612/526, loss: 6.500922609120607e-06 2023-01-22 19:09:58.803479: step: 1616/526, loss: 0.006943310145288706 2023-01-22 19:09:59.875706: step: 1620/526, loss: 0.0010031199781224132 2023-01-22 19:10:00.947971: step: 1624/526, loss: 0.0005478385137394071 2023-01-22 19:10:02.020186: step: 1628/526, loss: 0.0005698002642020583 2023-01-22 19:10:03.100500: step: 1632/526, loss: 0.0018338001100346446 2023-01-22 19:10:04.180127: step: 1636/526, loss: 0.0005114403320476413 2023-01-22 19:10:05.243273: step: 1640/526, loss: 7.066483522066846e-05 2023-01-22 19:10:06.305618: step: 1644/526, loss: 2.204283737228252e-05 2023-01-22 19:10:07.378903: step: 1648/526, loss: 0.00647963210940361 2023-01-22 19:10:08.434868: step: 1652/526, loss: 0.003956041298806667 2023-01-22 19:10:09.511863: step: 1656/526, loss: 0.0016059986082836986 2023-01-22 19:10:10.594284: step: 1660/526, loss: 0.001439710846170783 2023-01-22 19:10:11.658421: step: 1664/526, loss: 0.0032056074123829603 2023-01-22 19:10:12.729343: step: 1668/526, loss: 0.0007851261761970818 2023-01-22 19:10:13.817313: step: 1672/526, loss: 0.003304389538243413 2023-01-22 19:10:14.886175: step: 1676/526, loss: 0.0013723867014050484 2023-01-22 19:10:15.959574: step: 1680/526, loss: 0.002044790191575885 2023-01-22 19:10:17.027777: step: 1684/526, loss: 0.0009177444153465331 2023-01-22 19:10:18.110434: step: 1688/526, loss: 0.0008950474439188838 2023-01-22 19:10:19.182409: step: 1692/526, loss: 0.002920059720054269 2023-01-22 19:10:20.265024: step: 1696/526, loss: 1.0374760677223094e-06 2023-01-22 19:10:21.318491: step: 1700/526, loss: 0.0005049766623415053 2023-01-22 19:10:22.378691: step: 1704/526, loss: 0.002634551841765642 2023-01-22 19:10:23.436882: step: 1708/526, loss: 0.0002857272047549486 2023-01-22 19:10:24.494737: step: 1712/526, loss: 2.25374515139265e-05 2023-01-22 19:10:25.562583: step: 1716/526, loss: 0.0007042968063615263 2023-01-22 19:10:26.641443: step: 1720/526, loss: 4.939256177749485e-05 2023-01-22 19:10:27.706199: step: 1724/526, loss: 0.001288863830268383 2023-01-22 19:10:28.773622: step: 1728/526, loss: 0.002705493476241827 2023-01-22 19:10:29.838200: step: 1732/526, loss: 0.0029062661342322826 2023-01-22 19:10:30.906924: step: 1736/526, loss: 2.4182270863093436e-05 2023-01-22 19:10:31.965672: step: 1740/526, loss: 0.005565832369029522 2023-01-22 19:10:33.051602: step: 1744/526, loss: 4.294197788112797e-05 2023-01-22 19:10:34.126056: step: 1748/526, loss: 0.0010161824757233262 2023-01-22 19:10:35.202356: step: 1752/526, loss: 6.783211574656889e-06 2023-01-22 19:10:36.266347: step: 1756/526, loss: 0.043723803013563156 2023-01-22 19:10:37.343664: step: 1760/526, loss: 0.001600755611434579 2023-01-22 19:10:38.390513: step: 1764/526, loss: 5.468020845000865e-06 2023-01-22 19:10:39.449358: step: 1768/526, loss: 0.0036688607651740313 2023-01-22 19:10:40.517526: step: 1772/526, loss: 0.005449839401990175 2023-01-22 19:10:41.591977: step: 1776/526, loss: 0.004576548933982849 2023-01-22 19:10:42.671994: step: 1780/526, loss: 0.005206066649407148 2023-01-22 19:10:43.727688: step: 1784/526, loss: 3.613529386825576e-08 2023-01-22 19:10:44.796927: step: 1788/526, loss: 0.0015814868966117501 2023-01-22 19:10:45.877266: step: 1792/526, loss: 0.0016092988662421703 2023-01-22 19:10:46.946909: step: 1796/526, loss: 0.00021931059018243104 2023-01-22 19:10:48.018671: step: 1800/526, loss: 0.005094549153000116 2023-01-22 19:10:49.081911: step: 1804/526, loss: 0.0007954631582833827 2023-01-22 19:10:50.145698: step: 1808/526, loss: 0.0015829253243282437 2023-01-22 19:10:51.211191: step: 1812/526, loss: 0.003134468337520957 2023-01-22 19:10:52.280899: step: 1816/526, loss: 0.0036410358734428883 2023-01-22 19:10:53.359151: step: 1820/526, loss: 0.004590882919728756 2023-01-22 19:10:54.428293: step: 1824/526, loss: 0.0006469223299063742 2023-01-22 19:10:55.495752: step: 1828/526, loss: 0.0007000057958066463 2023-01-22 19:10:56.553565: step: 1832/526, loss: 0.0028438419103622437 2023-01-22 19:10:57.607462: step: 1836/526, loss: 0.000142493678140454 2023-01-22 19:10:58.661762: step: 1840/526, loss: 0.00486264331266284 2023-01-22 19:10:59.729976: step: 1844/526, loss: 0.0035808393731713295 2023-01-22 19:11:00.781834: step: 1848/526, loss: 0.0003518101875670254 2023-01-22 19:11:01.846086: step: 1852/526, loss: 0.002024126471951604 2023-01-22 19:11:02.913375: step: 1856/526, loss: 0.012850222177803516 2023-01-22 19:11:03.961293: step: 1860/526, loss: 0.00039805955020710826 2023-01-22 19:11:05.009646: step: 1864/526, loss: 0.0037232115864753723 2023-01-22 19:11:06.070646: step: 1868/526, loss: 0.005582768004387617 2023-01-22 19:11:07.128693: step: 1872/526, loss: 0.0005623808247037232 2023-01-22 19:11:08.210662: step: 1876/526, loss: 0.0016592019237577915 2023-01-22 19:11:09.277592: step: 1880/526, loss: 0.004121776670217514 2023-01-22 19:11:10.351705: step: 1884/526, loss: 0.007590590510517359 2023-01-22 19:11:11.433113: step: 1888/526, loss: 0.0072757345624268055 2023-01-22 19:11:12.495331: step: 1892/526, loss: 0.005597613751888275 2023-01-22 19:11:13.576616: step: 1896/526, loss: 0.0008631025557406247 2023-01-22 19:11:14.650192: step: 1900/526, loss: 0.0017087755259126425 2023-01-22 19:11:15.715908: step: 1904/526, loss: 0.004748784936964512 2023-01-22 19:11:16.788877: step: 1908/526, loss: 0.004131928086280823 2023-01-22 19:11:17.849133: step: 1912/526, loss: 0.0014519084943458438 2023-01-22 19:11:18.912188: step: 1916/526, loss: 0.00809105858206749 2023-01-22 19:11:19.979799: step: 1920/526, loss: 0.0004664697335101664 2023-01-22 19:11:21.044874: step: 1924/526, loss: 0.0006500197923742235 2023-01-22 19:11:22.109821: step: 1928/526, loss: 0.0017618017736822367 2023-01-22 19:11:23.164960: step: 1932/526, loss: 6.68510765535757e-05 2023-01-22 19:11:24.227110: step: 1936/526, loss: 0.002272934652864933 2023-01-22 19:11:25.311538: step: 1940/526, loss: 0.0025545170065015554 2023-01-22 19:11:26.381341: step: 1944/526, loss: 5.807965135318227e-05 2023-01-22 19:11:27.455982: step: 1948/526, loss: 0.0007770382217131555 2023-01-22 19:11:28.518473: step: 1952/526, loss: 2.8514297810033895e-05 2023-01-22 19:11:29.614825: step: 1956/526, loss: 0.0037726627197116613 2023-01-22 19:11:30.670909: step: 1960/526, loss: 3.890847821708121e-08 2023-01-22 19:11:31.748946: step: 1964/526, loss: 0.0003265069972258061 2023-01-22 19:11:32.834223: step: 1968/526, loss: 0.0034496900625526905 2023-01-22 19:11:33.888321: step: 1972/526, loss: 0.005602105986326933 2023-01-22 19:11:34.950059: step: 1976/526, loss: 0.004039344377815723 2023-01-22 19:11:36.019805: step: 1980/526, loss: 0.0053483834490180016 2023-01-22 19:11:37.080511: step: 1984/526, loss: 0.032101910561323166 2023-01-22 19:11:38.152326: step: 1988/526, loss: 0.00018123260815627873 2023-01-22 19:11:39.237772: step: 1992/526, loss: 2.514551340482285e-07 2023-01-22 19:11:40.310114: step: 1996/526, loss: 0.0031049828976392746 2023-01-22 19:11:41.366259: step: 2000/526, loss: 0.00010156889766221866 2023-01-22 19:11:42.426716: step: 2004/526, loss: 0.0019236773950979114 2023-01-22 19:11:43.492207: step: 2008/526, loss: 0.004513517487794161 2023-01-22 19:11:44.580968: step: 2012/526, loss: 0.000804692623205483 2023-01-22 19:11:45.646507: step: 2016/526, loss: 0.013205349445343018 2023-01-22 19:11:46.716416: step: 2020/526, loss: 0.0024769336450845003 2023-01-22 19:11:47.797580: step: 2024/526, loss: 0.013315088115632534 2023-01-22 19:11:48.869485: step: 2028/526, loss: 0.007460259832441807 2023-01-22 19:11:49.937713: step: 2032/526, loss: 0.002822374226525426 2023-01-22 19:11:51.017152: step: 2036/526, loss: 0.0010779191507026553 2023-01-22 19:11:52.085173: step: 2040/526, loss: 0.001435117213986814 2023-01-22 19:11:53.139890: step: 2044/526, loss: 0.0010898748878389597 2023-01-22 19:11:54.190496: step: 2048/526, loss: 0.0006823171279393137 2023-01-22 19:11:55.276555: step: 2052/526, loss: 0.005039572715759277 2023-01-22 19:11:56.363664: step: 2056/526, loss: 0.008580667898058891 2023-01-22 19:11:57.418572: step: 2060/526, loss: 0.0006023873575031757 2023-01-22 19:11:58.481397: step: 2064/526, loss: 5.9271555073792115e-05 2023-01-22 19:11:59.555313: step: 2068/526, loss: 0.0025310919154435396 2023-01-22 19:12:00.623519: step: 2072/526, loss: 0.003380388719961047 2023-01-22 19:12:01.701369: step: 2076/526, loss: 0.0006637676269747317 2023-01-22 19:12:02.791375: step: 2080/526, loss: 0.004664552863687277 2023-01-22 19:12:03.877984: step: 2084/526, loss: 0.00042629099334590137 2023-01-22 19:12:04.921811: step: 2088/526, loss: 5.476576916407794e-06 2023-01-22 19:12:05.988053: step: 2092/526, loss: 0.0011100373230874538 2023-01-22 19:12:07.040615: step: 2096/526, loss: 0.0008667405345477164 2023-01-22 19:12:08.117601: step: 2100/526, loss: 0.001465075183659792 2023-01-22 19:12:09.178858: step: 2104/526, loss: 0.0012715288903564215 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3501817673378076, 'r': 0.29702324478178366, 'f1': 0.32141940451745377}, 'combined': 0.23683535069707118, 'stategy': 1, 'epoch': 18} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3405136827458256, 'r': 0.24010580193615907, 'f1': 0.2816278579100813}, 'combined': 0.15361519522368072, 'stategy': 1, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3143010992275698, 'r': 0.3345785895003162, 'f1': 0.32412300857843135}, 'combined': 0.23882748000515994, 'stategy': 1, 'epoch': 18} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34248128672426803, 'r': 0.26884624337550045, 'f1': 0.3012290558784439}, 'combined': 0.16430675775187847, 'stategy': 1, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3272707583746313, 'r': 0.33223881542775663, 'f1': 0.3297360748218978}, 'combined': 0.24296342355297731, 'stategy': 1, 'epoch': 18} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33502409981910874, 'r': 0.2726086510856857, 'f1': 0.3006107481760582}, 'combined': 0.16396949900512264, 'stategy': 1, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38461538461538464, 'r': 0.43478260869565216, 'f1': 0.40816326530612246}, 'combined': 0.20408163265306123, 'stategy': 1, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.3017241379310345, 'f1': 0.3723404255319149}, 'combined': 0.2482269503546099, 'stategy': 1, 'epoch': 18} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35344101123595506, 'r': 0.29844639468690703, 'f1': 0.3236239711934157}, 'combined': 0.23845976824777995, 'stategy': 1, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3382607142857143, 'r': 0.2323219191522763, 'f1': 0.275456607724523}, 'combined': 0.15024905875883074, 'stategy': 1, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3143010992275698, 'r': 0.3345785895003162, 'f1': 0.32412300857843135}, 'combined': 0.23882748000515994, 'stategy': 1, 'epoch': 18} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34248128672426803, 'r': 0.26884624337550045, 'f1': 0.3012290558784439}, 'combined': 0.16430675775187847, 'stategy': 1, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38461538461538464, 'r': 0.43478260869565216, 'f1': 0.40816326530612246}, 'combined': 0.20408163265306123, 'stategy': 1, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 19 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 19:15:00.805414: step: 4/526, loss: 0.000569669995456934 2023-01-22 19:15:01.871971: step: 8/526, loss: 3.2244915928458795e-05 2023-01-22 19:15:02.937824: step: 12/526, loss: 0.0024412390775978565 2023-01-22 19:15:03.981683: step: 16/526, loss: 0.006111631169915199 2023-01-22 19:15:05.043514: step: 20/526, loss: 0.00016594465705566108 2023-01-22 19:15:06.096608: step: 24/526, loss: 0.0001973848120542243 2023-01-22 19:15:07.154504: step: 28/526, loss: 0.00048437825171276927 2023-01-22 19:15:08.208724: step: 32/526, loss: 0.00014461729733739048 2023-01-22 19:15:09.273830: step: 36/526, loss: 5.002100806450471e-05 2023-01-22 19:15:10.345571: step: 40/526, loss: 0.000659647339489311 2023-01-22 19:15:11.381765: step: 44/526, loss: 0.0010140965459868312 2023-01-22 19:15:12.446659: step: 48/526, loss: 0.0045945728197693825 2023-01-22 19:15:13.505959: step: 52/526, loss: 0.01648615673184395 2023-01-22 19:15:14.558696: step: 56/526, loss: 0.014753147959709167 2023-01-22 19:15:15.608432: step: 60/526, loss: 0.0036627831868827343 2023-01-22 19:15:16.662535: step: 64/526, loss: 0.0016998531064018607 2023-01-22 19:15:17.736104: step: 68/526, loss: 0.0015511858509853482 2023-01-22 19:15:18.788876: step: 72/526, loss: 0.0016527875559404492 2023-01-22 19:15:19.844957: step: 76/526, loss: 0.008284826762974262 2023-01-22 19:15:20.912217: step: 80/526, loss: 0.0019192583858966827 2023-01-22 19:15:21.972898: step: 84/526, loss: 0.00046214356552809477 2023-01-22 19:15:23.030288: step: 88/526, loss: 0.0023678361903876066 2023-01-22 19:15:24.092811: step: 92/526, loss: 0.0011270419927313924 2023-01-22 19:15:25.168253: step: 96/526, loss: 0.0011837695492431521 2023-01-22 19:15:26.243386: step: 100/526, loss: 0.005285175982862711 2023-01-22 19:15:27.295921: step: 104/526, loss: 0.00016043600044213235 2023-01-22 19:15:28.368634: step: 108/526, loss: 0.005015851464122534 2023-01-22 19:15:29.434985: step: 112/526, loss: 0.00028798842686228454 2023-01-22 19:15:30.515440: step: 116/526, loss: 0.0035528892185539007 2023-01-22 19:15:31.568477: step: 120/526, loss: 0.003343100193887949 2023-01-22 19:15:32.623982: step: 124/526, loss: 0.0009230113355442882 2023-01-22 19:15:33.675039: step: 128/526, loss: 0.0028537840116769075 2023-01-22 19:15:34.735654: step: 132/526, loss: 0.0014088028110563755 2023-01-22 19:15:35.789228: step: 136/526, loss: 0.00044063152745366096 2023-01-22 19:15:36.859735: step: 140/526, loss: 0.0011419329093769193 2023-01-22 19:15:37.909110: step: 144/526, loss: 0.00012337852967903018 2023-01-22 19:15:38.985371: step: 148/526, loss: 7.015644223429263e-05 2023-01-22 19:15:40.043220: step: 152/526, loss: 0.0028316678944975138 2023-01-22 19:15:41.112895: step: 156/526, loss: 0.0024008823093026876 2023-01-22 19:15:42.173556: step: 160/526, loss: 0.013563969172537327 2023-01-22 19:15:43.249781: step: 164/526, loss: 0.0017817881889641285 2023-01-22 19:15:44.327471: step: 168/526, loss: 0.006673426833003759 2023-01-22 19:15:45.382346: step: 172/526, loss: 4.339169754530303e-05 2023-01-22 19:15:46.448206: step: 176/526, loss: 0.001986489864066243 2023-01-22 19:15:47.523229: step: 180/526, loss: 0.016091465950012207 2023-01-22 19:15:48.595289: step: 184/526, loss: 0.000978349125944078 2023-01-22 19:15:49.667692: step: 188/526, loss: 0.012071536853909492 2023-01-22 19:15:50.747957: step: 192/526, loss: 0.0010617480147629976 2023-01-22 19:15:51.819026: step: 196/526, loss: 2.8688744350802153e-05 2023-01-22 19:15:52.891111: step: 200/526, loss: 0.00386435491964221 2023-01-22 19:15:53.975131: step: 204/526, loss: 0.005183544475585222 2023-01-22 19:15:55.033072: step: 208/526, loss: 0.0015531701501458883 2023-01-22 19:15:56.093909: step: 212/526, loss: 0.04391736909747124 2023-01-22 19:15:57.146051: step: 216/526, loss: 0.001453329692594707 2023-01-22 19:15:58.212881: step: 220/526, loss: 0.0009062530589289963 2023-01-22 19:15:59.284722: step: 224/526, loss: 1.8443685689817357e-07 2023-01-22 19:16:00.352385: step: 228/526, loss: 0.003171028569340706 2023-01-22 19:16:01.421017: step: 232/526, loss: 0.012162331491708755 2023-01-22 19:16:02.495382: step: 236/526, loss: 0.0022634435445070267 2023-01-22 19:16:03.555793: step: 240/526, loss: 0.00045546016190201044 2023-01-22 19:16:04.641268: step: 244/526, loss: 0.0020304692443460226 2023-01-22 19:16:05.729112: step: 248/526, loss: 0.008071152493357658 2023-01-22 19:16:06.797767: step: 252/526, loss: 0.0001804004714358598 2023-01-22 19:16:07.894722: step: 256/526, loss: 0.006697654724121094 2023-01-22 19:16:08.961457: step: 260/526, loss: 0.016047224402427673 2023-01-22 19:16:10.029443: step: 264/526, loss: 0.005625705700367689 2023-01-22 19:16:11.088659: step: 268/526, loss: 0.0002054600336123258 2023-01-22 19:16:12.161946: step: 272/526, loss: 3.677611312014051e-05 2023-01-22 19:16:13.247290: step: 276/526, loss: 0.003997755236923695 2023-01-22 19:16:14.322918: step: 280/526, loss: 0.00029645985341630876 2023-01-22 19:16:15.383088: step: 284/526, loss: 0.0006751755136065185 2023-01-22 19:16:16.453809: step: 288/526, loss: 0.001563030993565917 2023-01-22 19:16:17.510824: step: 292/526, loss: 0.00112351484131068 2023-01-22 19:16:18.599179: step: 296/526, loss: 0.0025632455945014954 2023-01-22 19:16:19.653088: step: 300/526, loss: 0.005517404060810804 2023-01-22 19:16:20.711502: step: 304/526, loss: 8.595208055339754e-05 2023-01-22 19:16:21.786563: step: 308/526, loss: 0.0011672399705275893 2023-01-22 19:16:22.861678: step: 312/526, loss: 0.0028854880947619677 2023-01-22 19:16:23.928186: step: 316/526, loss: 0.0003983911301475018 2023-01-22 19:16:25.006579: step: 320/526, loss: 1.4268714039644692e-05 2023-01-22 19:16:26.069328: step: 324/526, loss: 0.002732402179390192 2023-01-22 19:16:27.133896: step: 328/526, loss: 0.003224008483812213 2023-01-22 19:16:28.210760: step: 332/526, loss: 9.395511006005108e-05 2023-01-22 19:16:29.284197: step: 336/526, loss: 0.0034900426398962736 2023-01-22 19:16:30.350216: step: 340/526, loss: 0.005861368961632252 2023-01-22 19:16:31.445661: step: 344/526, loss: 0.002362034749239683 2023-01-22 19:16:32.515804: step: 348/526, loss: 0.0017583552980795503 2023-01-22 19:16:33.582322: step: 352/526, loss: 0.008526742458343506 2023-01-22 19:16:34.652100: step: 356/526, loss: 0.000456963200122118 2023-01-22 19:16:35.723603: step: 360/526, loss: 0.004144969396293163 2023-01-22 19:16:36.792786: step: 364/526, loss: 0.0024426057934761047 2023-01-22 19:16:37.863321: step: 368/526, loss: 0.0011936324881389737 2023-01-22 19:16:38.937284: step: 372/526, loss: 2.695504690564121e-06 2023-01-22 19:16:40.011940: step: 376/526, loss: 0.0002537055697757751 2023-01-22 19:16:41.093430: step: 380/526, loss: 0.0034151566214859486 2023-01-22 19:16:42.156570: step: 384/526, loss: 0.0006154229631647468 2023-01-22 19:16:43.231439: step: 388/526, loss: 0.00034621491795405746 2023-01-22 19:16:44.320335: step: 392/526, loss: 0.0012314682826399803 2023-01-22 19:16:45.396786: step: 396/526, loss: 0.00019274740770924836 2023-01-22 19:16:46.459086: step: 400/526, loss: 0.002828571479767561 2023-01-22 19:16:47.523440: step: 404/526, loss: 8.624631959719409e-07 2023-01-22 19:16:48.588963: step: 408/526, loss: 0.0013867750531062484 2023-01-22 19:16:49.654664: step: 412/526, loss: 1.3585689885076135e-05 2023-01-22 19:16:50.722256: step: 416/526, loss: 0.0030869045294821262 2023-01-22 19:16:51.794761: step: 420/526, loss: 5.697604592569405e-06 2023-01-22 19:16:52.868326: step: 424/526, loss: 0.0044848923571407795 2023-01-22 19:16:53.938294: step: 428/526, loss: 0.00022074829030316323 2023-01-22 19:16:55.020354: step: 432/526, loss: 0.000835778599139303 2023-01-22 19:16:56.098538: step: 436/526, loss: 2.2209900635061786e-05 2023-01-22 19:16:57.171514: step: 440/526, loss: 0.0015263669192790985 2023-01-22 19:16:58.233321: step: 444/526, loss: 0.0013343016617000103 2023-01-22 19:16:59.298596: step: 448/526, loss: 0.0004796128487214446 2023-01-22 19:17:00.372155: step: 452/526, loss: 0.0045418450608849525 2023-01-22 19:17:01.452565: step: 456/526, loss: 0.0002619755978230387 2023-01-22 19:17:02.529500: step: 460/526, loss: 0.003085214179009199 2023-01-22 19:17:03.592948: step: 464/526, loss: 6.343096174532548e-05 2023-01-22 19:17:04.665601: step: 468/526, loss: 0.0013672653585672379 2023-01-22 19:17:05.729598: step: 472/526, loss: 0.003144089598208666 2023-01-22 19:17:06.813511: step: 476/526, loss: 0.0060950350016355515 2023-01-22 19:17:07.892928: step: 480/526, loss: 0.00015191845886874944 2023-01-22 19:17:08.966077: step: 484/526, loss: 0.009082391858100891 2023-01-22 19:17:10.062200: step: 488/526, loss: 0.005123598035424948 2023-01-22 19:17:11.119287: step: 492/526, loss: 9.851953564066207e-07 2023-01-22 19:17:12.197330: step: 496/526, loss: 0.00010150056186830625 2023-01-22 19:17:13.256324: step: 500/526, loss: 0.0009043654426932335 2023-01-22 19:17:14.326375: step: 504/526, loss: 0.003390824655070901 2023-01-22 19:17:15.399253: step: 508/526, loss: 0.0026581601705402136 2023-01-22 19:17:16.470622: step: 512/526, loss: 0.0013180566020309925 2023-01-22 19:17:17.536042: step: 516/526, loss: 0.0028439939487725496 2023-01-22 19:17:18.616142: step: 520/526, loss: 0.0023930263705551624 2023-01-22 19:17:19.676380: step: 524/526, loss: 0.00011110230116173625 2023-01-22 19:17:20.737633: step: 528/526, loss: 0.0007991700549609959 2023-01-22 19:17:21.813706: step: 532/526, loss: 0.004436293616890907 2023-01-22 19:17:22.878496: step: 536/526, loss: 0.0008413216564804316 2023-01-22 19:17:23.954457: step: 540/526, loss: 0.0013426182558760047 2023-01-22 19:17:25.025873: step: 544/526, loss: 0.011862105689942837 2023-01-22 19:17:26.093766: step: 548/526, loss: 0.000603470194619149 2023-01-22 19:17:27.167376: step: 552/526, loss: 0.0012973761186003685 2023-01-22 19:17:28.229027: step: 556/526, loss: 0.005763054825365543 2023-01-22 19:17:29.297984: step: 560/526, loss: 0.0020640764851123095 2023-01-22 19:17:30.370025: step: 564/526, loss: 0.0017110321205109358 2023-01-22 19:17:31.435295: step: 568/526, loss: 0.0024442095309495926 2023-01-22 19:17:32.535055: step: 572/526, loss: 0.04741708189249039 2023-01-22 19:17:33.628137: step: 576/526, loss: 0.0006900339503772557 2023-01-22 19:17:34.711502: step: 580/526, loss: 0.00041801895713433623 2023-01-22 19:17:35.785456: step: 584/526, loss: 0.00019650156900752336 2023-01-22 19:17:36.865704: step: 588/526, loss: 0.00042187742656096816 2023-01-22 19:17:37.928094: step: 592/526, loss: 0.00012364753638394177 2023-01-22 19:17:39.005029: step: 596/526, loss: 0.00045142672024667263 2023-01-22 19:17:40.084080: step: 600/526, loss: 0.0035812421701848507 2023-01-22 19:17:41.152558: step: 604/526, loss: 0.0034070913679897785 2023-01-22 19:17:42.209366: step: 608/526, loss: 0.002796769142150879 2023-01-22 19:17:43.312773: step: 612/526, loss: 0.010457450523972511 2023-01-22 19:17:44.374257: step: 616/526, loss: 0.0021682356018573046 2023-01-22 19:17:45.447774: step: 620/526, loss: 0.0013233019271865487 2023-01-22 19:17:46.516318: step: 624/526, loss: 0.0010071101132780313 2023-01-22 19:17:47.584231: step: 628/526, loss: 0.001086207339540124 2023-01-22 19:17:48.643752: step: 632/526, loss: 0.0038121482357382774 2023-01-22 19:17:49.715755: step: 636/526, loss: 0.0034389120992273092 2023-01-22 19:17:50.789333: step: 640/526, loss: 0.0036294718738645315 2023-01-22 19:17:51.841476: step: 644/526, loss: 0.0021401431877166033 2023-01-22 19:17:52.901780: step: 648/526, loss: 0.0011671707034111023 2023-01-22 19:17:53.980211: step: 652/526, loss: 0.0002695779548957944 2023-01-22 19:17:55.042594: step: 656/526, loss: 0.0038533394690603018 2023-01-22 19:17:56.113513: step: 660/526, loss: 0.00017523662245366722 2023-01-22 19:17:57.185232: step: 664/526, loss: 4.067300324095413e-05 2023-01-22 19:17:58.247718: step: 668/526, loss: 0.002836147788912058 2023-01-22 19:17:59.322033: step: 672/526, loss: 0.0037578134797513485 2023-01-22 19:18:00.382570: step: 676/526, loss: 0.00014514665235765278 2023-01-22 19:18:01.454493: step: 680/526, loss: 0.0015787945594638586 2023-01-22 19:18:02.527161: step: 684/526, loss: 0.010320290923118591 2023-01-22 19:18:03.597561: step: 688/526, loss: 0.007087341509759426 2023-01-22 19:18:04.672550: step: 692/526, loss: 1.1761619134631474e-05 2023-01-22 19:18:05.720648: step: 696/526, loss: 0.0007945873658172786 2023-01-22 19:18:06.781605: step: 700/526, loss: 0.0002718539035413414 2023-01-22 19:18:07.843865: step: 704/526, loss: 0.0 2023-01-22 19:18:08.909116: step: 708/526, loss: 0.00011284255015198141 2023-01-22 19:18:09.970955: step: 712/526, loss: 3.4059776226058602e-06 2023-01-22 19:18:11.050428: step: 716/526, loss: 0.003200100501999259 2023-01-22 19:18:12.122031: step: 720/526, loss: 0.006048861891031265 2023-01-22 19:18:13.216057: step: 724/526, loss: 0.0012870689388364553 2023-01-22 19:18:14.289408: step: 728/526, loss: 2.732599023147486e-05 2023-01-22 19:18:15.375744: step: 732/526, loss: 0.0008709495887160301 2023-01-22 19:18:16.442960: step: 736/526, loss: 0.003461112268269062 2023-01-22 19:18:17.506747: step: 740/526, loss: 0.00013872893759980798 2023-01-22 19:18:18.573376: step: 744/526, loss: 0.007967110723257065 2023-01-22 19:18:19.650571: step: 748/526, loss: 0.0004007107054349035 2023-01-22 19:18:20.719232: step: 752/526, loss: 3.608710176195018e-05 2023-01-22 19:18:21.773500: step: 756/526, loss: 6.621971988352016e-05 2023-01-22 19:18:22.842556: step: 760/526, loss: 0.007780269719660282 2023-01-22 19:18:23.920137: step: 764/526, loss: 0.001570235239341855 2023-01-22 19:18:24.979167: step: 768/526, loss: 0.0058837831020355225 2023-01-22 19:18:26.040681: step: 772/526, loss: 3.5347639482097293e-07 2023-01-22 19:18:27.124643: step: 776/526, loss: 0.0017492821207270026 2023-01-22 19:18:28.183540: step: 780/526, loss: 0.00011164666648255661 2023-01-22 19:18:29.241147: step: 784/526, loss: 0.01498086005449295 2023-01-22 19:18:30.308942: step: 788/526, loss: 0.00361587293446064 2023-01-22 19:18:31.381488: step: 792/526, loss: 3.799923797487281e-05 2023-01-22 19:18:32.440309: step: 796/526, loss: 0.0005075965891592205 2023-01-22 19:18:33.512476: step: 800/526, loss: 0.01113780029118061 2023-01-22 19:18:34.582316: step: 804/526, loss: 0.0006841511349193752 2023-01-22 19:18:35.648409: step: 808/526, loss: 0.0003625400713644922 2023-01-22 19:18:36.740037: step: 812/526, loss: 0.00024903123266994953 2023-01-22 19:18:37.797844: step: 816/526, loss: 0.0005136379622854292 2023-01-22 19:18:38.880453: step: 820/526, loss: 0.0004205251461826265 2023-01-22 19:18:39.960178: step: 824/526, loss: 0.0035737608559429646 2023-01-22 19:18:41.041416: step: 828/526, loss: 0.00017109981854446232 2023-01-22 19:18:42.106636: step: 832/526, loss: 0.001606510835699737 2023-01-22 19:18:43.180247: step: 836/526, loss: 0.00104121258482337 2023-01-22 19:18:44.246824: step: 840/526, loss: 0.016616296023130417 2023-01-22 19:18:45.336132: step: 844/526, loss: 0.004030216485261917 2023-01-22 19:18:46.401844: step: 848/526, loss: 0.00169152463786304 2023-01-22 19:18:47.459859: step: 852/526, loss: 3.169671253999695e-05 2023-01-22 19:18:48.533253: step: 856/526, loss: 0.027636200189590454 2023-01-22 19:18:49.598010: step: 860/526, loss: 0.0014092089841142297 2023-01-22 19:18:50.658530: step: 864/526, loss: 0.001951780985109508 2023-01-22 19:18:51.725247: step: 868/526, loss: 8.349808922503144e-05 2023-01-22 19:18:52.802753: step: 872/526, loss: 0.0006212798180058599 2023-01-22 19:18:53.866225: step: 876/526, loss: 0.0037468839436769485 2023-01-22 19:18:54.958282: step: 880/526, loss: 0.006238115485757589 2023-01-22 19:18:56.018235: step: 884/526, loss: 0.0010426564840599895 2023-01-22 19:18:57.084115: step: 888/526, loss: 0.006967831403017044 2023-01-22 19:18:58.141285: step: 892/526, loss: 0.0062560816295444965 2023-01-22 19:18:59.207188: step: 896/526, loss: 0.0051534236408770084 2023-01-22 19:19:00.286213: step: 900/526, loss: 0.009966417215764523 2023-01-22 19:19:01.343272: step: 904/526, loss: 0.02571197599172592 2023-01-22 19:19:02.420628: step: 908/526, loss: 0.0004537216736935079 2023-01-22 19:19:03.493005: step: 912/526, loss: 0.001871958957053721 2023-01-22 19:19:04.563965: step: 916/526, loss: 0.003109056269749999 2023-01-22 19:19:05.633698: step: 920/526, loss: 0.0003085378557443619 2023-01-22 19:19:06.701899: step: 924/526, loss: 0.014510254375636578 2023-01-22 19:19:07.759393: step: 928/526, loss: 0.00020686320203822106 2023-01-22 19:19:08.831805: step: 932/526, loss: 1.6763797461294416e-08 2023-01-22 19:19:09.919230: step: 936/526, loss: 0.00014371155702974647 2023-01-22 19:19:11.005686: step: 940/526, loss: 0.0007291205110959709 2023-01-22 19:19:12.064726: step: 944/526, loss: 0.006861106026917696 2023-01-22 19:19:13.145570: step: 948/526, loss: 0.0008004697156138718 2023-01-22 19:19:14.209260: step: 952/526, loss: 0.0031208854634314775 2023-01-22 19:19:15.285502: step: 956/526, loss: 0.0013908748514950275 2023-01-22 19:19:16.353422: step: 960/526, loss: 0.00047287711640819907 2023-01-22 19:19:17.423713: step: 964/526, loss: 0.0074398452416062355 2023-01-22 19:19:18.492108: step: 968/526, loss: 0.0003213495365343988 2023-01-22 19:19:19.559370: step: 972/526, loss: 0.003496794728562236 2023-01-22 19:19:20.621737: step: 976/526, loss: 0.0002683876664377749 2023-01-22 19:19:21.688773: step: 980/526, loss: 0.002398095326498151 2023-01-22 19:19:22.772661: step: 984/526, loss: 0.0013283496955409646 2023-01-22 19:19:23.845323: step: 988/526, loss: 0.004707751329988241 2023-01-22 19:19:24.908299: step: 992/526, loss: 0.0007992981700226665 2023-01-22 19:19:25.993383: step: 996/526, loss: 0.004032555967569351 2023-01-22 19:19:27.068742: step: 1000/526, loss: 0.002073394600301981 2023-01-22 19:19:28.143370: step: 1004/526, loss: 0.004528529476374388 2023-01-22 19:19:29.211046: step: 1008/526, loss: 0.001896018162369728 2023-01-22 19:19:30.270837: step: 1012/526, loss: 0.001797710545361042 2023-01-22 19:19:31.336981: step: 1016/526, loss: 0.012676495127379894 2023-01-22 19:19:32.396863: step: 1020/526, loss: 5.2017931011505425e-05 2023-01-22 19:19:33.477601: step: 1024/526, loss: 6.385130109265447e-05 2023-01-22 19:19:34.542832: step: 1028/526, loss: 0.0012638600310310721 2023-01-22 19:19:35.602852: step: 1032/526, loss: 0.0005611968226730824 2023-01-22 19:19:36.669763: step: 1036/526, loss: 0.005554665811359882 2023-01-22 19:19:37.745652: step: 1040/526, loss: 0.00988900475203991 2023-01-22 19:19:38.814828: step: 1044/526, loss: 0.003970308229327202 2023-01-22 19:19:39.869668: step: 1048/526, loss: 0.0005954879452474415 2023-01-22 19:19:40.938520: step: 1052/526, loss: 0.00013599077647086233 2023-01-22 19:19:41.999349: step: 1056/526, loss: 0.0007825624197721481 2023-01-22 19:19:43.074466: step: 1060/526, loss: 0.0034719433169811964 2023-01-22 19:19:44.146418: step: 1064/526, loss: 0.00015020959835965186 2023-01-22 19:19:45.213950: step: 1068/526, loss: 0.0036959724966436625 2023-01-22 19:19:46.280296: step: 1072/526, loss: 0.00019078415061812848 2023-01-22 19:19:47.348339: step: 1076/526, loss: 0.001046059071086347 2023-01-22 19:19:48.422839: step: 1080/526, loss: 0.00045754920574836433 2023-01-22 19:19:49.488698: step: 1084/526, loss: 0.001041437266394496 2023-01-22 19:19:50.546877: step: 1088/526, loss: 0.0008761699427850544 2023-01-22 19:19:51.616384: step: 1092/526, loss: 0.0007837422890588641 2023-01-22 19:19:52.681129: step: 1096/526, loss: 5.401765156420879e-05 2023-01-22 19:19:53.738657: step: 1100/526, loss: 0.00015237083425745368 2023-01-22 19:19:54.805161: step: 1104/526, loss: 0.004630605690181255 2023-01-22 19:19:55.857327: step: 1108/526, loss: 0.0014717730227857828 2023-01-22 19:19:56.899347: step: 1112/526, loss: 8.019447705009952e-05 2023-01-22 19:19:57.966070: step: 1116/526, loss: 0.00020317891903687268 2023-01-22 19:19:59.047239: step: 1120/526, loss: 0.004466088488698006 2023-01-22 19:20:00.134174: step: 1124/526, loss: 0.004833556246012449 2023-01-22 19:20:01.197066: step: 1128/526, loss: 4.4164062273921445e-05 2023-01-22 19:20:02.272012: step: 1132/526, loss: 0.0004955878830514848 2023-01-22 19:20:03.335277: step: 1136/526, loss: 0.0006191849242895842 2023-01-22 19:20:04.403069: step: 1140/526, loss: 0.004287369549274445 2023-01-22 19:20:05.474707: step: 1144/526, loss: 4.887943941866979e-05 2023-01-22 19:20:06.554803: step: 1148/526, loss: 0.00021259553614072502 2023-01-22 19:20:07.610948: step: 1152/526, loss: 0.00047071417793631554 2023-01-22 19:20:08.684336: step: 1156/526, loss: 0.0013863551430404186 2023-01-22 19:20:09.740884: step: 1160/526, loss: 0.0013225360307842493 2023-01-22 19:20:10.811675: step: 1164/526, loss: 0.0018136282451450825 2023-01-22 19:20:11.871879: step: 1168/526, loss: 0.023360004648566246 2023-01-22 19:20:12.939740: step: 1172/526, loss: 0.0027511168736964464 2023-01-22 19:20:13.999214: step: 1176/526, loss: 0.0030257117468863726 2023-01-22 19:20:15.064116: step: 1180/526, loss: 9.190230048261583e-05 2023-01-22 19:20:16.128011: step: 1184/526, loss: 2.0414516654909676e-07 2023-01-22 19:20:17.197337: step: 1188/526, loss: 0.0011328563559800386 2023-01-22 19:20:18.271557: step: 1192/526, loss: 0.005524192471057177 2023-01-22 19:20:19.334038: step: 1196/526, loss: 0.00010769408982014284 2023-01-22 19:20:20.393660: step: 1200/526, loss: 0.0001532303140265867 2023-01-22 19:20:21.450194: step: 1204/526, loss: 0.0004255402891431004 2023-01-22 19:20:22.518612: step: 1208/526, loss: 7.860360346967354e-05 2023-01-22 19:20:23.566152: step: 1212/526, loss: 3.3908506793522974e-06 2023-01-22 19:20:24.638756: step: 1216/526, loss: 1.0914414815488271e-05 2023-01-22 19:20:25.706405: step: 1220/526, loss: 0.001297007780522108 2023-01-22 19:20:26.790917: step: 1224/526, loss: 0.0076791089959442616 2023-01-22 19:20:27.857784: step: 1228/526, loss: 0.0002545668394304812 2023-01-22 19:20:28.926401: step: 1232/526, loss: 0.010161765851080418 2023-01-22 19:20:29.979506: step: 1236/526, loss: 0.0006302600377239287 2023-01-22 19:20:31.060947: step: 1240/526, loss: 0.0034730457700788975 2023-01-22 19:20:32.127430: step: 1244/526, loss: 0.0014930617762729526 2023-01-22 19:20:33.192670: step: 1248/526, loss: 0.006188447121530771 2023-01-22 19:20:34.259051: step: 1252/526, loss: 0.004832875449210405 2023-01-22 19:20:35.331253: step: 1256/526, loss: 0.0005920781404711306 2023-01-22 19:20:36.416973: step: 1260/526, loss: 0.000316679390380159 2023-01-22 19:20:37.489139: step: 1264/526, loss: 0.00042659611790440977 2023-01-22 19:20:38.569756: step: 1268/526, loss: 0.0013608381850644946 2023-01-22 19:20:39.643323: step: 1272/526, loss: 0.000839449290651828 2023-01-22 19:20:40.712008: step: 1276/526, loss: 0.0024914476089179516 2023-01-22 19:20:41.772883: step: 1280/526, loss: 4.108694156457204e-06 2023-01-22 19:20:42.859061: step: 1284/526, loss: 9.43108261708403e-07 2023-01-22 19:20:43.930149: step: 1288/526, loss: 0.014416714198887348 2023-01-22 19:20:44.990186: step: 1292/526, loss: 0.0003294550988357514 2023-01-22 19:20:46.079657: step: 1296/526, loss: 0.0025182405952364206 2023-01-22 19:20:47.136566: step: 1300/526, loss: 0.0005686861695721745 2023-01-22 19:20:48.208638: step: 1304/526, loss: 3.637062764028087e-05 2023-01-22 19:20:49.290654: step: 1308/526, loss: 0.0015812882920727134 2023-01-22 19:20:50.353350: step: 1312/526, loss: 0.00033929257187992334 2023-01-22 19:20:51.421983: step: 1316/526, loss: 0.004336930811405182 2023-01-22 19:20:52.482601: step: 1320/526, loss: 0.005707069765776396 2023-01-22 19:20:53.557307: step: 1324/526, loss: 0.0019213082268834114 2023-01-22 19:20:54.623600: step: 1328/526, loss: 0.0014584719901904464 2023-01-22 19:20:55.687711: step: 1332/526, loss: 8.348702249350026e-05 2023-01-22 19:20:56.762045: step: 1336/526, loss: 0.0004102880193386227 2023-01-22 19:20:57.829758: step: 1340/526, loss: 0.00021070781804155558 2023-01-22 19:20:58.904081: step: 1344/526, loss: 0.002992538968101144 2023-01-22 19:20:59.966112: step: 1348/526, loss: 0.00023836577020119876 2023-01-22 19:21:01.035139: step: 1352/526, loss: 0.006522088311612606 2023-01-22 19:21:02.101327: step: 1356/526, loss: 0.00012933120888192207 2023-01-22 19:21:03.151813: step: 1360/526, loss: 0.0007604683632962406 2023-01-22 19:21:04.214386: step: 1364/526, loss: 0.0026534099597483873 2023-01-22 19:21:05.275142: step: 1368/526, loss: 0.00029512442415580153 2023-01-22 19:21:06.335813: step: 1372/526, loss: 0.004442297853529453 2023-01-22 19:21:07.407370: step: 1376/526, loss: 0.0023686098866164684 2023-01-22 19:21:08.482339: step: 1380/526, loss: 0.0020333111751824617 2023-01-22 19:21:09.541974: step: 1384/526, loss: 0.0017943915445357561 2023-01-22 19:21:10.605582: step: 1388/526, loss: 0.0005311199347488582 2023-01-22 19:21:11.672676: step: 1392/526, loss: 0.001915119239129126 2023-01-22 19:21:12.747871: step: 1396/526, loss: 0.000977266812697053 2023-01-22 19:21:13.834011: step: 1400/526, loss: 0.011393888853490353 2023-01-22 19:21:14.889965: step: 1404/526, loss: 0.0007187350420281291 2023-01-22 19:21:15.964457: step: 1408/526, loss: 0.007377948146313429 2023-01-22 19:21:17.029229: step: 1412/526, loss: 0.001826529041863978 2023-01-22 19:21:18.090043: step: 1416/526, loss: 9.130862054007594e-06 2023-01-22 19:21:19.152288: step: 1420/526, loss: 7.097920752130449e-05 2023-01-22 19:21:20.221051: step: 1424/526, loss: 0.0007022854988463223 2023-01-22 19:21:21.297613: step: 1428/526, loss: 1.1935015209019184e-05 2023-01-22 19:21:22.359458: step: 1432/526, loss: 0.0018706510309129953 2023-01-22 19:21:23.447160: step: 1436/526, loss: 0.0015774505445733666 2023-01-22 19:21:24.510172: step: 1440/526, loss: 0.005842737387865782 2023-01-22 19:21:25.569993: step: 1444/526, loss: 0.0018355679931119084 2023-01-22 19:21:26.631205: step: 1448/526, loss: 0.0037214672192931175 2023-01-22 19:21:27.694433: step: 1452/526, loss: 0.005495049990713596 2023-01-22 19:21:28.756122: step: 1456/526, loss: 3.282867692178115e-05 2023-01-22 19:21:29.820375: step: 1460/526, loss: 0.00829373113811016 2023-01-22 19:21:30.907371: step: 1464/526, loss: 0.0008721364429220557 2023-01-22 19:21:31.975026: step: 1468/526, loss: 0.002679194789379835 2023-01-22 19:21:33.037175: step: 1472/526, loss: 0.0013993995962664485 2023-01-22 19:21:34.113349: step: 1476/526, loss: 0.0017318251775577664 2023-01-22 19:21:35.183381: step: 1480/526, loss: 0.0020512931514531374 2023-01-22 19:21:36.254744: step: 1484/526, loss: 0.0028425874188542366 2023-01-22 19:21:37.317740: step: 1488/526, loss: 0.001211543451063335 2023-01-22 19:21:38.385649: step: 1492/526, loss: 0.00012597184104379267 2023-01-22 19:21:39.445312: step: 1496/526, loss: 0.00021376338554546237 2023-01-22 19:21:40.501427: step: 1500/526, loss: 5.690865691576619e-06 2023-01-22 19:21:41.585868: step: 1504/526, loss: 0.009615123271942139 2023-01-22 19:21:42.656270: step: 1508/526, loss: 0.001319476985372603 2023-01-22 19:21:43.734509: step: 1512/526, loss: 0.00040984631050378084 2023-01-22 19:21:44.799886: step: 1516/526, loss: 0.001269143307581544 2023-01-22 19:21:45.878413: step: 1520/526, loss: 0.0027608280070126057 2023-01-22 19:21:46.940329: step: 1524/526, loss: 0.00368399266153574 2023-01-22 19:21:48.001552: step: 1528/526, loss: 0.003042002907022834 2023-01-22 19:21:49.093072: step: 1532/526, loss: 0.0037112620193511248 2023-01-22 19:21:50.162953: step: 1536/526, loss: 0.0006520982133224607 2023-01-22 19:21:51.215104: step: 1540/526, loss: 0.0001832563430070877 2023-01-22 19:21:52.276815: step: 1544/526, loss: 0.0007733324309810996 2023-01-22 19:21:53.354067: step: 1548/526, loss: 0.002382730133831501 2023-01-22 19:21:54.436380: step: 1552/526, loss: 0.0029309310484677553 2023-01-22 19:21:55.502014: step: 1556/526, loss: 0.00018036349501926452 2023-01-22 19:21:56.566951: step: 1560/526, loss: 0.007770819123834372 2023-01-22 19:21:57.651560: step: 1564/526, loss: 0.00019999477081000805 2023-01-22 19:21:58.719213: step: 1568/526, loss: 0.005768663249909878 2023-01-22 19:21:59.776320: step: 1572/526, loss: 0.0002435079513816163 2023-01-22 19:22:00.845187: step: 1576/526, loss: 0.0025663760025054216 2023-01-22 19:22:01.909158: step: 1580/526, loss: 0.004356759134680033 2023-01-22 19:22:02.970654: step: 1584/526, loss: 0.0036898008547723293 2023-01-22 19:22:04.041233: step: 1588/526, loss: 0.011443507857620716 2023-01-22 19:22:05.096668: step: 1592/526, loss: 0.000976758892647922 2023-01-22 19:22:06.168259: step: 1596/526, loss: 0.002183235716074705 2023-01-22 19:22:07.219793: step: 1600/526, loss: 0.00021467276383191347 2023-01-22 19:22:08.288663: step: 1604/526, loss: 0.0006088180234655738 2023-01-22 19:22:09.346089: step: 1608/526, loss: 0.0012263018870726228 2023-01-22 19:22:10.401074: step: 1612/526, loss: 0.0009391807834617794 2023-01-22 19:22:11.468686: step: 1616/526, loss: 0.01722201704978943 2023-01-22 19:22:12.531665: step: 1620/526, loss: 0.0006972018163651228 2023-01-22 19:22:13.589269: step: 1624/526, loss: 0.0 2023-01-22 19:22:14.655961: step: 1628/526, loss: 2.719453107147274e-08 2023-01-22 19:22:15.725648: step: 1632/526, loss: 0.003350720275193453 2023-01-22 19:22:16.796897: step: 1636/526, loss: 0.012177029624581337 2023-01-22 19:22:17.872780: step: 1640/526, loss: 0.002311804797500372 2023-01-22 19:22:18.945763: step: 1644/526, loss: 3.591082349885255e-05 2023-01-22 19:22:20.010858: step: 1648/526, loss: 0.011936341412365437 2023-01-22 19:22:21.074645: step: 1652/526, loss: 0.0007209527539089322 2023-01-22 19:22:22.137064: step: 1656/526, loss: 0.002192405052483082 2023-01-22 19:22:23.213443: step: 1660/526, loss: 0.00033770635491237044 2023-01-22 19:22:24.263769: step: 1664/526, loss: 0.0022931727580726147 2023-01-22 19:22:25.336226: step: 1668/526, loss: 0.000733984517864883 2023-01-22 19:22:26.398341: step: 1672/526, loss: 0.0008263453491963446 2023-01-22 19:22:27.472580: step: 1676/526, loss: 0.001402566907927394 2023-01-22 19:22:28.548544: step: 1680/526, loss: 0.007053116336464882 2023-01-22 19:22:29.644809: step: 1684/526, loss: 0.005855896044522524 2023-01-22 19:22:30.721127: step: 1688/526, loss: 0.005505474284291267 2023-01-22 19:22:31.795799: step: 1692/526, loss: 0.0013290958013385534 2023-01-22 19:22:32.879836: step: 1696/526, loss: 0.0039756507612764835 2023-01-22 19:22:33.945420: step: 1700/526, loss: 0.0 2023-01-22 19:22:34.993669: step: 1704/526, loss: 0.002265496412292123 2023-01-22 19:22:36.069718: step: 1708/526, loss: 0.001697678118944168 2023-01-22 19:22:37.164285: step: 1712/526, loss: 0.0003724343259818852 2023-01-22 19:22:38.233574: step: 1716/526, loss: 0.00453084846958518 2023-01-22 19:22:39.288819: step: 1720/526, loss: 3.149261374346679e-06 2023-01-22 19:22:40.342307: step: 1724/526, loss: 0.0005596246337518096 2023-01-22 19:22:41.417030: step: 1728/526, loss: 0.002456620568409562 2023-01-22 19:22:42.492046: step: 1732/526, loss: 0.001171683892607689 2023-01-22 19:22:43.588702: step: 1736/526, loss: 0.006751475390046835 2023-01-22 19:22:44.658191: step: 1740/526, loss: 0.007621586322784424 2023-01-22 19:22:45.721810: step: 1744/526, loss: 0.0016836391296237707 2023-01-22 19:22:46.789846: step: 1748/526, loss: 0.0012501185992732644 2023-01-22 19:22:47.857373: step: 1752/526, loss: 0.00405002711340785 2023-01-22 19:22:48.948537: step: 1756/526, loss: 0.010960236191749573 2023-01-22 19:22:50.032256: step: 1760/526, loss: 0.0007426299271173775 2023-01-22 19:22:51.102867: step: 1764/526, loss: 0.0017698564333841205 2023-01-22 19:22:52.175021: step: 1768/526, loss: 0.0021087094210088253 2023-01-22 19:22:53.231859: step: 1772/526, loss: 0.0014048486482352018 2023-01-22 19:22:54.311367: step: 1776/526, loss: 0.0007239112164825201 2023-01-22 19:22:55.380253: step: 1780/526, loss: 0.004561044741421938 2023-01-22 19:22:56.454653: step: 1784/526, loss: 0.0002426331047900021 2023-01-22 19:22:57.545399: step: 1788/526, loss: 0.00249788467772305 2023-01-22 19:22:58.615157: step: 1792/526, loss: 0.0013936988543719053 2023-01-22 19:22:59.691940: step: 1796/526, loss: 0.004205132834613323 2023-01-22 19:23:00.771541: step: 1800/526, loss: 0.003447645576670766 2023-01-22 19:23:01.835123: step: 1804/526, loss: 0.04244941845536232 2023-01-22 19:23:02.892222: step: 1808/526, loss: 0.0004102271341253072 2023-01-22 19:23:03.961259: step: 1812/526, loss: 0.001691601937636733 2023-01-22 19:23:05.039334: step: 1816/526, loss: 0.0008515746449120343 2023-01-22 19:23:06.107624: step: 1820/526, loss: 0.0035659619607031345 2023-01-22 19:23:07.178850: step: 1824/526, loss: 0.0009378705872222781 2023-01-22 19:23:08.245905: step: 1828/526, loss: 0.002529499586671591 2023-01-22 19:23:09.310200: step: 1832/526, loss: 0.00209238869138062 2023-01-22 19:23:10.367574: step: 1836/526, loss: 0.0014279825845733285 2023-01-22 19:23:11.453305: step: 1840/526, loss: 0.0037378163542598486 2023-01-22 19:23:12.521651: step: 1844/526, loss: 0.004979941062629223 2023-01-22 19:23:13.580848: step: 1848/526, loss: 0.012022201903164387 2023-01-22 19:23:14.656977: step: 1852/526, loss: 0.01618291810154915 2023-01-22 19:23:15.731239: step: 1856/526, loss: 0.0029926386196166277 2023-01-22 19:23:16.795761: step: 1860/526, loss: 0.00028098246548324823 2023-01-22 19:23:17.856899: step: 1864/526, loss: 0.0013977146008983254 2023-01-22 19:23:18.948195: step: 1868/526, loss: 0.0006358748651109636 2023-01-22 19:23:20.021053: step: 1872/526, loss: 0.00019496819004416466 2023-01-22 19:23:21.063563: step: 1876/526, loss: 0.0006264228140935302 2023-01-22 19:23:22.135689: step: 1880/526, loss: 0.0015917223645374179 2023-01-22 19:23:23.199818: step: 1884/526, loss: 0.005444049835205078 2023-01-22 19:23:24.282352: step: 1888/526, loss: 0.000791533850133419 2023-01-22 19:23:25.366529: step: 1892/526, loss: 0.004104716703295708 2023-01-22 19:23:26.425671: step: 1896/526, loss: 0.0005158140556886792 2023-01-22 19:23:27.482866: step: 1900/526, loss: 0.0015277708880603313 2023-01-22 19:23:28.535179: step: 1904/526, loss: 0.005023960955440998 2023-01-22 19:23:29.584002: step: 1908/526, loss: 0.016057617962360382 2023-01-22 19:23:30.649923: step: 1912/526, loss: 0.0055084978230297565 2023-01-22 19:23:31.694770: step: 1916/526, loss: 2.6908952349913307e-05 2023-01-22 19:23:32.776523: step: 1920/526, loss: 0.00023559413966722786 2023-01-22 19:23:33.849562: step: 1924/526, loss: 0.002671909285709262 2023-01-22 19:23:34.916699: step: 1928/526, loss: 0.0022257629316300154 2023-01-22 19:23:35.980867: step: 1932/526, loss: 0.004569669719785452 2023-01-22 19:23:37.048330: step: 1936/526, loss: 0.0007130609592422843 2023-01-22 19:23:38.124989: step: 1940/526, loss: 0.0005388118443079293 2023-01-22 19:23:39.205857: step: 1944/526, loss: 8.003956281754654e-06 2023-01-22 19:23:40.280949: step: 1948/526, loss: 0.000334419310092926 2023-01-22 19:23:41.347713: step: 1952/526, loss: 0.0001431743148714304 2023-01-22 19:23:42.417157: step: 1956/526, loss: 5.982616130495444e-05 2023-01-22 19:23:43.470591: step: 1960/526, loss: 1.8170852854382247e-05 2023-01-22 19:23:44.544632: step: 1964/526, loss: 0.00025206009740941226 2023-01-22 19:23:45.606639: step: 1968/526, loss: 3.678839129861444e-05 2023-01-22 19:23:46.659804: step: 1972/526, loss: 0.0007890138658694923 2023-01-22 19:23:47.727452: step: 1976/526, loss: 0.00011741852358682081 2023-01-22 19:23:48.803156: step: 1980/526, loss: 0.002405744045972824 2023-01-22 19:23:49.886458: step: 1984/526, loss: 0.01120900921523571 2023-01-22 19:23:50.943893: step: 1988/526, loss: 0.006500423885881901 2023-01-22 19:23:51.994646: step: 1992/526, loss: 0.00035128547460772097 2023-01-22 19:23:53.086830: step: 1996/526, loss: 2.3441616576747037e-05 2023-01-22 19:23:54.151545: step: 2000/526, loss: 0.004306937102228403 2023-01-22 19:23:55.215121: step: 2004/526, loss: 0.0032469145953655243 2023-01-22 19:23:56.285298: step: 2008/526, loss: 0.0008283528732135892 2023-01-22 19:23:57.352009: step: 2012/526, loss: 0.001748239970766008 2023-01-22 19:23:58.415997: step: 2016/526, loss: 0.00016227687592618167 2023-01-22 19:23:59.489628: step: 2020/526, loss: 0.000812965095974505 2023-01-22 19:24:00.559784: step: 2024/526, loss: 8.890100434655324e-05 2023-01-22 19:24:01.633933: step: 2028/526, loss: 0.008077074773609638 2023-01-22 19:24:02.702490: step: 2032/526, loss: 0.0025476557202637196 2023-01-22 19:24:03.776018: step: 2036/526, loss: 0.005910987034440041 2023-01-22 19:24:04.855696: step: 2040/526, loss: 0.0006522894254885614 2023-01-22 19:24:05.924627: step: 2044/526, loss: 0.00014745125372428447 2023-01-22 19:24:06.994185: step: 2048/526, loss: 0.006271205842494965 2023-01-22 19:24:08.081428: step: 2052/526, loss: 0.006025245878845453 2023-01-22 19:24:09.145477: step: 2056/526, loss: 4.4928023271495476e-05 2023-01-22 19:24:10.217388: step: 2060/526, loss: 0.00013033021241426468 2023-01-22 19:24:11.275050: step: 2064/526, loss: 0.004828251898288727 2023-01-22 19:24:12.334975: step: 2068/526, loss: 0.0005814018659293652 2023-01-22 19:24:13.420438: step: 2072/526, loss: 0.004634057637304068 2023-01-22 19:24:14.491428: step: 2076/526, loss: 0.007452423218637705 2023-01-22 19:24:15.552987: step: 2080/526, loss: 0.0005268630338832736 2023-01-22 19:24:16.611624: step: 2084/526, loss: 0.007408125326037407 2023-01-22 19:24:17.678510: step: 2088/526, loss: 0.00011818054917966947 2023-01-22 19:24:18.747460: step: 2092/526, loss: 0.004184565506875515 2023-01-22 19:24:19.823957: step: 2096/526, loss: 0.0006894851103425026 2023-01-22 19:24:20.897093: step: 2100/526, loss: 5.0117723731091246e-05 2023-01-22 19:24:21.969771: step: 2104/526, loss: 2.5599050786695443e-05 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3532090807174888, 'r': 0.2989207779886148, 'f1': 0.3238052415210689}, 'combined': 0.23859333585762968, 'stategy': 1, 'epoch': 19} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34224117702271445, 'r': 0.23850323783359498, 'f1': 0.2811068922982037}, 'combined': 0.15333103216265653, 'stategy': 1, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3140839296362552, 'r': 0.3331554395951929, 'f1': 0.32333870472682624}, 'combined': 0.23824957190397722, 'stategy': 1, 'epoch': 19} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3443450459474935, 'r': 0.2674738737506148, 'f1': 0.3010802718943584}, 'combined': 0.1642256028514682, 'stategy': 1, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3264376987414056, 'r': 0.32953483060802236, 'f1': 0.32797895322082676}, 'combined': 0.24166870237324076, 'stategy': 1, 'epoch': 19} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33804342318151903, 'r': 0.27352189309481817, 'f1': 0.30237907414318616}, 'combined': 0.1649340404417379, 'stategy': 1, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'stategy': 1, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 19} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3532090807174888, 'r': 0.2989207779886148, 'f1': 0.3238052415210689}, 'combined': 0.23859333585762968, 'stategy': 1, 'epoch': 19} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34224117702271445, 'r': 0.23850323783359498, 'f1': 0.2811068922982037}, 'combined': 0.15333103216265653, 'stategy': 1, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3143010992275698, 'r': 0.3345785895003162, 'f1': 0.32412300857843135}, 'combined': 0.23882748000515994, 'stategy': 1, 'epoch': 18} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.34248128672426803, 'r': 0.26884624337550045, 'f1': 0.3012290558784439}, 'combined': 0.16430675775187847, 'stategy': 1, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38461538461538464, 'r': 0.43478260869565216, 'f1': 0.40816326530612246}, 'combined': 0.20408163265306123, 'stategy': 1, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32434039511067353, 'r': 0.3360338818414189, 'f1': 0.3300836080716268}, 'combined': 0.24321950068435658, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33738046554546325, 'r': 0.27329358259253506, 'f1': 0.3019742411088051}, 'combined': 0.16471322242298458, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5147058823529411, 'r': 0.3017241379310345, 'f1': 0.3804347826086956}, 'combined': 0.25362318840579706, 'stategy': 1, 'epoch': 1}