Command that produces this log: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trigger_label_ffn.layers.0.weight: torch.Size([450, 1024]) >>> trigger_label_ffn.layers.0.bias: torch.Size([450]) >>> trigger_label_ffn.layers.1.weight: torch.Size([233, 450]) >>> trigger_label_ffn.layers.1.bias: torch.Size([233]) >>> trigger_crf.transition: torch.Size([235, 235]) n_trainable_params: 560511990, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-22 23:22:34.036715: step: 4/531, loss: 3.0332765579223633 2023-01-22 23:22:35.171055: step: 8/531, loss: 3.9890761375427246 2023-01-22 23:22:36.313388: step: 12/531, loss: 24.841459274291992 2023-01-22 23:22:37.426861: step: 16/531, loss: 5.859591484069824 2023-01-22 23:22:38.540561: step: 20/531, loss: 26.823410034179688 2023-01-22 23:22:39.649299: step: 24/531, loss: 4.028316497802734 2023-01-22 23:22:40.774963: step: 28/531, loss: 13.13836669921875 2023-01-22 23:22:41.912925: step: 32/531, loss: 15.800943374633789 2023-01-22 23:22:43.033113: step: 36/531, loss: 13.201178550720215 2023-01-22 23:22:44.151902: step: 40/531, loss: 18.712141036987305 2023-01-22 23:22:45.275849: step: 44/531, loss: 2.714864730834961 2023-01-22 23:22:46.423968: step: 48/531, loss: 21.38660430908203 2023-01-22 23:22:47.516811: step: 52/531, loss: 3.945868968963623 2023-01-22 23:22:48.648364: step: 56/531, loss: 11.743212699890137 2023-01-22 23:22:49.783047: step: 60/531, loss: 13.900259017944336 2023-01-22 23:22:50.897052: step: 64/531, loss: 13.357820510864258 2023-01-22 23:22:52.035560: step: 68/531, loss: 3.7479476928710938 2023-01-22 23:22:53.145917: step: 72/531, loss: 3.0918235778808594 2023-01-22 23:22:54.247999: step: 76/531, loss: 3.387728691101074 2023-01-22 23:22:55.405257: step: 80/531, loss: 36.84928894042969 2023-01-22 23:22:56.509435: step: 84/531, loss: 23.01399803161621 2023-01-22 23:22:57.620418: step: 88/531, loss: 39.12159729003906 2023-01-22 23:22:58.730496: step: 92/531, loss: 3.9193806648254395 2023-01-22 23:22:59.850874: step: 96/531, loss: 6.501638889312744 2023-01-22 23:23:00.979837: step: 100/531, loss: 9.5567626953125 2023-01-22 23:23:02.147838: step: 104/531, loss: 13.800580978393555 2023-01-22 23:23:03.262764: step: 108/531, loss: 7.207483291625977 2023-01-22 23:23:04.403418: step: 112/531, loss: 32.66229248046875 2023-01-22 23:23:05.546217: step: 116/531, loss: 3.4956822395324707 2023-01-22 23:23:06.721312: step: 120/531, loss: 3.73321533203125 2023-01-22 23:23:07.867263: step: 124/531, loss: 5.19720458984375 2023-01-22 23:23:08.981937: step: 128/531, loss: 4.249058246612549 2023-01-22 23:23:10.104770: step: 132/531, loss: 5.777254104614258 2023-01-22 23:23:11.237087: step: 136/531, loss: 12.54652214050293 2023-01-22 23:23:12.400939: step: 140/531, loss: 23.855159759521484 2023-01-22 23:23:13.554109: step: 144/531, loss: 23.652910232543945 2023-01-22 23:23:14.651280: step: 148/531, loss: 4.233834266662598 2023-01-22 23:23:15.799863: step: 152/531, loss: 25.489444732666016 2023-01-22 23:23:16.940068: step: 156/531, loss: 3.089613914489746 2023-01-22 23:23:18.039879: step: 160/531, loss: 4.675300598144531 2023-01-22 23:23:19.134422: step: 164/531, loss: 3.804196357727051 2023-01-22 23:23:20.249908: step: 168/531, loss: 6.4520368576049805 2023-01-22 23:23:21.371936: step: 172/531, loss: 3.4101152420043945 2023-01-22 23:23:22.514393: step: 176/531, loss: 3.4412355422973633 2023-01-22 23:23:23.648468: step: 180/531, loss: 14.93376350402832 2023-01-22 23:23:24.774114: step: 184/531, loss: 2.9774580001831055 2023-01-22 23:23:25.876789: step: 188/531, loss: 18.464181900024414 2023-01-22 23:23:26.982276: step: 192/531, loss: 2.5480151176452637 2023-01-22 23:23:28.115983: step: 196/531, loss: 19.042381286621094 2023-01-22 23:23:29.225613: step: 200/531, loss: 15.893377304077148 2023-01-22 23:23:30.312662: step: 204/531, loss: 1.845534324645996 2023-01-22 23:23:31.467391: step: 208/531, loss: 15.35491943359375 2023-01-22 23:23:32.598899: step: 212/531, loss: 7.65878963470459 2023-01-22 23:23:33.708462: step: 216/531, loss: 14.374588012695312 2023-01-22 23:23:34.831055: step: 220/531, loss: 18.26699447631836 2023-01-22 23:23:35.922363: step: 224/531, loss: 15.5499906539917 2023-01-22 23:23:37.045050: step: 228/531, loss: 4.617379188537598 2023-01-22 23:23:38.171162: step: 232/531, loss: 8.427047729492188 2023-01-22 23:23:39.301897: step: 236/531, loss: 16.495691299438477 2023-01-22 23:23:40.412067: step: 240/531, loss: 2.6092939376831055 2023-01-22 23:23:41.508712: step: 244/531, loss: 2.978790283203125 2023-01-22 23:23:42.614093: step: 248/531, loss: 13.746543884277344 2023-01-22 23:23:43.747885: step: 252/531, loss: 16.243314743041992 2023-01-22 23:23:44.861551: step: 256/531, loss: 7.9986371994018555 2023-01-22 23:23:45.998120: step: 260/531, loss: 16.104475021362305 2023-01-22 23:23:47.131916: step: 264/531, loss: 10.928227424621582 2023-01-22 23:23:48.254873: step: 268/531, loss: 19.04630470275879 2023-01-22 23:23:49.375084: step: 272/531, loss: 14.556979179382324 2023-01-22 23:23:50.486546: step: 276/531, loss: 17.606996536254883 2023-01-22 23:23:51.615871: step: 280/531, loss: 9.844139099121094 2023-01-22 23:23:52.745143: step: 284/531, loss: 27.90496063232422 2023-01-22 23:23:53.880388: step: 288/531, loss: 2.5847291946411133 2023-01-22 23:23:54.992390: step: 292/531, loss: 3.0382254123687744 2023-01-22 23:23:56.102208: step: 296/531, loss: 3.6176180839538574 2023-01-22 23:23:57.236239: step: 300/531, loss: 21.48680877685547 2023-01-22 23:23:58.362472: step: 304/531, loss: 3.929943561553955 2023-01-22 23:23:59.493423: step: 308/531, loss: 8.297030448913574 2023-01-22 23:24:00.637839: step: 312/531, loss: 15.864788055419922 2023-01-22 23:24:01.824496: step: 316/531, loss: 32.826087951660156 2023-01-22 23:24:02.942133: step: 320/531, loss: 6.250124931335449 2023-01-22 23:24:04.035936: step: 324/531, loss: 10.671683311462402 2023-01-22 23:24:05.175852: step: 328/531, loss: 12.687604904174805 2023-01-22 23:24:06.307039: step: 332/531, loss: 18.26819610595703 2023-01-22 23:24:07.428361: step: 336/531, loss: 2.780642509460449 2023-01-22 23:24:08.553269: step: 340/531, loss: 16.283519744873047 2023-01-22 23:24:09.667585: step: 344/531, loss: 9.130254745483398 2023-01-22 23:24:10.814501: step: 348/531, loss: 8.334065437316895 2023-01-22 23:24:11.940333: step: 352/531, loss: 10.149765968322754 2023-01-22 23:24:13.085697: step: 356/531, loss: 26.10174560546875 2023-01-22 23:24:14.230927: step: 360/531, loss: 16.297813415527344 2023-01-22 23:24:15.363076: step: 364/531, loss: 3.450869560241699 2023-01-22 23:24:16.498231: step: 368/531, loss: 5.1178507804870605 2023-01-22 23:24:17.632464: step: 372/531, loss: 2.885474443435669 2023-01-22 23:24:18.757426: step: 376/531, loss: 30.20762825012207 2023-01-22 23:24:19.911096: step: 380/531, loss: 15.562658309936523 2023-01-22 23:24:21.030124: step: 384/531, loss: 2.6462650299072266 2023-01-22 23:24:22.140197: step: 388/531, loss: 5.7757768630981445 2023-01-22 23:24:23.253883: step: 392/531, loss: 15.065591812133789 2023-01-22 23:24:24.384054: step: 396/531, loss: 10.5269136428833 2023-01-22 23:24:25.519123: step: 400/531, loss: 7.367352485656738 2023-01-22 23:24:26.646600: step: 404/531, loss: 2.811690330505371 2023-01-22 23:24:27.752401: step: 408/531, loss: 10.781352043151855 2023-01-22 23:24:28.868584: step: 412/531, loss: 3.174560070037842 2023-01-22 23:24:29.997010: step: 416/531, loss: 14.93073844909668 2023-01-22 23:24:31.135423: step: 420/531, loss: 3.3398280143737793 2023-01-22 23:24:32.279320: step: 424/531, loss: 3.4559032917022705 2023-01-22 23:24:33.391790: step: 428/531, loss: 2.799764394760132 2023-01-22 23:24:34.532753: step: 432/531, loss: 1.5183038711547852 2023-01-22 23:24:35.667634: step: 436/531, loss: 1.6708240509033203 2023-01-22 23:24:36.813802: step: 440/531, loss: 19.957786560058594 2023-01-22 23:24:37.948148: step: 444/531, loss: 2.4036645889282227 2023-01-22 23:24:39.083437: step: 448/531, loss: 11.189226150512695 2023-01-22 23:24:40.202786: step: 452/531, loss: 7.232325553894043 2023-01-22 23:24:41.326491: step: 456/531, loss: 2.113729476928711 2023-01-22 23:24:42.469280: step: 460/531, loss: 1.9179741144180298 2023-01-22 23:24:43.601649: step: 464/531, loss: 8.944046020507812 2023-01-22 23:24:44.758407: step: 468/531, loss: 16.67998695373535 2023-01-22 23:24:45.891028: step: 472/531, loss: 12.318568229675293 2023-01-22 23:24:47.014703: step: 476/531, loss: 9.781846046447754 2023-01-22 23:24:48.120884: step: 480/531, loss: 7.681807518005371 2023-01-22 23:24:49.242704: step: 484/531, loss: 12.780887603759766 2023-01-22 23:24:50.388030: step: 488/531, loss: 1.7621686458587646 2023-01-22 23:24:51.499868: step: 492/531, loss: 5.095869541168213 2023-01-22 23:24:52.615733: step: 496/531, loss: 5.915456771850586 2023-01-22 23:24:53.745538: step: 500/531, loss: 7.6137542724609375 2023-01-22 23:24:54.872178: step: 504/531, loss: 4.71314001083374 2023-01-22 23:24:55.994373: step: 508/531, loss: 1.0998773574829102 2023-01-22 23:24:57.117522: step: 512/531, loss: 1.1988365650177002 2023-01-22 23:24:58.225155: step: 516/531, loss: 0.9550460577011108 2023-01-22 23:24:59.347344: step: 520/531, loss: 1.109716773033142 2023-01-22 23:25:00.463492: step: 524/531, loss: 13.10318660736084 2023-01-22 23:25:01.584332: step: 528/531, loss: 1.030605673789978 2023-01-22 23:25:02.726354: step: 532/531, loss: 4.0358686447143555 2023-01-22 23:25:03.859117: step: 536/531, loss: 1.6712257862091064 2023-01-22 23:25:04.999338: step: 540/531, loss: 0.9523038864135742 2023-01-22 23:25:06.108233: step: 544/531, loss: 1.0028724670410156 2023-01-22 23:25:07.275140: step: 548/531, loss: 2.5961227416992188 2023-01-22 23:25:08.394144: step: 552/531, loss: 0.8206800222396851 2023-01-22 23:25:09.502226: step: 556/531, loss: 1.3381786346435547 2023-01-22 23:25:10.620487: step: 560/531, loss: 0.770328164100647 2023-01-22 23:25:11.746625: step: 564/531, loss: 0.9549591541290283 2023-01-22 23:25:12.901804: step: 568/531, loss: 3.4985733032226562 2023-01-22 23:25:14.052888: step: 572/531, loss: 1.0681061744689941 2023-01-22 23:25:15.188124: step: 576/531, loss: 3.586304187774658 2023-01-22 23:25:16.299591: step: 580/531, loss: 0.9384206533432007 2023-01-22 23:25:17.416377: step: 584/531, loss: 0.8816757202148438 2023-01-22 23:25:18.542172: step: 588/531, loss: 4.1721367835998535 2023-01-22 23:25:19.681247: step: 592/531, loss: 3.3882291316986084 2023-01-22 23:25:20.812641: step: 596/531, loss: 3.445366382598877 2023-01-22 23:25:21.960028: step: 600/531, loss: 0.7376421689987183 2023-01-22 23:25:23.098806: step: 604/531, loss: 2.8214468955993652 2023-01-22 23:25:24.212056: step: 608/531, loss: 2.00473690032959 2023-01-22 23:25:25.322298: step: 612/531, loss: 6.6643571853637695 2023-01-22 23:25:26.464893: step: 616/531, loss: 1.9179348945617676 2023-01-22 23:25:27.564089: step: 620/531, loss: 0.5742745399475098 2023-01-22 23:25:28.680984: step: 624/531, loss: 0.745516836643219 2023-01-22 23:25:29.834565: step: 628/531, loss: 0.5563780069351196 2023-01-22 23:25:30.955053: step: 632/531, loss: 0.6017870903015137 2023-01-22 23:25:32.084131: step: 636/531, loss: 1.8896949291229248 2023-01-22 23:25:33.195687: step: 640/531, loss: 0.4716556668281555 2023-01-22 23:25:34.300709: step: 644/531, loss: 1.8702508211135864 2023-01-22 23:25:35.433624: step: 648/531, loss: 0.7170490026473999 2023-01-22 23:25:36.555909: step: 652/531, loss: 0.9703950881958008 2023-01-22 23:25:37.672180: step: 656/531, loss: 0.39847180247306824 2023-01-22 23:25:38.813982: step: 660/531, loss: 1.3900601863861084 2023-01-22 23:25:39.930697: step: 664/531, loss: 3.078333616256714 2023-01-22 23:25:41.038968: step: 668/531, loss: 2.5048274993896484 2023-01-22 23:25:42.162833: step: 672/531, loss: 2.422506093978882 2023-01-22 23:25:43.267370: step: 676/531, loss: 1.824657917022705 2023-01-22 23:25:44.373568: step: 680/531, loss: 0.5725464820861816 2023-01-22 23:25:45.495945: step: 684/531, loss: 0.49483758211135864 2023-01-22 23:25:46.663018: step: 688/531, loss: 1.2039530277252197 2023-01-22 23:25:47.771500: step: 692/531, loss: 1.5589475631713867 2023-01-22 23:25:48.903733: step: 696/531, loss: 1.5279383659362793 2023-01-22 23:25:50.024232: step: 700/531, loss: 1.0019121170043945 2023-01-22 23:25:51.126861: step: 704/531, loss: 0.6314219832420349 2023-01-22 23:25:52.230139: step: 708/531, loss: 0.6624341011047363 2023-01-22 23:25:53.362244: step: 712/531, loss: 1.38822603225708 2023-01-22 23:25:54.468135: step: 716/531, loss: 0.5042579174041748 2023-01-22 23:25:55.575674: step: 720/531, loss: 2.2914187908172607 2023-01-22 23:25:56.691604: step: 724/531, loss: 1.2736380100250244 2023-01-22 23:25:57.802971: step: 728/531, loss: 1.0720560550689697 2023-01-22 23:25:58.914432: step: 732/531, loss: 0.7303277850151062 2023-01-22 23:26:00.027019: step: 736/531, loss: 1.5819597244262695 2023-01-22 23:26:01.154793: step: 740/531, loss: 1.2175713777542114 2023-01-22 23:26:02.259670: step: 744/531, loss: 1.3924689292907715 2023-01-22 23:26:03.366222: step: 748/531, loss: 1.6670432090759277 2023-01-22 23:26:04.515773: step: 752/531, loss: 1.1271965503692627 2023-01-22 23:26:05.666048: step: 756/531, loss: 1.7324247360229492 2023-01-22 23:26:06.774080: step: 760/531, loss: 0.45162907242774963 2023-01-22 23:26:07.891301: step: 764/531, loss: 0.48444193601608276 2023-01-22 23:26:08.992911: step: 768/531, loss: 0.560933530330658 2023-01-22 23:26:10.094527: step: 772/531, loss: 1.4928064346313477 2023-01-22 23:26:11.196056: step: 776/531, loss: 5.680951118469238 2023-01-22 23:26:12.313652: step: 780/531, loss: 1.7607084512710571 2023-01-22 23:26:13.450912: step: 784/531, loss: 0.48723241686820984 2023-01-22 23:26:14.565540: step: 788/531, loss: 0.18759968876838684 2023-01-22 23:26:15.680765: step: 792/531, loss: 0.9270867109298706 2023-01-22 23:26:16.804649: step: 796/531, loss: 0.6372804641723633 2023-01-22 23:26:17.990663: step: 800/531, loss: 5.777713298797607 2023-01-22 23:26:19.102165: step: 804/531, loss: 3.051236629486084 2023-01-22 23:26:20.240114: step: 808/531, loss: 0.5228690505027771 2023-01-22 23:26:21.399696: step: 812/531, loss: 1.185579538345337 2023-01-22 23:26:22.499020: step: 816/531, loss: 0.5009186267852783 2023-01-22 23:26:23.664455: step: 820/531, loss: 10.1963472366333 2023-01-22 23:26:24.810842: step: 824/531, loss: 0.5036492943763733 2023-01-22 23:26:25.934422: step: 828/531, loss: 2.3226099014282227 2023-01-22 23:26:27.064179: step: 832/531, loss: 1.3302218914031982 2023-01-22 23:26:28.184553: step: 836/531, loss: 1.4813424348831177 2023-01-22 23:26:29.292383: step: 840/531, loss: 0.4985276460647583 2023-01-22 23:26:30.394277: step: 844/531, loss: 1.1232692003250122 2023-01-22 23:26:31.515227: step: 848/531, loss: 0.8728890419006348 2023-01-22 23:26:32.620456: step: 852/531, loss: 1.6100800037384033 2023-01-22 23:26:33.736251: step: 856/531, loss: 1.1191056966781616 2023-01-22 23:26:34.878692: step: 860/531, loss: 3.284076690673828 2023-01-22 23:26:35.986660: step: 864/531, loss: 5.448684215545654 2023-01-22 23:26:37.074136: step: 868/531, loss: 1.9500758647918701 2023-01-22 23:26:38.210990: step: 872/531, loss: 6.734179973602295 2023-01-22 23:26:39.324007: step: 876/531, loss: 0.44377148151397705 2023-01-22 23:26:40.420843: step: 880/531, loss: 0.7405997514724731 2023-01-22 23:26:41.552868: step: 884/531, loss: 0.6487605571746826 2023-01-22 23:26:42.658986: step: 888/531, loss: 2.0299644470214844 2023-01-22 23:26:43.763022: step: 892/531, loss: 2.1412789821624756 2023-01-22 23:26:44.903688: step: 896/531, loss: 0.3870377242565155 2023-01-22 23:26:45.983745: step: 900/531, loss: 3.136416435241699 2023-01-22 23:26:47.097052: step: 904/531, loss: 2.141172170639038 2023-01-22 23:26:48.231157: step: 908/531, loss: 0.57072913646698 2023-01-22 23:26:49.341974: step: 912/531, loss: 0.44198426604270935 2023-01-22 23:26:50.493546: step: 916/531, loss: 0.3706693649291992 2023-01-22 23:26:51.601427: step: 920/531, loss: 0.3774597644805908 2023-01-22 23:26:52.725611: step: 924/531, loss: 0.8894097208976746 2023-01-22 23:26:53.864479: step: 928/531, loss: 2.654102325439453 2023-01-22 23:26:54.969219: step: 932/531, loss: 1.4737154245376587 2023-01-22 23:26:56.058559: step: 936/531, loss: 0.783511757850647 2023-01-22 23:26:57.179300: step: 940/531, loss: 1.106475830078125 2023-01-22 23:26:58.312572: step: 944/531, loss: 1.2096598148345947 2023-01-22 23:26:59.448759: step: 948/531, loss: 0.6061283349990845 2023-01-22 23:27:00.619140: step: 952/531, loss: 0.2911093831062317 2023-01-22 23:27:01.755657: step: 956/531, loss: 0.28803348541259766 2023-01-22 23:27:02.882001: step: 960/531, loss: 1.0073535442352295 2023-01-22 23:27:04.012215: step: 964/531, loss: 0.9820934534072876 2023-01-22 23:27:05.123811: step: 968/531, loss: 0.3536796271800995 2023-01-22 23:27:06.255304: step: 972/531, loss: 0.42519691586494446 2023-01-22 23:27:07.393517: step: 976/531, loss: 5.102304458618164 2023-01-22 23:27:08.493956: step: 980/531, loss: 0.22068482637405396 2023-01-22 23:27:09.608709: step: 984/531, loss: 0.769264817237854 2023-01-22 23:27:10.750706: step: 988/531, loss: 0.506183922290802 2023-01-22 23:27:11.888226: step: 992/531, loss: 0.9405239820480347 2023-01-22 23:27:12.993559: step: 996/531, loss: 0.47805076837539673 2023-01-22 23:27:14.151521: step: 1000/531, loss: 1.0347998142242432 2023-01-22 23:27:15.257677: step: 1004/531, loss: 0.9928960800170898 2023-01-22 23:27:16.406175: step: 1008/531, loss: 0.5030802488327026 2023-01-22 23:27:17.503413: step: 1012/531, loss: 2.2416796684265137 2023-01-22 23:27:18.626149: step: 1016/531, loss: 0.5580966472625732 2023-01-22 23:27:19.729019: step: 1020/531, loss: 1.8231711387634277 2023-01-22 23:27:20.859754: step: 1024/531, loss: 0.2565525770187378 2023-01-22 23:27:21.982561: step: 1028/531, loss: 1.5125222206115723 2023-01-22 23:27:23.077284: step: 1032/531, loss: 0.364323228597641 2023-01-22 23:27:24.201075: step: 1036/531, loss: 0.39796924591064453 2023-01-22 23:27:25.325161: step: 1040/531, loss: 1.5682728290557861 2023-01-22 23:27:26.428596: step: 1044/531, loss: 1.653188943862915 2023-01-22 23:27:27.550980: step: 1048/531, loss: 2.6216962337493896 2023-01-22 23:27:28.675758: step: 1052/531, loss: 1.4048326015472412 2023-01-22 23:27:29.817096: step: 1056/531, loss: 1.5759382247924805 2023-01-22 23:27:30.926266: step: 1060/531, loss: 0.6790085434913635 2023-01-22 23:27:32.051588: step: 1064/531, loss: 2.1952245235443115 2023-01-22 23:27:33.184333: step: 1068/531, loss: 0.8539444208145142 2023-01-22 23:27:34.317525: step: 1072/531, loss: 0.5027569532394409 2023-01-22 23:27:35.454220: step: 1076/531, loss: 2.809027671813965 2023-01-22 23:27:36.605646: step: 1080/531, loss: 1.761759638786316 2023-01-22 23:27:37.738128: step: 1084/531, loss: 0.3076581060886383 2023-01-22 23:27:38.858046: step: 1088/531, loss: 1.8024475574493408 2023-01-22 23:27:39.965243: step: 1092/531, loss: 0.9613577127456665 2023-01-22 23:27:41.099178: step: 1096/531, loss: 2.433546304702759 2023-01-22 23:27:42.210938: step: 1100/531, loss: 2.8840386867523193 2023-01-22 23:27:43.313645: step: 1104/531, loss: 1.305079698562622 2023-01-22 23:27:44.447371: step: 1108/531, loss: 0.709696352481842 2023-01-22 23:27:45.578754: step: 1112/531, loss: 0.37777137756347656 2023-01-22 23:27:46.686270: step: 1116/531, loss: 0.8038275241851807 2023-01-22 23:27:47.776898: step: 1120/531, loss: 0.39626961946487427 2023-01-22 23:27:48.909401: step: 1124/531, loss: 2.3624799251556396 2023-01-22 23:27:50.023849: step: 1128/531, loss: 0.3900138735771179 2023-01-22 23:27:51.142731: step: 1132/531, loss: 0.6979865431785583 2023-01-22 23:27:52.291558: step: 1136/531, loss: 0.6186054348945618 2023-01-22 23:27:53.392718: step: 1140/531, loss: 0.16052737832069397 2023-01-22 23:27:54.598771: step: 1144/531, loss: 0.8662946224212646 2023-01-22 23:27:55.735692: step: 1148/531, loss: 0.2455580234527588 2023-01-22 23:27:56.831866: step: 1152/531, loss: 1.0361310243606567 2023-01-22 23:27:57.937282: step: 1156/531, loss: 0.462401807308197 2023-01-22 23:27:59.088149: step: 1160/531, loss: 0.3532995283603668 2023-01-22 23:28:00.238973: step: 1164/531, loss: 1.959795594215393 2023-01-22 23:28:01.367774: step: 1168/531, loss: 1.5334808826446533 2023-01-22 23:28:02.493875: step: 1172/531, loss: 0.28836435079574585 2023-01-22 23:28:03.602847: step: 1176/531, loss: 1.1528983116149902 2023-01-22 23:28:04.746886: step: 1180/531, loss: 1.2682372331619263 2023-01-22 23:28:05.873220: step: 1184/531, loss: 0.26979345083236694 2023-01-22 23:28:06.998045: step: 1188/531, loss: 1.1078568696975708 2023-01-22 23:28:08.124310: step: 1192/531, loss: 0.6036314964294434 2023-01-22 23:28:09.258241: step: 1196/531, loss: 2.5460143089294434 2023-01-22 23:28:10.387819: step: 1200/531, loss: 3.628701686859131 2023-01-22 23:28:11.485533: step: 1204/531, loss: 1.763791799545288 2023-01-22 23:28:12.601051: step: 1208/531, loss: 0.4431189298629761 2023-01-22 23:28:13.727923: step: 1212/531, loss: 0.7327397465705872 2023-01-22 23:28:14.868595: step: 1216/531, loss: 0.4342723786830902 2023-01-22 23:28:15.981016: step: 1220/531, loss: 1.902055263519287 2023-01-22 23:28:17.079912: step: 1224/531, loss: 0.4548925459384918 2023-01-22 23:28:18.197329: step: 1228/531, loss: 0.9176959991455078 2023-01-22 23:28:19.320709: step: 1232/531, loss: 3.0312373638153076 2023-01-22 23:28:20.456348: step: 1236/531, loss: 1.475813388824463 2023-01-22 23:28:21.573512: step: 1240/531, loss: 0.528725266456604 2023-01-22 23:28:22.703209: step: 1244/531, loss: 0.40829718112945557 2023-01-22 23:28:23.827960: step: 1248/531, loss: 0.5420728921890259 2023-01-22 23:28:24.949286: step: 1252/531, loss: 0.2994222044944763 2023-01-22 23:28:26.089113: step: 1256/531, loss: 0.635415256023407 2023-01-22 23:28:27.219761: step: 1260/531, loss: 0.9323607087135315 2023-01-22 23:28:28.344447: step: 1264/531, loss: 2.6936163902282715 2023-01-22 23:28:29.463566: step: 1268/531, loss: 0.6204828023910522 2023-01-22 23:28:30.637584: step: 1272/531, loss: 0.5789872407913208 2023-01-22 23:28:31.762553: step: 1276/531, loss: 1.1375057697296143 2023-01-22 23:28:32.863040: step: 1280/531, loss: 1.0385173559188843 2023-01-22 23:28:33.983824: step: 1284/531, loss: 1.3682106733322144 2023-01-22 23:28:35.133805: step: 1288/531, loss: 4.037505626678467 2023-01-22 23:28:36.295189: step: 1292/531, loss: 0.6877168416976929 2023-01-22 23:28:37.431964: step: 1296/531, loss: 1.7743664979934692 2023-01-22 23:28:38.565575: step: 1300/531, loss: 3.430102825164795 2023-01-22 23:28:39.692831: step: 1304/531, loss: 1.777139663696289 2023-01-22 23:28:40.855184: step: 1308/531, loss: 0.3965368866920471 2023-01-22 23:28:41.970975: step: 1312/531, loss: 0.520017147064209 2023-01-22 23:28:43.072164: step: 1316/531, loss: 1.033474326133728 2023-01-22 23:28:44.173326: step: 1320/531, loss: 0.37674418091773987 2023-01-22 23:28:45.289849: step: 1324/531, loss: 0.8251477479934692 2023-01-22 23:28:46.404751: step: 1328/531, loss: 0.4165104925632477 2023-01-22 23:28:47.544643: step: 1332/531, loss: 2.0840795040130615 2023-01-22 23:28:48.638702: step: 1336/531, loss: 0.6134458780288696 2023-01-22 23:28:49.766625: step: 1340/531, loss: 0.235723078250885 2023-01-22 23:28:50.874915: step: 1344/531, loss: 0.41169464588165283 2023-01-22 23:28:52.000214: step: 1348/531, loss: 1.4164056777954102 2023-01-22 23:28:53.135096: step: 1352/531, loss: 7.537428379058838 2023-01-22 23:28:54.278906: step: 1356/531, loss: 3.568732261657715 2023-01-22 23:28:55.388194: step: 1360/531, loss: 1.5798994302749634 2023-01-22 23:28:56.540381: step: 1364/531, loss: 0.4049384593963623 2023-01-22 23:28:57.678059: step: 1368/531, loss: 1.1719298362731934 2023-01-22 23:28:58.811351: step: 1372/531, loss: 0.35880422592163086 2023-01-22 23:28:59.946925: step: 1376/531, loss: 2.994121789932251 2023-01-22 23:29:01.072339: step: 1380/531, loss: 0.9692338705062866 2023-01-22 23:29:02.197851: step: 1384/531, loss: 1.0469532012939453 2023-01-22 23:29:03.333536: step: 1388/531, loss: 0.704371452331543 2023-01-22 23:29:04.448112: step: 1392/531, loss: 1.5928230285644531 2023-01-22 23:29:05.567977: step: 1396/531, loss: 0.19797860085964203 2023-01-22 23:29:06.697272: step: 1400/531, loss: 1.1524953842163086 2023-01-22 23:29:07.850692: step: 1404/531, loss: 0.6012471318244934 2023-01-22 23:29:08.959316: step: 1408/531, loss: 2.3841757774353027 2023-01-22 23:29:10.091315: step: 1412/531, loss: 0.5171011090278625 2023-01-22 23:29:11.226463: step: 1416/531, loss: 0.5391451120376587 2023-01-22 23:29:12.376369: step: 1420/531, loss: 0.5451816916465759 2023-01-22 23:29:13.504423: step: 1424/531, loss: 0.27809175848960876 2023-01-22 23:29:14.614089: step: 1428/531, loss: 0.49407845735549927 2023-01-22 23:29:15.773787: step: 1432/531, loss: 0.13293522596359253 2023-01-22 23:29:16.937443: step: 1436/531, loss: 1.519361138343811 2023-01-22 23:29:18.045574: step: 1440/531, loss: 1.7191133499145508 2023-01-22 23:29:19.170089: step: 1444/531, loss: 0.5537688732147217 2023-01-22 23:29:20.265604: step: 1448/531, loss: 6.81881046295166 2023-01-22 23:29:21.427058: step: 1452/531, loss: 1.1535937786102295 2023-01-22 23:29:22.554687: step: 1456/531, loss: 0.5681639909744263 2023-01-22 23:29:23.671123: step: 1460/531, loss: 1.02992582321167 2023-01-22 23:29:24.785448: step: 1464/531, loss: 0.42583152651786804 2023-01-22 23:29:25.934300: step: 1468/531, loss: 0.5356565713882446 2023-01-22 23:29:27.063354: step: 1472/531, loss: 0.4080018997192383 2023-01-22 23:29:28.180415: step: 1476/531, loss: 0.22704468667507172 2023-01-22 23:29:29.321244: step: 1480/531, loss: 0.8032234907150269 2023-01-22 23:29:30.440160: step: 1484/531, loss: 0.4029533863067627 2023-01-22 23:29:31.543718: step: 1488/531, loss: 2.214237689971924 2023-01-22 23:29:32.664793: step: 1492/531, loss: 1.0937788486480713 2023-01-22 23:29:33.780621: step: 1496/531, loss: 0.18486443161964417 2023-01-22 23:29:34.952170: step: 1500/531, loss: 2.1671459674835205 2023-01-22 23:29:36.096416: step: 1504/531, loss: 0.37394145131111145 2023-01-22 23:29:37.189953: step: 1508/531, loss: 2.1639952659606934 2023-01-22 23:29:38.332178: step: 1512/531, loss: 7.445359230041504 2023-01-22 23:29:39.456403: step: 1516/531, loss: 0.6151976585388184 2023-01-22 23:29:40.592835: step: 1520/531, loss: 0.7307574152946472 2023-01-22 23:29:41.748916: step: 1524/531, loss: 0.5783308744430542 2023-01-22 23:29:42.888530: step: 1528/531, loss: 1.0292999744415283 2023-01-22 23:29:44.005841: step: 1532/531, loss: 0.2526084780693054 2023-01-22 23:29:45.107449: step: 1536/531, loss: 1.0480601787567139 2023-01-22 23:29:46.217205: step: 1540/531, loss: 2.3040876388549805 2023-01-22 23:29:47.354067: step: 1544/531, loss: 0.45953500270843506 2023-01-22 23:29:48.474876: step: 1548/531, loss: 0.726035475730896 2023-01-22 23:29:49.597895: step: 1552/531, loss: 0.7123405337333679 2023-01-22 23:29:50.717156: step: 1556/531, loss: 1.239498257637024 2023-01-22 23:29:51.823566: step: 1560/531, loss: 2.332115888595581 2023-01-22 23:29:52.912591: step: 1564/531, loss: 0.8626034259796143 2023-01-22 23:29:54.045699: step: 1568/531, loss: 0.7825387716293335 2023-01-22 23:29:55.176566: step: 1572/531, loss: 0.20002436637878418 2023-01-22 23:29:56.301564: step: 1576/531, loss: 1.0079257488250732 2023-01-22 23:29:57.402793: step: 1580/531, loss: 0.4445771276950836 2023-01-22 23:29:58.530201: step: 1584/531, loss: 1.4051389694213867 2023-01-22 23:29:59.630187: step: 1588/531, loss: 0.6725057363510132 2023-01-22 23:30:00.760397: step: 1592/531, loss: 0.9567013382911682 2023-01-22 23:30:01.922484: step: 1596/531, loss: 0.27645760774612427 2023-01-22 23:30:03.045008: step: 1600/531, loss: 1.4702401161193848 2023-01-22 23:30:04.157812: step: 1604/531, loss: 2.8450229167938232 2023-01-22 23:30:05.300863: step: 1608/531, loss: 0.1912183314561844 2023-01-22 23:30:06.450372: step: 1612/531, loss: 0.4139157235622406 2023-01-22 23:30:07.565279: step: 1616/531, loss: 1.4575409889221191 2023-01-22 23:30:08.689841: step: 1620/531, loss: 0.4374315142631531 2023-01-22 23:30:09.851621: step: 1624/531, loss: 0.40041646361351013 2023-01-22 23:30:10.991492: step: 1628/531, loss: 0.9358422160148621 2023-01-22 23:30:12.125523: step: 1632/531, loss: 0.2858022451400757 2023-01-22 23:30:13.284885: step: 1636/531, loss: 0.40463021397590637 2023-01-22 23:30:14.375770: step: 1640/531, loss: 0.27782222628593445 2023-01-22 23:30:15.478867: step: 1644/531, loss: 0.8261849284172058 2023-01-22 23:30:16.609246: step: 1648/531, loss: 0.2743852734565735 2023-01-22 23:30:17.694071: step: 1652/531, loss: 1.1915918588638306 2023-01-22 23:30:18.809027: step: 1656/531, loss: 6.563152313232422 2023-01-22 23:30:19.913603: step: 1660/531, loss: 0.2767614424228668 2023-01-22 23:30:21.068863: step: 1664/531, loss: 8.009928703308105 2023-01-22 23:30:22.167732: step: 1668/531, loss: 2.7408249378204346 2023-01-22 23:30:23.304709: step: 1672/531, loss: 1.823009967803955 2023-01-22 23:30:24.408491: step: 1676/531, loss: 1.3707501888275146 2023-01-22 23:30:25.542986: step: 1680/531, loss: 0.9346147775650024 2023-01-22 23:30:26.674030: step: 1684/531, loss: 3.644580841064453 2023-01-22 23:30:27.789430: step: 1688/531, loss: 1.5002988576889038 2023-01-22 23:30:28.902458: step: 1692/531, loss: 0.40758341550827026 2023-01-22 23:30:30.009090: step: 1696/531, loss: 0.26690512895584106 2023-01-22 23:30:31.146993: step: 1700/531, loss: 1.9627392292022705 2023-01-22 23:30:32.280923: step: 1704/531, loss: 4.548556804656982 2023-01-22 23:30:33.403699: step: 1708/531, loss: 1.4685009717941284 2023-01-22 23:30:34.489288: step: 1712/531, loss: 1.336835265159607 2023-01-22 23:30:35.611584: step: 1716/531, loss: 0.24930821359157562 2023-01-22 23:30:36.735804: step: 1720/531, loss: 6.974177837371826 2023-01-22 23:30:37.882751: step: 1724/531, loss: 1.5804717540740967 2023-01-22 23:30:38.996243: step: 1728/531, loss: 0.610162615776062 2023-01-22 23:30:40.127019: step: 1732/531, loss: 0.3148822784423828 2023-01-22 23:30:41.244611: step: 1736/531, loss: 2.1768674850463867 2023-01-22 23:30:42.377472: step: 1740/531, loss: 1.2555155754089355 2023-01-22 23:30:43.535908: step: 1744/531, loss: 1.2791881561279297 2023-01-22 23:30:44.641671: step: 1748/531, loss: 0.405431866645813 2023-01-22 23:30:45.778810: step: 1752/531, loss: 0.6121499538421631 2023-01-22 23:30:46.874492: step: 1756/531, loss: 0.8035522103309631 2023-01-22 23:30:47.992504: step: 1760/531, loss: 1.099301815032959 2023-01-22 23:30:49.107959: step: 1764/531, loss: 0.8523430824279785 2023-01-22 23:30:50.224490: step: 1768/531, loss: 0.9650673866271973 2023-01-22 23:30:51.344241: step: 1772/531, loss: 1.7846542596817017 2023-01-22 23:30:52.459105: step: 1776/531, loss: 2.0832650661468506 2023-01-22 23:30:53.590285: step: 1780/531, loss: 1.1523033380508423 2023-01-22 23:30:54.729002: step: 1784/531, loss: 1.3105982542037964 2023-01-22 23:30:55.845770: step: 1788/531, loss: 0.28089332580566406 2023-01-22 23:30:56.979527: step: 1792/531, loss: 1.5918664932250977 2023-01-22 23:30:58.115729: step: 1796/531, loss: 1.4153391122817993 2023-01-22 23:30:59.266170: step: 1800/531, loss: 0.6680206060409546 2023-01-22 23:31:00.369956: step: 1804/531, loss: 1.9647587537765503 2023-01-22 23:31:01.513058: step: 1808/531, loss: 1.0230886936187744 2023-01-22 23:31:02.627337: step: 1812/531, loss: 0.40220046043395996 2023-01-22 23:31:03.769426: step: 1816/531, loss: 0.39898940920829773 2023-01-22 23:31:04.919831: step: 1820/531, loss: 1.1538629531860352 2023-01-22 23:31:06.027435: step: 1824/531, loss: 0.9606660604476929 2023-01-22 23:31:07.162401: step: 1828/531, loss: 0.2896497845649719 2023-01-22 23:31:08.302008: step: 1832/531, loss: 1.2962169647216797 2023-01-22 23:31:09.444847: step: 1836/531, loss: 0.5317136645317078 2023-01-22 23:31:10.582614: step: 1840/531, loss: 0.5023956894874573 2023-01-22 23:31:11.706369: step: 1844/531, loss: 0.21467125415802002 2023-01-22 23:31:12.847066: step: 1848/531, loss: 0.6295309662818909 2023-01-22 23:31:13.948095: step: 1852/531, loss: 7.116159915924072 2023-01-22 23:31:15.106513: step: 1856/531, loss: 1.039978265762329 2023-01-22 23:31:16.241646: step: 1860/531, loss: 0.49906837940216064 2023-01-22 23:31:17.360858: step: 1864/531, loss: 1.0414111614227295 2023-01-22 23:31:18.459361: step: 1868/531, loss: 1.4617364406585693 2023-01-22 23:31:19.588209: step: 1872/531, loss: 0.45923924446105957 2023-01-22 23:31:20.681616: step: 1876/531, loss: 0.5880622863769531 2023-01-22 23:31:21.796774: step: 1880/531, loss: 7.1724138259887695 2023-01-22 23:31:22.943035: step: 1884/531, loss: 1.0949327945709229 2023-01-22 23:31:24.056509: step: 1888/531, loss: 0.2629883289337158 2023-01-22 23:31:25.168512: step: 1892/531, loss: 1.5492472648620605 2023-01-22 23:31:26.307798: step: 1896/531, loss: 2.168962240219116 2023-01-22 23:31:27.415150: step: 1900/531, loss: 2.447136402130127 2023-01-22 23:31:28.531589: step: 1904/531, loss: 2.9105565547943115 2023-01-22 23:31:29.667968: step: 1908/531, loss: 0.4838661253452301 2023-01-22 23:31:30.788833: step: 1912/531, loss: 0.5327164530754089 2023-01-22 23:31:31.913004: step: 1916/531, loss: 0.6271045207977295 2023-01-22 23:31:33.037197: step: 1920/531, loss: 0.342769056558609 2023-01-22 23:31:34.174192: step: 1924/531, loss: 0.44412603974342346 2023-01-22 23:31:35.303878: step: 1928/531, loss: 7.712641716003418 2023-01-22 23:31:36.436458: step: 1932/531, loss: 0.17210501432418823 2023-01-22 23:31:37.528298: step: 1936/531, loss: 0.29074543714523315 2023-01-22 23:31:38.650398: step: 1940/531, loss: 0.5579442977905273 2023-01-22 23:31:39.775765: step: 1944/531, loss: 2.413166046142578 2023-01-22 23:31:40.884603: step: 1948/531, loss: 0.15081782639026642 2023-01-22 23:31:41.996734: step: 1952/531, loss: 0.11447057873010635 2023-01-22 23:31:43.119902: step: 1956/531, loss: 0.36604928970336914 2023-01-22 23:31:44.262336: step: 1960/531, loss: 0.4121372103691101 2023-01-22 23:31:45.382352: step: 1964/531, loss: 1.1306660175323486 2023-01-22 23:31:46.471675: step: 1968/531, loss: 0.19114255905151367 2023-01-22 23:31:47.591764: step: 1972/531, loss: 1.115263819694519 2023-01-22 23:31:48.713759: step: 1976/531, loss: 0.40050989389419556 2023-01-22 23:31:49.822530: step: 1980/531, loss: 0.27480548620224 2023-01-22 23:31:50.931768: step: 1984/531, loss: 1.353050708770752 2023-01-22 23:31:52.033438: step: 1988/531, loss: 0.8090643882751465 2023-01-22 23:31:53.160772: step: 1992/531, loss: 0.9668687582015991 2023-01-22 23:31:54.267153: step: 1996/531, loss: 1.2375690937042236 2023-01-22 23:31:55.383588: step: 2000/531, loss: 0.811989426612854 2023-01-22 23:31:56.526139: step: 2004/531, loss: 0.3476330041885376 2023-01-22 23:31:57.649079: step: 2008/531, loss: 0.6789736747741699 2023-01-22 23:31:58.751229: step: 2012/531, loss: 0.9745103120803833 2023-01-22 23:31:59.896191: step: 2016/531, loss: 2.3204092979431152 2023-01-22 23:32:01.045909: step: 2020/531, loss: 0.7785778045654297 2023-01-22 23:32:02.179513: step: 2024/531, loss: 0.38837215304374695 2023-01-22 23:32:03.307280: step: 2028/531, loss: 0.6144498586654663 2023-01-22 23:32:04.430392: step: 2032/531, loss: 1.802272081375122 2023-01-22 23:32:05.536806: step: 2036/531, loss: 1.1621789932250977 2023-01-22 23:32:06.670690: step: 2040/531, loss: 18.040464401245117 2023-01-22 23:32:07.807992: step: 2044/531, loss: 0.8983226418495178 2023-01-22 23:32:08.961045: step: 2048/531, loss: 0.4278659522533417 2023-01-22 23:32:10.091075: step: 2052/531, loss: 0.33591580390930176 2023-01-22 23:32:11.199188: step: 2056/531, loss: 0.42114487290382385 2023-01-22 23:32:12.348019: step: 2060/531, loss: 0.6154437065124512 2023-01-22 23:32:13.447096: step: 2064/531, loss: 3.467343807220459 2023-01-22 23:32:14.549429: step: 2068/531, loss: 0.7770359516143799 2023-01-22 23:32:15.705318: step: 2072/531, loss: 0.46303659677505493 2023-01-22 23:32:16.806243: step: 2076/531, loss: 0.5598751902580261 2023-01-22 23:32:17.961385: step: 2080/531, loss: 0.7088748216629028 2023-01-22 23:32:19.070396: step: 2084/531, loss: 0.27088069915771484 2023-01-22 23:32:20.204935: step: 2088/531, loss: 0.6107016801834106 2023-01-22 23:32:21.347621: step: 2092/531, loss: 0.5451753735542297 2023-01-22 23:32:22.479775: step: 2096/531, loss: 0.4906690716743469 2023-01-22 23:32:23.591118: step: 2100/531, loss: 0.4330683648586273 2023-01-22 23:32:24.712113: step: 2104/531, loss: 0.5274524688720703 2023-01-22 23:32:25.838765: step: 2108/531, loss: 0.22472038865089417 2023-01-22 23:32:26.968923: step: 2112/531, loss: 0.37288033962249756 2023-01-22 23:32:28.077420: step: 2116/531, loss: 0.20569263398647308 2023-01-22 23:32:29.205627: step: 2120/531, loss: 0.17711704969406128 2023-01-22 23:32:30.363616: step: 2124/531, loss: 1.4963927268981934 ================================================== Loss: 3.639 -------------------- Dev: {'event': {'p': 0.5924170616113744, 'r': 0.4993342210386152, 'f1': 0.541907514450867}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test: {'event': {'p': 0.48865248226950353, 'r': 0.4108527131782946, 'f1': 0.44638807904114025}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'event': {'p': 0.5714285714285714, 'r': 0.37037037037037035, 'f1': 0.449438202247191}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'event': {'p': 0.9230769230769231, 'r': 0.19047619047619047, 'f1': 0.31578947368421056}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'event': {'p': 0.625, 'r': 0.1388888888888889, 'f1': 0.2272727272727273}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.5924170616113744, 'r': 0.4993342210386152, 'f1': 0.541907514450867}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Eng Test for Chinese: {'event': {'p': 0.48865248226950353, 'r': 0.4108527131782946, 'f1': 0.44638807904114025}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'event': {'p': 0.5714285714285714, 'r': 0.37037037037037035, 'f1': 0.449438202247191}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Eng Dev for Korean: {'event': {'p': 0.5924170616113744, 'r': 0.4993342210386152, 'f1': 0.541907514450867}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Eng Test for Korean: {'event': {'p': 0.48865248226950353, 'r': 0.4108527131782946, 'f1': 0.44638807904114025}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'event': {'p': 0.9230769230769231, 'r': 0.19047619047619047, 'f1': 0.31578947368421056}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Eng Dev for Russian: {'event': {'p': 0.5924170616113744, 'r': 0.4993342210386152, 'f1': 0.541907514450867}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Eng Test for Russian: {'event': {'p': 0.48865248226950353, 'r': 0.4108527131782946, 'f1': 0.44638807904114025}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'event': {'p': 0.625, 'r': 0.1388888888888889, 'f1': 0.2272727272727273}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-22 23:33:33.838462: step: 4/531, loss: 1.4447047710418701 2023-01-22 23:33:34.965171: step: 8/531, loss: 1.7894995212554932 2023-01-22 23:33:36.083240: step: 12/531, loss: 1.1803804636001587 2023-01-22 23:33:37.189303: step: 16/531, loss: 0.3246711492538452 2023-01-22 23:33:38.323073: step: 20/531, loss: 1.0602116584777832 2023-01-22 23:33:39.420266: step: 24/531, loss: 0.4928525984287262 2023-01-22 23:33:40.579428: step: 28/531, loss: 0.7069123387336731 2023-01-22 23:33:41.708076: step: 32/531, loss: 0.26230937242507935 2023-01-22 23:33:42.848919: step: 36/531, loss: 0.3809455931186676 2023-01-22 23:33:43.966629: step: 40/531, loss: 1.4203050136566162 2023-01-22 23:33:45.102903: step: 44/531, loss: 0.2373761683702469 2023-01-22 23:33:46.214843: step: 48/531, loss: 0.3367355465888977 2023-01-22 23:33:47.336063: step: 52/531, loss: 0.3164003789424896 2023-01-22 23:33:48.444306: step: 56/531, loss: 0.2195574790239334 2023-01-22 23:33:49.586579: step: 60/531, loss: 0.18575087189674377 2023-01-22 23:33:50.728697: step: 64/531, loss: 0.489385724067688 2023-01-22 23:33:51.861483: step: 68/531, loss: 0.2584063410758972 2023-01-22 23:33:52.991704: step: 72/531, loss: 0.6360254287719727 2023-01-22 23:33:54.143611: step: 76/531, loss: 1.2348854541778564 2023-01-22 23:33:55.228284: step: 80/531, loss: 0.6211497187614441 2023-01-22 23:33:56.380460: step: 84/531, loss: 0.9589474201202393 2023-01-22 23:33:57.498628: step: 88/531, loss: 1.0409431457519531 2023-01-22 23:33:58.621036: step: 92/531, loss: 0.9349552392959595 2023-01-22 23:33:59.743968: step: 96/531, loss: 0.49208372831344604 2023-01-22 23:34:00.843077: step: 100/531, loss: 1.3125523328781128 2023-01-22 23:34:01.994102: step: 104/531, loss: 0.2161637395620346 2023-01-22 23:34:03.117387: step: 108/531, loss: 0.4276527762413025 2023-01-22 23:34:04.258358: step: 112/531, loss: 0.3313613533973694 2023-01-22 23:34:05.361696: step: 116/531, loss: 0.6031900644302368 2023-01-22 23:34:06.470087: step: 120/531, loss: 0.3204917907714844 2023-01-22 23:34:07.600790: step: 124/531, loss: 0.20592385530471802 2023-01-22 23:34:08.715732: step: 128/531, loss: 0.4562414288520813 2023-01-22 23:34:09.856598: step: 132/531, loss: 4.408691883087158 2023-01-22 23:34:10.978543: step: 136/531, loss: 0.3817763328552246 2023-01-22 23:34:12.132653: step: 140/531, loss: 0.5173056721687317 2023-01-22 23:34:13.266653: step: 144/531, loss: 1.3479859828948975 2023-01-22 23:34:14.384697: step: 148/531, loss: 1.0278340578079224 2023-01-22 23:34:15.485770: step: 152/531, loss: 0.10811326652765274 2023-01-22 23:34:16.598904: step: 156/531, loss: 0.43464717268943787 2023-01-22 23:34:17.705388: step: 160/531, loss: 0.7763004302978516 2023-01-22 23:34:18.817521: step: 164/531, loss: 0.2981727123260498 2023-01-22 23:34:19.939318: step: 168/531, loss: 1.2074049711227417 2023-01-22 23:34:21.042129: step: 172/531, loss: 1.7091926336288452 2023-01-22 23:34:22.132631: step: 176/531, loss: 0.40574541687965393 2023-01-22 23:34:23.235945: step: 180/531, loss: 0.2928807735443115 2023-01-22 23:34:24.359128: step: 184/531, loss: 1.7569458484649658 2023-01-22 23:34:25.464845: step: 188/531, loss: 0.2622080445289612 2023-01-22 23:34:26.631932: step: 192/531, loss: 0.32386958599090576 2023-01-22 23:34:27.730969: step: 196/531, loss: 0.36866387724876404 2023-01-22 23:34:28.879446: step: 200/531, loss: 2.2625324726104736 2023-01-22 23:34:30.017246: step: 204/531, loss: 1.0625203847885132 2023-01-22 23:34:31.146835: step: 208/531, loss: 0.29278287291526794 2023-01-22 23:34:32.277491: step: 212/531, loss: 0.8662877082824707 2023-01-22 23:34:33.419502: step: 216/531, loss: 0.6360157132148743 2023-01-22 23:34:34.526073: step: 220/531, loss: 1.511866807937622 2023-01-22 23:34:35.660124: step: 224/531, loss: 1.0980476140975952 2023-01-22 23:34:36.785411: step: 228/531, loss: 1.4812939167022705 2023-01-22 23:34:37.912174: step: 232/531, loss: 0.6922441720962524 2023-01-22 23:34:39.035380: step: 236/531, loss: 0.9902564883232117 2023-01-22 23:34:40.139093: step: 240/531, loss: 0.177326962351799 2023-01-22 23:34:41.274077: step: 244/531, loss: 0.20777606964111328 2023-01-22 23:34:42.393197: step: 248/531, loss: 1.7635724544525146 2023-01-22 23:34:43.517255: step: 252/531, loss: 0.11808653175830841 2023-01-22 23:34:44.638162: step: 256/531, loss: 0.3749334514141083 2023-01-22 23:34:45.807962: step: 260/531, loss: 1.1772258281707764 2023-01-22 23:34:46.922454: step: 264/531, loss: 0.7569869756698608 2023-01-22 23:34:48.043421: step: 268/531, loss: 0.5492221117019653 2023-01-22 23:34:49.172150: step: 272/531, loss: 0.6368181109428406 2023-01-22 23:34:50.291296: step: 276/531, loss: 0.6319532990455627 2023-01-22 23:34:51.421398: step: 280/531, loss: 1.2981876134872437 2023-01-22 23:34:52.523123: step: 284/531, loss: 0.15669222176074982 2023-01-22 23:34:53.662633: step: 288/531, loss: 0.6646470427513123 2023-01-22 23:34:54.768469: step: 292/531, loss: 0.369415283203125 2023-01-22 23:34:55.878485: step: 296/531, loss: 0.5761706233024597 2023-01-22 23:34:56.999429: step: 300/531, loss: 0.10725517570972443 2023-01-22 23:34:58.146615: step: 304/531, loss: 0.2761271595954895 2023-01-22 23:34:59.272118: step: 308/531, loss: 0.17902125418186188 2023-01-22 23:35:00.430259: step: 312/531, loss: 0.642677903175354 2023-01-22 23:35:01.544193: step: 316/531, loss: 0.32330718636512756 2023-01-22 23:35:02.723962: step: 320/531, loss: 0.31008490920066833 2023-01-22 23:35:03.847537: step: 324/531, loss: 0.1907430738210678 2023-01-22 23:35:04.969174: step: 328/531, loss: 0.41660165786743164 2023-01-22 23:35:06.070557: step: 332/531, loss: 0.20495377480983734 2023-01-22 23:35:07.205524: step: 336/531, loss: 0.34137028455734253 2023-01-22 23:35:08.338859: step: 340/531, loss: 0.5798617601394653 2023-01-22 23:35:09.459449: step: 344/531, loss: 0.37318092584609985 2023-01-22 23:35:10.587641: step: 348/531, loss: 0.21679158508777618 2023-01-22 23:35:11.681777: step: 352/531, loss: 0.5242207050323486 2023-01-22 23:35:12.797292: step: 356/531, loss: 0.2511359453201294 2023-01-22 23:35:13.940316: step: 360/531, loss: 1.7452383041381836 2023-01-22 23:35:15.043520: step: 364/531, loss: 0.5586540102958679 2023-01-22 23:35:16.164174: step: 368/531, loss: 0.34089863300323486 2023-01-22 23:35:17.285025: step: 372/531, loss: 0.2661881446838379 2023-01-22 23:35:18.426089: step: 376/531, loss: 1.7807663679122925 2023-01-22 23:35:19.576018: step: 380/531, loss: 2.899529457092285 2023-01-22 23:35:20.727009: step: 384/531, loss: 0.3713320791721344 2023-01-22 23:35:21.852913: step: 388/531, loss: 0.35799217224121094 2023-01-22 23:35:22.988920: step: 392/531, loss: 0.5195703506469727 2023-01-22 23:35:24.125386: step: 396/531, loss: 0.8480582237243652 2023-01-22 23:35:25.282163: step: 400/531, loss: 0.4078831374645233 2023-01-22 23:35:26.467423: step: 404/531, loss: 0.15243983268737793 2023-01-22 23:35:27.573809: step: 408/531, loss: 0.17399731278419495 2023-01-22 23:35:28.678675: step: 412/531, loss: 0.6364738345146179 2023-01-22 23:35:29.815043: step: 416/531, loss: 2.5777413845062256 2023-01-22 23:35:30.946041: step: 420/531, loss: 0.7492295503616333 2023-01-22 23:35:32.072226: step: 424/531, loss: 0.17711439728736877 2023-01-22 23:35:33.192307: step: 428/531, loss: 1.0871543884277344 2023-01-22 23:35:34.318881: step: 432/531, loss: 0.2248401641845703 2023-01-22 23:35:35.468820: step: 436/531, loss: 1.3650846481323242 2023-01-22 23:35:36.586703: step: 440/531, loss: 0.9823659658432007 2023-01-22 23:35:37.700183: step: 444/531, loss: 1.0744810104370117 2023-01-22 23:35:38.829527: step: 448/531, loss: 1.3013124465942383 2023-01-22 23:35:39.975436: step: 452/531, loss: 0.5785605311393738 2023-01-22 23:35:41.085445: step: 456/531, loss: 1.6107933521270752 2023-01-22 23:35:42.200307: step: 460/531, loss: 0.29660820960998535 2023-01-22 23:35:43.329090: step: 464/531, loss: 0.359740674495697 2023-01-22 23:35:44.443568: step: 468/531, loss: 0.11545324325561523 2023-01-22 23:35:45.566392: step: 472/531, loss: 0.24031469225883484 2023-01-22 23:35:46.666369: step: 476/531, loss: 0.6878279447555542 2023-01-22 23:35:47.784694: step: 480/531, loss: 1.9192399978637695 2023-01-22 23:35:48.923796: step: 484/531, loss: 0.7948795557022095 2023-01-22 23:35:50.043959: step: 488/531, loss: 1.035010814666748 2023-01-22 23:35:51.186686: step: 492/531, loss: 0.09685955196619034 2023-01-22 23:35:52.308160: step: 496/531, loss: 1.7673611640930176 2023-01-22 23:35:53.416052: step: 500/531, loss: 0.17317765951156616 2023-01-22 23:35:54.543058: step: 504/531, loss: 0.7733259797096252 2023-01-22 23:35:55.684641: step: 508/531, loss: 0.7451426982879639 2023-01-22 23:35:56.849217: step: 512/531, loss: 0.44408389925956726 2023-01-22 23:35:57.961780: step: 516/531, loss: 1.8669283390045166 2023-01-22 23:35:59.071292: step: 520/531, loss: 0.2147599756717682 2023-01-22 23:36:00.207211: step: 524/531, loss: 0.16766032576560974 2023-01-22 23:36:01.312789: step: 528/531, loss: 6.4270243644714355 2023-01-22 23:36:02.420273: step: 532/531, loss: 1.178396463394165 2023-01-22 23:36:03.554396: step: 536/531, loss: 1.7416846752166748 2023-01-22 23:36:04.682663: step: 540/531, loss: 1.717814326286316 2023-01-22 23:36:05.820731: step: 544/531, loss: 0.942686915397644 2023-01-22 23:36:06.959820: step: 548/531, loss: 1.3419599533081055 2023-01-22 23:36:08.076676: step: 552/531, loss: 0.16580715775489807 2023-01-22 23:36:09.203864: step: 556/531, loss: 0.14894671738147736 2023-01-22 23:36:10.331042: step: 560/531, loss: 0.12128090858459473 2023-01-22 23:36:11.461766: step: 564/531, loss: 0.9024069309234619 2023-01-22 23:36:12.628687: step: 568/531, loss: 0.264594167470932 2023-01-22 23:36:13.721730: step: 572/531, loss: 0.32906651496887207 2023-01-22 23:36:14.841463: step: 576/531, loss: 0.3746604025363922 2023-01-22 23:36:15.956107: step: 580/531, loss: 1.7179760932922363 2023-01-22 23:36:17.083857: step: 584/531, loss: 0.21175822615623474 2023-01-22 23:36:18.199679: step: 588/531, loss: 0.1195782870054245 2023-01-22 23:36:19.322620: step: 592/531, loss: 0.23276109993457794 2023-01-22 23:36:20.437363: step: 596/531, loss: 7.56704568862915 2023-01-22 23:36:21.557775: step: 600/531, loss: 0.6738176345825195 2023-01-22 23:36:22.691289: step: 604/531, loss: 0.5174084305763245 2023-01-22 23:36:23.814834: step: 608/531, loss: 0.24270924925804138 2023-01-22 23:36:24.957667: step: 612/531, loss: 1.7047746181488037 2023-01-22 23:36:26.069595: step: 616/531, loss: 0.8631380200386047 2023-01-22 23:36:27.201139: step: 620/531, loss: 0.09635944664478302 2023-01-22 23:36:28.334693: step: 624/531, loss: 0.8997295498847961 2023-01-22 23:36:29.460422: step: 628/531, loss: 0.23431158065795898 2023-01-22 23:36:30.592071: step: 632/531, loss: 0.4684382677078247 2023-01-22 23:36:31.699873: step: 636/531, loss: 0.24691133201122284 2023-01-22 23:36:32.849668: step: 640/531, loss: 0.5310478806495667 2023-01-22 23:36:33.959169: step: 644/531, loss: 0.34976208209991455 2023-01-22 23:36:35.070590: step: 648/531, loss: 0.566770076751709 2023-01-22 23:36:36.213961: step: 652/531, loss: 1.7421294450759888 2023-01-22 23:36:37.332571: step: 656/531, loss: 0.28855934739112854 2023-01-22 23:36:38.441393: step: 660/531, loss: 1.6107193231582642 2023-01-22 23:36:39.568227: step: 664/531, loss: 0.1847524642944336 2023-01-22 23:36:40.707847: step: 668/531, loss: 3.687346935272217 2023-01-22 23:36:41.867970: step: 672/531, loss: 1.1015892028808594 2023-01-22 23:36:42.960793: step: 676/531, loss: 0.37416964769363403 2023-01-22 23:36:44.094270: step: 680/531, loss: 0.2634064555168152 2023-01-22 23:36:45.242456: step: 684/531, loss: 0.8191621899604797 2023-01-22 23:36:46.362993: step: 688/531, loss: 1.3657824993133545 2023-01-22 23:36:47.478848: step: 692/531, loss: 0.16610869765281677 2023-01-22 23:36:48.646144: step: 696/531, loss: 0.2113826870918274 2023-01-22 23:36:49.771558: step: 700/531, loss: 0.9884800314903259 2023-01-22 23:36:50.888644: step: 704/531, loss: 1.5296320915222168 2023-01-22 23:36:52.017553: step: 708/531, loss: 0.5116531252861023 2023-01-22 23:36:53.126873: step: 712/531, loss: 1.908612608909607 2023-01-22 23:36:54.268654: step: 716/531, loss: 0.945914626121521 2023-01-22 23:36:55.434932: step: 720/531, loss: 0.44992390275001526 2023-01-22 23:36:56.567312: step: 724/531, loss: 0.41654056310653687 2023-01-22 23:36:57.692548: step: 728/531, loss: 1.8458174467086792 2023-01-22 23:36:58.815801: step: 732/531, loss: 0.9704311490058899 2023-01-22 23:36:59.940720: step: 736/531, loss: 1.451930046081543 2023-01-22 23:37:01.087400: step: 740/531, loss: 0.772199273109436 2023-01-22 23:37:02.203118: step: 744/531, loss: 6.651556491851807 2023-01-22 23:37:03.318509: step: 748/531, loss: 0.6743013858795166 2023-01-22 23:37:04.440805: step: 752/531, loss: 1.1714826822280884 2023-01-22 23:37:05.597459: step: 756/531, loss: 0.17997589707374573 2023-01-22 23:37:06.684234: step: 760/531, loss: 0.240447536110878 2023-01-22 23:37:07.809943: step: 764/531, loss: 0.18628492951393127 2023-01-22 23:37:08.956443: step: 768/531, loss: 1.2768070697784424 2023-01-22 23:37:10.107536: step: 772/531, loss: 0.9290516376495361 2023-01-22 23:37:11.227453: step: 776/531, loss: 1.3223081827163696 2023-01-22 23:37:12.378639: step: 780/531, loss: 1.5617501735687256 2023-01-22 23:37:13.501094: step: 784/531, loss: 6.992527484893799 2023-01-22 23:37:14.618783: step: 788/531, loss: 1.130629062652588 2023-01-22 23:37:15.736925: step: 792/531, loss: 0.7078399658203125 2023-01-22 23:37:16.842053: step: 796/531, loss: 0.23568393290042877 2023-01-22 23:37:17.945137: step: 800/531, loss: 0.41420382261276245 2023-01-22 23:37:19.056419: step: 804/531, loss: 0.2370687574148178 2023-01-22 23:37:20.196315: step: 808/531, loss: 0.24226799607276917 2023-01-22 23:37:21.324357: step: 812/531, loss: 1.7401185035705566 2023-01-22 23:37:22.454931: step: 816/531, loss: 0.4222102165222168 2023-01-22 23:37:23.572821: step: 820/531, loss: 1.0131925344467163 2023-01-22 23:37:24.689442: step: 824/531, loss: 0.4274725914001465 2023-01-22 23:37:25.801694: step: 828/531, loss: 1.739496111869812 2023-01-22 23:37:26.928715: step: 832/531, loss: 0.4572395384311676 2023-01-22 23:37:28.089085: step: 836/531, loss: 0.2601722776889801 2023-01-22 23:37:29.206715: step: 840/531, loss: 1.272942066192627 2023-01-22 23:37:30.333958: step: 844/531, loss: 0.3978622853755951 2023-01-22 23:37:31.470385: step: 848/531, loss: 0.2559117376804352 2023-01-22 23:37:32.601364: step: 852/531, loss: 0.11607237160205841 2023-01-22 23:37:33.759840: step: 856/531, loss: 6.901358604431152 2023-01-22 23:37:34.892352: step: 860/531, loss: 0.14067640900611877 2023-01-22 23:37:36.027118: step: 864/531, loss: 0.23895572125911713 2023-01-22 23:37:37.169232: step: 868/531, loss: 0.5042967796325684 2023-01-22 23:37:38.285044: step: 872/531, loss: 0.29004794359207153 2023-01-22 23:37:39.391549: step: 876/531, loss: 0.13914261758327484 2023-01-22 23:37:40.537442: step: 880/531, loss: 0.8387675881385803 2023-01-22 23:37:41.667819: step: 884/531, loss: 0.23050576448440552 2023-01-22 23:37:42.803766: step: 888/531, loss: 1.108525037765503 2023-01-22 23:37:43.936133: step: 892/531, loss: 0.5829653143882751 2023-01-22 23:37:45.083122: step: 896/531, loss: 0.650887668132782 2023-01-22 23:37:46.208077: step: 900/531, loss: 0.7675807476043701 2023-01-22 23:37:47.324505: step: 904/531, loss: 0.19485878944396973 2023-01-22 23:37:48.455284: step: 908/531, loss: 0.16979295015335083 2023-01-22 23:37:49.589172: step: 912/531, loss: 1.896610975265503 2023-01-22 23:37:50.704274: step: 916/531, loss: 0.7971558570861816 2023-01-22 23:37:51.820554: step: 920/531, loss: 1.45060396194458 2023-01-22 23:37:52.960916: step: 924/531, loss: 0.5280710458755493 2023-01-22 23:37:54.100040: step: 928/531, loss: 0.2047538310289383 2023-01-22 23:37:55.203257: step: 932/531, loss: 0.9925472140312195 2023-01-22 23:37:56.304876: step: 936/531, loss: 0.04657275974750519 2023-01-22 23:37:57.473654: step: 940/531, loss: 7.417726516723633 2023-01-22 23:37:58.594864: step: 944/531, loss: 2.8799166679382324 2023-01-22 23:37:59.723416: step: 948/531, loss: 0.5087589025497437 2023-01-22 23:38:00.835441: step: 952/531, loss: 1.0417362451553345 2023-01-22 23:38:01.966579: step: 956/531, loss: 0.17657442390918732 2023-01-22 23:38:03.133858: step: 960/531, loss: 0.413053423166275 2023-01-22 23:38:04.236399: step: 964/531, loss: 0.9740074276924133 2023-01-22 23:38:05.351132: step: 968/531, loss: 0.08871526271104813 2023-01-22 23:38:06.449489: step: 972/531, loss: 0.5364027619361877 2023-01-22 23:38:07.539931: step: 976/531, loss: 0.18806371092796326 2023-01-22 23:38:08.660228: step: 980/531, loss: 0.3420529365539551 2023-01-22 23:38:09.788223: step: 984/531, loss: 0.16767436265945435 2023-01-22 23:38:10.894674: step: 988/531, loss: 0.12904635071754456 2023-01-22 23:38:12.037041: step: 992/531, loss: 0.5927817821502686 2023-01-22 23:38:13.177072: step: 996/531, loss: 0.5910190343856812 2023-01-22 23:38:14.315367: step: 1000/531, loss: 0.239983469247818 2023-01-22 23:38:15.432773: step: 1004/531, loss: 1.172353744506836 2023-01-22 23:38:16.549180: step: 1008/531, loss: 0.2604988217353821 2023-01-22 23:38:17.657552: step: 1012/531, loss: 0.2758321762084961 2023-01-22 23:38:18.765934: step: 1016/531, loss: 1.3930798768997192 2023-01-22 23:38:19.896426: step: 1020/531, loss: 0.3904854953289032 2023-01-22 23:38:21.014937: step: 1024/531, loss: 1.5742791891098022 2023-01-22 23:38:22.157100: step: 1028/531, loss: 0.36612045764923096 2023-01-22 23:38:23.318971: step: 1032/531, loss: 0.46194028854370117 2023-01-22 23:38:24.438929: step: 1036/531, loss: 0.18834352493286133 2023-01-22 23:38:25.567325: step: 1040/531, loss: 0.24331551790237427 2023-01-22 23:38:26.685585: step: 1044/531, loss: 0.30071860551834106 2023-01-22 23:38:27.803568: step: 1048/531, loss: 0.1441446840763092 2023-01-22 23:38:28.900450: step: 1052/531, loss: 1.0711448192596436 2023-01-22 23:38:30.004621: step: 1056/531, loss: 0.5804270505905151 2023-01-22 23:38:31.104758: step: 1060/531, loss: 7.240081310272217 2023-01-22 23:38:32.231346: step: 1064/531, loss: 0.500418484210968 2023-01-22 23:38:33.368715: step: 1068/531, loss: 2.0883078575134277 2023-01-22 23:38:34.473584: step: 1072/531, loss: 0.23637351393699646 2023-01-22 23:38:35.579353: step: 1076/531, loss: 0.8518012762069702 2023-01-22 23:38:36.717580: step: 1080/531, loss: 0.24204093217849731 2023-01-22 23:38:37.837484: step: 1084/531, loss: 1.2486172914505005 2023-01-22 23:38:38.944411: step: 1088/531, loss: 0.16274595260620117 2023-01-22 23:38:40.072800: step: 1092/531, loss: 1.5050017833709717 2023-01-22 23:38:41.179272: step: 1096/531, loss: 0.9276976585388184 2023-01-22 23:38:42.303241: step: 1100/531, loss: 0.19396352767944336 2023-01-22 23:38:43.398516: step: 1104/531, loss: 0.21584434807300568 2023-01-22 23:38:44.532200: step: 1108/531, loss: 1.2082065343856812 2023-01-22 23:38:45.628457: step: 1112/531, loss: 0.2518351078033447 2023-01-22 23:38:46.734958: step: 1116/531, loss: 0.1923869550228119 2023-01-22 23:38:47.850112: step: 1120/531, loss: 0.26678207516670227 2023-01-22 23:38:48.962542: step: 1124/531, loss: 0.2565642297267914 2023-01-22 23:38:50.061880: step: 1128/531, loss: 0.5560347437858582 2023-01-22 23:38:51.168204: step: 1132/531, loss: 0.1654636412858963 2023-01-22 23:38:52.288222: step: 1136/531, loss: 1.2767598628997803 2023-01-22 23:38:53.414705: step: 1140/531, loss: 1.0399624109268188 2023-01-22 23:38:54.529965: step: 1144/531, loss: 0.0939173698425293 2023-01-22 23:38:55.641040: step: 1148/531, loss: 0.837897777557373 2023-01-22 23:38:56.716266: step: 1152/531, loss: 0.10107071697711945 2023-01-22 23:38:57.821166: step: 1156/531, loss: 0.24848470091819763 2023-01-22 23:38:58.925422: step: 1160/531, loss: 0.2140188217163086 2023-01-22 23:39:00.023927: step: 1164/531, loss: 0.6957235932350159 2023-01-22 23:39:01.150926: step: 1168/531, loss: 0.62116938829422 2023-01-22 23:39:02.259270: step: 1172/531, loss: 0.23200102150440216 2023-01-22 23:39:03.399677: step: 1176/531, loss: 0.23393641412258148 2023-01-22 23:39:04.499903: step: 1180/531, loss: 0.18609829246997833 2023-01-22 23:39:05.613513: step: 1184/531, loss: 0.7403040528297424 2023-01-22 23:39:06.719957: step: 1188/531, loss: 0.2572787404060364 2023-01-22 23:39:07.833185: step: 1192/531, loss: 1.2990471124649048 2023-01-22 23:39:08.966964: step: 1196/531, loss: 0.29179847240448 2023-01-22 23:39:10.067130: step: 1200/531, loss: 0.2972412109375 2023-01-22 23:39:11.177743: step: 1204/531, loss: 0.9895287752151489 2023-01-22 23:39:12.349754: step: 1208/531, loss: 0.13477206230163574 2023-01-22 23:39:13.508484: step: 1212/531, loss: 0.9927616119384766 2023-01-22 23:39:14.627658: step: 1216/531, loss: 0.2719815671443939 2023-01-22 23:39:15.753015: step: 1220/531, loss: 0.3135610520839691 2023-01-22 23:39:16.892536: step: 1224/531, loss: 1.4710155725479126 2023-01-22 23:39:18.032465: step: 1228/531, loss: 0.15041828155517578 2023-01-22 23:39:19.137391: step: 1232/531, loss: 0.7769324779510498 2023-01-22 23:39:20.297865: step: 1236/531, loss: 1.044642686843872 2023-01-22 23:39:21.438681: step: 1240/531, loss: 0.27401572465896606 2023-01-22 23:39:22.576770: step: 1244/531, loss: 0.3988594710826874 2023-01-22 23:39:23.740443: step: 1248/531, loss: 0.1301170289516449 2023-01-22 23:39:24.868055: step: 1252/531, loss: 0.4545459747314453 2023-01-22 23:39:25.962428: step: 1256/531, loss: 0.7234287858009338 2023-01-22 23:39:27.103043: step: 1260/531, loss: 1.6857560873031616 2023-01-22 23:39:28.242797: step: 1264/531, loss: 0.3886871337890625 2023-01-22 23:39:29.352539: step: 1268/531, loss: 1.5109837055206299 2023-01-22 23:39:30.450232: step: 1272/531, loss: 0.09156426787376404 2023-01-22 23:39:31.552262: step: 1276/531, loss: 0.169049471616745 2023-01-22 23:39:32.665615: step: 1280/531, loss: 0.8284401297569275 2023-01-22 23:39:33.792965: step: 1284/531, loss: 0.26794034242630005 2023-01-22 23:39:34.914872: step: 1288/531, loss: 0.3205471634864807 2023-01-22 23:39:36.081335: step: 1292/531, loss: 1.0547372102737427 2023-01-22 23:39:37.211272: step: 1296/531, loss: 0.4735472500324249 2023-01-22 23:39:38.342679: step: 1300/531, loss: 0.9224669337272644 2023-01-22 23:39:39.451421: step: 1304/531, loss: 0.2162257730960846 2023-01-22 23:39:40.571845: step: 1308/531, loss: 0.8526097536087036 2023-01-22 23:39:41.725514: step: 1312/531, loss: 0.805574893951416 2023-01-22 23:39:42.838871: step: 1316/531, loss: 0.20904913544654846 2023-01-22 23:39:44.010315: step: 1320/531, loss: 0.2312142252922058 2023-01-22 23:39:45.120335: step: 1324/531, loss: 1.9592543840408325 2023-01-22 23:39:46.196475: step: 1328/531, loss: 0.1831745207309723 2023-01-22 23:39:47.308309: step: 1332/531, loss: 0.9486944675445557 2023-01-22 23:39:48.436973: step: 1336/531, loss: 0.161957785487175 2023-01-22 23:39:49.534180: step: 1340/531, loss: 0.2659211754798889 2023-01-22 23:39:50.698369: step: 1344/531, loss: 0.8168730139732361 2023-01-22 23:39:51.829026: step: 1348/531, loss: 0.20484352111816406 2023-01-22 23:39:52.962676: step: 1352/531, loss: 2.036332845687866 2023-01-22 23:39:54.113963: step: 1356/531, loss: 1.3080036640167236 2023-01-22 23:39:55.215056: step: 1360/531, loss: 0.35485154390335083 2023-01-22 23:39:56.354168: step: 1364/531, loss: 0.2512446641921997 2023-01-22 23:39:57.458761: step: 1368/531, loss: 0.39393502473831177 2023-01-22 23:39:58.584670: step: 1372/531, loss: 0.8258788585662842 2023-01-22 23:39:59.710076: step: 1376/531, loss: 0.9475477337837219 2023-01-22 23:40:00.847331: step: 1380/531, loss: 0.5336613655090332 2023-01-22 23:40:01.954984: step: 1384/531, loss: 0.7308098077774048 2023-01-22 23:40:03.064902: step: 1388/531, loss: 0.4056418836116791 2023-01-22 23:40:04.190734: step: 1392/531, loss: 0.22033196687698364 2023-01-22 23:40:05.288697: step: 1396/531, loss: 0.7989135980606079 2023-01-22 23:40:06.439132: step: 1400/531, loss: 1.441643476486206 2023-01-22 23:40:07.553814: step: 1404/531, loss: 0.3538009524345398 2023-01-22 23:40:08.686260: step: 1408/531, loss: 1.0300137996673584 2023-01-22 23:40:09.803860: step: 1412/531, loss: 0.3359490633010864 2023-01-22 23:40:10.911118: step: 1416/531, loss: 0.17974421381950378 2023-01-22 23:40:12.028357: step: 1420/531, loss: 0.7635331749916077 2023-01-22 23:40:13.150538: step: 1424/531, loss: 2.332240104675293 2023-01-22 23:40:14.255156: step: 1428/531, loss: 1.2522529363632202 2023-01-22 23:40:15.412953: step: 1432/531, loss: 0.28223592042922974 2023-01-22 23:40:16.549168: step: 1436/531, loss: 0.2710130214691162 2023-01-22 23:40:17.663902: step: 1440/531, loss: 4.314457893371582 2023-01-22 23:40:18.803075: step: 1444/531, loss: 0.4161422848701477 2023-01-22 23:40:19.910212: step: 1448/531, loss: 0.7519227266311646 2023-01-22 23:40:21.053748: step: 1452/531, loss: 0.3558284640312195 2023-01-22 23:40:22.167790: step: 1456/531, loss: 1.498359203338623 2023-01-22 23:40:23.290408: step: 1460/531, loss: 0.6741830110549927 2023-01-22 23:40:24.435235: step: 1464/531, loss: 0.05815839767456055 2023-01-22 23:40:25.555890: step: 1468/531, loss: 0.20585794746875763 2023-01-22 23:40:26.670822: step: 1472/531, loss: 0.41033706068992615 2023-01-22 23:40:27.813964: step: 1476/531, loss: 1.3723766803741455 2023-01-22 23:40:28.958329: step: 1480/531, loss: 1.0911204814910889 2023-01-22 23:40:30.071830: step: 1484/531, loss: 0.49098461866378784 2023-01-22 23:40:31.197735: step: 1488/531, loss: 0.31540995836257935 2023-01-22 23:40:32.330505: step: 1492/531, loss: 0.1768076866865158 2023-01-22 23:40:33.466467: step: 1496/531, loss: 1.1858489513397217 2023-01-22 23:40:34.600042: step: 1500/531, loss: 0.8010136485099792 2023-01-22 23:40:35.745010: step: 1504/531, loss: 0.43002554774284363 2023-01-22 23:40:36.896114: step: 1508/531, loss: 0.13285726308822632 2023-01-22 23:40:38.014644: step: 1512/531, loss: 0.42237424850463867 2023-01-22 23:40:39.145658: step: 1516/531, loss: 0.17222046852111816 2023-01-22 23:40:40.253267: step: 1520/531, loss: 0.16028109192848206 2023-01-22 23:40:41.367459: step: 1524/531, loss: 0.35287314653396606 2023-01-22 23:40:42.481810: step: 1528/531, loss: 0.3377394378185272 2023-01-22 23:40:43.581123: step: 1532/531, loss: 0.6800830960273743 2023-01-22 23:40:44.714526: step: 1536/531, loss: 0.25599777698516846 2023-01-22 23:40:45.859407: step: 1540/531, loss: 0.2045327126979828 2023-01-22 23:40:46.981176: step: 1544/531, loss: 0.08804045617580414 2023-01-22 23:40:48.098912: step: 1548/531, loss: 0.8169205784797668 2023-01-22 23:40:49.227034: step: 1552/531, loss: 0.9265896081924438 2023-01-22 23:40:50.329779: step: 1556/531, loss: 0.2756344676017761 2023-01-22 23:40:51.442689: step: 1560/531, loss: 0.11664175987243652 2023-01-22 23:40:52.572786: step: 1564/531, loss: 0.4637576937675476 2023-01-22 23:40:53.744732: step: 1568/531, loss: 2.064606189727783 2023-01-22 23:40:54.854723: step: 1572/531, loss: 0.033939555287361145 2023-01-22 23:40:56.003130: step: 1576/531, loss: 0.9088577032089233 2023-01-22 23:40:57.113922: step: 1580/531, loss: 0.08482623100280762 2023-01-22 23:40:58.239170: step: 1584/531, loss: 0.13899503648281097 2023-01-22 23:40:59.386231: step: 1588/531, loss: 0.13772006332874298 2023-01-22 23:41:00.481432: step: 1592/531, loss: 1.553917407989502 2023-01-22 23:41:01.586998: step: 1596/531, loss: 0.4175397753715515 2023-01-22 23:41:02.713661: step: 1600/531, loss: 0.5677341222763062 2023-01-22 23:41:03.823417: step: 1604/531, loss: 0.11777696758508682 2023-01-22 23:41:04.950632: step: 1608/531, loss: 0.2342889904975891 2023-01-22 23:41:06.096413: step: 1612/531, loss: 0.18349990248680115 2023-01-22 23:41:07.212760: step: 1616/531, loss: 0.2129518687725067 2023-01-22 23:41:08.334031: step: 1620/531, loss: 0.1986038088798523 2023-01-22 23:41:09.464200: step: 1624/531, loss: 0.3257094621658325 2023-01-22 23:41:10.576921: step: 1628/531, loss: 0.6993228793144226 2023-01-22 23:41:11.699162: step: 1632/531, loss: 1.3127716779708862 2023-01-22 23:41:12.792570: step: 1636/531, loss: 0.11155405640602112 2023-01-22 23:41:13.904689: step: 1640/531, loss: 1.4729888439178467 2023-01-22 23:41:14.997953: step: 1644/531, loss: 0.0621953047811985 2023-01-22 23:41:16.109264: step: 1648/531, loss: 0.3179476857185364 2023-01-22 23:41:17.232315: step: 1652/531, loss: 7.031080722808838 2023-01-22 23:41:18.383637: step: 1656/531, loss: 7.330111503601074 2023-01-22 23:41:19.501234: step: 1660/531, loss: 0.4803520739078522 2023-01-22 23:41:20.622752: step: 1664/531, loss: 0.38560056686401367 2023-01-22 23:41:21.728531: step: 1668/531, loss: 0.24395470321178436 2023-01-22 23:41:22.866785: step: 1672/531, loss: 0.8088110685348511 2023-01-22 23:41:23.987976: step: 1676/531, loss: 0.2811763882637024 2023-01-22 23:41:25.086278: step: 1680/531, loss: 1.0032514333724976 2023-01-22 23:41:26.195114: step: 1684/531, loss: 0.21879087388515472 2023-01-22 23:41:27.334753: step: 1688/531, loss: 0.38532471656799316 2023-01-22 23:41:28.473894: step: 1692/531, loss: 0.17665715515613556 2023-01-22 23:41:29.585830: step: 1696/531, loss: 0.9016225934028625 2023-01-22 23:41:30.729016: step: 1700/531, loss: 0.5829874277114868 2023-01-22 23:41:31.838140: step: 1704/531, loss: 0.7834721803665161 2023-01-22 23:41:32.951680: step: 1708/531, loss: 0.23218432068824768 2023-01-22 23:41:34.068665: step: 1712/531, loss: 0.25406843423843384 2023-01-22 23:41:35.193016: step: 1716/531, loss: 0.9642157554626465 2023-01-22 23:41:36.336074: step: 1720/531, loss: 0.20871314406394958 2023-01-22 23:41:37.453172: step: 1724/531, loss: 0.17053963243961334 2023-01-22 23:41:38.585363: step: 1728/531, loss: 1.1743921041488647 2023-01-22 23:41:39.736180: step: 1732/531, loss: 0.23276901245117188 2023-01-22 23:41:40.833541: step: 1736/531, loss: 0.8446066379547119 2023-01-22 23:41:41.949175: step: 1740/531, loss: 1.8968336582183838 2023-01-22 23:41:43.075747: step: 1744/531, loss: 0.6809177398681641 2023-01-22 23:41:44.197291: step: 1748/531, loss: 0.40739890933036804 2023-01-22 23:41:45.337663: step: 1752/531, loss: 0.09254312515258789 2023-01-22 23:41:46.455239: step: 1756/531, loss: 0.22147217392921448 2023-01-22 23:41:47.583680: step: 1760/531, loss: 0.2090633362531662 2023-01-22 23:41:48.722478: step: 1764/531, loss: 0.20166674256324768 2023-01-22 23:41:49.845323: step: 1768/531, loss: 0.2949308753013611 2023-01-22 23:41:50.950067: step: 1772/531, loss: 0.3551998734474182 2023-01-22 23:41:52.107679: step: 1776/531, loss: 0.2173646092414856 2023-01-22 23:41:53.215047: step: 1780/531, loss: 0.25806522369384766 2023-01-22 23:41:54.330694: step: 1784/531, loss: 0.5489709973335266 2023-01-22 23:41:55.450132: step: 1788/531, loss: 0.2526020109653473 2023-01-22 23:41:56.574320: step: 1792/531, loss: 3.2206082344055176 2023-01-22 23:41:57.708078: step: 1796/531, loss: 0.8189810514450073 2023-01-22 23:41:58.822165: step: 1800/531, loss: 0.46924546360969543 2023-01-22 23:41:59.942838: step: 1804/531, loss: 0.8861564993858337 2023-01-22 23:42:01.049977: step: 1808/531, loss: 0.166741281747818 2023-01-22 23:42:02.154924: step: 1812/531, loss: 1.0572699308395386 2023-01-22 23:42:03.307038: step: 1816/531, loss: 1.3276479244232178 2023-01-22 23:42:04.457894: step: 1820/531, loss: 0.23045936226844788 2023-01-22 23:42:05.561435: step: 1824/531, loss: 0.4996519088745117 2023-01-22 23:42:06.681134: step: 1828/531, loss: 1.5433249473571777 2023-01-22 23:42:07.819377: step: 1832/531, loss: 0.3330448269844055 2023-01-22 23:42:08.922914: step: 1836/531, loss: 0.0652044266462326 2023-01-22 23:42:10.030406: step: 1840/531, loss: 0.28855496644973755 2023-01-22 23:42:11.145459: step: 1844/531, loss: 0.1495048999786377 2023-01-22 23:42:12.262151: step: 1848/531, loss: 0.6744166612625122 2023-01-22 23:42:13.402795: step: 1852/531, loss: 1.1349947452545166 2023-01-22 23:42:14.527002: step: 1856/531, loss: 0.6290029287338257 2023-01-22 23:42:15.675862: step: 1860/531, loss: 0.5774468183517456 2023-01-22 23:42:16.813739: step: 1864/531, loss: 2.1828465461730957 2023-01-22 23:42:17.977671: step: 1868/531, loss: 0.538384735584259 2023-01-22 23:42:19.094157: step: 1872/531, loss: 0.4154299795627594 2023-01-22 23:42:20.224679: step: 1876/531, loss: 2.072601795196533 2023-01-22 23:42:21.352806: step: 1880/531, loss: 0.21358928084373474 2023-01-22 23:42:22.493893: step: 1884/531, loss: 1.8182247877120972 2023-01-22 23:42:23.635551: step: 1888/531, loss: 0.28505173325538635 2023-01-22 23:42:24.754740: step: 1892/531, loss: 0.7993797063827515 2023-01-22 23:42:25.844286: step: 1896/531, loss: 1.0267996788024902 2023-01-22 23:42:26.966325: step: 1900/531, loss: 0.18323755264282227 2023-01-22 23:42:28.138719: step: 1904/531, loss: 0.7881519198417664 2023-01-22 23:42:29.275331: step: 1908/531, loss: 0.4433140754699707 2023-01-22 23:42:30.395424: step: 1912/531, loss: 0.20907936990261078 2023-01-22 23:42:31.509224: step: 1916/531, loss: 1.629726529121399 2023-01-22 23:42:32.627131: step: 1920/531, loss: 0.33906078338623047 2023-01-22 23:42:33.770150: step: 1924/531, loss: 1.1163712739944458 2023-01-22 23:42:34.903649: step: 1928/531, loss: 0.09826003015041351 2023-01-22 23:42:36.041450: step: 1932/531, loss: 0.7715063691139221 2023-01-22 23:42:37.151505: step: 1936/531, loss: 0.15876717865467072 2023-01-22 23:42:38.286122: step: 1940/531, loss: 0.42607760429382324 2023-01-22 23:42:39.424432: step: 1944/531, loss: 0.6965678930282593 2023-01-22 23:42:40.537417: step: 1948/531, loss: 0.09287256002426147 2023-01-22 23:42:41.645850: step: 1952/531, loss: 1.5991343259811401 2023-01-22 23:42:42.747138: step: 1956/531, loss: 0.19623929262161255 2023-01-22 23:42:43.841096: step: 1960/531, loss: 0.8158611059188843 2023-01-22 23:42:44.963987: step: 1964/531, loss: 2.6786370277404785 2023-01-22 23:42:46.082567: step: 1968/531, loss: 0.12565556168556213 2023-01-22 23:42:47.211388: step: 1972/531, loss: 0.04360482841730118 2023-01-22 23:42:48.360245: step: 1976/531, loss: 0.9402376413345337 2023-01-22 23:42:49.514430: step: 1980/531, loss: 0.6813391447067261 2023-01-22 23:42:50.652330: step: 1984/531, loss: 0.17815256118774414 2023-01-22 23:42:51.760149: step: 1988/531, loss: 0.5153025388717651 2023-01-22 23:42:52.853948: step: 1992/531, loss: 0.20263013243675232 2023-01-22 23:42:53.976483: step: 1996/531, loss: 0.6896774172782898 2023-01-22 23:42:55.107414: step: 2000/531, loss: 0.3980448246002197 2023-01-22 23:42:56.246041: step: 2004/531, loss: 0.48645612597465515 2023-01-22 23:42:57.350417: step: 2008/531, loss: 0.13880328834056854 2023-01-22 23:42:58.454681: step: 2012/531, loss: 2.4375839233398438 2023-01-22 23:42:59.567795: step: 2016/531, loss: 0.891746997833252 2023-01-22 23:43:00.689840: step: 2020/531, loss: 0.2686363160610199 2023-01-22 23:43:01.807837: step: 2024/531, loss: 0.9690882563591003 2023-01-22 23:43:02.913625: step: 2028/531, loss: 0.4365391731262207 2023-01-22 23:43:04.019526: step: 2032/531, loss: 0.11934394389390945 2023-01-22 23:43:05.163266: step: 2036/531, loss: 0.16340236365795135 2023-01-22 23:43:06.309482: step: 2040/531, loss: 1.2615885734558105 2023-01-22 23:43:07.452325: step: 2044/531, loss: 0.687390148639679 2023-01-22 23:43:08.560480: step: 2048/531, loss: 0.08409624546766281 2023-01-22 23:43:09.710833: step: 2052/531, loss: 0.32873356342315674 2023-01-22 23:43:10.806606: step: 2056/531, loss: 1.5646607875823975 2023-01-22 23:43:11.939746: step: 2060/531, loss: 0.8692156076431274 2023-01-22 23:43:13.074877: step: 2064/531, loss: 0.2723952531814575 2023-01-22 23:43:14.213383: step: 2068/531, loss: 0.6442745327949524 2023-01-22 23:43:15.303868: step: 2072/531, loss: 0.7862272262573242 2023-01-22 23:43:16.450971: step: 2076/531, loss: 1.035030484199524 2023-01-22 23:43:17.572606: step: 2080/531, loss: 0.6777375936508179 2023-01-22 23:43:18.736237: step: 2084/531, loss: 0.14923977851867676 2023-01-22 23:43:19.911334: step: 2088/531, loss: 0.9863994121551514 2023-01-22 23:43:21.019142: step: 2092/531, loss: 0.3320446014404297 2023-01-22 23:43:22.123251: step: 2096/531, loss: 0.39623701572418213 2023-01-22 23:43:23.235886: step: 2100/531, loss: 0.16563501954078674 2023-01-22 23:43:24.374884: step: 2104/531, loss: 0.21453677117824554 2023-01-22 23:43:25.516304: step: 2108/531, loss: 0.3409889340400696 2023-01-22 23:43:26.664797: step: 2112/531, loss: 1.567966103553772 2023-01-22 23:43:27.804792: step: 2116/531, loss: 0.8170315623283386 2023-01-22 23:43:28.900148: step: 2120/531, loss: 1.0186183452606201 2023-01-22 23:43:30.072539: step: 2124/531, loss: 0.23631054162979126 ================================================== Loss: 0.784 -------------------- Dev: {'event': {'p': 0.5747001090512541, 'r': 0.7017310252996005, 'f1': 0.6318944844124701}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test: {'event': {'p': 0.530718336483932, 'r': 0.6696481812760883, 'f1': 0.5921434220933298}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Chinese: {'event': {'p': 0.5806451612903226, 'r': 0.6666666666666666, 'f1': 0.6206896551724138}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Korean: {'event': {'p': 0.8125, 'r': 0.4126984126984127, 'f1': 0.5473684210526316}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Russian: {'event': {'p': 0.5588235294117647, 'r': 0.5277777777777778, 'f1': 0.5428571428571428}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.5747001090512541, 'r': 0.7017310252996005, 'f1': 0.6318944844124701}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Eng Test for Chinese: {'event': {'p': 0.530718336483932, 'r': 0.6696481812760883, 'f1': 0.5921434220933298}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'event': {'p': 0.5806451612903226, 'r': 0.6666666666666666, 'f1': 0.6206896551724138}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} -------------------- Eng Dev for Korean: {'event': {'p': 0.5747001090512541, 'r': 0.7017310252996005, 'f1': 0.6318944844124701}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Eng Test for Korean: {'event': {'p': 0.530718336483932, 'r': 0.6696481812760883, 'f1': 0.5921434220933298}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'event': {'p': 0.8125, 'r': 0.4126984126984127, 'f1': 0.5473684210526316}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} -------------------- Eng Dev for Russian: {'event': {'p': 0.5747001090512541, 'r': 0.7017310252996005, 'f1': 0.6318944844124701}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Eng Test for Russian: {'event': {'p': 0.530718336483932, 'r': 0.6696481812760883, 'f1': 0.5921434220933298}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'event': {'p': 0.5588235294117647, 'r': 0.5277777777777778, 'f1': 0.5428571428571428}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-22 23:44:32.784305: step: 4/531, loss: 0.1804010421037674 2023-01-22 23:44:33.917568: step: 8/531, loss: 0.6254034042358398 2023-01-22 23:44:35.053710: step: 12/531, loss: 0.8860158920288086 2023-01-22 23:44:36.189884: step: 16/531, loss: 0.2502374053001404 2023-01-22 23:44:37.297856: step: 20/531, loss: 0.15954260528087616 2023-01-22 23:44:38.446692: step: 24/531, loss: 0.6917909383773804 2023-01-22 23:44:39.558204: step: 28/531, loss: 0.2389553189277649 2023-01-22 23:44:40.685293: step: 32/531, loss: 0.20649877190589905 2023-01-22 23:44:41.808449: step: 36/531, loss: 0.07758722454309464 2023-01-22 23:44:42.906030: step: 40/531, loss: 1.024478793144226 2023-01-22 23:44:44.025081: step: 44/531, loss: 0.41793936491012573 2023-01-22 23:44:45.163098: step: 48/531, loss: 0.23780226707458496 2023-01-22 23:44:46.276662: step: 52/531, loss: 0.06887403130531311 2023-01-22 23:44:47.395560: step: 56/531, loss: 0.15385523438453674 2023-01-22 23:44:48.522159: step: 60/531, loss: 0.50139981508255 2023-01-22 23:44:49.616336: step: 64/531, loss: 1.1274677515029907 2023-01-22 23:44:50.735271: step: 68/531, loss: 1.458704948425293 2023-01-22 23:44:51.854280: step: 72/531, loss: 0.9071649312973022 2023-01-22 23:44:52.991617: step: 76/531, loss: 0.20013427734375 2023-01-22 23:44:54.113030: step: 80/531, loss: 0.1279170960187912 2023-01-22 23:44:55.227229: step: 84/531, loss: 1.2136847972869873 2023-01-22 23:44:56.366303: step: 88/531, loss: 0.21601973474025726 2023-01-22 23:44:57.497769: step: 92/531, loss: 0.9196099638938904 2023-01-22 23:44:58.617638: step: 96/531, loss: 0.42274338006973267 2023-01-22 23:44:59.745916: step: 100/531, loss: 0.9508174657821655 2023-01-22 23:45:00.889380: step: 104/531, loss: 0.1686171591281891 2023-01-22 23:45:02.040439: step: 108/531, loss: 0.8195101022720337 2023-01-22 23:45:03.174489: step: 112/531, loss: 0.657781183719635 2023-01-22 23:45:04.300393: step: 116/531, loss: 0.19910478591918945 2023-01-22 23:45:05.489541: step: 120/531, loss: 0.27922117710113525 2023-01-22 23:45:06.610770: step: 124/531, loss: 0.038541603833436966 2023-01-22 23:45:07.748817: step: 128/531, loss: 0.08344163745641708 2023-01-22 23:45:08.866690: step: 132/531, loss: 0.1253976821899414 2023-01-22 23:45:09.989999: step: 136/531, loss: 0.0702425017952919 2023-01-22 23:45:11.095438: step: 140/531, loss: 0.32205361127853394 2023-01-22 23:45:12.210148: step: 144/531, loss: 0.2390725016593933 2023-01-22 23:45:13.336544: step: 148/531, loss: 0.17802448570728302 2023-01-22 23:45:14.482517: step: 152/531, loss: 0.29456859827041626 2023-01-22 23:45:15.587695: step: 156/531, loss: 0.7989634275436401 2023-01-22 23:45:16.740982: step: 160/531, loss: 0.16000080108642578 2023-01-22 23:45:17.874256: step: 164/531, loss: 0.08040676265954971 2023-01-22 23:45:18.967646: step: 168/531, loss: 0.08630891144275665 2023-01-22 23:45:20.112097: step: 172/531, loss: 0.17356902360916138 2023-01-22 23:45:21.246575: step: 176/531, loss: 0.532572865486145 2023-01-22 23:45:22.348603: step: 180/531, loss: 0.08776288479566574 2023-01-22 23:45:23.460371: step: 184/531, loss: 0.8369118571281433 2023-01-22 23:45:24.583620: step: 188/531, loss: 0.8102593421936035 2023-01-22 23:45:25.717067: step: 192/531, loss: 0.08340568840503693 2023-01-22 23:45:26.843960: step: 196/531, loss: 0.32099205255508423 2023-01-22 23:45:28.013894: step: 200/531, loss: 0.2582084536552429 2023-01-22 23:45:29.147620: step: 204/531, loss: 0.8237835764884949 2023-01-22 23:45:30.260604: step: 208/531, loss: 0.36807554960250854 2023-01-22 23:45:31.401041: step: 212/531, loss: 1.1978293657302856 2023-01-22 23:45:32.526524: step: 216/531, loss: 0.20412281155586243 2023-01-22 23:45:33.675175: step: 220/531, loss: 2.2512669563293457 2023-01-22 23:45:34.784929: step: 224/531, loss: 1.0043058395385742 2023-01-22 23:45:35.891683: step: 228/531, loss: 1.3984246253967285 2023-01-22 23:45:37.023045: step: 232/531, loss: 0.10905656963586807 2023-01-22 23:45:38.160334: step: 236/531, loss: 1.242911696434021 2023-01-22 23:45:39.263204: step: 240/531, loss: 0.269389808177948 2023-01-22 23:45:40.390699: step: 244/531, loss: 0.7974693179130554 2023-01-22 23:45:41.526527: step: 248/531, loss: 0.1078215166926384 2023-01-22 23:45:42.649341: step: 252/531, loss: 0.06884270161390305 2023-01-22 23:45:43.791367: step: 256/531, loss: 0.21344806253910065 2023-01-22 23:45:44.925536: step: 260/531, loss: 0.404558390378952 2023-01-22 23:45:46.029506: step: 264/531, loss: 0.0639638900756836 2023-01-22 23:45:47.168506: step: 268/531, loss: 0.10782432556152344 2023-01-22 23:45:48.322586: step: 272/531, loss: 0.2297256588935852 2023-01-22 23:45:49.438490: step: 276/531, loss: 0.5807439088821411 2023-01-22 23:45:50.556157: step: 280/531, loss: 0.14174138009548187 2023-01-22 23:45:51.665276: step: 284/531, loss: 0.17653116583824158 2023-01-22 23:45:52.799333: step: 288/531, loss: 0.2736678719520569 2023-01-22 23:45:53.936456: step: 292/531, loss: 6.68790864944458 2023-01-22 23:45:55.053698: step: 296/531, loss: 0.3079667091369629 2023-01-22 23:45:56.162189: step: 300/531, loss: 0.12260589748620987 2023-01-22 23:45:57.298356: step: 304/531, loss: 0.18704338371753693 2023-01-22 23:45:58.424670: step: 308/531, loss: 0.13155046105384827 2023-01-22 23:45:59.554415: step: 312/531, loss: 0.600986123085022 2023-01-22 23:46:00.661527: step: 316/531, loss: 0.4282132685184479 2023-01-22 23:46:01.808296: step: 320/531, loss: 0.3611065149307251 2023-01-22 23:46:02.918617: step: 324/531, loss: 0.09567099064588547 2023-01-22 23:46:04.023086: step: 328/531, loss: 0.7234969735145569 2023-01-22 23:46:05.133339: step: 332/531, loss: 0.4623526632785797 2023-01-22 23:46:06.285970: step: 336/531, loss: 0.06090545654296875 2023-01-22 23:46:07.403961: step: 340/531, loss: 0.5487068891525269 2023-01-22 23:46:08.520321: step: 344/531, loss: 0.14757975935935974 2023-01-22 23:46:09.621938: step: 348/531, loss: 0.012281417846679688 2023-01-22 23:46:10.765012: step: 352/531, loss: 0.0668662041425705 2023-01-22 23:46:11.891172: step: 356/531, loss: 0.14945173263549805 2023-01-22 23:46:13.001096: step: 360/531, loss: 0.2041628062725067 2023-01-22 23:46:14.137638: step: 364/531, loss: 0.23457203805446625 2023-01-22 23:46:15.248411: step: 368/531, loss: 0.10594773292541504 2023-01-22 23:46:16.379130: step: 372/531, loss: 0.9441040754318237 2023-01-22 23:46:17.533969: step: 376/531, loss: 0.08234281092882156 2023-01-22 23:46:18.668274: step: 380/531, loss: 0.10490860790014267 2023-01-22 23:46:19.794966: step: 384/531, loss: 0.07585316151380539 2023-01-22 23:46:20.906182: step: 388/531, loss: 0.12614183127880096 2023-01-22 23:46:22.023965: step: 392/531, loss: 0.3918529450893402 2023-01-22 23:46:23.145473: step: 396/531, loss: 0.3381730616092682 2023-01-22 23:46:24.271107: step: 400/531, loss: 1.1836628913879395 2023-01-22 23:46:25.382502: step: 404/531, loss: 0.770467221736908 2023-01-22 23:46:26.481193: step: 408/531, loss: 0.09366574883460999 2023-01-22 23:46:27.606183: step: 412/531, loss: 0.19394254684448242 2023-01-22 23:46:28.754614: step: 416/531, loss: 0.23999567329883575 2023-01-22 23:46:29.871308: step: 420/531, loss: 0.1803724318742752 2023-01-22 23:46:30.975708: step: 424/531, loss: 0.15346011519432068 2023-01-22 23:46:32.084909: step: 428/531, loss: 0.07764768600463867 2023-01-22 23:46:33.206656: step: 432/531, loss: 0.18345585465431213 2023-01-22 23:46:34.357342: step: 436/531, loss: 0.7415252327919006 2023-01-22 23:46:35.506911: step: 440/531, loss: 0.7271220684051514 2023-01-22 23:46:36.629022: step: 444/531, loss: 0.8113651275634766 2023-01-22 23:46:37.771175: step: 448/531, loss: 0.1729355901479721 2023-01-22 23:46:38.899426: step: 452/531, loss: 0.06541948765516281 2023-01-22 23:46:40.032088: step: 456/531, loss: 0.23413439095020294 2023-01-22 23:46:41.163457: step: 460/531, loss: 0.6707006096839905 2023-01-22 23:46:42.316161: step: 464/531, loss: 0.6349808573722839 2023-01-22 23:46:43.442946: step: 468/531, loss: 0.5122022032737732 2023-01-22 23:46:44.547105: step: 472/531, loss: 0.6229504942893982 2023-01-22 23:46:45.696318: step: 476/531, loss: 0.20829248428344727 2023-01-22 23:46:46.841713: step: 480/531, loss: 2.0307044982910156 2023-01-22 23:46:47.973799: step: 484/531, loss: 0.780998706817627 2023-01-22 23:46:49.110197: step: 488/531, loss: 0.3710038363933563 2023-01-22 23:46:50.235157: step: 492/531, loss: 0.698712944984436 2023-01-22 23:46:51.371681: step: 496/531, loss: 0.6875631213188171 2023-01-22 23:46:52.502149: step: 500/531, loss: 0.6885471343994141 2023-01-22 23:46:53.643245: step: 504/531, loss: 0.23836584389209747 2023-01-22 23:46:54.757815: step: 508/531, loss: 0.3633994162082672 2023-01-22 23:46:55.866389: step: 512/531, loss: 0.1445978283882141 2023-01-22 23:46:57.003888: step: 516/531, loss: 2.135205030441284 2023-01-22 23:46:58.147768: step: 520/531, loss: 0.19791412353515625 2023-01-22 23:46:59.256865: step: 524/531, loss: 0.8024535179138184 2023-01-22 23:47:00.376494: step: 528/531, loss: 0.1619952768087387 2023-01-22 23:47:01.487049: step: 532/531, loss: 0.1395251303911209 2023-01-22 23:47:02.605118: step: 536/531, loss: 0.9045207500457764 2023-01-22 23:47:03.763115: step: 540/531, loss: 0.7106897830963135 2023-01-22 23:47:04.894738: step: 544/531, loss: 0.2746366560459137 2023-01-22 23:47:06.029863: step: 548/531, loss: 0.21125774085521698 2023-01-22 23:47:07.159189: step: 552/531, loss: 0.3815675973892212 2023-01-22 23:47:08.277081: step: 556/531, loss: 0.11512966454029083 2023-01-22 23:47:09.402460: step: 560/531, loss: 0.14159874618053436 2023-01-22 23:47:10.554681: step: 564/531, loss: 0.08137235790491104 2023-01-22 23:47:11.669706: step: 568/531, loss: 0.08827038109302521 2023-01-22 23:47:12.803038: step: 572/531, loss: 0.21972376108169556 2023-01-22 23:47:13.982572: step: 576/531, loss: 0.6301490664482117 2023-01-22 23:47:15.086116: step: 580/531, loss: 0.16319340467453003 2023-01-22 23:47:16.187222: step: 584/531, loss: 0.12621183693408966 2023-01-22 23:47:17.327654: step: 588/531, loss: 0.1114928275346756 2023-01-22 23:47:18.475458: step: 592/531, loss: 0.26103147864341736 2023-01-22 23:47:19.622472: step: 596/531, loss: 0.057212162762880325 2023-01-22 23:47:20.769871: step: 600/531, loss: 0.42674845457077026 2023-01-22 23:47:21.897843: step: 604/531, loss: 0.07332558184862137 2023-01-22 23:47:23.023889: step: 608/531, loss: 0.30532127618789673 2023-01-22 23:47:24.165833: step: 612/531, loss: 0.23980112373828888 2023-01-22 23:47:25.276438: step: 616/531, loss: 0.8976439237594604 2023-01-22 23:47:26.428634: step: 620/531, loss: 2.906677007675171 2023-01-22 23:47:27.548096: step: 624/531, loss: 0.7280469536781311 2023-01-22 23:47:28.702162: step: 628/531, loss: 1.1542143821716309 2023-01-22 23:47:29.836848: step: 632/531, loss: 0.3346903920173645 2023-01-22 23:47:30.963061: step: 636/531, loss: 0.34560537338256836 2023-01-22 23:47:32.080159: step: 640/531, loss: 0.1651010513305664 2023-01-22 23:47:33.220068: step: 644/531, loss: 0.21281543374061584 2023-01-22 23:47:34.334991: step: 648/531, loss: 0.2954367399215698 2023-01-22 23:47:35.489179: step: 652/531, loss: 0.1339561492204666 2023-01-22 23:47:36.617894: step: 656/531, loss: 0.1886063665151596 2023-01-22 23:47:37.743524: step: 660/531, loss: 1.0190188884735107 2023-01-22 23:47:38.851996: step: 664/531, loss: 0.11133213341236115 2023-01-22 23:47:39.983008: step: 668/531, loss: 0.2437620311975479 2023-01-22 23:47:41.120185: step: 672/531, loss: 0.21872201561927795 2023-01-22 23:47:42.242539: step: 676/531, loss: 0.19212254881858826 2023-01-22 23:47:43.385620: step: 680/531, loss: 1.5248701572418213 2023-01-22 23:47:44.554465: step: 684/531, loss: 0.2010609209537506 2023-01-22 23:47:45.655197: step: 688/531, loss: 0.12344007939100266 2023-01-22 23:47:46.774857: step: 692/531, loss: 0.4252944886684418 2023-01-22 23:47:47.902153: step: 696/531, loss: 1.312186360359192 2023-01-22 23:47:49.021699: step: 700/531, loss: 1.0798553228378296 2023-01-22 23:47:50.143512: step: 704/531, loss: 0.5881645083427429 2023-01-22 23:47:51.254837: step: 708/531, loss: 0.10628495365381241 2023-01-22 23:47:52.401859: step: 712/531, loss: 0.159512460231781 2023-01-22 23:47:53.541172: step: 716/531, loss: 2.050825595855713 2023-01-22 23:47:54.685168: step: 720/531, loss: 0.09985098242759705 2023-01-22 23:47:55.792098: step: 724/531, loss: 0.30086416006088257 2023-01-22 23:47:56.917006: step: 728/531, loss: 0.23234796524047852 2023-01-22 23:47:58.062711: step: 732/531, loss: 0.17717213928699493 2023-01-22 23:47:59.199814: step: 736/531, loss: 0.20781327784061432 2023-01-22 23:48:00.308670: step: 740/531, loss: 0.7929614186286926 2023-01-22 23:48:01.431536: step: 744/531, loss: 0.7031111121177673 2023-01-22 23:48:02.544112: step: 748/531, loss: 0.17271780967712402 2023-01-22 23:48:03.640443: step: 752/531, loss: 0.15759292244911194 2023-01-22 23:48:04.799417: step: 756/531, loss: 0.2638086676597595 2023-01-22 23:48:05.911052: step: 760/531, loss: 0.13789749145507812 2023-01-22 23:48:07.033918: step: 764/531, loss: 1.042675256729126 2023-01-22 23:48:08.142007: step: 768/531, loss: 0.15152187645435333 2023-01-22 23:48:09.279982: step: 772/531, loss: 0.29762402176856995 2023-01-22 23:48:10.413836: step: 776/531, loss: 0.8096060752868652 2023-01-22 23:48:11.550596: step: 780/531, loss: 0.6018036007881165 2023-01-22 23:48:12.671200: step: 784/531, loss: 0.7044198513031006 2023-01-22 23:48:13.822867: step: 788/531, loss: 1.0509707927703857 2023-01-22 23:48:14.954570: step: 792/531, loss: 0.1526416838169098 2023-01-22 23:48:16.096614: step: 796/531, loss: 1.2137681245803833 2023-01-22 23:48:17.237572: step: 800/531, loss: 0.20632153749465942 2023-01-22 23:48:18.339775: step: 804/531, loss: 0.2633296847343445 2023-01-22 23:48:19.473369: step: 808/531, loss: 0.18680515885353088 2023-01-22 23:48:20.606302: step: 812/531, loss: 0.7810570001602173 2023-01-22 23:48:21.724163: step: 816/531, loss: 2.1240315437316895 2023-01-22 23:48:22.830951: step: 820/531, loss: 1.6482373476028442 2023-01-22 23:48:23.948863: step: 824/531, loss: 0.8680394291877747 2023-01-22 23:48:25.087026: step: 828/531, loss: 0.17143592238426208 2023-01-22 23:48:26.213776: step: 832/531, loss: 0.15712718665599823 2023-01-22 23:48:27.338182: step: 836/531, loss: 6.5789875984191895 2023-01-22 23:48:28.455565: step: 840/531, loss: 0.5327132940292358 2023-01-22 23:48:29.580024: step: 844/531, loss: 0.14803513884544373 2023-01-22 23:48:30.730711: step: 848/531, loss: 0.22047024965286255 2023-01-22 23:48:31.861876: step: 852/531, loss: 0.12683305144309998 2023-01-22 23:48:32.988137: step: 856/531, loss: 0.11187329143285751 2023-01-22 23:48:34.106777: step: 860/531, loss: 0.28370827436447144 2023-01-22 23:48:35.245270: step: 864/531, loss: 0.17166957259178162 2023-01-22 23:48:36.372713: step: 868/531, loss: 0.050062134861946106 2023-01-22 23:48:37.496412: step: 872/531, loss: 0.6749389171600342 2023-01-22 23:48:38.608178: step: 876/531, loss: 1.137378454208374 2023-01-22 23:48:39.716436: step: 880/531, loss: 0.20644298195838928 2023-01-22 23:48:40.864510: step: 884/531, loss: 0.06601281464099884 2023-01-22 23:48:41.948601: step: 888/531, loss: 0.7395771741867065 2023-01-22 23:48:43.063056: step: 892/531, loss: 0.06680784374475479 2023-01-22 23:48:44.210166: step: 896/531, loss: 0.25551241636276245 2023-01-22 23:48:45.321149: step: 900/531, loss: 0.11387459933757782 2023-01-22 23:48:46.433814: step: 904/531, loss: 0.2727457880973816 2023-01-22 23:48:47.561529: step: 908/531, loss: 0.13785429298877716 2023-01-22 23:48:48.671694: step: 912/531, loss: 1.1785967350006104 2023-01-22 23:48:49.784616: step: 916/531, loss: 0.1641356498003006 2023-01-22 23:48:50.918809: step: 920/531, loss: 0.5948185920715332 2023-01-22 23:48:52.044615: step: 924/531, loss: 0.2540041208267212 2023-01-22 23:48:53.178068: step: 928/531, loss: 0.6472973823547363 2023-01-22 23:48:54.316802: step: 932/531, loss: 0.10078860074281693 2023-01-22 23:48:55.441644: step: 936/531, loss: 0.3583838641643524 2023-01-22 23:48:56.542702: step: 940/531, loss: 0.07218985259532928 2023-01-22 23:48:57.656382: step: 944/531, loss: 0.17330054938793182 2023-01-22 23:48:58.797078: step: 948/531, loss: 0.3355116546154022 2023-01-22 23:48:59.914425: step: 952/531, loss: 0.25009462237358093 2023-01-22 23:49:01.038084: step: 956/531, loss: 0.13683763146400452 2023-01-22 23:49:02.167200: step: 960/531, loss: 0.38340532779693604 2023-01-22 23:49:03.276133: step: 964/531, loss: 0.3999234139919281 2023-01-22 23:49:04.416090: step: 968/531, loss: 0.22345657646656036 2023-01-22 23:49:05.537644: step: 972/531, loss: 0.9508110284805298 2023-01-22 23:49:06.696410: step: 976/531, loss: 0.8545024394989014 2023-01-22 23:49:07.814858: step: 980/531, loss: 0.17120656371116638 2023-01-22 23:49:08.995141: step: 984/531, loss: 0.11361608654260635 2023-01-22 23:49:10.104924: step: 988/531, loss: 0.20901842415332794 2023-01-22 23:49:11.215510: step: 992/531, loss: 0.6007456183433533 2023-01-22 23:49:12.377343: step: 996/531, loss: 1.1552215814590454 2023-01-22 23:49:13.499885: step: 1000/531, loss: 0.8901445269584656 2023-01-22 23:49:14.593929: step: 1004/531, loss: 0.36755266785621643 2023-01-22 23:49:15.718659: step: 1008/531, loss: 0.11657439172267914 2023-01-22 23:49:16.848018: step: 1012/531, loss: 0.7590698003768921 2023-01-22 23:49:17.976086: step: 1016/531, loss: 0.4374852180480957 2023-01-22 23:49:19.070815: step: 1020/531, loss: 0.947029709815979 2023-01-22 23:49:20.161153: step: 1024/531, loss: 0.10592050850391388 2023-01-22 23:49:21.347764: step: 1028/531, loss: 0.5956710577011108 2023-01-22 23:49:22.470610: step: 1032/531, loss: 1.4725477695465088 2023-01-22 23:49:23.593517: step: 1036/531, loss: 0.41024237871170044 2023-01-22 23:49:24.688930: step: 1040/531, loss: 0.5856024622917175 2023-01-22 23:49:25.806170: step: 1044/531, loss: 0.7835206389427185 2023-01-22 23:49:26.923370: step: 1048/531, loss: 3.6335926055908203 2023-01-22 23:49:28.060672: step: 1052/531, loss: 0.08657512813806534 2023-01-22 23:49:29.179296: step: 1056/531, loss: 0.9906864166259766 2023-01-22 23:49:30.282574: step: 1060/531, loss: 0.09550638496875763 2023-01-22 23:49:31.389765: step: 1064/531, loss: 0.11956606060266495 2023-01-22 23:49:32.528652: step: 1068/531, loss: 0.9163941144943237 2023-01-22 23:49:33.651388: step: 1072/531, loss: 0.5700295567512512 2023-01-22 23:49:34.785274: step: 1076/531, loss: 6.957311153411865 2023-01-22 23:49:35.893045: step: 1080/531, loss: 0.1573740541934967 2023-01-22 23:49:37.004162: step: 1084/531, loss: 1.2304725646972656 2023-01-22 23:49:38.119558: step: 1088/531, loss: 1.3342170715332031 2023-01-22 23:49:39.261469: step: 1092/531, loss: 0.12902946770191193 2023-01-22 23:49:40.385431: step: 1096/531, loss: 0.06106915324926376 2023-01-22 23:49:41.502572: step: 1100/531, loss: 1.06157648563385 2023-01-22 23:49:42.634432: step: 1104/531, loss: 0.06626663357019424 2023-01-22 23:49:43.746717: step: 1108/531, loss: 0.06310644000768661 2023-01-22 23:49:44.854882: step: 1112/531, loss: 0.09823315590620041 2023-01-22 23:49:45.979006: step: 1116/531, loss: 0.7527143359184265 2023-01-22 23:49:47.123792: step: 1120/531, loss: 0.74085533618927 2023-01-22 23:49:48.275895: step: 1124/531, loss: 1.3974148035049438 2023-01-22 23:49:49.387446: step: 1128/531, loss: 1.5040168762207031 2023-01-22 23:49:50.484541: step: 1132/531, loss: 0.6995537877082825 2023-01-22 23:49:51.572106: step: 1136/531, loss: 0.2084212303161621 2023-01-22 23:49:52.724057: step: 1140/531, loss: 1.5815051794052124 2023-01-22 23:49:53.816120: step: 1144/531, loss: 0.7678499817848206 2023-01-22 23:49:54.933780: step: 1148/531, loss: 0.1555301547050476 2023-01-22 23:49:56.101897: step: 1152/531, loss: 0.6561551690101624 2023-01-22 23:49:57.201274: step: 1156/531, loss: 0.034768246114254 2023-01-22 23:49:58.344144: step: 1160/531, loss: 0.9606888890266418 2023-01-22 23:49:59.448609: step: 1164/531, loss: 0.6768432855606079 2023-01-22 23:50:00.564922: step: 1168/531, loss: 0.41532501578330994 2023-01-22 23:50:01.706079: step: 1172/531, loss: 0.1155424565076828 2023-01-22 23:50:02.841515: step: 1176/531, loss: 0.4982238709926605 2023-01-22 23:50:03.960294: step: 1180/531, loss: 7.609128475189209 2023-01-22 23:50:05.085058: step: 1184/531, loss: 0.6324636340141296 2023-01-22 23:50:06.211531: step: 1188/531, loss: 0.1454913169145584 2023-01-22 23:50:07.331805: step: 1192/531, loss: 1.7525665760040283 2023-01-22 23:50:08.440022: step: 1196/531, loss: 0.2776854634284973 2023-01-22 23:50:09.571303: step: 1200/531, loss: 0.09683418273925781 2023-01-22 23:50:10.742114: step: 1204/531, loss: 1.0877107381820679 2023-01-22 23:50:11.884254: step: 1208/531, loss: 0.19226941466331482 2023-01-22 23:50:13.000968: step: 1212/531, loss: 0.2268226593732834 2023-01-22 23:50:14.118162: step: 1216/531, loss: 0.6520306468009949 2023-01-22 23:50:15.276220: step: 1220/531, loss: 0.12727731466293335 2023-01-22 23:50:16.392768: step: 1224/531, loss: 0.09743762016296387 2023-01-22 23:50:17.541122: step: 1228/531, loss: 1.691659927368164 2023-01-22 23:50:18.661059: step: 1232/531, loss: 0.6641461849212646 2023-01-22 23:50:19.779656: step: 1236/531, loss: 0.20985403656959534 2023-01-22 23:50:20.921997: step: 1240/531, loss: 0.2602899670600891 2023-01-22 23:50:22.028007: step: 1244/531, loss: 0.7859292030334473 2023-01-22 23:50:23.155693: step: 1248/531, loss: 0.08878204226493835 2023-01-22 23:50:24.257800: step: 1252/531, loss: 0.4219377338886261 2023-01-22 23:50:25.356243: step: 1256/531, loss: 0.1345120519399643 2023-01-22 23:50:26.444696: step: 1260/531, loss: 0.4237781763076782 2023-01-22 23:50:27.572082: step: 1264/531, loss: 3.7702183723449707 2023-01-22 23:50:28.697194: step: 1268/531, loss: 1.4639090299606323 2023-01-22 23:50:29.812473: step: 1272/531, loss: 0.546227753162384 2023-01-22 23:50:30.940827: step: 1276/531, loss: 0.4485281705856323 2023-01-22 23:50:32.091940: step: 1280/531, loss: 0.9650321006774902 2023-01-22 23:50:33.239688: step: 1284/531, loss: 0.37040644884109497 2023-01-22 23:50:34.404888: step: 1288/531, loss: 0.2140875607728958 2023-01-22 23:50:35.524022: step: 1292/531, loss: 0.4569013714790344 2023-01-22 23:50:36.625384: step: 1296/531, loss: 1.2681976556777954 2023-01-22 23:50:37.734661: step: 1300/531, loss: 2.1809608936309814 2023-01-22 23:50:38.861057: step: 1304/531, loss: 0.4685884714126587 2023-01-22 23:50:40.009854: step: 1308/531, loss: 0.12381152808666229 2023-01-22 23:50:41.155873: step: 1312/531, loss: 0.43081074953079224 2023-01-22 23:50:42.284407: step: 1316/531, loss: 0.17219844460487366 2023-01-22 23:50:43.404594: step: 1320/531, loss: 0.15136300027370453 2023-01-22 23:50:44.552572: step: 1324/531, loss: 1.1252928972244263 2023-01-22 23:50:45.633251: step: 1328/531, loss: 0.7586947679519653 2023-01-22 23:50:46.739704: step: 1332/531, loss: 0.049904439598321915 2023-01-22 23:50:47.864954: step: 1336/531, loss: 0.24002304673194885 2023-01-22 23:50:48.970226: step: 1340/531, loss: 0.31283408403396606 2023-01-22 23:50:50.098864: step: 1344/531, loss: 0.13884754478931427 2023-01-22 23:50:51.215723: step: 1348/531, loss: 0.21160832047462463 2023-01-22 23:50:52.327054: step: 1352/531, loss: 0.2823036313056946 2023-01-22 23:50:53.472133: step: 1356/531, loss: 0.17182999849319458 2023-01-22 23:50:54.600305: step: 1360/531, loss: 0.059093572199344635 2023-01-22 23:50:55.747480: step: 1364/531, loss: 0.2501261532306671 2023-01-22 23:50:56.863240: step: 1368/531, loss: 0.1321430206298828 2023-01-22 23:50:57.995427: step: 1372/531, loss: 0.20583033561706543 2023-01-22 23:50:59.143105: step: 1376/531, loss: 0.2908773422241211 2023-01-22 23:51:00.309193: step: 1380/531, loss: 0.138828843832016 2023-01-22 23:51:01.431742: step: 1384/531, loss: 0.590828537940979 2023-01-22 23:51:02.589242: step: 1388/531, loss: 0.9859359264373779 2023-01-22 23:51:03.710310: step: 1392/531, loss: 1.1036908626556396 2023-01-22 23:51:04.832300: step: 1396/531, loss: 0.7704716324806213 2023-01-22 23:51:05.967531: step: 1400/531, loss: 1.3098336458206177 2023-01-22 23:51:07.100936: step: 1404/531, loss: 0.9489328861236572 2023-01-22 23:51:08.221055: step: 1408/531, loss: 0.3856930136680603 2023-01-22 23:51:09.328843: step: 1412/531, loss: 0.23342999815940857 2023-01-22 23:51:10.471338: step: 1416/531, loss: 0.10315561294555664 2023-01-22 23:51:11.608871: step: 1420/531, loss: 0.4391617774963379 2023-01-22 23:51:12.740222: step: 1424/531, loss: 0.23436251282691956 2023-01-22 23:51:13.865297: step: 1428/531, loss: 0.17315179109573364 2023-01-22 23:51:14.974155: step: 1432/531, loss: 0.03181004524230957 2023-01-22 23:51:16.072323: step: 1436/531, loss: 0.21297875046730042 2023-01-22 23:51:17.204114: step: 1440/531, loss: 0.1252715140581131 2023-01-22 23:51:18.346769: step: 1444/531, loss: 1.476365566253662 2023-01-22 23:51:19.506548: step: 1448/531, loss: 0.4087343215942383 2023-01-22 23:51:20.634137: step: 1452/531, loss: 0.3203123211860657 2023-01-22 23:51:21.732895: step: 1456/531, loss: 0.21265503764152527 2023-01-22 23:51:22.857046: step: 1460/531, loss: 0.1920381486415863 2023-01-22 23:51:23.983266: step: 1464/531, loss: 0.2612139582633972 2023-01-22 23:51:25.117592: step: 1468/531, loss: 0.545148491859436 2023-01-22 23:51:26.220081: step: 1472/531, loss: 0.28278470039367676 2023-01-22 23:51:27.337503: step: 1476/531, loss: 1.2949702739715576 2023-01-22 23:51:28.451459: step: 1480/531, loss: 0.0972374975681305 2023-01-22 23:51:29.582491: step: 1484/531, loss: 0.8514845371246338 2023-01-22 23:51:30.664714: step: 1488/531, loss: 0.7473111152648926 2023-01-22 23:51:31.782159: step: 1492/531, loss: 8.381614685058594 2023-01-22 23:51:32.912573: step: 1496/531, loss: 0.5208867788314819 2023-01-22 23:51:34.066781: step: 1500/531, loss: 0.37298718094825745 2023-01-22 23:51:35.193476: step: 1504/531, loss: 0.15591751039028168 2023-01-22 23:51:36.311725: step: 1508/531, loss: 0.24155083298683167 2023-01-22 23:51:37.407234: step: 1512/531, loss: 0.14065274596214294 2023-01-22 23:51:38.538671: step: 1516/531, loss: 0.3229680061340332 2023-01-22 23:51:39.670144: step: 1520/531, loss: 0.4380077123641968 2023-01-22 23:51:40.811933: step: 1524/531, loss: 0.147398442029953 2023-01-22 23:51:41.941149: step: 1528/531, loss: 0.225171759724617 2023-01-22 23:51:43.043139: step: 1532/531, loss: 0.5240567922592163 2023-01-22 23:51:44.171667: step: 1536/531, loss: 0.7758234739303589 2023-01-22 23:51:45.286388: step: 1540/531, loss: 1.0318593978881836 2023-01-22 23:51:46.401073: step: 1544/531, loss: 0.7325069904327393 2023-01-22 23:51:47.532029: step: 1548/531, loss: 0.11525993794202805 2023-01-22 23:51:48.661950: step: 1552/531, loss: 0.8383235335350037 2023-01-22 23:51:49.792801: step: 1556/531, loss: 1.8379714488983154 2023-01-22 23:51:50.892126: step: 1560/531, loss: 0.19917169213294983 2023-01-22 23:51:52.025871: step: 1564/531, loss: 0.08856458961963654 2023-01-22 23:51:53.155191: step: 1568/531, loss: 0.7376327514648438 2023-01-22 23:51:54.266243: step: 1572/531, loss: 0.0812767967581749 2023-01-22 23:51:55.389142: step: 1576/531, loss: 0.12987622618675232 2023-01-22 23:51:56.523384: step: 1580/531, loss: 0.5027222633361816 2023-01-22 23:51:57.661363: step: 1584/531, loss: 0.442804217338562 2023-01-22 23:51:58.780151: step: 1588/531, loss: 0.8244544863700867 2023-01-22 23:51:59.904177: step: 1592/531, loss: 0.09971772134304047 2023-01-22 23:52:01.010327: step: 1596/531, loss: 0.5062020421028137 2023-01-22 23:52:02.139209: step: 1600/531, loss: 0.08735175430774689 2023-01-22 23:52:03.293366: step: 1604/531, loss: 0.13679353892803192 2023-01-22 23:52:04.468249: step: 1608/531, loss: 0.33079707622528076 2023-01-22 23:52:05.569685: step: 1612/531, loss: 0.3879395127296448 2023-01-22 23:52:06.670450: step: 1616/531, loss: 0.18667078018188477 2023-01-22 23:52:07.759184: step: 1620/531, loss: 0.36524903774261475 2023-01-22 23:52:08.906701: step: 1624/531, loss: 0.9339926242828369 2023-01-22 23:52:10.042438: step: 1628/531, loss: 0.3150537610054016 2023-01-22 23:52:11.169388: step: 1632/531, loss: 0.2344045639038086 2023-01-22 23:52:12.272946: step: 1636/531, loss: 0.3411739468574524 2023-01-22 23:52:13.374418: step: 1640/531, loss: 0.02496981807053089 2023-01-22 23:52:14.500259: step: 1644/531, loss: 0.14544254541397095 2023-01-22 23:52:15.624525: step: 1648/531, loss: 0.17730173468589783 2023-01-22 23:52:16.761070: step: 1652/531, loss: 1.1998558044433594 2023-01-22 23:52:17.883066: step: 1656/531, loss: 1.4463233947753906 2023-01-22 23:52:19.038843: step: 1660/531, loss: 0.9938532114028931 2023-01-22 23:52:20.151098: step: 1664/531, loss: 0.2296905517578125 2023-01-22 23:52:21.257185: step: 1668/531, loss: 0.7235745787620544 2023-01-22 23:52:22.354334: step: 1672/531, loss: 0.3931038975715637 2023-01-22 23:52:23.496118: step: 1676/531, loss: 0.08223333209753036 2023-01-22 23:52:24.615724: step: 1680/531, loss: 0.11543980240821838 2023-01-22 23:52:25.725079: step: 1684/531, loss: 0.23564620316028595 2023-01-22 23:52:26.869083: step: 1688/531, loss: 0.11182551085948944 2023-01-22 23:52:28.034379: step: 1692/531, loss: 0.26423460245132446 2023-01-22 23:52:29.145183: step: 1696/531, loss: 0.8914171457290649 2023-01-22 23:52:30.259293: step: 1700/531, loss: 0.2571353018283844 2023-01-22 23:52:31.381700: step: 1704/531, loss: 0.14939013123512268 2023-01-22 23:52:32.553212: step: 1708/531, loss: 0.34691277146339417 2023-01-22 23:52:33.626297: step: 1712/531, loss: 1.6872406005859375 2023-01-22 23:52:34.740800: step: 1716/531, loss: 0.7784949541091919 2023-01-22 23:52:35.889405: step: 1720/531, loss: 0.12170334160327911 2023-01-22 23:52:37.035132: step: 1724/531, loss: 0.22595274448394775 2023-01-22 23:52:38.137030: step: 1728/531, loss: 0.6139885783195496 2023-01-22 23:52:39.280945: step: 1732/531, loss: 0.33781617879867554 2023-01-22 23:52:40.396125: step: 1736/531, loss: 0.319772332906723 2023-01-22 23:52:41.526936: step: 1740/531, loss: 0.6939656138420105 2023-01-22 23:52:42.653305: step: 1744/531, loss: 0.40353959798812866 2023-01-22 23:52:43.796128: step: 1748/531, loss: 0.7860513925552368 2023-01-22 23:52:44.917760: step: 1752/531, loss: 0.07775941491127014 2023-01-22 23:52:46.012340: step: 1756/531, loss: 0.18229876458644867 2023-01-22 23:52:47.119631: step: 1760/531, loss: 0.08175616711378098 2023-01-22 23:52:48.247302: step: 1764/531, loss: 0.5313694477081299 2023-01-22 23:52:49.396234: step: 1768/531, loss: 0.29177960753440857 2023-01-22 23:52:50.520250: step: 1772/531, loss: 0.8129252791404724 2023-01-22 23:52:51.627923: step: 1776/531, loss: 0.3290082812309265 2023-01-22 23:52:52.759588: step: 1780/531, loss: 0.12709903717041016 2023-01-22 23:52:53.881320: step: 1784/531, loss: 1.948046088218689 2023-01-22 23:52:55.010660: step: 1788/531, loss: 0.39866989850997925 2023-01-22 23:52:56.129544: step: 1792/531, loss: 0.31562525033950806 2023-01-22 23:52:57.271425: step: 1796/531, loss: 6.704441070556641 2023-01-22 23:52:58.389293: step: 1800/531, loss: 0.8341490030288696 2023-01-22 23:52:59.518136: step: 1804/531, loss: 0.17107801139354706 2023-01-22 23:53:00.643689: step: 1808/531, loss: 0.6423975229263306 2023-01-22 23:53:01.769951: step: 1812/531, loss: 0.6129862070083618 2023-01-22 23:53:02.907679: step: 1816/531, loss: 1.418178915977478 2023-01-22 23:53:04.042904: step: 1820/531, loss: 0.36889734864234924 2023-01-22 23:53:05.149415: step: 1824/531, loss: 7.297482013702393 2023-01-22 23:53:06.267000: step: 1828/531, loss: 0.10019774734973907 2023-01-22 23:53:07.384129: step: 1832/531, loss: 0.46965092420578003 2023-01-22 23:53:08.496725: step: 1836/531, loss: 0.2980436384677887 2023-01-22 23:53:09.635977: step: 1840/531, loss: 1.2087880373001099 2023-01-22 23:53:10.738707: step: 1844/531, loss: 0.37208208441734314 2023-01-22 23:53:11.877843: step: 1848/531, loss: 0.1479925662279129 2023-01-22 23:53:12.994609: step: 1852/531, loss: 0.37395450472831726 2023-01-22 23:53:14.141656: step: 1856/531, loss: 0.9796489477157593 2023-01-22 23:53:15.273288: step: 1860/531, loss: 0.15056085586547852 2023-01-22 23:53:16.428233: step: 1864/531, loss: 0.11423273384571075 2023-01-22 23:53:17.563902: step: 1868/531, loss: 0.3338344693183899 2023-01-22 23:53:18.675227: step: 1872/531, loss: 0.9630193114280701 2023-01-22 23:53:19.825306: step: 1876/531, loss: 1.8121334314346313 2023-01-22 23:53:20.943639: step: 1880/531, loss: 0.11071091145277023 2023-01-22 23:53:22.085707: step: 1884/531, loss: 0.2831290066242218 2023-01-22 23:53:23.214022: step: 1888/531, loss: 0.5267331600189209 2023-01-22 23:53:24.337786: step: 1892/531, loss: 0.10727138817310333 2023-01-22 23:53:25.454660: step: 1896/531, loss: 0.06220896542072296 2023-01-22 23:53:26.580616: step: 1900/531, loss: 0.33919304609298706 2023-01-22 23:53:27.725304: step: 1904/531, loss: 0.04836144298315048 2023-01-22 23:53:28.836908: step: 1908/531, loss: 6.447678565979004 2023-01-22 23:53:29.966470: step: 1912/531, loss: 0.29288217425346375 2023-01-22 23:53:31.096715: step: 1916/531, loss: 0.3692001402378082 2023-01-22 23:53:32.194476: step: 1920/531, loss: 0.08110976219177246 2023-01-22 23:53:33.294914: step: 1924/531, loss: 0.3174844980239868 2023-01-22 23:53:34.412234: step: 1928/531, loss: 0.3076438903808594 2023-01-22 23:53:35.539241: step: 1932/531, loss: 0.299083948135376 2023-01-22 23:53:36.659628: step: 1936/531, loss: 0.35193508863449097 2023-01-22 23:53:37.770130: step: 1940/531, loss: 0.13347235321998596 2023-01-22 23:53:38.867954: step: 1944/531, loss: 0.7443598508834839 2023-01-22 23:53:40.000230: step: 1948/531, loss: 0.15762268006801605 2023-01-22 23:53:41.147174: step: 1952/531, loss: 0.8865430951118469 2023-01-22 23:53:42.267821: step: 1956/531, loss: 1.0948612689971924 2023-01-22 23:53:43.383163: step: 1960/531, loss: 0.08130589127540588 2023-01-22 23:53:44.516559: step: 1964/531, loss: 0.40301501750946045 2023-01-22 23:53:45.651604: step: 1968/531, loss: 1.0839444398880005 2023-01-22 23:53:46.759457: step: 1972/531, loss: 0.0632549300789833 2023-01-22 23:53:47.860851: step: 1976/531, loss: 0.2433350533246994 2023-01-22 23:53:48.969760: step: 1980/531, loss: 0.15397948026657104 2023-01-22 23:53:50.078613: step: 1984/531, loss: 0.251697838306427 2023-01-22 23:53:51.198845: step: 1988/531, loss: 0.15375256538391113 2023-01-22 23:53:52.328209: step: 1992/531, loss: 0.09616260975599289 2023-01-22 23:53:53.465506: step: 1996/531, loss: 1.0166057348251343 2023-01-22 23:53:54.584379: step: 2000/531, loss: 0.16484537720680237 2023-01-22 23:53:55.686767: step: 2004/531, loss: 0.04665394127368927 2023-01-22 23:53:56.810486: step: 2008/531, loss: 0.10111980140209198 2023-01-22 23:53:57.951474: step: 2012/531, loss: 2.2535128593444824 2023-01-22 23:53:59.050007: step: 2016/531, loss: 0.9195954203605652 2023-01-22 23:54:00.165391: step: 2020/531, loss: 0.10869503021240234 2023-01-22 23:54:01.298763: step: 2024/531, loss: 2.0339207649230957 2023-01-22 23:54:02.406261: step: 2028/531, loss: 6.240901947021484 2023-01-22 23:54:03.521307: step: 2032/531, loss: 0.21875745058059692 2023-01-22 23:54:04.629410: step: 2036/531, loss: 0.425650417804718 2023-01-22 23:54:05.789004: step: 2040/531, loss: 0.1376427710056305 2023-01-22 23:54:06.939880: step: 2044/531, loss: 0.304365873336792 2023-01-22 23:54:08.106609: step: 2048/531, loss: 0.09956922382116318 2023-01-22 23:54:09.228872: step: 2052/531, loss: 0.08913564682006836 2023-01-22 23:54:10.332627: step: 2056/531, loss: 0.29691264033317566 2023-01-22 23:54:11.452906: step: 2060/531, loss: 0.0633930191397667 2023-01-22 23:54:12.611099: step: 2064/531, loss: 0.13934431970119476 2023-01-22 23:54:13.719547: step: 2068/531, loss: 0.13103389739990234 2023-01-22 23:54:14.817027: step: 2072/531, loss: 0.7008161544799805 2023-01-22 23:54:15.937878: step: 2076/531, loss: 1.5216033458709717 2023-01-22 23:54:17.076899: step: 2080/531, loss: 1.4950681924819946 2023-01-22 23:54:18.218069: step: 2084/531, loss: 0.2535172402858734 2023-01-22 23:54:19.333062: step: 2088/531, loss: 1.3668227195739746 2023-01-22 23:54:20.471314: step: 2092/531, loss: 0.15920992195606232 2023-01-22 23:54:21.594677: step: 2096/531, loss: 1.681749939918518 2023-01-22 23:54:22.713766: step: 2100/531, loss: 0.15518221259117126 2023-01-22 23:54:23.845517: step: 2104/531, loss: 0.2543899416923523 2023-01-22 23:54:24.975099: step: 2108/531, loss: 0.13458938896656036 2023-01-22 23:54:26.107905: step: 2112/531, loss: 1.3520259857177734 2023-01-22 23:54:27.240593: step: 2116/531, loss: 0.2652145326137543 2023-01-22 23:54:28.373773: step: 2120/531, loss: 0.4456610679626465 2023-01-22 23:54:29.498826: step: 2124/531, loss: 0.06604361534118652 ================================================== Loss: 0.598 -------------------- Dev: {'event': {'p': 0.6514360313315927, 'r': 0.6644474034620506, 'f1': 0.6578773895847067}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Test: {'event': {'p': 0.6570926143024619, 'r': 0.6684555754323196, 'f1': 0.6627253916642033}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Chinese: {'event': {'p': 0.6271186440677966, 'r': 0.6851851851851852, 'f1': 0.6548672566371682}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Korean: {'event': {'p': 0.8, 'r': 0.31746031746031744, 'f1': 0.45454545454545453}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Russian: {'event': {'p': 0.6071428571428571, 'r': 0.4722222222222222, 'f1': 0.53125}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6514360313315927, 'r': 0.6644474034620506, 'f1': 0.6578773895847067}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Eng Test for Chinese: {'event': {'p': 0.6570926143024619, 'r': 0.6684555754323196, 'f1': 0.6627253916642033}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Chinese: {'event': {'p': 0.6271186440677966, 'r': 0.6851851851851852, 'f1': 0.6548672566371682}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Eng Dev for Korean: {'event': {'p': 0.5747001090512541, 'r': 0.7017310252996005, 'f1': 0.6318944844124701}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Eng Test for Korean: {'event': {'p': 0.530718336483932, 'r': 0.6696481812760883, 'f1': 0.5921434220933298}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'event': {'p': 0.8125, 'r': 0.4126984126984127, 'f1': 0.5473684210526316}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} -------------------- Eng Dev for Russian: {'event': {'p': 0.6514360313315927, 'r': 0.6644474034620506, 'f1': 0.6578773895847067}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Eng Test for Russian: {'event': {'p': 0.6570926143024619, 'r': 0.6684555754323196, 'f1': 0.6627253916642033}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'event': {'p': 0.6071428571428571, 'r': 0.4722222222222222, 'f1': 0.53125}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-22 23:55:24.355109: step: 4/531, loss: 0.16662216186523438 2023-01-22 23:55:25.480142: step: 8/531, loss: 0.6138704419136047 2023-01-22 23:55:26.583309: step: 12/531, loss: 0.043197158724069595 2023-01-22 23:55:27.727767: step: 16/531, loss: 0.9986713528633118 2023-01-22 23:55:28.824210: step: 20/531, loss: 0.25728529691696167 2023-01-22 23:55:29.929470: step: 24/531, loss: 0.24201565980911255 2023-01-22 23:55:31.059528: step: 28/531, loss: 0.5085304379463196 2023-01-22 23:55:32.168795: step: 32/531, loss: 0.36001381278038025 2023-01-22 23:55:33.282288: step: 36/531, loss: 1.0557341575622559 2023-01-22 23:55:34.424221: step: 40/531, loss: 0.05920562893152237 2023-01-22 23:55:35.570476: step: 44/531, loss: 0.10665875673294067 2023-01-22 23:55:36.701614: step: 48/531, loss: 1.6024012565612793 2023-01-22 23:55:37.827802: step: 52/531, loss: 0.23823794722557068 2023-01-22 23:55:38.938519: step: 56/531, loss: 0.12140689045190811 2023-01-22 23:55:40.047387: step: 60/531, loss: 0.3833610415458679 2023-01-22 23:55:41.202623: step: 64/531, loss: 0.10561180859804153 2023-01-22 23:55:42.367465: step: 68/531, loss: 1.0837523937225342 2023-01-22 23:55:43.535006: step: 72/531, loss: 1.2641563415527344 2023-01-22 23:55:44.644975: step: 76/531, loss: 0.17548207938671112 2023-01-22 23:55:45.755895: step: 80/531, loss: 0.7715210914611816 2023-01-22 23:55:46.900041: step: 84/531, loss: 0.1898891031742096 2023-01-22 23:55:48.054457: step: 88/531, loss: 0.26800501346588135 2023-01-22 23:55:49.189317: step: 92/531, loss: 0.1271069496870041 2023-01-22 23:55:50.316226: step: 96/531, loss: 0.15137191116809845 2023-01-22 23:55:51.482509: step: 100/531, loss: 0.9224390387535095 2023-01-22 23:55:52.593106: step: 104/531, loss: 0.39085257053375244 2023-01-22 23:55:53.718810: step: 108/531, loss: 0.806428074836731 2023-01-22 23:55:54.841492: step: 112/531, loss: 0.670211672782898 2023-01-22 23:55:55.985179: step: 116/531, loss: 0.9572925567626953 2023-01-22 23:55:57.090224: step: 120/531, loss: 0.1352705955505371 2023-01-22 23:55:58.239360: step: 124/531, loss: 0.8628254532814026 2023-01-22 23:55:59.357530: step: 128/531, loss: 0.07888732105493546 2023-01-22 23:56:00.483370: step: 132/531, loss: 0.49997806549072266 2023-01-22 23:56:01.623142: step: 136/531, loss: 0.12863436341285706 2023-01-22 23:56:02.800978: step: 140/531, loss: 0.1377452313899994 2023-01-22 23:56:03.911458: step: 144/531, loss: 0.31675663590431213 2023-01-22 23:56:05.017304: step: 148/531, loss: 0.17679738998413086 2023-01-22 23:56:06.136247: step: 152/531, loss: 0.14813052117824554 2023-01-22 23:56:07.263143: step: 156/531, loss: 0.21542072296142578 2023-01-22 23:56:08.420206: step: 160/531, loss: 0.0798526257276535 2023-01-22 23:56:09.549368: step: 164/531, loss: 0.27220889925956726 2023-01-22 23:56:10.695778: step: 168/531, loss: 0.1726182997226715 2023-01-22 23:56:11.870851: step: 172/531, loss: 0.17931662499904633 2023-01-22 23:56:12.965801: step: 176/531, loss: 0.04673619568347931 2023-01-22 23:56:14.094249: step: 180/531, loss: 0.15547513961791992 2023-01-22 23:56:15.219635: step: 184/531, loss: 0.06568518280982971 2023-01-22 23:56:16.344147: step: 188/531, loss: 0.26724153757095337 2023-01-22 23:56:17.441335: step: 192/531, loss: 0.6732152104377747 2023-01-22 23:56:18.608185: step: 196/531, loss: 0.4713152050971985 2023-01-22 23:56:19.744414: step: 200/531, loss: 0.04192962870001793 2023-01-22 23:56:20.884064: step: 204/531, loss: 0.8031841516494751 2023-01-22 23:56:22.042419: step: 208/531, loss: 0.6889842748641968 2023-01-22 23:56:23.172420: step: 212/531, loss: 0.967907190322876 2023-01-22 23:56:24.302678: step: 216/531, loss: 0.8394032716751099 2023-01-22 23:56:25.391455: step: 220/531, loss: 0.12822183966636658 2023-01-22 23:56:26.508977: step: 224/531, loss: 0.22148971259593964 2023-01-22 23:56:27.649569: step: 228/531, loss: 0.11873769760131836 2023-01-22 23:56:28.765271: step: 232/531, loss: 1.1610593795776367 2023-01-22 23:56:29.878801: step: 236/531, loss: 0.6590182185173035 2023-01-22 23:56:31.019256: step: 240/531, loss: 0.6668195724487305 2023-01-22 23:56:32.132451: step: 244/531, loss: 0.8663637042045593 2023-01-22 23:56:33.254195: step: 248/531, loss: 0.21116772294044495 2023-01-22 23:56:34.365439: step: 252/531, loss: 0.5122771859169006 2023-01-22 23:56:35.484934: step: 256/531, loss: 1.1016069650650024 2023-01-22 23:56:36.619364: step: 260/531, loss: 0.3089834153652191 2023-01-22 23:56:37.719740: step: 264/531, loss: 0.3933687210083008 2023-01-22 23:56:38.845322: step: 268/531, loss: 0.1246330738067627 2023-01-22 23:56:39.931922: step: 272/531, loss: 0.21756906807422638 2023-01-22 23:56:41.060703: step: 276/531, loss: 0.735102117061615 2023-01-22 23:56:42.189432: step: 280/531, loss: 0.3586246371269226 2023-01-22 23:56:43.311947: step: 284/531, loss: 0.21320581436157227 2023-01-22 23:56:44.424226: step: 288/531, loss: 0.342446893453598 2023-01-22 23:56:45.542931: step: 292/531, loss: 0.23088237643241882 2023-01-22 23:56:46.702553: step: 296/531, loss: 0.4778937101364136 2023-01-22 23:56:47.847833: step: 300/531, loss: 0.8178250193595886 2023-01-22 23:56:48.952256: step: 304/531, loss: 0.7688889503479004 2023-01-22 23:56:50.066472: step: 308/531, loss: 0.4121326208114624 2023-01-22 23:56:51.193932: step: 312/531, loss: 0.15973415970802307 2023-01-22 23:56:52.291111: step: 316/531, loss: 0.14084920287132263 2023-01-22 23:56:53.408709: step: 320/531, loss: 0.13389372825622559 2023-01-22 23:56:54.527181: step: 324/531, loss: 0.7516636252403259 2023-01-22 23:56:55.637996: step: 328/531, loss: 0.6399877667427063 2023-01-22 23:56:56.756897: step: 332/531, loss: 0.09671802818775177 2023-01-22 23:56:57.895990: step: 336/531, loss: 0.19902664422988892 2023-01-22 23:56:59.009579: step: 340/531, loss: 0.07357347011566162 2023-01-22 23:57:00.138873: step: 344/531, loss: 0.14418679475784302 2023-01-22 23:57:01.283114: step: 348/531, loss: 0.15957948565483093 2023-01-22 23:57:02.437393: step: 352/531, loss: 0.2750186026096344 2023-01-22 23:57:03.514320: step: 356/531, loss: 6.368100166320801 2023-01-22 23:57:04.652968: step: 360/531, loss: 0.3465927839279175 2023-01-22 23:57:05.789639: step: 364/531, loss: 0.4368191957473755 2023-01-22 23:57:06.910277: step: 368/531, loss: 0.3170282244682312 2023-01-22 23:57:08.048592: step: 372/531, loss: 0.17959733307361603 2023-01-22 23:57:09.201487: step: 376/531, loss: 0.245753675699234 2023-01-22 23:57:10.336812: step: 380/531, loss: 0.23004131019115448 2023-01-22 23:57:11.483554: step: 384/531, loss: 0.11193791031837463 2023-01-22 23:57:12.630800: step: 388/531, loss: 0.20531444251537323 2023-01-22 23:57:13.758564: step: 392/531, loss: 0.18397626280784607 2023-01-22 23:57:14.883888: step: 396/531, loss: 0.1100132018327713 2023-01-22 23:57:15.983289: step: 400/531, loss: 0.30163049697875977 2023-01-22 23:57:17.115205: step: 404/531, loss: 0.6225482225418091 2023-01-22 23:57:18.234762: step: 408/531, loss: 0.4819822311401367 2023-01-22 23:57:19.371393: step: 412/531, loss: 0.14583168923854828 2023-01-22 23:57:20.492236: step: 416/531, loss: 0.5499014258384705 2023-01-22 23:57:21.683107: step: 420/531, loss: 0.19112345576286316 2023-01-22 23:57:22.830407: step: 424/531, loss: 1.0183470249176025 2023-01-22 23:57:23.963844: step: 428/531, loss: 0.19668178260326385 2023-01-22 23:57:25.089555: step: 432/531, loss: 0.5788142681121826 2023-01-22 23:57:26.215881: step: 436/531, loss: 0.8933883905410767 2023-01-22 23:57:27.358705: step: 440/531, loss: 0.10386767983436584 2023-01-22 23:57:28.484287: step: 444/531, loss: 0.07568464428186417 2023-01-22 23:57:29.624998: step: 448/531, loss: 0.17182761430740356 2023-01-22 23:57:30.752327: step: 452/531, loss: 0.44821253418922424 2023-01-22 23:57:31.877963: step: 456/531, loss: 1.8453174829483032 2023-01-22 23:57:33.000347: step: 460/531, loss: 0.36006927490234375 2023-01-22 23:57:34.114594: step: 464/531, loss: 0.12083110958337784 2023-01-22 23:57:35.252473: step: 468/531, loss: 0.47441715002059937 2023-01-22 23:57:36.356265: step: 472/531, loss: 0.060214996337890625 2023-01-22 23:57:37.454986: step: 476/531, loss: 0.05754494667053223 2023-01-22 23:57:38.580197: step: 480/531, loss: 0.0638250783085823 2023-01-22 23:57:39.687111: step: 484/531, loss: 0.7962153553962708 2023-01-22 23:57:40.840882: step: 488/531, loss: 0.3979437053203583 2023-01-22 23:57:41.959757: step: 492/531, loss: 0.5276994705200195 2023-01-22 23:57:43.102143: step: 496/531, loss: 0.1254461258649826 2023-01-22 23:57:44.223589: step: 500/531, loss: 0.10908088833093643 2023-01-22 23:57:45.345971: step: 504/531, loss: 0.1849699467420578 2023-01-22 23:57:46.449383: step: 508/531, loss: 0.1103767454624176 2023-01-22 23:57:47.588979: step: 512/531, loss: 0.2388903647661209 2023-01-22 23:57:48.700932: step: 516/531, loss: 0.08560748398303986 2023-01-22 23:57:49.828654: step: 520/531, loss: 1.113990068435669 2023-01-22 23:57:50.934895: step: 524/531, loss: 0.15884310007095337 2023-01-22 23:57:52.058478: step: 528/531, loss: 0.06486983597278595 2023-01-22 23:57:53.210516: step: 532/531, loss: 0.15931951999664307 2023-01-22 23:57:54.334909: step: 536/531, loss: 0.07498688995838165 2023-01-22 23:57:55.469081: step: 540/531, loss: 0.37991830706596375 2023-01-22 23:57:56.572168: step: 544/531, loss: 0.2313407063484192 2023-01-22 23:57:57.701786: step: 548/531, loss: 0.5404362678527832 2023-01-22 23:57:58.847414: step: 552/531, loss: 0.16544213891029358 2023-01-22 23:57:59.951354: step: 556/531, loss: 0.13815440237522125 2023-01-22 23:58:01.094207: step: 560/531, loss: 0.04776563495397568 2023-01-22 23:58:02.207686: step: 564/531, loss: 0.09709911793470383 2023-01-22 23:58:03.366253: step: 568/531, loss: 0.1378200501203537 2023-01-22 23:58:04.486133: step: 572/531, loss: 0.6328756809234619 2023-01-22 23:58:05.620106: step: 576/531, loss: 0.9069112539291382 2023-01-22 23:58:06.777809: step: 580/531, loss: 0.34874820709228516 2023-01-22 23:58:07.922141: step: 584/531, loss: 0.07492513954639435 2023-01-22 23:58:09.022195: step: 588/531, loss: 0.03363952785730362 2023-01-22 23:58:10.145102: step: 592/531, loss: 0.16928711533546448 2023-01-22 23:58:11.283744: step: 596/531, loss: 0.37546029686927795 2023-01-22 23:58:12.419549: step: 600/531, loss: 0.0743885487318039 2023-01-22 23:58:13.559286: step: 604/531, loss: 0.12271638214588165 2023-01-22 23:58:14.681873: step: 608/531, loss: 0.10387945175170898 2023-01-22 23:58:15.797518: step: 612/531, loss: 0.35221025347709656 2023-01-22 23:58:16.935616: step: 616/531, loss: 0.23270520567893982 2023-01-22 23:58:18.062548: step: 620/531, loss: 0.4657605290412903 2023-01-22 23:58:19.185064: step: 624/531, loss: 0.2584255337715149 2023-01-22 23:58:20.301443: step: 628/531, loss: 1.6028679609298706 2023-01-22 23:58:21.443286: step: 632/531, loss: 0.24047031998634338 2023-01-22 23:58:22.603645: step: 636/531, loss: 0.15306806564331055 2023-01-22 23:58:23.745509: step: 640/531, loss: 3.189697265625 2023-01-22 23:58:24.829451: step: 644/531, loss: 0.1187511458992958 2023-01-22 23:58:25.956464: step: 648/531, loss: 0.14820004999637604 2023-01-22 23:58:27.117701: step: 652/531, loss: 0.7147862911224365 2023-01-22 23:58:28.230323: step: 656/531, loss: 0.8112518191337585 2023-01-22 23:58:29.351783: step: 660/531, loss: 0.6644281148910522 2023-01-22 23:58:30.481342: step: 664/531, loss: 0.2926095724105835 2023-01-22 23:58:31.607663: step: 668/531, loss: 0.5058299899101257 2023-01-22 23:58:32.724323: step: 672/531, loss: 0.09663629531860352 2023-01-22 23:58:33.825016: step: 676/531, loss: 0.28038692474365234 2023-01-22 23:58:34.964063: step: 680/531, loss: 0.16675615310668945 2023-01-22 23:58:36.112891: step: 684/531, loss: 0.16124801337718964 2023-01-22 23:58:37.255414: step: 688/531, loss: 0.6827594041824341 2023-01-22 23:58:38.370651: step: 692/531, loss: 0.6800993084907532 2023-01-22 23:58:39.496906: step: 696/531, loss: 0.20764131844043732 2023-01-22 23:58:40.621805: step: 700/531, loss: 0.1090860366821289 2023-01-22 23:58:41.730803: step: 704/531, loss: 0.15023556351661682 2023-01-22 23:58:42.877135: step: 708/531, loss: 0.5590397119522095 2023-01-22 23:58:44.008290: step: 712/531, loss: 0.21748369932174683 2023-01-22 23:58:45.145196: step: 716/531, loss: 0.23442602157592773 2023-01-22 23:58:46.266569: step: 720/531, loss: 1.1037371158599854 2023-01-22 23:58:47.372683: step: 724/531, loss: 0.19483137130737305 2023-01-22 23:58:48.515923: step: 728/531, loss: 0.2529425621032715 2023-01-22 23:58:49.640061: step: 732/531, loss: 0.06408672034740448 2023-01-22 23:58:50.749258: step: 736/531, loss: 0.11017031967639923 2023-01-22 23:58:51.848679: step: 740/531, loss: 0.13152790069580078 2023-01-22 23:58:52.997153: step: 744/531, loss: 0.1627214550971985 2023-01-22 23:58:54.112335: step: 748/531, loss: 0.05002836883068085 2023-01-22 23:58:55.253982: step: 752/531, loss: 0.0850561186671257 2023-01-22 23:58:56.376260: step: 756/531, loss: 0.26949211955070496 2023-01-22 23:58:57.490440: step: 760/531, loss: 0.09312200546264648 2023-01-22 23:58:58.613737: step: 764/531, loss: 0.4535209536552429 2023-01-22 23:58:59.717576: step: 768/531, loss: 0.8326729536056519 2023-01-22 23:59:00.864015: step: 772/531, loss: 0.39291179180145264 2023-01-22 23:59:01.990360: step: 776/531, loss: 0.14687509834766388 2023-01-22 23:59:03.095925: step: 780/531, loss: 0.11059560626745224 2023-01-22 23:59:04.217887: step: 784/531, loss: 0.22882090508937836 2023-01-22 23:59:05.351258: step: 788/531, loss: 0.9857144355773926 2023-01-22 23:59:06.473915: step: 792/531, loss: 0.37069380283355713 2023-01-22 23:59:07.635064: step: 796/531, loss: 0.15048182010650635 2023-01-22 23:59:08.757907: step: 800/531, loss: 0.11518535763025284 2023-01-22 23:59:09.899591: step: 804/531, loss: 0.6279888153076172 2023-01-22 23:59:11.007714: step: 808/531, loss: 0.554506778717041 2023-01-22 23:59:12.165854: step: 812/531, loss: 0.17894954979419708 2023-01-22 23:59:13.290191: step: 816/531, loss: 0.37503328919410706 2023-01-22 23:59:14.407962: step: 820/531, loss: 0.12509837746620178 2023-01-22 23:59:15.510015: step: 824/531, loss: 0.2600063383579254 2023-01-22 23:59:16.620971: step: 828/531, loss: 0.16963835060596466 2023-01-22 23:59:17.733539: step: 832/531, loss: 0.25909852981567383 2023-01-22 23:59:18.861365: step: 836/531, loss: 0.0875040590763092 2023-01-22 23:59:19.988898: step: 840/531, loss: 0.04245062172412872 2023-01-22 23:59:21.104261: step: 844/531, loss: 0.49918240308761597 2023-01-22 23:59:22.247368: step: 848/531, loss: 1.3098195791244507 2023-01-22 23:59:23.355505: step: 852/531, loss: 0.16476936638355255 2023-01-22 23:59:24.493267: step: 856/531, loss: 0.134425550699234 2023-01-22 23:59:25.617504: step: 860/531, loss: 0.8511238694190979 2023-01-22 23:59:26.738098: step: 864/531, loss: 0.13095274567604065 2023-01-22 23:59:27.898210: step: 868/531, loss: 1.5890244245529175 2023-01-22 23:59:29.004955: step: 872/531, loss: 0.04851360619068146 2023-01-22 23:59:30.111984: step: 876/531, loss: 0.6740989685058594 2023-01-22 23:59:31.229691: step: 880/531, loss: 0.2112097144126892 2023-01-22 23:59:32.354672: step: 884/531, loss: 0.6060776710510254 2023-01-22 23:59:33.485726: step: 888/531, loss: 6.26407527923584 2023-01-22 23:59:34.604532: step: 892/531, loss: 0.17175836861133575 2023-01-22 23:59:35.711577: step: 896/531, loss: 0.10602875053882599 2023-01-22 23:59:36.875566: step: 900/531, loss: 0.16825968027114868 2023-01-22 23:59:37.976390: step: 904/531, loss: 0.08746328949928284 2023-01-22 23:59:39.097243: step: 908/531, loss: 0.24208231270313263 2023-01-22 23:59:40.227918: step: 912/531, loss: 0.24104900658130646 2023-01-22 23:59:41.324471: step: 916/531, loss: 0.05861306190490723 2023-01-22 23:59:42.465604: step: 920/531, loss: 0.12055106461048126 2023-01-22 23:59:43.596687: step: 924/531, loss: 0.293590247631073 2023-01-22 23:59:44.756742: step: 928/531, loss: 0.18651580810546875 2023-01-22 23:59:45.902280: step: 932/531, loss: 0.17169369757175446 2023-01-22 23:59:47.014979: step: 936/531, loss: 2.6086864471435547 2023-01-22 23:59:48.124106: step: 940/531, loss: 0.6810840964317322 2023-01-22 23:59:49.231343: step: 944/531, loss: 0.2558404803276062 2023-01-22 23:59:50.340917: step: 948/531, loss: 0.43018078804016113 2023-01-22 23:59:51.442273: step: 952/531, loss: 0.03276710584759712 2023-01-22 23:59:52.572856: step: 956/531, loss: 0.731675922870636 2023-01-22 23:59:53.754934: step: 960/531, loss: 0.31705302000045776 2023-01-22 23:59:54.870244: step: 964/531, loss: 1.105121374130249 2023-01-22 23:59:55.988790: step: 968/531, loss: 0.02999272383749485 2023-01-22 23:59:57.113505: step: 972/531, loss: 0.29633599519729614 2023-01-22 23:59:58.234485: step: 976/531, loss: 0.5533487200737 2023-01-22 23:59:59.333071: step: 980/531, loss: 0.2234947383403778 2023-01-23 00:00:00.477281: step: 984/531, loss: 0.8931765556335449 2023-01-23 00:00:01.626218: step: 988/531, loss: 0.10681305080652237 2023-01-23 00:00:02.716336: step: 992/531, loss: 0.16969099640846252 2023-01-23 00:00:03.846865: step: 996/531, loss: 0.5271251797676086 2023-01-23 00:00:04.940999: step: 1000/531, loss: 0.02947692945599556 2023-01-23 00:00:06.064177: step: 1004/531, loss: 0.21456784009933472 2023-01-23 00:00:07.189664: step: 1008/531, loss: 0.10657548904418945 2023-01-23 00:00:08.315491: step: 1012/531, loss: 0.14209675788879395 2023-01-23 00:00:09.462568: step: 1016/531, loss: 0.23482033610343933 2023-01-23 00:00:10.609093: step: 1020/531, loss: 0.3176805377006531 2023-01-23 00:00:11.742591: step: 1024/531, loss: 0.06163635477423668 2023-01-23 00:00:12.916313: step: 1028/531, loss: 0.07156524807214737 2023-01-23 00:00:14.055350: step: 1032/531, loss: 0.13432101905345917 2023-01-23 00:00:15.201409: step: 1036/531, loss: 1.5836384296417236 2023-01-23 00:00:16.322615: step: 1040/531, loss: 0.3377200961112976 2023-01-23 00:00:17.437116: step: 1044/531, loss: 0.14433574676513672 2023-01-23 00:00:18.576323: step: 1048/531, loss: 0.16090470552444458 2023-01-23 00:00:19.696950: step: 1052/531, loss: 0.6653269529342651 2023-01-23 00:00:20.831376: step: 1056/531, loss: 0.10369344055652618 2023-01-23 00:00:21.986870: step: 1060/531, loss: 0.30237817764282227 2023-01-23 00:00:23.126631: step: 1064/531, loss: 0.1858772337436676 2023-01-23 00:00:24.224742: step: 1068/531, loss: 6.350717544555664 2023-01-23 00:00:25.352057: step: 1072/531, loss: 0.6866821050643921 2023-01-23 00:00:26.492254: step: 1076/531, loss: 0.13230451941490173 2023-01-23 00:00:27.632377: step: 1080/531, loss: 0.7150003910064697 2023-01-23 00:00:28.756151: step: 1084/531, loss: 0.24188528954982758 2023-01-23 00:00:29.921002: step: 1088/531, loss: 0.2658001780509949 2023-01-23 00:00:31.061062: step: 1092/531, loss: 0.14182662963867188 2023-01-23 00:00:32.153580: step: 1096/531, loss: 0.14601083099842072 2023-01-23 00:00:33.289746: step: 1100/531, loss: 0.33536964654922485 2023-01-23 00:00:34.408132: step: 1104/531, loss: 1.2487106323242188 2023-01-23 00:00:35.513997: step: 1108/531, loss: 0.04573507606983185 2023-01-23 00:00:36.640115: step: 1112/531, loss: 0.19724483788013458 2023-01-23 00:00:37.777933: step: 1116/531, loss: 6.658953666687012 2023-01-23 00:00:38.965821: step: 1120/531, loss: 0.07531404495239258 2023-01-23 00:00:40.080176: step: 1124/531, loss: 0.2183738797903061 2023-01-23 00:00:41.199550: step: 1128/531, loss: 0.09226799011230469 2023-01-23 00:00:42.306016: step: 1132/531, loss: 0.8653135299682617 2023-01-23 00:00:43.413637: step: 1136/531, loss: 0.7104889154434204 2023-01-23 00:00:44.508585: step: 1140/531, loss: 0.26601505279541016 2023-01-23 00:00:45.644758: step: 1144/531, loss: 0.11831030994653702 2023-01-23 00:00:46.757676: step: 1148/531, loss: 0.7495932579040527 2023-01-23 00:00:47.877313: step: 1152/531, loss: 0.3176819384098053 2023-01-23 00:00:49.005284: step: 1156/531, loss: 0.28096848726272583 2023-01-23 00:00:50.166784: step: 1160/531, loss: 0.12641488015651703 2023-01-23 00:00:51.312083: step: 1164/531, loss: 0.6406083106994629 2023-01-23 00:00:52.432544: step: 1168/531, loss: 0.44876059889793396 2023-01-23 00:00:53.550975: step: 1172/531, loss: 0.08108501881361008 2023-01-23 00:00:54.681066: step: 1176/531, loss: 0.12720216810703278 2023-01-23 00:00:55.776348: step: 1180/531, loss: 1.2504231929779053 2023-01-23 00:00:56.895909: step: 1184/531, loss: 0.08104820549488068 2023-01-23 00:00:58.014725: step: 1188/531, loss: 0.2751450538635254 2023-01-23 00:00:59.147271: step: 1192/531, loss: 0.16054968535900116 2023-01-23 00:01:00.264873: step: 1196/531, loss: 0.10385838150978088 2023-01-23 00:01:01.395158: step: 1200/531, loss: 0.22699794173240662 2023-01-23 00:01:02.510225: step: 1204/531, loss: 0.06376848369836807 2023-01-23 00:01:03.614358: step: 1208/531, loss: 0.14717711508274078 2023-01-23 00:01:04.733429: step: 1212/531, loss: 0.1023891419172287 2023-01-23 00:01:05.865676: step: 1216/531, loss: 0.4369228482246399 2023-01-23 00:01:07.012730: step: 1220/531, loss: 0.48825111985206604 2023-01-23 00:01:08.163218: step: 1224/531, loss: 0.49807554483413696 2023-01-23 00:01:09.314400: step: 1228/531, loss: 0.2706001400947571 2023-01-23 00:01:10.435128: step: 1232/531, loss: 0.8898267149925232 2023-01-23 00:01:11.542637: step: 1236/531, loss: 0.6811284422874451 2023-01-23 00:01:12.679867: step: 1240/531, loss: 0.22493773698806763 2023-01-23 00:01:13.789883: step: 1244/531, loss: 0.14221876859664917 2023-01-23 00:01:14.913410: step: 1248/531, loss: 0.24322977662086487 2023-01-23 00:01:16.023768: step: 1252/531, loss: 0.12687310576438904 2023-01-23 00:01:17.124887: step: 1256/531, loss: 0.12520436942577362 2023-01-23 00:01:18.227249: step: 1260/531, loss: 0.5926680564880371 2023-01-23 00:01:19.356218: step: 1264/531, loss: 1.5512936115264893 2023-01-23 00:01:20.455277: step: 1268/531, loss: 0.2939271926879883 2023-01-23 00:01:21.579978: step: 1272/531, loss: 0.16870155930519104 2023-01-23 00:01:22.731557: step: 1276/531, loss: 0.19709309935569763 2023-01-23 00:01:23.849878: step: 1280/531, loss: 0.26249590516090393 2023-01-23 00:01:24.966509: step: 1284/531, loss: 0.3689180314540863 2023-01-23 00:01:26.086406: step: 1288/531, loss: 1.9986234903335571 2023-01-23 00:01:27.207149: step: 1292/531, loss: 0.20813560485839844 2023-01-23 00:01:28.331477: step: 1296/531, loss: 0.2829445004463196 2023-01-23 00:01:29.457830: step: 1300/531, loss: 0.46262797713279724 2023-01-23 00:01:30.582263: step: 1304/531, loss: 1.4950883388519287 2023-01-23 00:01:31.720447: step: 1308/531, loss: 0.7825860977172852 2023-01-23 00:01:32.858229: step: 1312/531, loss: 0.17398595809936523 2023-01-23 00:01:33.960849: step: 1316/531, loss: 1.5933424234390259 2023-01-23 00:01:35.072679: step: 1320/531, loss: 0.3585663139820099 2023-01-23 00:01:36.207088: step: 1324/531, loss: 6.111742973327637 2023-01-23 00:01:37.310595: step: 1328/531, loss: 6.710764408111572 2023-01-23 00:01:38.422008: step: 1332/531, loss: 0.42534637451171875 2023-01-23 00:01:39.562312: step: 1336/531, loss: 6.452024459838867 2023-01-23 00:01:40.666734: step: 1340/531, loss: 0.026490593329072 2023-01-23 00:01:41.803143: step: 1344/531, loss: 0.21514034271240234 2023-01-23 00:01:42.921853: step: 1348/531, loss: 0.11302419006824493 2023-01-23 00:01:44.041986: step: 1352/531, loss: 0.17592263221740723 2023-01-23 00:01:45.156809: step: 1356/531, loss: 0.04754643514752388 2023-01-23 00:01:46.269335: step: 1360/531, loss: 0.07143106311559677 2023-01-23 00:01:47.414657: step: 1364/531, loss: 0.10684919357299805 2023-01-23 00:01:48.534970: step: 1368/531, loss: 0.1371428519487381 2023-01-23 00:01:49.656052: step: 1372/531, loss: 0.1619555503129959 2023-01-23 00:01:50.806083: step: 1376/531, loss: 0.10991425812244415 2023-01-23 00:01:51.922085: step: 1380/531, loss: 0.18041697144508362 2023-01-23 00:01:53.022215: step: 1384/531, loss: 0.13861122727394104 2023-01-23 00:01:54.138973: step: 1388/531, loss: 1.261899471282959 2023-01-23 00:01:55.251947: step: 1392/531, loss: 0.09035201370716095 2023-01-23 00:01:56.354118: step: 1396/531, loss: 0.09430161118507385 2023-01-23 00:01:57.493821: step: 1400/531, loss: 0.2554810643196106 2023-01-23 00:01:58.646974: step: 1404/531, loss: 0.11138715595006943 2023-01-23 00:01:59.766232: step: 1408/531, loss: 0.14705419540405273 2023-01-23 00:02:00.889772: step: 1412/531, loss: 0.08975587040185928 2023-01-23 00:02:02.008916: step: 1416/531, loss: 0.17991198599338531 2023-01-23 00:02:03.123004: step: 1420/531, loss: 0.4787808656692505 2023-01-23 00:02:04.251918: step: 1424/531, loss: 0.6533840298652649 2023-01-23 00:02:05.382955: step: 1428/531, loss: 1.1327035427093506 2023-01-23 00:02:06.512004: step: 1432/531, loss: 0.31544196605682373 2023-01-23 00:02:07.651210: step: 1436/531, loss: 0.19829073548316956 2023-01-23 00:02:08.773318: step: 1440/531, loss: 0.7922886610031128 2023-01-23 00:02:09.886994: step: 1444/531, loss: 0.45242154598236084 2023-01-23 00:02:10.997945: step: 1448/531, loss: 0.0868907943367958 2023-01-23 00:02:12.127483: step: 1452/531, loss: 6.72774600982666 2023-01-23 00:02:13.246168: step: 1456/531, loss: 0.6551020741462708 2023-01-23 00:02:14.365894: step: 1460/531, loss: 0.38183632493019104 2023-01-23 00:02:15.467787: step: 1464/531, loss: 0.17334146797657013 2023-01-23 00:02:16.600385: step: 1468/531, loss: 0.06473560631275177 2023-01-23 00:02:17.730467: step: 1472/531, loss: 1.2650949954986572 2023-01-23 00:02:18.862213: step: 1476/531, loss: 0.9869539141654968 2023-01-23 00:02:19.997627: step: 1480/531, loss: 0.7808462977409363 2023-01-23 00:02:21.112909: step: 1484/531, loss: 0.29303520917892456 2023-01-23 00:02:22.250053: step: 1488/531, loss: 0.31090497970581055 2023-01-23 00:02:23.379811: step: 1492/531, loss: 0.509864091873169 2023-01-23 00:02:24.531449: step: 1496/531, loss: 0.2444847822189331 2023-01-23 00:02:25.669360: step: 1500/531, loss: 0.7062308192253113 2023-01-23 00:02:26.808022: step: 1504/531, loss: 0.12572498619556427 2023-01-23 00:02:27.913687: step: 1508/531, loss: 0.10840673744678497 2023-01-23 00:02:29.067383: step: 1512/531, loss: 0.21810230612754822 2023-01-23 00:02:30.205774: step: 1516/531, loss: 0.1108594462275505 2023-01-23 00:02:31.335521: step: 1520/531, loss: 0.13676194846630096 2023-01-23 00:02:32.455351: step: 1524/531, loss: 1.5128180980682373 2023-01-23 00:02:33.584974: step: 1528/531, loss: 0.31835395097732544 2023-01-23 00:02:34.720616: step: 1532/531, loss: 0.18029403686523438 2023-01-23 00:02:35.831019: step: 1536/531, loss: 0.0620429553091526 2023-01-23 00:02:36.943161: step: 1540/531, loss: 0.9665861129760742 2023-01-23 00:02:38.064333: step: 1544/531, loss: 0.23005767166614532 2023-01-23 00:02:39.194482: step: 1548/531, loss: 0.09529547393321991 2023-01-23 00:02:40.332283: step: 1552/531, loss: 0.15144681930541992 2023-01-23 00:02:41.452989: step: 1556/531, loss: 1.3491935729980469 2023-01-23 00:02:42.572170: step: 1560/531, loss: 0.4917375445365906 2023-01-23 00:02:43.713300: step: 1564/531, loss: 1.0351355075836182 2023-01-23 00:02:44.814926: step: 1568/531, loss: 0.24221372604370117 2023-01-23 00:02:45.949303: step: 1572/531, loss: 0.6614864468574524 2023-01-23 00:02:47.080655: step: 1576/531, loss: 0.1420409232378006 2023-01-23 00:02:48.185778: step: 1580/531, loss: 0.5234269499778748 2023-01-23 00:02:49.299143: step: 1584/531, loss: 0.2370121031999588 2023-01-23 00:02:50.445153: step: 1588/531, loss: 0.12778696417808533 2023-01-23 00:02:51.554584: step: 1592/531, loss: 0.07138128578662872 2023-01-23 00:02:52.678818: step: 1596/531, loss: 0.14819002151489258 2023-01-23 00:02:53.837479: step: 1600/531, loss: 0.6488267779350281 2023-01-23 00:02:54.969951: step: 1604/531, loss: 0.1852748841047287 2023-01-23 00:02:56.085272: step: 1608/531, loss: 1.0953336954116821 2023-01-23 00:02:57.235320: step: 1612/531, loss: 0.6988387107849121 2023-01-23 00:02:58.376295: step: 1616/531, loss: 0.14900445938110352 2023-01-23 00:02:59.557577: step: 1620/531, loss: 0.38943299651145935 2023-01-23 00:03:00.707645: step: 1624/531, loss: 0.33376407623291016 2023-01-23 00:03:01.849874: step: 1628/531, loss: 0.144989013671875 2023-01-23 00:03:02.957493: step: 1632/531, loss: 0.09367694705724716 2023-01-23 00:03:04.069547: step: 1636/531, loss: 0.9546205401420593 2023-01-23 00:03:05.184769: step: 1640/531, loss: 0.20469646155834198 2023-01-23 00:03:06.285505: step: 1644/531, loss: 0.4632951021194458 2023-01-23 00:03:07.408928: step: 1648/531, loss: 1.105305790901184 2023-01-23 00:03:08.522965: step: 1652/531, loss: 0.059732675552368164 2023-01-23 00:03:09.636008: step: 1656/531, loss: 0.736891508102417 2023-01-23 00:03:10.763806: step: 1660/531, loss: 0.0957973450422287 2023-01-23 00:03:11.888260: step: 1664/531, loss: 0.05791821703314781 2023-01-23 00:03:13.019904: step: 1668/531, loss: 0.2219129502773285 2023-01-23 00:03:14.149820: step: 1672/531, loss: 0.19374127686023712 2023-01-23 00:03:15.256046: step: 1676/531, loss: 0.10098528861999512 2023-01-23 00:03:16.369065: step: 1680/531, loss: 0.2915896475315094 2023-01-23 00:03:17.511165: step: 1684/531, loss: 0.13615313172340393 2023-01-23 00:03:18.664627: step: 1688/531, loss: 1.0060828924179077 2023-01-23 00:03:19.802268: step: 1692/531, loss: 0.7194678783416748 2023-01-23 00:03:20.921247: step: 1696/531, loss: 0.06060910224914551 2023-01-23 00:03:22.032382: step: 1700/531, loss: 0.0589454285800457 2023-01-23 00:03:23.157965: step: 1704/531, loss: 0.21398936212062836 2023-01-23 00:03:24.279738: step: 1708/531, loss: 0.118120476603508 2023-01-23 00:03:25.396455: step: 1712/531, loss: 0.2954292297363281 2023-01-23 00:03:26.517725: step: 1716/531, loss: 0.14494939148426056 2023-01-23 00:03:27.693522: step: 1720/531, loss: 0.5078255534172058 2023-01-23 00:03:28.828789: step: 1724/531, loss: 0.10290718078613281 2023-01-23 00:03:29.954368: step: 1728/531, loss: 0.1213926300406456 2023-01-23 00:03:31.041151: step: 1732/531, loss: 0.6624854803085327 2023-01-23 00:03:32.166948: step: 1736/531, loss: 0.033014774322509766 2023-01-23 00:03:33.284520: step: 1740/531, loss: 0.24623575806617737 2023-01-23 00:03:34.409578: step: 1744/531, loss: 6.728213310241699 2023-01-23 00:03:35.544593: step: 1748/531, loss: 0.049927666783332825 2023-01-23 00:03:36.652280: step: 1752/531, loss: 0.10648477077484131 2023-01-23 00:03:37.787847: step: 1756/531, loss: 0.32085666060447693 2023-01-23 00:03:38.934370: step: 1760/531, loss: 0.5215115547180176 2023-01-23 00:03:40.105573: step: 1764/531, loss: 0.0846908837556839 2023-01-23 00:03:41.246706: step: 1768/531, loss: 0.5208784341812134 2023-01-23 00:03:42.374666: step: 1772/531, loss: 0.23698830604553223 2023-01-23 00:03:43.485160: step: 1776/531, loss: 0.5973255634307861 2023-01-23 00:03:44.590599: step: 1780/531, loss: 0.4342270791530609 2023-01-23 00:03:45.724193: step: 1784/531, loss: 0.1437152922153473 2023-01-23 00:03:46.849383: step: 1788/531, loss: 0.5214222073554993 2023-01-23 00:03:47.973927: step: 1792/531, loss: 0.14058314263820648 2023-01-23 00:03:49.129457: step: 1796/531, loss: 0.23124293982982635 2023-01-23 00:03:50.236782: step: 1800/531, loss: 0.16713590919971466 2023-01-23 00:03:51.354182: step: 1804/531, loss: 0.21768590807914734 2023-01-23 00:03:52.496807: step: 1808/531, loss: 0.03388547897338867 2023-01-23 00:03:53.626118: step: 1812/531, loss: 0.43925586342811584 2023-01-23 00:03:54.756306: step: 1816/531, loss: 0.42096811532974243 2023-01-23 00:03:55.880198: step: 1820/531, loss: 0.9137694239616394 2023-01-23 00:03:56.995952: step: 1824/531, loss: 0.035216882824897766 2023-01-23 00:03:58.124820: step: 1828/531, loss: 0.12874551117420197 2023-01-23 00:03:59.237068: step: 1832/531, loss: 0.115388423204422 2023-01-23 00:04:00.336610: step: 1836/531, loss: 6.278397560119629 2023-01-23 00:04:01.441932: step: 1840/531, loss: 0.22608862817287445 2023-01-23 00:04:02.560944: step: 1844/531, loss: 0.1218988448381424 2023-01-23 00:04:03.698935: step: 1848/531, loss: 0.2171352356672287 2023-01-23 00:04:04.840604: step: 1852/531, loss: 0.05553038418292999 2023-01-23 00:04:05.950900: step: 1856/531, loss: 0.1022796630859375 2023-01-23 00:04:07.091896: step: 1860/531, loss: 0.12974414229393005 2023-01-23 00:04:08.229238: step: 1864/531, loss: 1.0389344692230225 2023-01-23 00:04:09.310750: step: 1868/531, loss: 0.10201707482337952 2023-01-23 00:04:10.428174: step: 1872/531, loss: 1.1278913021087646 2023-01-23 00:04:11.555131: step: 1876/531, loss: 0.0420563705265522 2023-01-23 00:04:12.657858: step: 1880/531, loss: 0.16171474754810333 2023-01-23 00:04:13.766283: step: 1884/531, loss: 0.058796025812625885 2023-01-23 00:04:14.877853: step: 1888/531, loss: 0.09874926507472992 2023-01-23 00:04:15.985354: step: 1892/531, loss: 0.32869645953178406 2023-01-23 00:04:17.123909: step: 1896/531, loss: 1.0184884071350098 2023-01-23 00:04:18.261144: step: 1900/531, loss: 0.05754880607128143 2023-01-23 00:04:19.403477: step: 1904/531, loss: 0.07989501953125 2023-01-23 00:04:20.536554: step: 1908/531, loss: 0.101422980427742 2023-01-23 00:04:21.650649: step: 1912/531, loss: 0.12306462228298187 2023-01-23 00:04:22.741438: step: 1916/531, loss: 0.10077142715454102 2023-01-23 00:04:23.891162: step: 1920/531, loss: 0.16026315093040466 2023-01-23 00:04:25.014239: step: 1924/531, loss: 0.15832313895225525 2023-01-23 00:04:26.138203: step: 1928/531, loss: 0.11770325154066086 2023-01-23 00:04:27.272817: step: 1932/531, loss: 0.10809460282325745 2023-01-23 00:04:28.411135: step: 1936/531, loss: 0.26179322600364685 2023-01-23 00:04:29.546541: step: 1940/531, loss: 0.04438791424036026 2023-01-23 00:04:30.666177: step: 1944/531, loss: 0.6509518027305603 2023-01-23 00:04:31.799652: step: 1948/531, loss: 0.09031243622303009 2023-01-23 00:04:32.917941: step: 1952/531, loss: 0.7060879468917847 2023-01-23 00:04:34.042931: step: 1956/531, loss: 0.05508151277899742 2023-01-23 00:04:35.151607: step: 1960/531, loss: 0.5473034381866455 2023-01-23 00:04:36.277768: step: 1964/531, loss: 0.30443769693374634 2023-01-23 00:04:37.423694: step: 1968/531, loss: 0.1759936809539795 2023-01-23 00:04:38.550781: step: 1972/531, loss: 0.14455018937587738 2023-01-23 00:04:39.699711: step: 1976/531, loss: 0.752804160118103 2023-01-23 00:04:40.836825: step: 1980/531, loss: 0.18311309814453125 2023-01-23 00:04:41.952619: step: 1984/531, loss: 0.3597634434700012 2023-01-23 00:04:43.071221: step: 1988/531, loss: 0.12116880714893341 2023-01-23 00:04:44.219644: step: 1992/531, loss: 0.9020876884460449 2023-01-23 00:04:45.361380: step: 1996/531, loss: 0.11265754699707031 2023-01-23 00:04:46.448544: step: 2000/531, loss: 0.20572224259376526 2023-01-23 00:04:47.585214: step: 2004/531, loss: 0.5403975248336792 2023-01-23 00:04:48.714290: step: 2008/531, loss: 0.4854414463043213 2023-01-23 00:04:49.868024: step: 2012/531, loss: 0.6149066090583801 2023-01-23 00:04:51.003661: step: 2016/531, loss: 0.1524006873369217 2023-01-23 00:04:52.133592: step: 2020/531, loss: 0.32329440116882324 2023-01-23 00:04:53.255388: step: 2024/531, loss: 0.08343849331140518 2023-01-23 00:04:54.362033: step: 2028/531, loss: 0.22176915407180786 2023-01-23 00:04:55.476274: step: 2032/531, loss: 0.09827204048633575 2023-01-23 00:04:56.630303: step: 2036/531, loss: 0.13515625894069672 2023-01-23 00:04:57.763739: step: 2040/531, loss: 0.16757479310035706 2023-01-23 00:04:58.891814: step: 2044/531, loss: 0.37889060378074646 2023-01-23 00:05:00.032128: step: 2048/531, loss: 0.24546527862548828 2023-01-23 00:05:01.155628: step: 2052/531, loss: 0.651732325553894 2023-01-23 00:05:02.233459: step: 2056/531, loss: 0.6140133738517761 2023-01-23 00:05:03.372351: step: 2060/531, loss: 0.3050972819328308 2023-01-23 00:05:04.492556: step: 2064/531, loss: 0.12994174659252167 2023-01-23 00:05:05.612587: step: 2068/531, loss: 0.528088390827179 2023-01-23 00:05:06.741354: step: 2072/531, loss: 0.0399443656206131 2023-01-23 00:05:07.874089: step: 2076/531, loss: 0.19521847367286682 2023-01-23 00:05:09.007410: step: 2080/531, loss: 0.3238506317138672 2023-01-23 00:05:10.117317: step: 2084/531, loss: 0.8641220331192017 2023-01-23 00:05:11.225851: step: 2088/531, loss: 0.1053767204284668 2023-01-23 00:05:12.345428: step: 2092/531, loss: 0.6582637429237366 2023-01-23 00:05:13.440804: step: 2096/531, loss: 0.12176046520471573 2023-01-23 00:05:14.562174: step: 2100/531, loss: 0.5314443111419678 2023-01-23 00:05:15.701273: step: 2104/531, loss: 0.15437598526477814 2023-01-23 00:05:16.817618: step: 2108/531, loss: 0.7992597222328186 2023-01-23 00:05:17.959937: step: 2112/531, loss: 0.13027337193489075 2023-01-23 00:05:19.077333: step: 2116/531, loss: 0.2880975604057312 2023-01-23 00:05:20.217821: step: 2120/531, loss: 0.06377029418945312 2023-01-23 00:05:21.342123: step: 2124/531, loss: 0.15702953934669495 ================================================== Loss: 0.483 -------------------- Dev: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Test: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Korean: {'event': {'p': 0.71875, 'r': 0.36507936507936506, 'f1': 0.4842105263157895}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Russian: {'event': {'p': 0.6956521739130435, 'r': 0.4444444444444444, 'f1': 0.5423728813559322}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.5747001090512541, 'r': 0.7017310252996005, 'f1': 0.6318944844124701}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Eng Test for Korean: {'event': {'p': 0.530718336483932, 'r': 0.6696481812760883, 'f1': 0.5921434220933298}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'event': {'p': 0.8125, 'r': 0.4126984126984127, 'f1': 0.5473684210526316}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} -------------------- Eng Dev for Russian: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.6956521739130435, 'r': 0.4444444444444444, 'f1': 0.5423728813559322}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:06:18.468954: step: 4/531, loss: 0.20924778282642365 2023-01-23 00:06:19.635171: step: 8/531, loss: 0.33616772294044495 2023-01-23 00:06:20.743410: step: 12/531, loss: 0.042319297790527344 2023-01-23 00:06:21.867030: step: 16/531, loss: 8.283539772033691 2023-01-23 00:06:22.954290: step: 20/531, loss: 0.04022035747766495 2023-01-23 00:06:24.067369: step: 24/531, loss: 0.06882859021425247 2023-01-23 00:06:25.192317: step: 28/531, loss: 0.12509828805923462 2023-01-23 00:06:26.317675: step: 32/531, loss: 0.6324887275695801 2023-01-23 00:06:27.441159: step: 36/531, loss: 0.11299276351928711 2023-01-23 00:06:28.590535: step: 40/531, loss: 0.07704886794090271 2023-01-23 00:06:29.694660: step: 44/531, loss: 0.32479286193847656 2023-01-23 00:06:30.828513: step: 48/531, loss: 0.0973147451877594 2023-01-23 00:06:31.964088: step: 52/531, loss: 0.19197535514831543 2023-01-23 00:06:33.089563: step: 56/531, loss: 0.1737435758113861 2023-01-23 00:06:34.210986: step: 60/531, loss: 0.097254179418087 2023-01-23 00:06:35.314547: step: 64/531, loss: 0.08131766319274902 2023-01-23 00:06:36.436291: step: 68/531, loss: 0.9577943682670593 2023-01-23 00:06:37.571007: step: 72/531, loss: 0.16581693291664124 2023-01-23 00:06:38.742825: step: 76/531, loss: 0.07871170341968536 2023-01-23 00:06:39.853615: step: 80/531, loss: 0.14577293395996094 2023-01-23 00:06:40.976678: step: 84/531, loss: 0.1412622481584549 2023-01-23 00:06:42.115505: step: 88/531, loss: 0.7363821268081665 2023-01-23 00:06:43.261236: step: 92/531, loss: 0.22683724761009216 2023-01-23 00:06:44.379929: step: 96/531, loss: 0.05515570566058159 2023-01-23 00:06:45.524297: step: 100/531, loss: 0.10706081986427307 2023-01-23 00:06:46.651738: step: 104/531, loss: 0.05919227749109268 2023-01-23 00:06:47.789016: step: 108/531, loss: 1.2315568923950195 2023-01-23 00:06:48.924225: step: 112/531, loss: 0.711255669593811 2023-01-23 00:06:50.061240: step: 116/531, loss: 1.5955922603607178 2023-01-23 00:06:51.176023: step: 120/531, loss: 0.11847896873950958 2023-01-23 00:06:52.312955: step: 124/531, loss: 0.07389245182275772 2023-01-23 00:06:53.435288: step: 128/531, loss: 0.09024009853601456 2023-01-23 00:06:54.549590: step: 132/531, loss: 0.7096128463745117 2023-01-23 00:06:55.676356: step: 136/531, loss: 0.13612642884254456 2023-01-23 00:06:56.823965: step: 140/531, loss: 0.14436140656471252 2023-01-23 00:06:57.946329: step: 144/531, loss: 0.2555411458015442 2023-01-23 00:06:59.091008: step: 148/531, loss: 0.1854534149169922 2023-01-23 00:07:00.204544: step: 152/531, loss: 5.722158432006836 2023-01-23 00:07:01.327244: step: 156/531, loss: 0.15536102652549744 2023-01-23 00:07:02.447883: step: 160/531, loss: 0.21621212363243103 2023-01-23 00:07:03.559663: step: 164/531, loss: 0.12443169951438904 2023-01-23 00:07:04.666798: step: 168/531, loss: 0.1415562629699707 2023-01-23 00:07:05.800187: step: 172/531, loss: 0.7281516790390015 2023-01-23 00:07:06.945536: step: 176/531, loss: 0.19980435073375702 2023-01-23 00:07:08.084422: step: 180/531, loss: 0.6728843450546265 2023-01-23 00:07:09.235409: step: 184/531, loss: 0.15898753702640533 2023-01-23 00:07:10.360820: step: 188/531, loss: 0.11105690151453018 2023-01-23 00:07:11.484926: step: 192/531, loss: 0.36551544070243835 2023-01-23 00:07:12.643708: step: 196/531, loss: 0.052789025008678436 2023-01-23 00:07:13.756404: step: 200/531, loss: 0.08533172309398651 2023-01-23 00:07:14.897066: step: 204/531, loss: 0.11238765716552734 2023-01-23 00:07:16.013105: step: 208/531, loss: 0.6849081516265869 2023-01-23 00:07:17.120600: step: 212/531, loss: 0.47072678804397583 2023-01-23 00:07:18.254311: step: 216/531, loss: 0.7471441030502319 2023-01-23 00:07:19.379690: step: 220/531, loss: 0.5334821939468384 2023-01-23 00:07:20.520844: step: 224/531, loss: 0.2592759132385254 2023-01-23 00:07:21.654287: step: 228/531, loss: 0.9253948926925659 2023-01-23 00:07:22.792188: step: 232/531, loss: 0.09056291729211807 2023-01-23 00:07:23.905569: step: 236/531, loss: 1.8205885887145996 2023-01-23 00:07:25.034339: step: 240/531, loss: 0.24185581505298615 2023-01-23 00:07:26.168916: step: 244/531, loss: 0.1812501847743988 2023-01-23 00:07:27.344087: step: 248/531, loss: 0.1305214911699295 2023-01-23 00:07:28.453749: step: 252/531, loss: 0.23132172226905823 2023-01-23 00:07:29.571512: step: 256/531, loss: 0.5506876111030579 2023-01-23 00:07:30.688357: step: 260/531, loss: 0.09004230797290802 2023-01-23 00:07:31.839709: step: 264/531, loss: 0.18773241341114044 2023-01-23 00:07:32.974864: step: 268/531, loss: 0.2571328580379486 2023-01-23 00:07:34.119707: step: 272/531, loss: 0.03196907043457031 2023-01-23 00:07:35.231193: step: 276/531, loss: 0.5614147186279297 2023-01-23 00:07:36.340533: step: 280/531, loss: 5.829814434051514 2023-01-23 00:07:37.464105: step: 284/531, loss: 1.4233050346374512 2023-01-23 00:07:38.597415: step: 288/531, loss: 0.14850550889968872 2023-01-23 00:07:39.735058: step: 292/531, loss: 0.031071018427610397 2023-01-23 00:07:40.887616: step: 296/531, loss: 0.08955345302820206 2023-01-23 00:07:42.022567: step: 300/531, loss: 0.135033518075943 2023-01-23 00:07:43.171945: step: 304/531, loss: 0.3570517897605896 2023-01-23 00:07:44.327702: step: 308/531, loss: 0.1132669448852539 2023-01-23 00:07:45.441650: step: 312/531, loss: 0.1917099952697754 2023-01-23 00:07:46.561645: step: 316/531, loss: 0.12852011620998383 2023-01-23 00:07:47.669678: step: 320/531, loss: 0.7748472094535828 2023-01-23 00:07:48.810006: step: 324/531, loss: 0.6677180528640747 2023-01-23 00:07:49.929861: step: 328/531, loss: 0.14297866821289062 2023-01-23 00:07:51.046297: step: 332/531, loss: 0.11147364974021912 2023-01-23 00:07:52.157203: step: 336/531, loss: 0.6438290476799011 2023-01-23 00:07:53.324545: step: 340/531, loss: 0.2291916459798813 2023-01-23 00:07:54.452290: step: 344/531, loss: 0.08800859749317169 2023-01-23 00:07:55.593801: step: 348/531, loss: 0.10606296360492706 2023-01-23 00:07:56.738617: step: 352/531, loss: 1.613391637802124 2023-01-23 00:07:57.862168: step: 356/531, loss: 0.36305904388427734 2023-01-23 00:07:58.978123: step: 360/531, loss: 0.16070961952209473 2023-01-23 00:08:00.119365: step: 364/531, loss: 0.1045873686671257 2023-01-23 00:08:01.256309: step: 368/531, loss: 0.6269897222518921 2023-01-23 00:08:02.389680: step: 372/531, loss: 0.055081795901060104 2023-01-23 00:08:03.549199: step: 376/531, loss: 0.22504273056983948 2023-01-23 00:08:04.717072: step: 380/531, loss: 0.11127491295337677 2023-01-23 00:08:05.867201: step: 384/531, loss: 0.09604225307703018 2023-01-23 00:08:06.977013: step: 388/531, loss: 0.04972877725958824 2023-01-23 00:08:08.110069: step: 392/531, loss: 0.7691006064414978 2023-01-23 00:08:09.236958: step: 396/531, loss: 0.07439041137695312 2023-01-23 00:08:10.374721: step: 400/531, loss: 0.3480495512485504 2023-01-23 00:08:11.477452: step: 404/531, loss: 0.14600592851638794 2023-01-23 00:08:12.628201: step: 408/531, loss: 0.1575869619846344 2023-01-23 00:08:13.750138: step: 412/531, loss: 0.3718557357788086 2023-01-23 00:08:14.860524: step: 416/531, loss: 0.39317190647125244 2023-01-23 00:08:15.995622: step: 420/531, loss: 0.4093666076660156 2023-01-23 00:08:17.145886: step: 424/531, loss: 1.0968222618103027 2023-01-23 00:08:18.267461: step: 428/531, loss: 0.12537908554077148 2023-01-23 00:08:19.404646: step: 432/531, loss: 0.03981904685497284 2023-01-23 00:08:20.537989: step: 436/531, loss: 0.15898437798023224 2023-01-23 00:08:21.653390: step: 440/531, loss: 0.21721288561820984 2023-01-23 00:08:22.787580: step: 444/531, loss: 0.11548252403736115 2023-01-23 00:08:23.893340: step: 448/531, loss: 0.2627193033695221 2023-01-23 00:08:25.045404: step: 452/531, loss: 0.0582522414624691 2023-01-23 00:08:26.190606: step: 456/531, loss: 0.05561819300055504 2023-01-23 00:08:27.573427: step: 460/531, loss: 0.1412040740251541 2023-01-23 00:08:29.032747: step: 464/531, loss: 0.004426288418471813 2023-01-23 00:08:30.168143: step: 468/531, loss: 0.20757563412189484 2023-01-23 00:08:31.301757: step: 472/531, loss: 0.16466303169727325 2023-01-23 00:08:32.406300: step: 476/531, loss: 0.113164521753788 2023-01-23 00:08:33.516148: step: 480/531, loss: 0.3526877760887146 2023-01-23 00:08:34.633388: step: 484/531, loss: 0.24685317277908325 2023-01-23 00:08:35.746087: step: 488/531, loss: 0.051558591425418854 2023-01-23 00:08:36.880370: step: 492/531, loss: 0.19835443794727325 2023-01-23 00:08:38.001469: step: 496/531, loss: 1.2674591541290283 2023-01-23 00:08:39.170272: step: 500/531, loss: 0.08375606685876846 2023-01-23 00:08:40.303498: step: 504/531, loss: 0.632394015789032 2023-01-23 00:08:41.419304: step: 508/531, loss: 0.2275291532278061 2023-01-23 00:08:42.534489: step: 512/531, loss: 0.11012879014015198 2023-01-23 00:08:43.634707: step: 516/531, loss: 0.5363385081291199 2023-01-23 00:08:44.773478: step: 520/531, loss: 0.15113544464111328 2023-01-23 00:08:45.912739: step: 524/531, loss: 0.09087934345006943 2023-01-23 00:08:47.004047: step: 528/531, loss: 0.026700545102357864 2023-01-23 00:08:48.146705: step: 532/531, loss: 0.2245170623064041 2023-01-23 00:08:49.280548: step: 536/531, loss: 0.2834951877593994 2023-01-23 00:08:50.419776: step: 540/531, loss: 0.6439535617828369 2023-01-23 00:08:51.574986: step: 544/531, loss: 0.02203512191772461 2023-01-23 00:08:52.691736: step: 548/531, loss: 0.15543846786022186 2023-01-23 00:08:53.799185: step: 552/531, loss: 0.09425106644630432 2023-01-23 00:08:54.930484: step: 556/531, loss: 0.3638821840286255 2023-01-23 00:08:56.076852: step: 560/531, loss: 0.07064905017614365 2023-01-23 00:08:57.217350: step: 564/531, loss: 0.7152858972549438 2023-01-23 00:08:58.373145: step: 568/531, loss: 0.047632791101932526 2023-01-23 00:08:59.507626: step: 572/531, loss: 0.16318674385547638 2023-01-23 00:09:00.643830: step: 576/531, loss: 0.12817439436912537 2023-01-23 00:09:01.765899: step: 580/531, loss: 1.096173882484436 2023-01-23 00:09:02.897770: step: 584/531, loss: 0.09704256802797318 2023-01-23 00:09:04.007888: step: 588/531, loss: 0.2589954435825348 2023-01-23 00:09:05.149399: step: 592/531, loss: 0.0656011626124382 2023-01-23 00:09:06.251887: step: 596/531, loss: 0.019220566377043724 2023-01-23 00:09:07.361801: step: 600/531, loss: 0.12142118811607361 2023-01-23 00:09:08.483354: step: 604/531, loss: 0.11405091732740402 2023-01-23 00:09:09.595687: step: 608/531, loss: 0.13228702545166016 2023-01-23 00:09:10.713501: step: 612/531, loss: 0.13077107071876526 2023-01-23 00:09:11.876094: step: 616/531, loss: 1.9059685468673706 2023-01-23 00:09:12.984450: step: 620/531, loss: 0.09871216118335724 2023-01-23 00:09:14.185150: step: 624/531, loss: 0.10402616858482361 2023-01-23 00:09:15.342697: step: 628/531, loss: 0.40327155590057373 2023-01-23 00:09:16.456849: step: 632/531, loss: 0.09236516803503036 2023-01-23 00:09:17.554170: step: 636/531, loss: 0.19326673448085785 2023-01-23 00:09:18.702654: step: 640/531, loss: 1.2940990924835205 2023-01-23 00:09:19.862265: step: 644/531, loss: 0.2951418161392212 2023-01-23 00:09:20.965571: step: 648/531, loss: 6.206678867340088 2023-01-23 00:09:22.118857: step: 652/531, loss: 0.8177210092544556 2023-01-23 00:09:23.261622: step: 656/531, loss: 0.09443321824073792 2023-01-23 00:09:24.407599: step: 660/531, loss: 0.31081074476242065 2023-01-23 00:09:25.503821: step: 664/531, loss: 0.13477078080177307 2023-01-23 00:09:26.663098: step: 668/531, loss: 0.05327220261096954 2023-01-23 00:09:27.823668: step: 672/531, loss: 0.14896708726882935 2023-01-23 00:09:28.941212: step: 676/531, loss: 0.9148578643798828 2023-01-23 00:09:30.056510: step: 680/531, loss: 0.24008570611476898 2023-01-23 00:09:31.165853: step: 684/531, loss: 0.13330745697021484 2023-01-23 00:09:32.269699: step: 688/531, loss: 0.13182926177978516 2023-01-23 00:09:33.416133: step: 692/531, loss: 0.7367156147956848 2023-01-23 00:09:34.544824: step: 696/531, loss: 0.1440873146057129 2023-01-23 00:09:35.687956: step: 700/531, loss: 0.1531650573015213 2023-01-23 00:09:36.818692: step: 704/531, loss: 0.13811349868774414 2023-01-23 00:09:37.913168: step: 708/531, loss: 0.18078504502773285 2023-01-23 00:09:39.052555: step: 712/531, loss: 0.34571951627731323 2023-01-23 00:09:40.178466: step: 716/531, loss: 0.12638406455516815 2023-01-23 00:09:41.287624: step: 720/531, loss: 0.9698561429977417 2023-01-23 00:09:42.428982: step: 724/531, loss: 0.23368030786514282 2023-01-23 00:09:43.550245: step: 728/531, loss: 0.5958910584449768 2023-01-23 00:09:44.672018: step: 732/531, loss: 0.11590452492237091 2023-01-23 00:09:45.776142: step: 736/531, loss: 0.08392305672168732 2023-01-23 00:09:46.887891: step: 740/531, loss: 0.212868794798851 2023-01-23 00:09:48.003711: step: 744/531, loss: 0.25697699189186096 2023-01-23 00:09:49.132365: step: 748/531, loss: 1.6109669208526611 2023-01-23 00:09:50.272335: step: 752/531, loss: 0.0461517795920372 2023-01-23 00:09:51.392220: step: 756/531, loss: 0.2585142254829407 2023-01-23 00:09:52.544545: step: 760/531, loss: 0.22301316261291504 2023-01-23 00:09:53.666221: step: 764/531, loss: 0.08022680133581161 2023-01-23 00:09:54.775440: step: 768/531, loss: 0.513279914855957 2023-01-23 00:09:55.903887: step: 772/531, loss: 0.2278485894203186 2023-01-23 00:09:57.031361: step: 776/531, loss: 0.1086762472987175 2023-01-23 00:09:58.148005: step: 780/531, loss: 0.13428373634815216 2023-01-23 00:09:59.248301: step: 784/531, loss: 0.12129803001880646 2023-01-23 00:10:00.375808: step: 788/531, loss: 0.049597643315792084 2023-01-23 00:10:01.504666: step: 792/531, loss: 0.1002727597951889 2023-01-23 00:10:02.652901: step: 796/531, loss: 0.7631046175956726 2023-01-23 00:10:03.790324: step: 800/531, loss: 0.08250617980957031 2023-01-23 00:10:04.925784: step: 804/531, loss: 0.15585081279277802 2023-01-23 00:10:06.065105: step: 808/531, loss: 0.12457437813282013 2023-01-23 00:10:07.179225: step: 812/531, loss: 0.14095354080200195 2023-01-23 00:10:08.325395: step: 816/531, loss: 0.24597658216953278 2023-01-23 00:10:09.472469: step: 820/531, loss: 0.061493776738643646 2023-01-23 00:10:10.583618: step: 824/531, loss: 0.14348164200782776 2023-01-23 00:10:11.719650: step: 828/531, loss: 0.06926098465919495 2023-01-23 00:10:12.855494: step: 832/531, loss: 0.14651158452033997 2023-01-23 00:10:14.003222: step: 836/531, loss: 0.6801068782806396 2023-01-23 00:10:15.146672: step: 840/531, loss: 0.07741355895996094 2023-01-23 00:10:16.281735: step: 844/531, loss: 0.11129383742809296 2023-01-23 00:10:17.396020: step: 848/531, loss: 0.13678894937038422 2023-01-23 00:10:18.519117: step: 852/531, loss: 0.09396753460168839 2023-01-23 00:10:19.656670: step: 856/531, loss: 0.17143774032592773 2023-01-23 00:10:20.801867: step: 860/531, loss: 0.14983521401882172 2023-01-23 00:10:21.893081: step: 864/531, loss: 0.03581710159778595 2023-01-23 00:10:23.003259: step: 868/531, loss: 0.060904692858457565 2023-01-23 00:10:24.183748: step: 872/531, loss: 0.7633885145187378 2023-01-23 00:10:25.299616: step: 876/531, loss: 0.8558177947998047 2023-01-23 00:10:26.440942: step: 880/531, loss: 0.649976909160614 2023-01-23 00:10:27.540350: step: 884/531, loss: 0.21141234040260315 2023-01-23 00:10:28.689632: step: 888/531, loss: 0.21001359820365906 2023-01-23 00:10:29.808829: step: 892/531, loss: 0.7254459857940674 2023-01-23 00:10:30.933909: step: 896/531, loss: 0.28088387846946716 2023-01-23 00:10:32.057731: step: 900/531, loss: 0.053021907806396484 2023-01-23 00:10:33.202370: step: 904/531, loss: 0.10650554299354553 2023-01-23 00:10:34.320787: step: 908/531, loss: 0.31854599714279175 2023-01-23 00:10:35.457270: step: 912/531, loss: 0.4917248785495758 2023-01-23 00:10:36.557523: step: 916/531, loss: 0.05721016228199005 2023-01-23 00:10:37.662088: step: 920/531, loss: 0.043104078620672226 2023-01-23 00:10:38.784617: step: 924/531, loss: 0.7895603179931641 2023-01-23 00:10:39.905119: step: 928/531, loss: 0.16299457848072052 2023-01-23 00:10:41.039166: step: 932/531, loss: 0.0969444215297699 2023-01-23 00:10:42.166996: step: 936/531, loss: 0.07045784592628479 2023-01-23 00:10:43.288478: step: 940/531, loss: 0.13676562905311584 2023-01-23 00:10:44.397190: step: 944/531, loss: 0.3364361822605133 2023-01-23 00:10:45.532242: step: 948/531, loss: 0.6041025519371033 2023-01-23 00:10:46.637676: step: 952/531, loss: 0.12712879478931427 2023-01-23 00:10:47.747925: step: 956/531, loss: 0.7844495177268982 2023-01-23 00:10:48.873328: step: 960/531, loss: 0.3680562973022461 2023-01-23 00:10:49.999961: step: 964/531, loss: 0.819412350654602 2023-01-23 00:10:51.099972: step: 968/531, loss: 0.2795478403568268 2023-01-23 00:10:52.211480: step: 972/531, loss: 0.6028388738632202 2023-01-23 00:10:53.315434: step: 976/531, loss: 0.26359063386917114 2023-01-23 00:10:54.436594: step: 980/531, loss: 0.9468530416488647 2023-01-23 00:10:55.570240: step: 984/531, loss: 0.1083495169878006 2023-01-23 00:10:56.719131: step: 988/531, loss: 0.11399345844984055 2023-01-23 00:10:57.836631: step: 992/531, loss: 1.3465616703033447 2023-01-23 00:10:58.960331: step: 996/531, loss: 0.9024931788444519 2023-01-23 00:11:00.114986: step: 1000/531, loss: 0.5502773523330688 2023-01-23 00:11:01.252196: step: 1004/531, loss: 0.8719649910926819 2023-01-23 00:11:02.420093: step: 1008/531, loss: 0.6857746243476868 2023-01-23 00:11:03.560966: step: 1012/531, loss: 0.30992692708969116 2023-01-23 00:11:04.687869: step: 1016/531, loss: 2.2987313270568848 2023-01-23 00:11:05.780378: step: 1020/531, loss: 0.04828248172998428 2023-01-23 00:11:06.924894: step: 1024/531, loss: 0.03621535748243332 2023-01-23 00:11:08.056518: step: 1028/531, loss: 0.155064195394516 2023-01-23 00:11:09.190407: step: 1032/531, loss: 0.17044906318187714 2023-01-23 00:11:10.297062: step: 1036/531, loss: 0.2105582356452942 2023-01-23 00:11:11.456028: step: 1040/531, loss: 0.6857356429100037 2023-01-23 00:11:12.623742: step: 1044/531, loss: 0.4065222144126892 2023-01-23 00:11:13.767322: step: 1048/531, loss: 0.17205926775932312 2023-01-23 00:11:14.895085: step: 1052/531, loss: 0.14728470146656036 2023-01-23 00:11:16.014272: step: 1056/531, loss: 0.8519327044487 2023-01-23 00:11:17.135637: step: 1060/531, loss: 0.11078529059886932 2023-01-23 00:11:18.257188: step: 1064/531, loss: 0.08406877517700195 2023-01-23 00:11:19.402669: step: 1068/531, loss: 0.3707761764526367 2023-01-23 00:11:20.515004: step: 1072/531, loss: 0.25494199991226196 2023-01-23 00:11:21.629258: step: 1076/531, loss: 0.14766664803028107 2023-01-23 00:11:22.739524: step: 1080/531, loss: 0.08800826221704483 2023-01-23 00:11:23.910091: step: 1084/531, loss: 0.05193691700696945 2023-01-23 00:11:25.048512: step: 1088/531, loss: 0.15417805314064026 2023-01-23 00:11:26.180575: step: 1092/531, loss: 0.7052686214447021 2023-01-23 00:11:27.343061: step: 1096/531, loss: 0.4181533753871918 2023-01-23 00:11:28.492568: step: 1100/531, loss: 0.41178396344184875 2023-01-23 00:11:29.639675: step: 1104/531, loss: 0.12480497360229492 2023-01-23 00:11:30.751114: step: 1108/531, loss: 5.68168306350708 2023-01-23 00:11:31.882654: step: 1112/531, loss: 0.014397859573364258 2023-01-23 00:11:33.026331: step: 1116/531, loss: 0.22462302446365356 2023-01-23 00:11:34.145004: step: 1120/531, loss: 0.0618349052965641 2023-01-23 00:11:35.254231: step: 1124/531, loss: 0.1266128122806549 2023-01-23 00:11:36.344543: step: 1128/531, loss: 0.0977567732334137 2023-01-23 00:11:37.473941: step: 1132/531, loss: 0.15257807075977325 2023-01-23 00:11:38.615235: step: 1136/531, loss: 0.10911617428064346 2023-01-23 00:11:39.726655: step: 1140/531, loss: 0.7101627588272095 2023-01-23 00:11:40.836819: step: 1144/531, loss: 0.2168382704257965 2023-01-23 00:11:41.950806: step: 1148/531, loss: 0.5644658207893372 2023-01-23 00:11:43.083097: step: 1152/531, loss: 0.6557202935218811 2023-01-23 00:11:44.191369: step: 1156/531, loss: 0.20778541266918182 2023-01-23 00:11:45.336732: step: 1160/531, loss: 0.14528942108154297 2023-01-23 00:11:46.468370: step: 1164/531, loss: 0.5801851749420166 2023-01-23 00:11:47.598160: step: 1168/531, loss: 0.47758749127388 2023-01-23 00:11:48.733979: step: 1172/531, loss: 0.08182573318481445 2023-01-23 00:11:49.878451: step: 1176/531, loss: 0.080461785197258 2023-01-23 00:11:50.992078: step: 1180/531, loss: 0.054553888738155365 2023-01-23 00:11:52.101981: step: 1184/531, loss: 0.04999861866235733 2023-01-23 00:11:53.218480: step: 1188/531, loss: 0.041826628148555756 2023-01-23 00:11:54.331051: step: 1192/531, loss: 0.3056960999965668 2023-01-23 00:11:55.469943: step: 1196/531, loss: 0.3413318395614624 2023-01-23 00:11:56.635517: step: 1200/531, loss: 0.0916716605424881 2023-01-23 00:11:57.755654: step: 1204/531, loss: 0.09605570137500763 2023-01-23 00:11:58.885861: step: 1208/531, loss: 0.7817696332931519 2023-01-23 00:11:59.986432: step: 1212/531, loss: 0.13069410622119904 2023-01-23 00:12:01.104811: step: 1216/531, loss: 0.5280987620353699 2023-01-23 00:12:02.205652: step: 1220/531, loss: 0.4246279001235962 2023-01-23 00:12:03.342046: step: 1224/531, loss: 0.45986607670783997 2023-01-23 00:12:04.456698: step: 1228/531, loss: 0.3266821801662445 2023-01-23 00:12:05.586759: step: 1232/531, loss: 0.29743510484695435 2023-01-23 00:12:06.703749: step: 1236/531, loss: 0.16992712020874023 2023-01-23 00:12:07.841653: step: 1240/531, loss: 0.02969083935022354 2023-01-23 00:12:09.021632: step: 1244/531, loss: 0.4778744578361511 2023-01-23 00:12:10.133351: step: 1248/531, loss: 0.088726706802845 2023-01-23 00:12:11.249335: step: 1252/531, loss: 0.17096376419067383 2023-01-23 00:12:12.382553: step: 1256/531, loss: 0.16469764709472656 2023-01-23 00:12:13.521206: step: 1260/531, loss: 1.0106450319290161 2023-01-23 00:12:14.644166: step: 1264/531, loss: 0.09521637111902237 2023-01-23 00:12:15.787197: step: 1268/531, loss: 0.4491819441318512 2023-01-23 00:12:16.915685: step: 1272/531, loss: 0.12090234458446503 2023-01-23 00:12:18.061702: step: 1276/531, loss: 0.0921357199549675 2023-01-23 00:12:19.181821: step: 1280/531, loss: 0.29400739073753357 2023-01-23 00:12:20.294761: step: 1284/531, loss: 0.1099323034286499 2023-01-23 00:12:21.427185: step: 1288/531, loss: 1.3374457359313965 2023-01-23 00:12:22.553279: step: 1292/531, loss: 0.07441673427820206 2023-01-23 00:12:23.677825: step: 1296/531, loss: 0.21961519122123718 2023-01-23 00:12:24.786194: step: 1300/531, loss: 0.21229895949363708 2023-01-23 00:12:25.904494: step: 1304/531, loss: 0.1740400195121765 2023-01-23 00:12:27.066555: step: 1308/531, loss: 0.6765531301498413 2023-01-23 00:12:28.210837: step: 1312/531, loss: 0.6090461015701294 2023-01-23 00:12:29.319698: step: 1316/531, loss: 0.06302328407764435 2023-01-23 00:12:30.428131: step: 1320/531, loss: 0.20242176949977875 2023-01-23 00:12:31.562607: step: 1324/531, loss: 0.6414319276809692 2023-01-23 00:12:32.707269: step: 1328/531, loss: 0.5580018758773804 2023-01-23 00:12:33.834950: step: 1332/531, loss: 0.03660574182868004 2023-01-23 00:12:34.943392: step: 1336/531, loss: 0.09093776345252991 2023-01-23 00:12:36.092133: step: 1340/531, loss: 0.10649824142456055 2023-01-23 00:12:37.237233: step: 1344/531, loss: 0.24700093269348145 2023-01-23 00:12:38.379164: step: 1348/531, loss: 0.1675254851579666 2023-01-23 00:12:39.498765: step: 1352/531, loss: 0.11417227238416672 2023-01-23 00:12:40.624477: step: 1356/531, loss: 0.06530604511499405 2023-01-23 00:12:41.782493: step: 1360/531, loss: 0.09662504494190216 2023-01-23 00:12:42.887870: step: 1364/531, loss: 0.09656266868114471 2023-01-23 00:12:44.009910: step: 1368/531, loss: 0.13258257508277893 2023-01-23 00:12:45.135553: step: 1372/531, loss: 0.07394042611122131 2023-01-23 00:12:46.285358: step: 1376/531, loss: 0.09123239666223526 2023-01-23 00:12:47.400024: step: 1380/531, loss: 0.3923642039299011 2023-01-23 00:12:48.541171: step: 1384/531, loss: 1.2973023653030396 2023-01-23 00:12:49.672926: step: 1388/531, loss: 0.08193626254796982 2023-01-23 00:12:50.811969: step: 1392/531, loss: 0.07066097110509872 2023-01-23 00:12:51.930716: step: 1396/531, loss: 0.5165959596633911 2023-01-23 00:12:53.086674: step: 1400/531, loss: 0.6549649238586426 2023-01-23 00:12:54.205010: step: 1404/531, loss: 0.5978779196739197 2023-01-23 00:12:55.331655: step: 1408/531, loss: 0.3496781587600708 2023-01-23 00:12:56.462817: step: 1412/531, loss: 0.5873646140098572 2023-01-23 00:12:57.578308: step: 1416/531, loss: 0.14626769721508026 2023-01-23 00:12:58.691470: step: 1420/531, loss: 0.13425074517726898 2023-01-23 00:12:59.796674: step: 1424/531, loss: 0.28182631731033325 2023-01-23 00:13:00.936146: step: 1428/531, loss: 0.6556992530822754 2023-01-23 00:13:02.059694: step: 1432/531, loss: 0.08122219890356064 2023-01-23 00:13:03.179863: step: 1436/531, loss: 3.662775754928589 2023-01-23 00:13:04.287640: step: 1440/531, loss: 0.10315761715173721 2023-01-23 00:13:05.448778: step: 1444/531, loss: 0.16700410842895508 2023-01-23 00:13:06.541688: step: 1448/531, loss: 0.139937162399292 2023-01-23 00:13:07.655281: step: 1452/531, loss: 0.4600054919719696 2023-01-23 00:13:08.778912: step: 1456/531, loss: 0.05763426050543785 2023-01-23 00:13:09.904840: step: 1460/531, loss: 0.74662846326828 2023-01-23 00:13:11.041279: step: 1464/531, loss: 0.40083014965057373 2023-01-23 00:13:12.193323: step: 1468/531, loss: 0.3536946475505829 2023-01-23 00:13:13.301094: step: 1472/531, loss: 0.10159354656934738 2023-01-23 00:13:14.423779: step: 1476/531, loss: 0.2691704332828522 2023-01-23 00:13:15.535378: step: 1480/531, loss: 0.6508985757827759 2023-01-23 00:13:16.672779: step: 1484/531, loss: 0.16213497519493103 2023-01-23 00:13:17.831128: step: 1488/531, loss: 0.13104447722434998 2023-01-23 00:13:18.975341: step: 1492/531, loss: 0.4676194190979004 2023-01-23 00:13:20.110328: step: 1496/531, loss: 0.13090524077415466 2023-01-23 00:13:21.227894: step: 1500/531, loss: 0.295690655708313 2023-01-23 00:13:22.341013: step: 1504/531, loss: 0.10624993592500687 2023-01-23 00:13:23.486424: step: 1508/531, loss: 0.3152994215488434 2023-01-23 00:13:24.589503: step: 1512/531, loss: 0.5497206449508667 2023-01-23 00:13:25.703182: step: 1516/531, loss: 0.5568131804466248 2023-01-23 00:13:26.827831: step: 1520/531, loss: 0.13320913910865784 2023-01-23 00:13:27.963232: step: 1524/531, loss: 0.37717312574386597 2023-01-23 00:13:29.102940: step: 1528/531, loss: 0.10049843788146973 2023-01-23 00:13:30.247292: step: 1532/531, loss: 0.30002593994140625 2023-01-23 00:13:31.351343: step: 1536/531, loss: 0.2790975570678711 2023-01-23 00:13:32.487493: step: 1540/531, loss: 0.19938716292381287 2023-01-23 00:13:33.627096: step: 1544/531, loss: 0.3546084761619568 2023-01-23 00:13:34.734286: step: 1548/531, loss: 0.005626106634736061 2023-01-23 00:13:35.897335: step: 1552/531, loss: 0.9757555723190308 2023-01-23 00:13:37.020598: step: 1556/531, loss: 0.7603006362915039 2023-01-23 00:13:38.178493: step: 1560/531, loss: 1.0087209939956665 2023-01-23 00:13:39.321208: step: 1564/531, loss: 2.440735340118408 2023-01-23 00:13:40.454404: step: 1568/531, loss: 0.025527238845825195 2023-01-23 00:13:41.545391: step: 1572/531, loss: 0.03502168506383896 2023-01-23 00:13:42.689318: step: 1576/531, loss: 0.23762908577919006 2023-01-23 00:13:43.813794: step: 1580/531, loss: 0.05060539394617081 2023-01-23 00:13:44.937169: step: 1584/531, loss: 0.46256598830223083 2023-01-23 00:13:46.056352: step: 1588/531, loss: 0.7527855038642883 2023-01-23 00:13:47.205380: step: 1592/531, loss: 0.700222373008728 2023-01-23 00:13:48.345943: step: 1596/531, loss: 0.1076175719499588 2023-01-23 00:13:49.457260: step: 1600/531, loss: 0.2346941977739334 2023-01-23 00:13:50.569790: step: 1604/531, loss: 0.19995194673538208 2023-01-23 00:13:51.701049: step: 1608/531, loss: 0.09300251305103302 2023-01-23 00:13:52.817927: step: 1612/531, loss: 0.7779393196105957 2023-01-23 00:13:53.941387: step: 1616/531, loss: 0.024361802265048027 2023-01-23 00:13:55.060416: step: 1620/531, loss: 0.846703827381134 2023-01-23 00:13:56.182490: step: 1624/531, loss: 0.3886374533176422 2023-01-23 00:13:57.291260: step: 1628/531, loss: 0.22510060667991638 2023-01-23 00:13:58.414309: step: 1632/531, loss: 0.10726480931043625 2023-01-23 00:13:59.534072: step: 1636/531, loss: 0.1618444323539734 2023-01-23 00:14:00.652943: step: 1640/531, loss: 0.07965681701898575 2023-01-23 00:14:01.770063: step: 1644/531, loss: 0.24817800521850586 2023-01-23 00:14:02.862875: step: 1648/531, loss: 0.39364901185035706 2023-01-23 00:14:03.989897: step: 1652/531, loss: 0.9929062724113464 2023-01-23 00:14:05.103335: step: 1656/531, loss: 2.6192328929901123 2023-01-23 00:14:06.260436: step: 1660/531, loss: 0.6866773962974548 2023-01-23 00:14:07.416986: step: 1664/531, loss: 0.1754910945892334 2023-01-23 00:14:08.516779: step: 1668/531, loss: 0.07648658752441406 2023-01-23 00:14:09.666388: step: 1672/531, loss: 0.23588821291923523 2023-01-23 00:14:10.800048: step: 1676/531, loss: 1.1988518238067627 2023-01-23 00:14:11.938143: step: 1680/531, loss: 0.266082763671875 2023-01-23 00:14:13.046739: step: 1684/531, loss: 0.16698479652404785 2023-01-23 00:14:14.170282: step: 1688/531, loss: 0.2625657320022583 2023-01-23 00:14:15.310352: step: 1692/531, loss: 0.0856928825378418 2023-01-23 00:14:16.464101: step: 1696/531, loss: 0.1346217691898346 2023-01-23 00:14:17.594798: step: 1700/531, loss: 0.20297470688819885 2023-01-23 00:14:18.725927: step: 1704/531, loss: 0.1562003195285797 2023-01-23 00:14:19.841490: step: 1708/531, loss: 0.07241010665893555 2023-01-23 00:14:20.976874: step: 1712/531, loss: 0.2211158275604248 2023-01-23 00:14:22.092512: step: 1716/531, loss: 0.02001185342669487 2023-01-23 00:14:23.177934: step: 1720/531, loss: 0.08154688030481339 2023-01-23 00:14:24.286880: step: 1724/531, loss: 0.30368930101394653 2023-01-23 00:14:25.418598: step: 1728/531, loss: 0.217662051320076 2023-01-23 00:14:26.554009: step: 1732/531, loss: 0.05589735507965088 2023-01-23 00:14:27.678461: step: 1736/531, loss: 0.22880081832408905 2023-01-23 00:14:28.818906: step: 1740/531, loss: 0.6000949144363403 2023-01-23 00:14:29.947513: step: 1744/531, loss: 0.1450720876455307 2023-01-23 00:14:31.055897: step: 1748/531, loss: 0.2290956974029541 2023-01-23 00:14:32.174589: step: 1752/531, loss: 0.022517014294862747 2023-01-23 00:14:33.296532: step: 1756/531, loss: 0.26492661237716675 2023-01-23 00:14:34.455804: step: 1760/531, loss: 0.09574466198682785 2023-01-23 00:14:35.571308: step: 1764/531, loss: 0.5451765060424805 2023-01-23 00:14:36.704598: step: 1768/531, loss: 0.23022547364234924 2023-01-23 00:14:37.873688: step: 1772/531, loss: 0.7778403759002686 2023-01-23 00:14:39.020365: step: 1776/531, loss: 0.038529351353645325 2023-01-23 00:14:40.163623: step: 1780/531, loss: 0.1612699031829834 2023-01-23 00:14:41.281426: step: 1784/531, loss: 0.11677742004394531 2023-01-23 00:14:42.415973: step: 1788/531, loss: 0.19737768173217773 2023-01-23 00:14:43.535078: step: 1792/531, loss: 0.11662693321704865 2023-01-23 00:14:44.647309: step: 1796/531, loss: 0.27351251244544983 2023-01-23 00:14:45.768994: step: 1800/531, loss: 0.08493033051490784 2023-01-23 00:14:46.885549: step: 1804/531, loss: 0.23884770274162292 2023-01-23 00:14:47.981149: step: 1808/531, loss: 0.3763754963874817 2023-01-23 00:14:49.141301: step: 1812/531, loss: 0.8610997796058655 2023-01-23 00:14:50.332871: step: 1816/531, loss: 0.28109779953956604 2023-01-23 00:14:51.443121: step: 1820/531, loss: 0.14995689690113068 2023-01-23 00:14:52.557172: step: 1824/531, loss: 0.1383594572544098 2023-01-23 00:14:53.691895: step: 1828/531, loss: 0.3315194845199585 2023-01-23 00:14:54.802530: step: 1832/531, loss: 0.6067302823066711 2023-01-23 00:14:55.922632: step: 1836/531, loss: 0.35284996032714844 2023-01-23 00:14:57.052862: step: 1840/531, loss: 0.12823915481567383 2023-01-23 00:14:58.185340: step: 1844/531, loss: 0.04045300558209419 2023-01-23 00:14:59.291110: step: 1848/531, loss: 0.24100752174854279 2023-01-23 00:15:00.410855: step: 1852/531, loss: 0.40012702345848083 2023-01-23 00:15:01.545705: step: 1856/531, loss: 0.18638353049755096 2023-01-23 00:15:02.650494: step: 1860/531, loss: 0.07514981925487518 2023-01-23 00:15:03.792032: step: 1864/531, loss: 0.2203844040632248 2023-01-23 00:15:04.937008: step: 1868/531, loss: 0.1560761034488678 2023-01-23 00:15:06.060743: step: 1872/531, loss: 0.5877777338027954 2023-01-23 00:15:07.169307: step: 1876/531, loss: 0.21020327508449554 2023-01-23 00:15:08.293115: step: 1880/531, loss: 0.4344820976257324 2023-01-23 00:15:09.435374: step: 1884/531, loss: 3.293607234954834 2023-01-23 00:15:10.535338: step: 1888/531, loss: 0.652442216873169 2023-01-23 00:15:11.633823: step: 1892/531, loss: 0.3659190535545349 2023-01-23 00:15:12.741516: step: 1896/531, loss: 0.18559308350086212 2023-01-23 00:15:13.857750: step: 1900/531, loss: 1.751516342163086 2023-01-23 00:15:14.996666: step: 1904/531, loss: 0.8812487125396729 2023-01-23 00:15:16.136246: step: 1908/531, loss: 0.40928924083709717 2023-01-23 00:15:17.268928: step: 1912/531, loss: 0.3764389157295227 2023-01-23 00:15:18.381480: step: 1916/531, loss: 0.11168127506971359 2023-01-23 00:15:19.529649: step: 1920/531, loss: 0.29050570726394653 2023-01-23 00:15:20.644586: step: 1924/531, loss: 0.31890085339546204 2023-01-23 00:15:21.755265: step: 1928/531, loss: 0.6248596906661987 2023-01-23 00:15:22.869493: step: 1932/531, loss: 0.043284133076667786 2023-01-23 00:15:23.993699: step: 1936/531, loss: 0.13437804579734802 2023-01-23 00:15:25.151472: step: 1940/531, loss: 0.23680981993675232 2023-01-23 00:15:26.281244: step: 1944/531, loss: 0.7789955139160156 2023-01-23 00:15:27.412715: step: 1948/531, loss: 0.18439841270446777 2023-01-23 00:15:28.554366: step: 1952/531, loss: 0.09562158584594727 2023-01-23 00:15:29.667958: step: 1956/531, loss: 0.250238299369812 2023-01-23 00:15:30.770622: step: 1960/531, loss: 0.007459831424057484 2023-01-23 00:15:31.893473: step: 1964/531, loss: 0.11854877322912216 2023-01-23 00:15:32.993432: step: 1968/531, loss: 1.391023874282837 2023-01-23 00:15:34.132701: step: 1972/531, loss: 0.10419883579015732 2023-01-23 00:15:35.257892: step: 1976/531, loss: 0.3989860415458679 2023-01-23 00:15:36.405090: step: 1980/531, loss: 0.16777630150318146 2023-01-23 00:15:37.541683: step: 1984/531, loss: 0.6184223294258118 2023-01-23 00:15:38.674649: step: 1988/531, loss: 0.09461212158203125 2023-01-23 00:15:39.780416: step: 1992/531, loss: 0.34774529933929443 2023-01-23 00:15:40.870999: step: 1996/531, loss: 3.4562437534332275 2023-01-23 00:15:41.987297: step: 2000/531, loss: 0.2627391815185547 2023-01-23 00:15:43.112075: step: 2004/531, loss: 1.653003454208374 2023-01-23 00:15:44.236069: step: 2008/531, loss: 0.08642206341028214 2023-01-23 00:15:45.372232: step: 2012/531, loss: 0.5813787579536438 2023-01-23 00:15:46.506393: step: 2016/531, loss: 0.6316620707511902 2023-01-23 00:15:47.632313: step: 2020/531, loss: 1.0356746912002563 2023-01-23 00:15:48.723254: step: 2024/531, loss: 0.1543656885623932 2023-01-23 00:15:49.827698: step: 2028/531, loss: 0.3369404077529907 2023-01-23 00:15:50.953218: step: 2032/531, loss: 0.24396809935569763 2023-01-23 00:15:52.067383: step: 2036/531, loss: 0.5506110191345215 2023-01-23 00:15:53.151698: step: 2040/531, loss: 0.5011934041976929 2023-01-23 00:15:54.281276: step: 2044/531, loss: 0.39471155405044556 2023-01-23 00:15:55.406701: step: 2048/531, loss: 0.14521732926368713 2023-01-23 00:15:56.517549: step: 2052/531, loss: 1.0103720426559448 2023-01-23 00:15:57.638984: step: 2056/531, loss: 1.0780736207962036 2023-01-23 00:15:58.762901: step: 2060/531, loss: 0.30792713165283203 2023-01-23 00:15:59.869241: step: 2064/531, loss: 0.5562599897384644 2023-01-23 00:16:00.985187: step: 2068/531, loss: 0.41382914781570435 2023-01-23 00:16:02.109841: step: 2072/531, loss: 0.31271201372146606 2023-01-23 00:16:03.229501: step: 2076/531, loss: 0.048189926892519 2023-01-23 00:16:04.366244: step: 2080/531, loss: 0.208033949136734 2023-01-23 00:16:05.484887: step: 2084/531, loss: 0.03415379673242569 2023-01-23 00:16:06.637180: step: 2088/531, loss: 0.18081635236740112 2023-01-23 00:16:07.765520: step: 2092/531, loss: 0.15192671120166779 2023-01-23 00:16:08.868113: step: 2096/531, loss: 0.19004955887794495 2023-01-23 00:16:09.982813: step: 2100/531, loss: 0.03775153309106827 2023-01-23 00:16:11.104937: step: 2104/531, loss: 0.498241126537323 2023-01-23 00:16:12.214868: step: 2108/531, loss: 0.1664467453956604 2023-01-23 00:16:13.313391: step: 2112/531, loss: 0.1313120424747467 2023-01-23 00:16:14.458769: step: 2116/531, loss: 0.3278893530368805 2023-01-23 00:16:15.605100: step: 2120/531, loss: 0.1384870558977127 2023-01-23 00:16:16.731020: step: 2124/531, loss: 0.14387187361717224 ================================================== Loss: 0.404 -------------------- Dev: {'event': {'p': 0.624384236453202, 'r': 0.6750998668442078, 'f1': 0.6487523992322456}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Test: {'event': {'p': 0.6409844836811129, 'r': 0.714370900417412, 'f1': 0.6756909193457418}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Chinese: {'event': {'p': 0.6515151515151515, 'r': 0.7962962962962963, 'f1': 0.7166666666666667}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Korean: {'event': {'p': 0.7105263157894737, 'r': 0.42857142857142855, 'f1': 0.5346534653465346}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Russian: {'event': {'p': 0.34615384615384615, 'r': 0.25, 'f1': 0.2903225806451613}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} New best korean model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.624384236453202, 'r': 0.6750998668442078, 'f1': 0.6487523992322456}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Eng Test for Korean: {'event': {'p': 0.6409844836811129, 'r': 0.714370900417412, 'f1': 0.6756909193457418}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Korean: {'event': {'p': 0.7105263157894737, 'r': 0.42857142857142855, 'f1': 0.5346534653465346}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Eng Dev for Russian: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.6956521739130435, 'r': 0.4444444444444444, 'f1': 0.5423728813559322}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 5 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:17:03.205229: step: 4/531, loss: 0.13327942788600922 2023-01-23 00:17:04.345499: step: 8/531, loss: 0.07551944255828857 2023-01-23 00:17:05.465980: step: 12/531, loss: 2.286137104034424 2023-01-23 00:17:06.598026: step: 16/531, loss: 0.17033815383911133 2023-01-23 00:17:07.711351: step: 20/531, loss: 0.16239529848098755 2023-01-23 00:17:08.835099: step: 24/531, loss: 0.05642290413379669 2023-01-23 00:17:09.955696: step: 28/531, loss: 0.08547820895910263 2023-01-23 00:17:11.076324: step: 32/531, loss: 0.10226593166589737 2023-01-23 00:17:12.158583: step: 36/531, loss: 0.06672248989343643 2023-01-23 00:17:13.264096: step: 40/531, loss: 0.0877053290605545 2023-01-23 00:17:14.374913: step: 44/531, loss: 0.0523744598031044 2023-01-23 00:17:15.476973: step: 48/531, loss: 0.23877525329589844 2023-01-23 00:17:16.587312: step: 52/531, loss: 0.31593093276023865 2023-01-23 00:17:17.730180: step: 56/531, loss: 4.361067295074463 2023-01-23 00:17:18.845447: step: 60/531, loss: 0.17816415429115295 2023-01-23 00:17:19.964511: step: 64/531, loss: 0.10122337192296982 2023-01-23 00:17:21.101663: step: 68/531, loss: 0.12873688340187073 2023-01-23 00:17:22.239859: step: 72/531, loss: 0.09878092259168625 2023-01-23 00:17:23.375910: step: 76/531, loss: 0.07462453842163086 2023-01-23 00:17:24.487375: step: 80/531, loss: 1.0296183824539185 2023-01-23 00:17:25.611070: step: 84/531, loss: 0.21318235993385315 2023-01-23 00:17:26.752840: step: 88/531, loss: 0.5464677810668945 2023-01-23 00:17:27.879362: step: 92/531, loss: 0.08990707993507385 2023-01-23 00:17:29.002070: step: 96/531, loss: 0.19647157192230225 2023-01-23 00:17:30.118977: step: 100/531, loss: 0.05043353885412216 2023-01-23 00:17:31.224927: step: 104/531, loss: 0.025876427069306374 2023-01-23 00:17:32.353962: step: 108/531, loss: 0.09851732850074768 2023-01-23 00:17:33.458378: step: 112/531, loss: 0.10849341750144958 2023-01-23 00:17:34.585029: step: 116/531, loss: 0.23804521560668945 2023-01-23 00:17:35.698079: step: 120/531, loss: 0.7247149348258972 2023-01-23 00:17:36.836854: step: 124/531, loss: 0.27154821157455444 2023-01-23 00:17:37.960985: step: 128/531, loss: 0.1360892355442047 2023-01-23 00:17:39.083317: step: 132/531, loss: 0.5307134389877319 2023-01-23 00:17:40.187603: step: 136/531, loss: 0.059553682804107666 2023-01-23 00:17:41.271076: step: 140/531, loss: 0.1077481359243393 2023-01-23 00:17:42.380799: step: 144/531, loss: 0.43255600333213806 2023-01-23 00:17:43.504060: step: 148/531, loss: 0.08051643520593643 2023-01-23 00:17:44.629242: step: 152/531, loss: 0.24660225212574005 2023-01-23 00:17:45.749960: step: 156/531, loss: 0.1206233948469162 2023-01-23 00:17:46.857416: step: 160/531, loss: 0.060358334332704544 2023-01-23 00:17:48.015413: step: 164/531, loss: 0.0969579666852951 2023-01-23 00:17:49.128192: step: 168/531, loss: 0.07474646717309952 2023-01-23 00:17:50.295246: step: 172/531, loss: 0.6261759996414185 2023-01-23 00:17:51.419992: step: 176/531, loss: 0.15111775696277618 2023-01-23 00:17:52.560583: step: 180/531, loss: 0.11831426620483398 2023-01-23 00:17:53.677053: step: 184/531, loss: 0.0626072883605957 2023-01-23 00:17:54.791578: step: 188/531, loss: 1.5190863609313965 2023-01-23 00:17:55.900149: step: 192/531, loss: 0.16070805490016937 2023-01-23 00:17:57.032692: step: 196/531, loss: 0.566563606262207 2023-01-23 00:17:58.135930: step: 200/531, loss: 0.0715511366724968 2023-01-23 00:17:59.257224: step: 204/531, loss: 0.7877327799797058 2023-01-23 00:18:00.386357: step: 208/531, loss: 0.12577266991138458 2023-01-23 00:18:01.501775: step: 212/531, loss: 0.22464542090892792 2023-01-23 00:18:02.630739: step: 216/531, loss: 0.2676597535610199 2023-01-23 00:18:03.755635: step: 220/531, loss: 0.09776349365711212 2023-01-23 00:18:04.940217: step: 224/531, loss: 0.14313706755638123 2023-01-23 00:18:06.062287: step: 228/531, loss: 0.007430077064782381 2023-01-23 00:18:07.198325: step: 232/531, loss: 0.03798205778002739 2023-01-23 00:18:08.283920: step: 236/531, loss: 0.015440607443451881 2023-01-23 00:18:09.415911: step: 240/531, loss: 0.1368858367204666 2023-01-23 00:18:10.541522: step: 244/531, loss: 0.17660722136497498 2023-01-23 00:18:11.661941: step: 248/531, loss: 0.07572473585605621 2023-01-23 00:18:12.789042: step: 252/531, loss: 0.13013730943202972 2023-01-23 00:18:13.903963: step: 256/531, loss: 0.14868679642677307 2023-01-23 00:18:15.016949: step: 260/531, loss: 0.10700082778930664 2023-01-23 00:18:16.143796: step: 264/531, loss: 0.5462125539779663 2023-01-23 00:18:17.281958: step: 268/531, loss: 0.20810189843177795 2023-01-23 00:18:18.382854: step: 272/531, loss: 0.08502139896154404 2023-01-23 00:18:19.506651: step: 276/531, loss: 0.14979496598243713 2023-01-23 00:18:20.628794: step: 280/531, loss: 0.18747034668922424 2023-01-23 00:18:21.743470: step: 284/531, loss: 0.3113807737827301 2023-01-23 00:18:22.880777: step: 288/531, loss: 0.36427801847457886 2023-01-23 00:18:23.997357: step: 292/531, loss: 1.1106621026992798 2023-01-23 00:18:25.138253: step: 296/531, loss: 0.0223146453499794 2023-01-23 00:18:26.258763: step: 300/531, loss: 0.1053074300289154 2023-01-23 00:18:27.358061: step: 304/531, loss: 0.3404029905796051 2023-01-23 00:18:28.504452: step: 308/531, loss: 0.25511685013771057 2023-01-23 00:18:29.624312: step: 312/531, loss: 1.8216387033462524 2023-01-23 00:18:30.750485: step: 316/531, loss: 1.1487311124801636 2023-01-23 00:18:31.881546: step: 320/531, loss: 0.14093279838562012 2023-01-23 00:18:32.973445: step: 324/531, loss: 0.06238756328821182 2023-01-23 00:18:34.090561: step: 328/531, loss: 0.12706175446510315 2023-01-23 00:18:35.234239: step: 332/531, loss: 0.13536815345287323 2023-01-23 00:18:36.349397: step: 336/531, loss: 0.13423609733581543 2023-01-23 00:18:37.486087: step: 340/531, loss: 0.07471199333667755 2023-01-23 00:18:38.582340: step: 344/531, loss: 3.4741203784942627 2023-01-23 00:18:39.695938: step: 348/531, loss: 0.22138512134552002 2023-01-23 00:18:40.817291: step: 352/531, loss: 0.39459818601608276 2023-01-23 00:18:41.952606: step: 356/531, loss: 0.029367590323090553 2023-01-23 00:18:43.114906: step: 360/531, loss: 0.1014915481209755 2023-01-23 00:18:44.260779: step: 364/531, loss: 0.32232171297073364 2023-01-23 00:18:45.372136: step: 368/531, loss: 0.19875365495681763 2023-01-23 00:18:46.490244: step: 372/531, loss: 0.1061868667602539 2023-01-23 00:18:47.658722: step: 376/531, loss: 0.29672110080718994 2023-01-23 00:18:48.773581: step: 380/531, loss: 0.2829223573207855 2023-01-23 00:18:49.890416: step: 384/531, loss: 0.11531827598810196 2023-01-23 00:18:51.004766: step: 388/531, loss: 0.0637456402182579 2023-01-23 00:18:52.124849: step: 392/531, loss: 0.14168529212474823 2023-01-23 00:18:53.271591: step: 396/531, loss: 0.225851908326149 2023-01-23 00:18:54.379093: step: 400/531, loss: 0.4207419157028198 2023-01-23 00:18:55.503250: step: 404/531, loss: 0.12054505944252014 2023-01-23 00:18:56.630542: step: 408/531, loss: 0.21454288065433502 2023-01-23 00:18:57.759730: step: 412/531, loss: 0.005415058229118586 2023-01-23 00:18:58.866302: step: 416/531, loss: 0.1372077912092209 2023-01-23 00:19:00.022994: step: 420/531, loss: 0.06324926018714905 2023-01-23 00:19:01.131286: step: 424/531, loss: 0.5226781964302063 2023-01-23 00:19:02.247720: step: 428/531, loss: 0.11421461403369904 2023-01-23 00:19:03.372896: step: 432/531, loss: 0.17379578948020935 2023-01-23 00:19:04.478789: step: 436/531, loss: 0.7596912980079651 2023-01-23 00:19:05.582291: step: 440/531, loss: 0.32616668939590454 2023-01-23 00:19:06.698740: step: 444/531, loss: 0.1080353707075119 2023-01-23 00:19:07.813019: step: 448/531, loss: 0.11216773837804794 2023-01-23 00:19:08.938247: step: 452/531, loss: 0.08574571460485458 2023-01-23 00:19:10.017960: step: 456/531, loss: 0.12597227096557617 2023-01-23 00:19:11.121536: step: 460/531, loss: 0.2102283537387848 2023-01-23 00:19:12.262059: step: 464/531, loss: 0.3382846713066101 2023-01-23 00:19:13.404163: step: 468/531, loss: 0.7317107319831848 2023-01-23 00:19:14.535947: step: 472/531, loss: 0.20980873703956604 2023-01-23 00:19:15.661175: step: 476/531, loss: 0.06017189472913742 2023-01-23 00:19:16.790404: step: 480/531, loss: 0.22499194741249084 2023-01-23 00:19:17.917674: step: 484/531, loss: 0.2171640843153 2023-01-23 00:19:19.022070: step: 488/531, loss: 0.058008477091789246 2023-01-23 00:19:20.141957: step: 492/531, loss: 0.15389442443847656 2023-01-23 00:19:21.260075: step: 496/531, loss: 0.09659262001514435 2023-01-23 00:19:22.392746: step: 500/531, loss: 0.11291804909706116 2023-01-23 00:19:23.524623: step: 504/531, loss: 0.31916096806526184 2023-01-23 00:19:24.664668: step: 508/531, loss: 0.08025255799293518 2023-01-23 00:19:25.804310: step: 512/531, loss: 0.5968890190124512 2023-01-23 00:19:26.948591: step: 516/531, loss: 0.4367731213569641 2023-01-23 00:19:28.111589: step: 520/531, loss: 0.3811578154563904 2023-01-23 00:19:29.220849: step: 524/531, loss: 0.4929841458797455 2023-01-23 00:19:30.342785: step: 528/531, loss: 0.22546052932739258 2023-01-23 00:19:31.483837: step: 532/531, loss: 0.15466861426830292 2023-01-23 00:19:32.616896: step: 536/531, loss: 0.4843331575393677 2023-01-23 00:19:33.726400: step: 540/531, loss: 0.07414913177490234 2023-01-23 00:19:34.852144: step: 544/531, loss: 1.7784169912338257 2023-01-23 00:19:35.959830: step: 548/531, loss: 0.11431710422039032 2023-01-23 00:19:37.094091: step: 552/531, loss: 0.147385835647583 2023-01-23 00:19:38.245857: step: 556/531, loss: 0.7561390995979309 2023-01-23 00:19:39.370183: step: 560/531, loss: 0.14396890997886658 2023-01-23 00:19:40.496312: step: 564/531, loss: 0.0835152193903923 2023-01-23 00:19:41.665928: step: 568/531, loss: 0.41781753301620483 2023-01-23 00:19:42.793858: step: 572/531, loss: 0.1418953835964203 2023-01-23 00:19:43.922115: step: 576/531, loss: 0.259294331073761 2023-01-23 00:19:45.051450: step: 580/531, loss: 0.09604254364967346 2023-01-23 00:19:46.162026: step: 584/531, loss: 0.21384435892105103 2023-01-23 00:19:47.304425: step: 588/531, loss: 0.12788324058055878 2023-01-23 00:19:48.432838: step: 592/531, loss: 0.1271975040435791 2023-01-23 00:19:49.558419: step: 596/531, loss: 0.12607315182685852 2023-01-23 00:19:50.662442: step: 600/531, loss: 0.30766287446022034 2023-01-23 00:19:51.780677: step: 604/531, loss: 0.7762675285339355 2023-01-23 00:19:52.927466: step: 608/531, loss: 0.15716032683849335 2023-01-23 00:19:54.088576: step: 612/531, loss: 0.07950840145349503 2023-01-23 00:19:55.226218: step: 616/531, loss: 0.09453105926513672 2023-01-23 00:19:56.344369: step: 620/531, loss: 0.8622385263442993 2023-01-23 00:19:57.483252: step: 624/531, loss: 0.03307478502392769 2023-01-23 00:19:58.620594: step: 628/531, loss: 0.12056894600391388 2023-01-23 00:19:59.757531: step: 632/531, loss: 0.200147807598114 2023-01-23 00:20:00.892125: step: 636/531, loss: 0.15794605016708374 2023-01-23 00:20:02.037958: step: 640/531, loss: 0.12747633457183838 2023-01-23 00:20:03.154042: step: 644/531, loss: 0.3461719751358032 2023-01-23 00:20:04.277884: step: 648/531, loss: 0.03799867630004883 2023-01-23 00:20:05.398780: step: 652/531, loss: 0.7810134887695312 2023-01-23 00:20:06.536143: step: 656/531, loss: 0.08401909470558167 2023-01-23 00:20:07.636696: step: 660/531, loss: 0.1714201420545578 2023-01-23 00:20:08.743078: step: 664/531, loss: 0.39346712827682495 2023-01-23 00:20:09.873926: step: 668/531, loss: 0.09842925518751144 2023-01-23 00:20:10.992944: step: 672/531, loss: 0.2554939389228821 2023-01-23 00:20:12.130284: step: 676/531, loss: 0.01188054122030735 2023-01-23 00:20:13.236312: step: 680/531, loss: 0.11830110847949982 2023-01-23 00:20:14.381849: step: 684/531, loss: 0.7243600487709045 2023-01-23 00:20:15.523568: step: 688/531, loss: 0.2884639799594879 2023-01-23 00:20:16.659029: step: 692/531, loss: 0.0690801590681076 2023-01-23 00:20:17.756155: step: 696/531, loss: 0.10876226425170898 2023-01-23 00:20:18.912515: step: 700/531, loss: 0.07742643356323242 2023-01-23 00:20:20.029917: step: 704/531, loss: 0.039069078862667084 2023-01-23 00:20:21.151877: step: 708/531, loss: 0.1261400282382965 2023-01-23 00:20:22.272478: step: 712/531, loss: 0.10678987205028534 2023-01-23 00:20:23.374450: step: 716/531, loss: 0.21546411514282227 2023-01-23 00:20:24.514205: step: 720/531, loss: 0.6676244139671326 2023-01-23 00:20:25.631465: step: 724/531, loss: 0.4401053786277771 2023-01-23 00:20:26.769374: step: 728/531, loss: 0.018947506323456764 2023-01-23 00:20:27.911933: step: 732/531, loss: 0.4283602833747864 2023-01-23 00:20:29.036276: step: 736/531, loss: 0.11649742722511292 2023-01-23 00:20:30.195885: step: 740/531, loss: 0.14506417512893677 2023-01-23 00:20:31.322900: step: 744/531, loss: 3.3566036224365234 2023-01-23 00:20:32.443863: step: 748/531, loss: 0.4281378984451294 2023-01-23 00:20:33.523904: step: 752/531, loss: 0.023123862221837044 2023-01-23 00:20:34.646282: step: 756/531, loss: 0.09989605098962784 2023-01-23 00:20:35.768350: step: 760/531, loss: 0.06815844029188156 2023-01-23 00:20:36.881556: step: 764/531, loss: 0.07789120823144913 2023-01-23 00:20:38.019188: step: 768/531, loss: 0.05248665809631348 2023-01-23 00:20:39.176073: step: 772/531, loss: 0.3559483587741852 2023-01-23 00:20:40.331536: step: 776/531, loss: 0.6388660669326782 2023-01-23 00:20:41.439746: step: 780/531, loss: 0.13060155510902405 2023-01-23 00:20:42.595048: step: 784/531, loss: 0.5392071008682251 2023-01-23 00:20:43.716200: step: 788/531, loss: 0.4878729581832886 2023-01-23 00:20:44.857045: step: 792/531, loss: 0.1716877818107605 2023-01-23 00:20:45.988579: step: 796/531, loss: 0.38911306858062744 2023-01-23 00:20:47.111499: step: 800/531, loss: 0.09696026146411896 2023-01-23 00:20:48.219170: step: 804/531, loss: 0.3628661334514618 2023-01-23 00:20:49.345456: step: 808/531, loss: 0.1736392080783844 2023-01-23 00:20:50.460957: step: 812/531, loss: 0.34399205446243286 2023-01-23 00:20:51.625018: step: 816/531, loss: 0.1590687781572342 2023-01-23 00:20:52.766751: step: 820/531, loss: 0.5829407572746277 2023-01-23 00:20:53.925216: step: 824/531, loss: 0.4002405107021332 2023-01-23 00:20:55.062530: step: 828/531, loss: 0.13225403428077698 2023-01-23 00:20:56.210775: step: 832/531, loss: 0.1279522031545639 2023-01-23 00:20:57.300966: step: 836/531, loss: 0.09511509537696838 2023-01-23 00:20:58.452110: step: 840/531, loss: 0.13932590186595917 2023-01-23 00:20:59.621745: step: 844/531, loss: 0.13361340761184692 2023-01-23 00:21:00.722743: step: 848/531, loss: 0.5909035205841064 2023-01-23 00:21:01.824490: step: 852/531, loss: 0.17041346430778503 2023-01-23 00:21:02.960920: step: 856/531, loss: 0.11356143653392792 2023-01-23 00:21:04.091537: step: 860/531, loss: 0.2798663377761841 2023-01-23 00:21:05.242292: step: 864/531, loss: 0.18520203232765198 2023-01-23 00:21:06.344991: step: 868/531, loss: 0.22399906814098358 2023-01-23 00:21:07.455974: step: 872/531, loss: 0.4984610080718994 2023-01-23 00:21:08.628650: step: 876/531, loss: 0.06039772182703018 2023-01-23 00:21:09.743004: step: 880/531, loss: 0.28380468487739563 2023-01-23 00:21:10.866669: step: 884/531, loss: 0.3702622056007385 2023-01-23 00:21:11.992092: step: 888/531, loss: 0.18391229212284088 2023-01-23 00:21:13.095052: step: 892/531, loss: 0.012537956237792969 2023-01-23 00:21:14.222555: step: 896/531, loss: 0.10259237140417099 2023-01-23 00:21:15.344129: step: 900/531, loss: 0.07060952484607697 2023-01-23 00:21:16.465732: step: 904/531, loss: 0.19390204548835754 2023-01-23 00:21:17.576127: step: 908/531, loss: 0.2734799385070801 2023-01-23 00:21:18.683231: step: 912/531, loss: 0.04100747033953667 2023-01-23 00:21:19.802991: step: 916/531, loss: 0.43919527530670166 2023-01-23 00:21:20.931536: step: 920/531, loss: 0.09465637803077698 2023-01-23 00:21:22.071638: step: 924/531, loss: 0.28514137864112854 2023-01-23 00:21:23.189205: step: 928/531, loss: 0.15421992540359497 2023-01-23 00:21:24.326947: step: 932/531, loss: 0.12785452604293823 2023-01-23 00:21:25.435265: step: 936/531, loss: 0.5910702347755432 2023-01-23 00:21:26.580195: step: 940/531, loss: 0.299083411693573 2023-01-23 00:21:27.705740: step: 944/531, loss: 0.08141613006591797 2023-01-23 00:21:28.804365: step: 948/531, loss: 0.03309335559606552 2023-01-23 00:21:29.926356: step: 952/531, loss: 0.407720685005188 2023-01-23 00:21:31.062170: step: 956/531, loss: 0.11979393661022186 2023-01-23 00:21:32.198374: step: 960/531, loss: 0.13426253199577332 2023-01-23 00:21:33.325814: step: 964/531, loss: 0.18411245942115784 2023-01-23 00:21:34.469528: step: 968/531, loss: 0.14016684889793396 2023-01-23 00:21:35.603657: step: 972/531, loss: 0.1574666053056717 2023-01-23 00:21:36.717891: step: 976/531, loss: 0.15568019449710846 2023-01-23 00:21:37.834839: step: 980/531, loss: 0.1557566225528717 2023-01-23 00:21:38.938760: step: 984/531, loss: 0.05853700637817383 2023-01-23 00:21:40.059089: step: 988/531, loss: 0.08996076136827469 2023-01-23 00:21:41.198037: step: 992/531, loss: 0.336512953042984 2023-01-23 00:21:42.358973: step: 996/531, loss: 0.0432555228471756 2023-01-23 00:21:43.476350: step: 1000/531, loss: 0.4580022096633911 2023-01-23 00:21:44.624256: step: 1004/531, loss: 0.0792016088962555 2023-01-23 00:21:45.741190: step: 1008/531, loss: 0.23022013902664185 2023-01-23 00:21:46.861745: step: 1012/531, loss: 0.5968620181083679 2023-01-23 00:21:47.997079: step: 1016/531, loss: 0.11794266104698181 2023-01-23 00:21:49.095859: step: 1020/531, loss: 0.5145123600959778 2023-01-23 00:21:50.200223: step: 1024/531, loss: 0.0805201306939125 2023-01-23 00:21:51.303280: step: 1028/531, loss: 0.43894654512405396 2023-01-23 00:21:52.407669: step: 1032/531, loss: 0.1105382889509201 2023-01-23 00:21:53.531659: step: 1036/531, loss: 0.25686168670654297 2023-01-23 00:21:54.692111: step: 1040/531, loss: 0.1647787094116211 2023-01-23 00:21:55.806775: step: 1044/531, loss: 0.6482565999031067 2023-01-23 00:21:56.934398: step: 1048/531, loss: 0.05835747718811035 2023-01-23 00:21:58.054151: step: 1052/531, loss: 0.4489502012729645 2023-01-23 00:21:59.160424: step: 1056/531, loss: 0.12999510765075684 2023-01-23 00:22:00.316931: step: 1060/531, loss: 0.40457284450531006 2023-01-23 00:22:01.435576: step: 1064/531, loss: 0.08496780693531036 2023-01-23 00:22:02.568743: step: 1068/531, loss: 0.9429113864898682 2023-01-23 00:22:03.674148: step: 1072/531, loss: 0.07517362385988235 2023-01-23 00:22:04.809590: step: 1076/531, loss: 0.06246475130319595 2023-01-23 00:22:05.932598: step: 1080/531, loss: 0.10489077866077423 2023-01-23 00:22:07.039784: step: 1084/531, loss: 0.05943968519568443 2023-01-23 00:22:08.154628: step: 1088/531, loss: 0.15815573930740356 2023-01-23 00:22:09.284111: step: 1092/531, loss: 0.3370331823825836 2023-01-23 00:22:10.419414: step: 1096/531, loss: 0.08954358100891113 2023-01-23 00:22:11.532515: step: 1100/531, loss: 0.02264227718114853 2023-01-23 00:22:12.652878: step: 1104/531, loss: 0.44786348938941956 2023-01-23 00:22:13.763806: step: 1108/531, loss: 0.49628493189811707 2023-01-23 00:22:14.907433: step: 1112/531, loss: 0.09058018028736115 2023-01-23 00:22:16.001214: step: 1116/531, loss: 0.08966894447803497 2023-01-23 00:22:17.125327: step: 1120/531, loss: 0.20239944756031036 2023-01-23 00:22:18.232583: step: 1124/531, loss: 0.023293495178222656 2023-01-23 00:22:19.354552: step: 1128/531, loss: 0.2353372573852539 2023-01-23 00:22:20.488461: step: 1132/531, loss: 0.9897315502166748 2023-01-23 00:22:21.627686: step: 1136/531, loss: 0.03459892049431801 2023-01-23 00:22:22.766923: step: 1140/531, loss: 0.05682554095983505 2023-01-23 00:22:23.895307: step: 1144/531, loss: 0.044228505343198776 2023-01-23 00:22:24.999366: step: 1148/531, loss: 0.6215946078300476 2023-01-23 00:22:26.146838: step: 1152/531, loss: 0.4609766900539398 2023-01-23 00:22:27.286233: step: 1156/531, loss: 0.1382693350315094 2023-01-23 00:22:28.408982: step: 1160/531, loss: 0.5196768641471863 2023-01-23 00:22:29.543042: step: 1164/531, loss: 0.043473437428474426 2023-01-23 00:22:30.671978: step: 1168/531, loss: 0.21080002188682556 2023-01-23 00:22:31.810792: step: 1172/531, loss: 0.3833649754524231 2023-01-23 00:22:32.918264: step: 1176/531, loss: 0.06717577576637268 2023-01-23 00:22:34.039111: step: 1180/531, loss: 0.046009257435798645 2023-01-23 00:22:35.155383: step: 1184/531, loss: 0.03521165996789932 2023-01-23 00:22:36.262993: step: 1188/531, loss: 0.2972188889980316 2023-01-23 00:22:37.385970: step: 1192/531, loss: 0.8312904238700867 2023-01-23 00:22:38.497685: step: 1196/531, loss: 0.977934718132019 2023-01-23 00:22:39.623125: step: 1200/531, loss: 0.05278778076171875 2023-01-23 00:22:40.768919: step: 1204/531, loss: 0.12949152290821075 2023-01-23 00:22:41.894257: step: 1208/531, loss: 0.24591580033302307 2023-01-23 00:22:43.019552: step: 1212/531, loss: 0.0878174751996994 2023-01-23 00:22:44.126991: step: 1216/531, loss: 0.3329910635948181 2023-01-23 00:22:45.263611: step: 1220/531, loss: 0.4611121416091919 2023-01-23 00:22:46.398284: step: 1224/531, loss: 0.09945712238550186 2023-01-23 00:22:47.523776: step: 1228/531, loss: 0.044197943061590195 2023-01-23 00:22:48.645466: step: 1232/531, loss: 0.11266021430492401 2023-01-23 00:22:49.760801: step: 1236/531, loss: 0.1609579175710678 2023-01-23 00:22:50.889955: step: 1240/531, loss: 0.09072819352149963 2023-01-23 00:22:52.022360: step: 1244/531, loss: 5.270782947540283 2023-01-23 00:22:53.134785: step: 1248/531, loss: 0.12462158501148224 2023-01-23 00:22:54.252400: step: 1252/531, loss: 0.08513360470533371 2023-01-23 00:22:55.365408: step: 1256/531, loss: 0.1756526529788971 2023-01-23 00:22:56.518771: step: 1260/531, loss: 0.3278857469558716 2023-01-23 00:22:57.646435: step: 1264/531, loss: 0.5369536280632019 2023-01-23 00:22:58.767660: step: 1268/531, loss: 0.6144624948501587 2023-01-23 00:22:59.900070: step: 1272/531, loss: 0.5206581950187683 2023-01-23 00:23:01.014994: step: 1276/531, loss: 0.05089038610458374 2023-01-23 00:23:02.160341: step: 1280/531, loss: 0.32959622144699097 2023-01-23 00:23:03.289084: step: 1284/531, loss: 0.390646755695343 2023-01-23 00:23:04.405358: step: 1288/531, loss: 0.07557649910449982 2023-01-23 00:23:05.508624: step: 1292/531, loss: 0.20201599597930908 2023-01-23 00:23:06.646661: step: 1296/531, loss: 0.4787057638168335 2023-01-23 00:23:07.793309: step: 1300/531, loss: 0.1810092031955719 2023-01-23 00:23:08.940435: step: 1304/531, loss: 0.10041961818933487 2023-01-23 00:23:10.062629: step: 1308/531, loss: 0.18631476163864136 2023-01-23 00:23:11.167115: step: 1312/531, loss: 0.7809675335884094 2023-01-23 00:23:12.326474: step: 1316/531, loss: 0.1162814125418663 2023-01-23 00:23:13.451823: step: 1320/531, loss: 0.7988578081130981 2023-01-23 00:23:14.561160: step: 1324/531, loss: 0.13178615272045135 2023-01-23 00:23:15.698932: step: 1328/531, loss: 0.49795836210250854 2023-01-23 00:23:16.831464: step: 1332/531, loss: 0.12801781296730042 2023-01-23 00:23:17.948206: step: 1336/531, loss: 0.12133064866065979 2023-01-23 00:23:19.072803: step: 1340/531, loss: 0.036447882652282715 2023-01-23 00:23:20.227957: step: 1344/531, loss: 0.3469217121601105 2023-01-23 00:23:21.340483: step: 1348/531, loss: 0.10699406266212463 2023-01-23 00:23:22.489904: step: 1352/531, loss: 0.10263185203075409 2023-01-23 00:23:23.643487: step: 1356/531, loss: 0.03931427001953125 2023-01-23 00:23:24.775862: step: 1360/531, loss: 0.21205520629882812 2023-01-23 00:23:25.908680: step: 1364/531, loss: 0.17296457290649414 2023-01-23 00:23:27.043139: step: 1368/531, loss: 0.15259799361228943 2023-01-23 00:23:28.141165: step: 1372/531, loss: 0.0536830872297287 2023-01-23 00:23:29.270268: step: 1376/531, loss: 0.28342801332473755 2023-01-23 00:23:30.404413: step: 1380/531, loss: 0.2268989086151123 2023-01-23 00:23:31.560882: step: 1384/531, loss: 0.40731558203697205 2023-01-23 00:23:32.742712: step: 1388/531, loss: 0.3068438768386841 2023-01-23 00:23:33.886354: step: 1392/531, loss: 0.16849274933338165 2023-01-23 00:23:35.009919: step: 1396/531, loss: 0.1644456833600998 2023-01-23 00:23:36.111746: step: 1400/531, loss: 0.951788067817688 2023-01-23 00:23:37.255119: step: 1404/531, loss: 0.3371749818325043 2023-01-23 00:23:38.379934: step: 1408/531, loss: 0.43408751487731934 2023-01-23 00:23:39.487555: step: 1412/531, loss: 1.5530601739883423 2023-01-23 00:23:40.569767: step: 1416/531, loss: 0.365949422121048 2023-01-23 00:23:41.673827: step: 1420/531, loss: 0.2542448937892914 2023-01-23 00:23:42.798035: step: 1424/531, loss: 0.12949238717556 2023-01-23 00:23:43.970251: step: 1428/531, loss: 0.09009018540382385 2023-01-23 00:23:45.104188: step: 1432/531, loss: 0.1276037096977234 2023-01-23 00:23:46.230791: step: 1436/531, loss: 0.09900350868701935 2023-01-23 00:23:47.379877: step: 1440/531, loss: 0.3038250803947449 2023-01-23 00:23:48.485871: step: 1444/531, loss: 0.28426557779312134 2023-01-23 00:23:49.622122: step: 1448/531, loss: 0.4755532443523407 2023-01-23 00:23:50.744565: step: 1452/531, loss: 0.13073988258838654 2023-01-23 00:23:51.864185: step: 1456/531, loss: 0.11859126389026642 2023-01-23 00:23:53.001175: step: 1460/531, loss: 0.16933254897594452 2023-01-23 00:23:54.139264: step: 1464/531, loss: 0.060035038739442825 2023-01-23 00:23:55.263890: step: 1468/531, loss: 0.019108964130282402 2023-01-23 00:23:56.380670: step: 1472/531, loss: 0.10508136451244354 2023-01-23 00:23:57.484258: step: 1476/531, loss: 0.13732466101646423 2023-01-23 00:23:58.608479: step: 1480/531, loss: 0.22092753648757935 2023-01-23 00:23:59.725355: step: 1484/531, loss: 0.053449537605047226 2023-01-23 00:24:00.840048: step: 1488/531, loss: 0.13331981003284454 2023-01-23 00:24:01.989062: step: 1492/531, loss: 0.0746951550245285 2023-01-23 00:24:03.068465: step: 1496/531, loss: 0.12361302971839905 2023-01-23 00:24:04.153602: step: 1500/531, loss: 0.1742846518754959 2023-01-23 00:24:05.272625: step: 1504/531, loss: 0.6790333390235901 2023-01-23 00:24:06.400337: step: 1508/531, loss: 0.06733884662389755 2023-01-23 00:24:07.514692: step: 1512/531, loss: 0.19083049893379211 2023-01-23 00:24:08.630079: step: 1516/531, loss: 0.27966246008872986 2023-01-23 00:24:09.762534: step: 1520/531, loss: 0.08095188438892365 2023-01-23 00:24:10.906585: step: 1524/531, loss: 0.14948931336402893 2023-01-23 00:24:12.050790: step: 1528/531, loss: 0.7838238477706909 2023-01-23 00:24:13.164904: step: 1532/531, loss: 0.17595674097537994 2023-01-23 00:24:14.294010: step: 1536/531, loss: 0.05603218078613281 2023-01-23 00:24:15.446499: step: 1540/531, loss: 0.25855112075805664 2023-01-23 00:24:16.568586: step: 1544/531, loss: 0.10620498657226562 2023-01-23 00:24:17.699467: step: 1548/531, loss: 0.3945713937282562 2023-01-23 00:24:18.832077: step: 1552/531, loss: 0.1955706626176834 2023-01-23 00:24:19.957850: step: 1556/531, loss: 0.10373048484325409 2023-01-23 00:24:21.072459: step: 1560/531, loss: 0.056058548390865326 2023-01-23 00:24:22.198382: step: 1564/531, loss: 0.13536995649337769 2023-01-23 00:24:23.318428: step: 1568/531, loss: 0.0990375503897667 2023-01-23 00:24:24.482271: step: 1572/531, loss: 0.11423464119434357 2023-01-23 00:24:25.578939: step: 1576/531, loss: 0.17716436088085175 2023-01-23 00:24:26.715022: step: 1580/531, loss: 0.2681174874305725 2023-01-23 00:24:27.847328: step: 1584/531, loss: 0.11104698479175568 2023-01-23 00:24:29.021519: step: 1588/531, loss: 0.12791967391967773 2023-01-23 00:24:30.145022: step: 1592/531, loss: 0.08090706169605255 2023-01-23 00:24:31.285676: step: 1596/531, loss: 0.0628213882446289 2023-01-23 00:24:32.393224: step: 1600/531, loss: 0.1080193966627121 2023-01-23 00:24:33.522618: step: 1604/531, loss: 0.13725414872169495 2023-01-23 00:24:34.654255: step: 1608/531, loss: 0.12615986168384552 2023-01-23 00:24:35.781920: step: 1612/531, loss: 0.10770893096923828 2023-01-23 00:24:36.895888: step: 1616/531, loss: 0.41494157910346985 2023-01-23 00:24:38.038038: step: 1620/531, loss: 0.07492885738611221 2023-01-23 00:24:39.148738: step: 1624/531, loss: 0.09369926154613495 2023-01-23 00:24:40.265443: step: 1628/531, loss: 0.26386985182762146 2023-01-23 00:24:41.385053: step: 1632/531, loss: 0.0204620361328125 2023-01-23 00:24:42.490711: step: 1636/531, loss: 0.21918782591819763 2023-01-23 00:24:43.611432: step: 1640/531, loss: 0.08974028378725052 2023-01-23 00:24:44.691197: step: 1644/531, loss: 0.3051663041114807 2023-01-23 00:24:45.787438: step: 1648/531, loss: 0.3446905314922333 2023-01-23 00:24:46.919945: step: 1652/531, loss: 0.039138033986091614 2023-01-23 00:24:48.023530: step: 1656/531, loss: 0.19138669967651367 2023-01-23 00:24:49.135929: step: 1660/531, loss: 0.2975608706474304 2023-01-23 00:24:50.252819: step: 1664/531, loss: 0.36778098344802856 2023-01-23 00:24:51.386838: step: 1668/531, loss: 0.1063106581568718 2023-01-23 00:24:52.517252: step: 1672/531, loss: 1.1621308326721191 2023-01-23 00:24:53.637651: step: 1676/531, loss: 0.23349857330322266 2023-01-23 00:24:54.751180: step: 1680/531, loss: 0.3977741301059723 2023-01-23 00:24:55.840357: step: 1684/531, loss: 0.48294752836227417 2023-01-23 00:24:56.936541: step: 1688/531, loss: 0.13870978355407715 2023-01-23 00:24:58.026032: step: 1692/531, loss: 0.21510955691337585 2023-01-23 00:24:59.140111: step: 1696/531, loss: 0.21706832945346832 2023-01-23 00:25:00.250791: step: 1700/531, loss: 0.09922724217176437 2023-01-23 00:25:01.379445: step: 1704/531, loss: 0.06827382743358612 2023-01-23 00:25:02.517665: step: 1708/531, loss: 0.11864328384399414 2023-01-23 00:25:03.664174: step: 1712/531, loss: 0.15477409958839417 2023-01-23 00:25:04.780424: step: 1716/531, loss: 0.29711437225341797 2023-01-23 00:25:05.923684: step: 1720/531, loss: 0.20394659042358398 2023-01-23 00:25:07.019007: step: 1724/531, loss: 0.23727789521217346 2023-01-23 00:25:08.143174: step: 1728/531, loss: 0.1402730941772461 2023-01-23 00:25:09.276234: step: 1732/531, loss: 0.052642822265625 2023-01-23 00:25:10.385880: step: 1736/531, loss: 0.15474733710289001 2023-01-23 00:25:11.488480: step: 1740/531, loss: 0.42271554470062256 2023-01-23 00:25:12.637864: step: 1744/531, loss: 0.8821865916252136 2023-01-23 00:25:13.770129: step: 1748/531, loss: 0.3389485478401184 2023-01-23 00:25:14.919918: step: 1752/531, loss: 0.18968543410301208 2023-01-23 00:25:16.065941: step: 1756/531, loss: 0.06896381080150604 2023-01-23 00:25:17.185109: step: 1760/531, loss: 0.37530672550201416 2023-01-23 00:25:18.321461: step: 1764/531, loss: 0.17176242172718048 2023-01-23 00:25:19.461401: step: 1768/531, loss: 0.11363659054040909 2023-01-23 00:25:20.608112: step: 1772/531, loss: 0.08463610708713531 2023-01-23 00:25:21.763070: step: 1776/531, loss: 0.05079937353730202 2023-01-23 00:25:22.926506: step: 1780/531, loss: 0.08702945709228516 2023-01-23 00:25:24.024090: step: 1784/531, loss: 0.31351909041404724 2023-01-23 00:25:25.151498: step: 1788/531, loss: 0.08429765701293945 2023-01-23 00:25:26.299963: step: 1792/531, loss: 0.09733138233423233 2023-01-23 00:25:27.416163: step: 1796/531, loss: 0.21036525070667267 2023-01-23 00:25:28.547004: step: 1800/531, loss: 0.48478659987449646 2023-01-23 00:25:29.671214: step: 1804/531, loss: 0.17424841225147247 2023-01-23 00:25:30.792859: step: 1808/531, loss: 0.40230417251586914 2023-01-23 00:25:31.935449: step: 1812/531, loss: 2.430882453918457 2023-01-23 00:25:33.054831: step: 1816/531, loss: 0.937486469745636 2023-01-23 00:25:34.189358: step: 1820/531, loss: 0.6594923138618469 2023-01-23 00:25:35.281864: step: 1824/531, loss: 0.04879341274499893 2023-01-23 00:25:36.422724: step: 1828/531, loss: 0.2868717312812805 2023-01-23 00:25:37.536965: step: 1832/531, loss: 0.07998818159103394 2023-01-23 00:25:38.656851: step: 1836/531, loss: 0.6160796880722046 2023-01-23 00:25:39.773847: step: 1840/531, loss: 2.504883289337158 2023-01-23 00:25:40.876433: step: 1844/531, loss: 0.024641752243041992 2023-01-23 00:25:42.024852: step: 1848/531, loss: 0.25491833686828613 2023-01-23 00:25:43.202278: step: 1852/531, loss: 0.17909449338912964 2023-01-23 00:25:44.337878: step: 1856/531, loss: 0.03929818421602249 2023-01-23 00:25:45.479956: step: 1860/531, loss: 0.20805053412914276 2023-01-23 00:25:46.570523: step: 1864/531, loss: 0.3360249698162079 2023-01-23 00:25:47.717432: step: 1868/531, loss: 0.2190142571926117 2023-01-23 00:25:48.839750: step: 1872/531, loss: 0.18401947617530823 2023-01-23 00:25:49.974041: step: 1876/531, loss: 0.03933882713317871 2023-01-23 00:25:51.123856: step: 1880/531, loss: 0.5655359029769897 2023-01-23 00:25:52.233784: step: 1884/531, loss: 0.15157060325145721 2023-01-23 00:25:53.384188: step: 1888/531, loss: 0.20729702711105347 2023-01-23 00:25:54.554335: step: 1892/531, loss: 0.4299635887145996 2023-01-23 00:25:55.668560: step: 1896/531, loss: 0.2888050079345703 2023-01-23 00:25:56.829406: step: 1900/531, loss: 0.11375733464956284 2023-01-23 00:25:57.970180: step: 1904/531, loss: 0.10378637909889221 2023-01-23 00:25:59.098989: step: 1908/531, loss: 0.10378704220056534 2023-01-23 00:26:00.204933: step: 1912/531, loss: 0.22663506865501404 2023-01-23 00:26:01.324934: step: 1916/531, loss: 0.34136760234832764 2023-01-23 00:26:02.424003: step: 1920/531, loss: 0.19974803924560547 2023-01-23 00:26:03.574380: step: 1924/531, loss: 0.25381433963775635 2023-01-23 00:26:04.732639: step: 1928/531, loss: 0.13586612045764923 2023-01-23 00:26:05.842955: step: 1932/531, loss: 0.19036856293678284 2023-01-23 00:26:07.018221: step: 1936/531, loss: 1.5728070735931396 2023-01-23 00:26:08.138212: step: 1940/531, loss: 0.09194259345531464 2023-01-23 00:26:09.229130: step: 1944/531, loss: 0.06685075908899307 2023-01-23 00:26:10.331417: step: 1948/531, loss: 0.03318953514099121 2023-01-23 00:26:11.460562: step: 1952/531, loss: 0.5354709625244141 2023-01-23 00:26:12.585831: step: 1956/531, loss: 0.3382452130317688 2023-01-23 00:26:13.714508: step: 1960/531, loss: 0.11215820163488388 2023-01-23 00:26:14.841349: step: 1964/531, loss: 0.8525193333625793 2023-01-23 00:26:15.996418: step: 1968/531, loss: 0.5534533858299255 2023-01-23 00:26:17.112608: step: 1972/531, loss: 0.5061460137367249 2023-01-23 00:26:18.258276: step: 1976/531, loss: 0.6987929344177246 2023-01-23 00:26:19.392326: step: 1980/531, loss: 0.12794113159179688 2023-01-23 00:26:20.505146: step: 1984/531, loss: 0.12068863213062286 2023-01-23 00:26:21.636400: step: 1988/531, loss: 0.07859029620885849 2023-01-23 00:26:22.759053: step: 1992/531, loss: 0.3634530007839203 2023-01-23 00:26:23.891295: step: 1996/531, loss: 0.39075756072998047 2023-01-23 00:26:25.009205: step: 2000/531, loss: 0.0685429573059082 2023-01-23 00:26:26.167146: step: 2004/531, loss: 0.09246974438428879 2023-01-23 00:26:27.277620: step: 2008/531, loss: 0.3992319703102112 2023-01-23 00:26:28.445425: step: 2012/531, loss: 0.2583286166191101 2023-01-23 00:26:29.577112: step: 2016/531, loss: 1.7008168697357178 2023-01-23 00:26:30.722009: step: 2020/531, loss: 0.09999027848243713 2023-01-23 00:26:31.824634: step: 2024/531, loss: 1.4721925258636475 2023-01-23 00:26:32.950133: step: 2028/531, loss: 0.23009443283081055 2023-01-23 00:26:34.102785: step: 2032/531, loss: 0.049252700060606 2023-01-23 00:26:35.211057: step: 2036/531, loss: 0.1880214661359787 2023-01-23 00:26:36.341842: step: 2040/531, loss: 0.3344150483608246 2023-01-23 00:26:37.453015: step: 2044/531, loss: 0.1133275032043457 2023-01-23 00:26:38.554569: step: 2048/531, loss: 1.1327979564666748 2023-01-23 00:26:39.675783: step: 2052/531, loss: 0.38301506638526917 2023-01-23 00:26:40.817852: step: 2056/531, loss: 0.0619143471121788 2023-01-23 00:26:41.945298: step: 2060/531, loss: 0.2871108949184418 2023-01-23 00:26:43.087723: step: 2064/531, loss: 0.12272000312805176 2023-01-23 00:26:44.195591: step: 2068/531, loss: 0.4739369750022888 2023-01-23 00:26:45.331385: step: 2072/531, loss: 0.09282936900854111 2023-01-23 00:26:46.441185: step: 2076/531, loss: 0.5756852626800537 2023-01-23 00:26:47.588842: step: 2080/531, loss: 0.4304693639278412 2023-01-23 00:26:48.692991: step: 2084/531, loss: 0.25591841340065 2023-01-23 00:26:49.819787: step: 2088/531, loss: 0.15246042609214783 2023-01-23 00:26:50.948903: step: 2092/531, loss: 0.8338353633880615 2023-01-23 00:26:52.059474: step: 2096/531, loss: 0.1795164942741394 2023-01-23 00:26:53.198720: step: 2100/531, loss: 0.21658101677894592 2023-01-23 00:26:54.319514: step: 2104/531, loss: 0.0930359810590744 2023-01-23 00:26:55.440984: step: 2108/531, loss: 0.09251256287097931 2023-01-23 00:26:56.541368: step: 2112/531, loss: 0.1334594339132309 2023-01-23 00:26:57.660620: step: 2116/531, loss: 0.07223796844482422 2023-01-23 00:26:58.819614: step: 2120/531, loss: 0.5539063811302185 2023-01-23 00:26:59.938800: step: 2124/531, loss: 0.09299664944410324 ================================================== Loss: 0.297 -------------------- Dev: {'event': {'p': 0.5440900562851783, 'r': 0.7723035952063915, 'f1': 0.6384149697303247}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Test: {'event': {'p': 0.5672823218997362, 'r': 0.7692307692307693, 'f1': 0.6529992406985574}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Chinese: {'event': {'p': 0.5108695652173914, 'r': 0.8703703703703703, 'f1': 0.6438356164383562}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Korean: {'event': {'p': 0.6557377049180327, 'r': 0.6349206349206349, 'f1': 0.6451612903225806}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Russian: {'event': {'p': 0.43478260869565216, 'r': 0.5555555555555556, 'f1': 0.4878048780487805}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} New best korean model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.5440900562851783, 'r': 0.7723035952063915, 'f1': 0.6384149697303247}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Korean: {'event': {'p': 0.5672823218997362, 'r': 0.7692307692307693, 'f1': 0.6529992406985574}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Korean: {'event': {'p': 0.6557377049180327, 'r': 0.6349206349206349, 'f1': 0.6451612903225806}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Russian: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.6956521739130435, 'r': 0.4444444444444444, 'f1': 0.5423728813559322}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 6 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:27:46.689320: step: 4/531, loss: 0.15086175501346588 2023-01-23 00:27:47.812809: step: 8/531, loss: 0.44519805908203125 2023-01-23 00:27:48.978779: step: 12/531, loss: 0.058432817459106445 2023-01-23 00:27:50.099769: step: 16/531, loss: 0.10795927047729492 2023-01-23 00:27:51.193470: step: 20/531, loss: 0.11459669470787048 2023-01-23 00:27:52.334647: step: 24/531, loss: 0.21477949619293213 2023-01-23 00:27:53.473292: step: 28/531, loss: 0.02445411868393421 2023-01-23 00:27:54.595405: step: 32/531, loss: 0.6534051299095154 2023-01-23 00:27:55.739232: step: 36/531, loss: 0.10216904431581497 2023-01-23 00:27:56.870970: step: 40/531, loss: 0.12439584732055664 2023-01-23 00:27:57.972406: step: 44/531, loss: 0.1372111290693283 2023-01-23 00:27:59.079289: step: 48/531, loss: 0.31181374192237854 2023-01-23 00:28:00.200593: step: 52/531, loss: 0.07597608864307404 2023-01-23 00:28:01.326146: step: 56/531, loss: 0.18333645164966583 2023-01-23 00:28:02.441778: step: 60/531, loss: 0.10311757028102875 2023-01-23 00:28:03.590444: step: 64/531, loss: 0.15589380264282227 2023-01-23 00:28:04.715973: step: 68/531, loss: 0.10180147737264633 2023-01-23 00:28:05.840136: step: 72/531, loss: 0.02370734140276909 2023-01-23 00:28:06.952393: step: 76/531, loss: 0.04161777347326279 2023-01-23 00:28:08.062050: step: 80/531, loss: 0.3113865852355957 2023-01-23 00:28:09.185077: step: 84/531, loss: 0.3803182542324066 2023-01-23 00:28:10.305602: step: 88/531, loss: 0.08583936840295792 2023-01-23 00:28:11.415366: step: 92/531, loss: 0.13607101142406464 2023-01-23 00:28:12.525322: step: 96/531, loss: 1.4133756160736084 2023-01-23 00:28:13.652500: step: 100/531, loss: 0.34161150455474854 2023-01-23 00:28:14.746091: step: 104/531, loss: 0.066541388630867 2023-01-23 00:28:15.873306: step: 108/531, loss: 0.2006741464138031 2023-01-23 00:28:16.996993: step: 112/531, loss: 0.07147273421287537 2023-01-23 00:28:18.108859: step: 116/531, loss: 0.05142202600836754 2023-01-23 00:28:19.241927: step: 120/531, loss: 0.19109898805618286 2023-01-23 00:28:20.364841: step: 124/531, loss: 0.11739455163478851 2023-01-23 00:28:21.488160: step: 128/531, loss: 0.22635670006275177 2023-01-23 00:28:22.623122: step: 132/531, loss: 0.052965451031923294 2023-01-23 00:28:23.749405: step: 136/531, loss: 0.1725083440542221 2023-01-23 00:28:24.865380: step: 140/531, loss: 0.20802326500415802 2023-01-23 00:28:25.990506: step: 144/531, loss: 0.14394885301589966 2023-01-23 00:28:27.126655: step: 148/531, loss: 0.3046075701713562 2023-01-23 00:28:28.232914: step: 152/531, loss: 0.08211908489465714 2023-01-23 00:28:29.333369: step: 156/531, loss: 0.09998159110546112 2023-01-23 00:28:30.431269: step: 160/531, loss: 0.12419387698173523 2023-01-23 00:28:31.542197: step: 164/531, loss: 0.10426750034093857 2023-01-23 00:28:32.664760: step: 168/531, loss: 0.023827863857150078 2023-01-23 00:28:33.785273: step: 172/531, loss: 0.7307791709899902 2023-01-23 00:28:34.921615: step: 176/531, loss: 0.2617862820625305 2023-01-23 00:28:36.044187: step: 180/531, loss: 0.27173247933387756 2023-01-23 00:28:37.190847: step: 184/531, loss: 0.2729070782661438 2023-01-23 00:28:38.319816: step: 188/531, loss: 0.08730888366699219 2023-01-23 00:28:39.432451: step: 192/531, loss: 0.05043325573205948 2023-01-23 00:28:40.573228: step: 196/531, loss: 0.06647205352783203 2023-01-23 00:28:41.681383: step: 200/531, loss: 0.7661253213882446 2023-01-23 00:28:42.804076: step: 204/531, loss: 0.03455343469977379 2023-01-23 00:28:43.897532: step: 208/531, loss: 0.5179017186164856 2023-01-23 00:28:45.021995: step: 212/531, loss: 0.2024114727973938 2023-01-23 00:28:46.149295: step: 216/531, loss: 0.10241232067346573 2023-01-23 00:28:47.290443: step: 220/531, loss: 0.3304611146450043 2023-01-23 00:28:48.411359: step: 224/531, loss: 0.13528771698474884 2023-01-23 00:28:49.520409: step: 228/531, loss: 0.026763249188661575 2023-01-23 00:28:50.628507: step: 232/531, loss: 0.034537121653556824 2023-01-23 00:28:51.757533: step: 236/531, loss: 0.09236855804920197 2023-01-23 00:28:52.882620: step: 240/531, loss: 0.10965538024902344 2023-01-23 00:28:53.999804: step: 244/531, loss: 0.11902637034654617 2023-01-23 00:28:55.112100: step: 248/531, loss: 0.18522052466869354 2023-01-23 00:28:56.237371: step: 252/531, loss: 0.18175888061523438 2023-01-23 00:28:57.376545: step: 256/531, loss: 0.9082954525947571 2023-01-23 00:28:58.506537: step: 260/531, loss: 0.17686797678470612 2023-01-23 00:28:59.598048: step: 264/531, loss: 3.1346275806427 2023-01-23 00:29:00.749267: step: 268/531, loss: 0.18814867734909058 2023-01-23 00:29:01.874719: step: 272/531, loss: 0.2829253077507019 2023-01-23 00:29:02.985749: step: 276/531, loss: 0.014850925654172897 2023-01-23 00:29:04.130319: step: 280/531, loss: 0.17604750394821167 2023-01-23 00:29:05.264475: step: 284/531, loss: 0.052352048456668854 2023-01-23 00:29:06.421322: step: 288/531, loss: 0.057363320142030716 2023-01-23 00:29:07.544810: step: 292/531, loss: 0.12430057674646378 2023-01-23 00:29:08.671519: step: 296/531, loss: 0.09099988639354706 2023-01-23 00:29:09.812715: step: 300/531, loss: 0.11884613335132599 2023-01-23 00:29:10.921923: step: 304/531, loss: 0.058777764439582825 2023-01-23 00:29:12.031773: step: 308/531, loss: 0.4404264986515045 2023-01-23 00:29:13.151640: step: 312/531, loss: 0.047658681869506836 2023-01-23 00:29:14.261677: step: 316/531, loss: 0.14133863151073456 2023-01-23 00:29:15.393459: step: 320/531, loss: 0.10661507397890091 2023-01-23 00:29:16.526665: step: 324/531, loss: 0.16122861206531525 2023-01-23 00:29:17.650690: step: 328/531, loss: 0.10744023323059082 2023-01-23 00:29:18.750325: step: 332/531, loss: 0.1173584908246994 2023-01-23 00:29:19.863160: step: 336/531, loss: 0.07502040266990662 2023-01-23 00:29:20.978237: step: 340/531, loss: 0.07976703345775604 2023-01-23 00:29:22.112457: step: 344/531, loss: 0.07326493412256241 2023-01-23 00:29:23.255730: step: 348/531, loss: 0.1508421003818512 2023-01-23 00:29:24.377228: step: 352/531, loss: 0.05692138522863388 2023-01-23 00:29:25.496442: step: 356/531, loss: 0.04296588897705078 2023-01-23 00:29:26.594209: step: 360/531, loss: 0.07064338028430939 2023-01-23 00:29:27.715117: step: 364/531, loss: 0.0837317481637001 2023-01-23 00:29:28.847329: step: 368/531, loss: 0.5004148483276367 2023-01-23 00:29:29.980189: step: 372/531, loss: 0.10927124321460724 2023-01-23 00:29:31.180188: step: 376/531, loss: 0.144053652882576 2023-01-23 00:29:32.295155: step: 380/531, loss: 0.07450933754444122 2023-01-23 00:29:33.406248: step: 384/531, loss: 0.07858972251415253 2023-01-23 00:29:34.529118: step: 388/531, loss: 0.10665154457092285 2023-01-23 00:29:35.652079: step: 392/531, loss: 0.39680424332618713 2023-01-23 00:29:36.798052: step: 396/531, loss: 0.08854503929615021 2023-01-23 00:29:37.925226: step: 400/531, loss: 0.13796578347682953 2023-01-23 00:29:39.021895: step: 404/531, loss: 0.03624967113137245 2023-01-23 00:29:40.161138: step: 408/531, loss: 0.12699785828590393 2023-01-23 00:29:41.288043: step: 412/531, loss: 0.12611037492752075 2023-01-23 00:29:42.416344: step: 416/531, loss: 0.14851084351539612 2023-01-23 00:29:43.547197: step: 420/531, loss: 0.16934546828269958 2023-01-23 00:29:44.653755: step: 424/531, loss: 0.051219940185546875 2023-01-23 00:29:45.792924: step: 428/531, loss: 0.17968197166919708 2023-01-23 00:29:46.950135: step: 432/531, loss: 0.10771141946315765 2023-01-23 00:29:48.078367: step: 436/531, loss: 0.18436498939990997 2023-01-23 00:29:49.207151: step: 440/531, loss: 0.11564745754003525 2023-01-23 00:29:50.348178: step: 444/531, loss: 0.4060850739479065 2023-01-23 00:29:51.450626: step: 448/531, loss: 0.17578983306884766 2023-01-23 00:29:52.575932: step: 452/531, loss: 0.6279041767120361 2023-01-23 00:29:53.695885: step: 456/531, loss: 0.03740043565630913 2023-01-23 00:29:54.801026: step: 460/531, loss: 0.05715646594762802 2023-01-23 00:29:55.924815: step: 464/531, loss: 0.02419424243271351 2023-01-23 00:29:57.073369: step: 468/531, loss: 0.29996854066848755 2023-01-23 00:29:58.206092: step: 472/531, loss: 0.05887563154101372 2023-01-23 00:29:59.309695: step: 476/531, loss: 0.11404214054346085 2023-01-23 00:30:00.422213: step: 480/531, loss: 0.047014858573675156 2023-01-23 00:30:01.565891: step: 484/531, loss: 0.06032724305987358 2023-01-23 00:30:02.691787: step: 488/531, loss: 0.10072794556617737 2023-01-23 00:30:03.806953: step: 492/531, loss: 0.14510297775268555 2023-01-23 00:30:04.930920: step: 496/531, loss: 0.6866494417190552 2023-01-23 00:30:06.014532: step: 500/531, loss: 0.07605952769517899 2023-01-23 00:30:07.129231: step: 504/531, loss: 0.12150907516479492 2023-01-23 00:30:08.297607: step: 508/531, loss: 0.4043623208999634 2023-01-23 00:30:09.427700: step: 512/531, loss: 0.1951828896999359 2023-01-23 00:30:10.537334: step: 516/531, loss: 0.11251011490821838 2023-01-23 00:30:11.658390: step: 520/531, loss: 0.17454767227172852 2023-01-23 00:30:12.779135: step: 524/531, loss: 0.27790889143943787 2023-01-23 00:30:13.900386: step: 528/531, loss: 0.2730432450771332 2023-01-23 00:30:15.021369: step: 532/531, loss: 0.037294577807188034 2023-01-23 00:30:16.143716: step: 536/531, loss: 0.1082620620727539 2023-01-23 00:30:17.249383: step: 540/531, loss: 0.27188369631767273 2023-01-23 00:30:18.396410: step: 544/531, loss: 0.599617600440979 2023-01-23 00:30:19.508569: step: 548/531, loss: 0.13083668053150177 2023-01-23 00:30:20.663537: step: 552/531, loss: 0.13458271324634552 2023-01-23 00:30:21.767945: step: 556/531, loss: 0.21781493723392487 2023-01-23 00:30:22.931140: step: 560/531, loss: 0.17681331932544708 2023-01-23 00:30:24.063613: step: 564/531, loss: 0.18365205824375153 2023-01-23 00:30:25.207144: step: 568/531, loss: 0.051621437072753906 2023-01-23 00:30:26.302926: step: 572/531, loss: 0.16896170377731323 2023-01-23 00:30:27.434399: step: 576/531, loss: 0.05746364966034889 2023-01-23 00:30:28.571495: step: 580/531, loss: 0.2391793578863144 2023-01-23 00:30:29.663807: step: 584/531, loss: 0.033048346638679504 2023-01-23 00:30:30.772814: step: 588/531, loss: 0.14591558277606964 2023-01-23 00:30:31.901255: step: 592/531, loss: 0.12477359175682068 2023-01-23 00:30:33.035176: step: 596/531, loss: 0.04654426500201225 2023-01-23 00:30:34.156282: step: 600/531, loss: 0.16546869277954102 2023-01-23 00:30:35.289809: step: 604/531, loss: 0.1897704154253006 2023-01-23 00:30:36.395672: step: 608/531, loss: 0.2908203601837158 2023-01-23 00:30:37.541138: step: 612/531, loss: 0.18500156700611115 2023-01-23 00:30:38.646239: step: 616/531, loss: 0.07350310683250427 2023-01-23 00:30:39.804999: step: 620/531, loss: 0.769061803817749 2023-01-23 00:30:40.924329: step: 624/531, loss: 0.2006874978542328 2023-01-23 00:30:42.040572: step: 628/531, loss: 0.056610964238643646 2023-01-23 00:30:43.175585: step: 632/531, loss: 0.5713087320327759 2023-01-23 00:30:44.314067: step: 636/531, loss: 0.08168573677539825 2023-01-23 00:30:45.431080: step: 640/531, loss: 0.0675649642944336 2023-01-23 00:30:46.571197: step: 644/531, loss: 0.13331395387649536 2023-01-23 00:30:47.682105: step: 648/531, loss: 0.05531329661607742 2023-01-23 00:30:48.783635: step: 652/531, loss: 0.08608045428991318 2023-01-23 00:30:49.911494: step: 656/531, loss: 0.04052705690264702 2023-01-23 00:30:51.005021: step: 660/531, loss: 0.06458362936973572 2023-01-23 00:30:52.130256: step: 664/531, loss: 0.06878576427698135 2023-01-23 00:30:53.236731: step: 668/531, loss: 0.020014381036162376 2023-01-23 00:30:54.352306: step: 672/531, loss: 0.11570100486278534 2023-01-23 00:30:55.463770: step: 676/531, loss: 0.2233683466911316 2023-01-23 00:30:56.622749: step: 680/531, loss: 0.07184639573097229 2023-01-23 00:30:57.742238: step: 684/531, loss: 0.06851339340209961 2023-01-23 00:30:58.860830: step: 688/531, loss: 0.05257387459278107 2023-01-23 00:31:00.005598: step: 692/531, loss: 0.14796286821365356 2023-01-23 00:31:01.157717: step: 696/531, loss: 0.12064874172210693 2023-01-23 00:31:02.311731: step: 700/531, loss: 0.3232024312019348 2023-01-23 00:31:03.444103: step: 704/531, loss: 0.08355827629566193 2023-01-23 00:31:04.555207: step: 708/531, loss: 0.11572246253490448 2023-01-23 00:31:05.656402: step: 712/531, loss: 0.07285787165164948 2023-01-23 00:31:06.776802: step: 716/531, loss: 4.3935956954956055 2023-01-23 00:31:07.891347: step: 720/531, loss: 0.07533235847949982 2023-01-23 00:31:09.024368: step: 724/531, loss: 0.28046339750289917 2023-01-23 00:31:10.156434: step: 728/531, loss: 0.09323997795581818 2023-01-23 00:31:11.250389: step: 732/531, loss: 0.2850017845630646 2023-01-23 00:31:12.398271: step: 736/531, loss: 0.12449341267347336 2023-01-23 00:31:13.526449: step: 740/531, loss: 0.10894966870546341 2023-01-23 00:31:14.639574: step: 744/531, loss: 0.29797396063804626 2023-01-23 00:31:15.777619: step: 748/531, loss: 0.08677225559949875 2023-01-23 00:31:16.896373: step: 752/531, loss: 0.03188638761639595 2023-01-23 00:31:18.028818: step: 756/531, loss: 0.234467551112175 2023-01-23 00:31:19.192521: step: 760/531, loss: 0.15124782919883728 2023-01-23 00:31:20.308318: step: 764/531, loss: 0.060906603932380676 2023-01-23 00:31:21.424972: step: 768/531, loss: 0.2954982817173004 2023-01-23 00:31:22.551481: step: 772/531, loss: 1.5478538274765015 2023-01-23 00:31:23.697418: step: 776/531, loss: 0.14429546892642975 2023-01-23 00:31:24.835140: step: 780/531, loss: 0.13128575682640076 2023-01-23 00:31:25.965045: step: 784/531, loss: 0.06908436119556427 2023-01-23 00:31:27.094199: step: 788/531, loss: 0.055697061121463776 2023-01-23 00:31:28.225920: step: 792/531, loss: 0.14579400420188904 2023-01-23 00:31:29.396612: step: 796/531, loss: 0.3477696478366852 2023-01-23 00:31:30.517527: step: 800/531, loss: 0.21159076690673828 2023-01-23 00:31:31.650078: step: 804/531, loss: 0.09578704833984375 2023-01-23 00:31:32.761710: step: 808/531, loss: 0.006145763210952282 2023-01-23 00:31:33.850328: step: 812/531, loss: 0.0369715690612793 2023-01-23 00:31:34.973173: step: 816/531, loss: 0.224507138133049 2023-01-23 00:31:36.096588: step: 820/531, loss: 0.03605923801660538 2023-01-23 00:31:37.217041: step: 824/531, loss: 0.10636615753173828 2023-01-23 00:31:38.392127: step: 828/531, loss: 0.09716625511646271 2023-01-23 00:31:39.495019: step: 832/531, loss: 0.28910160064697266 2023-01-23 00:31:40.641255: step: 836/531, loss: 0.1595040261745453 2023-01-23 00:31:41.749183: step: 840/531, loss: 0.17244987189769745 2023-01-23 00:31:42.896511: step: 844/531, loss: 0.15272106230258942 2023-01-23 00:31:44.047619: step: 848/531, loss: 0.13193626701831818 2023-01-23 00:31:45.167556: step: 852/531, loss: 0.04718504101037979 2023-01-23 00:31:46.285295: step: 856/531, loss: 0.46101006865501404 2023-01-23 00:31:47.430211: step: 860/531, loss: 0.08861985057592392 2023-01-23 00:31:48.550099: step: 864/531, loss: 0.12921848893165588 2023-01-23 00:31:49.661320: step: 868/531, loss: 0.09247064590454102 2023-01-23 00:31:50.784701: step: 872/531, loss: 0.16542649269104004 2023-01-23 00:31:51.899705: step: 876/531, loss: 0.019962072372436523 2023-01-23 00:31:53.037343: step: 880/531, loss: 0.42561396956443787 2023-01-23 00:31:54.192686: step: 884/531, loss: 0.14377442002296448 2023-01-23 00:31:55.314708: step: 888/531, loss: 0.15072783827781677 2023-01-23 00:31:56.410388: step: 892/531, loss: 0.02132854424417019 2023-01-23 00:31:57.553329: step: 896/531, loss: 0.2323920577764511 2023-01-23 00:31:58.739219: step: 900/531, loss: 0.11164345592260361 2023-01-23 00:31:59.869429: step: 904/531, loss: 0.025049973279237747 2023-01-23 00:32:01.012648: step: 908/531, loss: 0.3214794099330902 2023-01-23 00:32:02.157722: step: 912/531, loss: 0.19006967544555664 2023-01-23 00:32:03.322909: step: 916/531, loss: 0.16841477155685425 2023-01-23 00:32:04.460570: step: 920/531, loss: 0.250888466835022 2023-01-23 00:32:05.590403: step: 924/531, loss: 0.12557382881641388 2023-01-23 00:32:06.706471: step: 928/531, loss: 0.35221004486083984 2023-01-23 00:32:07.803833: step: 932/531, loss: 0.2559264600276947 2023-01-23 00:32:08.948034: step: 936/531, loss: 0.06557230651378632 2023-01-23 00:32:10.075363: step: 940/531, loss: 0.1823142021894455 2023-01-23 00:32:11.217549: step: 944/531, loss: 0.07201795279979706 2023-01-23 00:32:12.372687: step: 948/531, loss: 0.10969076305627823 2023-01-23 00:32:13.479015: step: 952/531, loss: 0.08450870215892792 2023-01-23 00:32:14.585413: step: 956/531, loss: 0.14039382338523865 2023-01-23 00:32:15.703314: step: 960/531, loss: 0.31438225507736206 2023-01-23 00:32:16.844228: step: 964/531, loss: 0.052788637578487396 2023-01-23 00:32:17.988540: step: 968/531, loss: 0.2875572443008423 2023-01-23 00:32:19.108908: step: 972/531, loss: 0.29628047347068787 2023-01-23 00:32:20.228738: step: 976/531, loss: 0.20326100289821625 2023-01-23 00:32:21.367599: step: 980/531, loss: 0.056059934198856354 2023-01-23 00:32:22.514755: step: 984/531, loss: 0.3125864565372467 2023-01-23 00:32:23.635334: step: 988/531, loss: 0.12609940767288208 2023-01-23 00:32:24.738545: step: 992/531, loss: 0.11285920441150665 2023-01-23 00:32:25.865105: step: 996/531, loss: 0.2896064817905426 2023-01-23 00:32:26.993730: step: 1000/531, loss: 0.06711012870073318 2023-01-23 00:32:28.106823: step: 1004/531, loss: 0.09853621572256088 2023-01-23 00:32:29.254801: step: 1008/531, loss: 0.02158946916460991 2023-01-23 00:32:30.355631: step: 1012/531, loss: 0.11627483367919922 2023-01-23 00:32:31.495019: step: 1016/531, loss: 0.10537615418434143 2023-01-23 00:32:32.609517: step: 1020/531, loss: 0.14854831993579865 2023-01-23 00:32:33.748794: step: 1024/531, loss: 0.015804387629032135 2023-01-23 00:32:34.861696: step: 1028/531, loss: 0.015231514349579811 2023-01-23 00:32:35.964374: step: 1032/531, loss: 0.2993220388889313 2023-01-23 00:32:37.084129: step: 1036/531, loss: 0.09406204521656036 2023-01-23 00:32:38.242063: step: 1040/531, loss: 0.11330404877662659 2023-01-23 00:32:39.388130: step: 1044/531, loss: 0.1613851934671402 2023-01-23 00:32:40.512799: step: 1048/531, loss: 0.9376170039176941 2023-01-23 00:32:41.625538: step: 1052/531, loss: 0.11808130890130997 2023-01-23 00:32:42.741697: step: 1056/531, loss: 0.14249935746192932 2023-01-23 00:32:43.884955: step: 1060/531, loss: 0.11317737400531769 2023-01-23 00:32:44.970773: step: 1064/531, loss: 0.3029274046421051 2023-01-23 00:32:46.101055: step: 1068/531, loss: 0.6898200511932373 2023-01-23 00:32:47.234680: step: 1072/531, loss: 0.30431056022644043 2023-01-23 00:32:48.406974: step: 1076/531, loss: 0.4340066611766815 2023-01-23 00:32:49.558321: step: 1080/531, loss: 0.21096879243850708 2023-01-23 00:32:50.677888: step: 1084/531, loss: 0.15492820739746094 2023-01-23 00:32:51.804009: step: 1088/531, loss: 0.004761124029755592 2023-01-23 00:32:52.931945: step: 1092/531, loss: 0.32111504673957825 2023-01-23 00:32:54.052459: step: 1096/531, loss: 0.07185220718383789 2023-01-23 00:32:55.157890: step: 1100/531, loss: 0.10532474517822266 2023-01-23 00:32:56.266074: step: 1104/531, loss: 0.08373747020959854 2023-01-23 00:32:57.382842: step: 1108/531, loss: 0.08203456550836563 2023-01-23 00:32:58.496651: step: 1112/531, loss: 0.2745981216430664 2023-01-23 00:32:59.629496: step: 1116/531, loss: 0.3468250632286072 2023-01-23 00:33:00.751766: step: 1120/531, loss: 0.4775484800338745 2023-01-23 00:33:01.882063: step: 1124/531, loss: 0.06384305655956268 2023-01-23 00:33:03.023589: step: 1128/531, loss: 0.5880611538887024 2023-01-23 00:33:04.176532: step: 1132/531, loss: 0.2526895999908447 2023-01-23 00:33:05.292751: step: 1136/531, loss: 0.05344879627227783 2023-01-23 00:33:06.427646: step: 1140/531, loss: 0.4926597476005554 2023-01-23 00:33:07.548766: step: 1144/531, loss: 0.4103788137435913 2023-01-23 00:33:08.682326: step: 1148/531, loss: 0.19746990501880646 2023-01-23 00:33:09.806189: step: 1152/531, loss: 0.12178003787994385 2023-01-23 00:33:10.926955: step: 1156/531, loss: 0.01918773539364338 2023-01-23 00:33:12.037318: step: 1160/531, loss: 0.12922295928001404 2023-01-23 00:33:13.174183: step: 1164/531, loss: 0.19759425520896912 2023-01-23 00:33:14.342524: step: 1168/531, loss: 0.5998255610466003 2023-01-23 00:33:15.483856: step: 1172/531, loss: 0.15536099672317505 2023-01-23 00:33:16.585069: step: 1176/531, loss: 0.19426585733890533 2023-01-23 00:33:17.678166: step: 1180/531, loss: 0.5076694488525391 2023-01-23 00:33:18.802104: step: 1184/531, loss: 0.21503980457782745 2023-01-23 00:33:19.955621: step: 1188/531, loss: 0.32345110177993774 2023-01-23 00:33:21.069454: step: 1192/531, loss: 0.11697684228420258 2023-01-23 00:33:22.203986: step: 1196/531, loss: 0.030659865587949753 2023-01-23 00:33:23.344227: step: 1200/531, loss: 0.045653585344552994 2023-01-23 00:33:24.474755: step: 1204/531, loss: 0.44869834184646606 2023-01-23 00:33:25.589210: step: 1208/531, loss: 0.16037292778491974 2023-01-23 00:33:26.717315: step: 1212/531, loss: 0.28761324286460876 2023-01-23 00:33:27.848626: step: 1216/531, loss: 0.23707181215286255 2023-01-23 00:33:28.992723: step: 1220/531, loss: 0.2691915035247803 2023-01-23 00:33:30.127145: step: 1224/531, loss: 0.29456740617752075 2023-01-23 00:33:31.261812: step: 1228/531, loss: 0.0876397117972374 2023-01-23 00:33:32.364859: step: 1232/531, loss: 0.04653625562787056 2023-01-23 00:33:33.486525: step: 1236/531, loss: 0.14375333487987518 2023-01-23 00:33:34.640129: step: 1240/531, loss: 0.14601507782936096 2023-01-23 00:33:35.735603: step: 1244/531, loss: 0.15758351981639862 2023-01-23 00:33:36.852143: step: 1248/531, loss: 0.04013080522418022 2023-01-23 00:33:37.963638: step: 1252/531, loss: 0.27631139755249023 2023-01-23 00:33:39.052292: step: 1256/531, loss: 0.17404857277870178 2023-01-23 00:33:40.222264: step: 1260/531, loss: 0.1943032294511795 2023-01-23 00:33:41.341987: step: 1264/531, loss: 0.08693666756153107 2023-01-23 00:33:42.462220: step: 1268/531, loss: 0.13766680657863617 2023-01-23 00:33:43.592977: step: 1272/531, loss: 0.15110287070274353 2023-01-23 00:33:44.722264: step: 1276/531, loss: 0.13016252219676971 2023-01-23 00:33:45.845615: step: 1280/531, loss: 0.14342035353183746 2023-01-23 00:33:46.971399: step: 1284/531, loss: 0.3188708424568176 2023-01-23 00:33:48.090116: step: 1288/531, loss: 0.051337338984012604 2023-01-23 00:33:49.216108: step: 1292/531, loss: 0.05208597332239151 2023-01-23 00:33:50.312809: step: 1296/531, loss: 0.11120089888572693 2023-01-23 00:33:51.440179: step: 1300/531, loss: 0.06132388487458229 2023-01-23 00:33:52.548770: step: 1304/531, loss: 0.07340598106384277 2023-01-23 00:33:53.692956: step: 1308/531, loss: 0.19583159685134888 2023-01-23 00:33:54.812715: step: 1312/531, loss: 0.1412883698940277 2023-01-23 00:33:55.919312: step: 1316/531, loss: 0.9439733028411865 2023-01-23 00:33:57.059256: step: 1320/531, loss: 0.12769585847854614 2023-01-23 00:33:58.177388: step: 1324/531, loss: 0.8803343772888184 2023-01-23 00:33:59.291397: step: 1328/531, loss: 0.05707111209630966 2023-01-23 00:34:00.401132: step: 1332/531, loss: 0.09998345375061035 2023-01-23 00:34:01.535620: step: 1336/531, loss: 0.2742424011230469 2023-01-23 00:34:02.646798: step: 1340/531, loss: 0.05552246421575546 2023-01-23 00:34:03.763017: step: 1344/531, loss: 0.009540557861328125 2023-01-23 00:34:04.864270: step: 1348/531, loss: 0.2689172625541687 2023-01-23 00:34:05.970929: step: 1352/531, loss: 0.7166247367858887 2023-01-23 00:34:07.097263: step: 1356/531, loss: 0.9955601692199707 2023-01-23 00:34:08.207981: step: 1360/531, loss: 0.11669646203517914 2023-01-23 00:34:09.321543: step: 1364/531, loss: 0.06280384212732315 2023-01-23 00:34:10.439368: step: 1368/531, loss: 0.5324352383613586 2023-01-23 00:34:11.578293: step: 1372/531, loss: 0.11991424858570099 2023-01-23 00:34:12.709536: step: 1376/531, loss: 1.7557897567749023 2023-01-23 00:34:13.828238: step: 1380/531, loss: 0.022441577166318893 2023-01-23 00:34:14.948300: step: 1384/531, loss: 0.1754746437072754 2023-01-23 00:34:16.063860: step: 1388/531, loss: 0.12215381115674973 2023-01-23 00:34:17.196218: step: 1392/531, loss: 0.09779481589794159 2023-01-23 00:34:18.312058: step: 1396/531, loss: 4.513215065002441 2023-01-23 00:34:19.424473: step: 1400/531, loss: 0.11455898731946945 2023-01-23 00:34:20.550626: step: 1404/531, loss: 0.15640844404697418 2023-01-23 00:34:21.708963: step: 1408/531, loss: 0.45267730951309204 2023-01-23 00:34:22.847723: step: 1412/531, loss: 0.34884214401245117 2023-01-23 00:34:23.970021: step: 1416/531, loss: 0.1205865889787674 2023-01-23 00:34:25.083052: step: 1420/531, loss: 0.0617460235953331 2023-01-23 00:34:26.191538: step: 1424/531, loss: 0.49670809507369995 2023-01-23 00:34:27.291717: step: 1428/531, loss: 0.06849632412195206 2023-01-23 00:34:28.420307: step: 1432/531, loss: 0.05942802503705025 2023-01-23 00:34:29.563842: step: 1436/531, loss: 0.07851801067590714 2023-01-23 00:34:30.652938: step: 1440/531, loss: 0.1911487579345703 2023-01-23 00:34:31.765460: step: 1444/531, loss: 0.1471824198961258 2023-01-23 00:34:32.886926: step: 1448/531, loss: 0.30402088165283203 2023-01-23 00:34:34.008110: step: 1452/531, loss: 0.25854140520095825 2023-01-23 00:34:35.140808: step: 1456/531, loss: 0.04099883884191513 2023-01-23 00:34:36.261409: step: 1460/531, loss: 0.17804959416389465 2023-01-23 00:34:37.380119: step: 1464/531, loss: 0.14124146103858948 2023-01-23 00:34:38.503007: step: 1468/531, loss: 0.8882207274436951 2023-01-23 00:34:39.643888: step: 1472/531, loss: 0.2734433710575104 2023-01-23 00:34:40.770636: step: 1476/531, loss: 0.13528390228748322 2023-01-23 00:34:41.891930: step: 1480/531, loss: 0.10020236670970917 2023-01-23 00:34:43.032390: step: 1484/531, loss: 0.1824178844690323 2023-01-23 00:34:44.201411: step: 1488/531, loss: 0.3533852994441986 2023-01-23 00:34:45.307846: step: 1492/531, loss: 0.3690573573112488 2023-01-23 00:34:46.438859: step: 1496/531, loss: 0.12082615494728088 2023-01-23 00:34:47.581029: step: 1500/531, loss: 0.12484579533338547 2023-01-23 00:34:48.711201: step: 1504/531, loss: 0.30373719334602356 2023-01-23 00:34:49.833796: step: 1508/531, loss: 0.035973407328128815 2023-01-23 00:34:50.961216: step: 1512/531, loss: 0.03906526416540146 2023-01-23 00:34:52.099712: step: 1516/531, loss: 0.07915530353784561 2023-01-23 00:34:53.228410: step: 1520/531, loss: 0.005004453472793102 2023-01-23 00:34:54.340412: step: 1524/531, loss: 0.10526444762945175 2023-01-23 00:34:55.468544: step: 1528/531, loss: 0.08911371231079102 2023-01-23 00:34:56.625893: step: 1532/531, loss: 0.1121458038687706 2023-01-23 00:34:57.750127: step: 1536/531, loss: 0.19884338974952698 2023-01-23 00:34:58.859789: step: 1540/531, loss: 0.174523264169693 2023-01-23 00:34:59.982657: step: 1544/531, loss: 0.11093469709157944 2023-01-23 00:35:01.105051: step: 1548/531, loss: 0.26886826753616333 2023-01-23 00:35:02.241657: step: 1552/531, loss: 0.08956921100616455 2023-01-23 00:35:03.351366: step: 1556/531, loss: 0.15122194588184357 2023-01-23 00:35:04.485999: step: 1560/531, loss: 0.2928595542907715 2023-01-23 00:35:05.624798: step: 1564/531, loss: 0.18391390144824982 2023-01-23 00:35:06.759872: step: 1568/531, loss: 0.4468950927257538 2023-01-23 00:35:07.889397: step: 1572/531, loss: 0.10531225800514221 2023-01-23 00:35:09.002781: step: 1576/531, loss: 0.10413676500320435 2023-01-23 00:35:10.129553: step: 1580/531, loss: 0.1386483758687973 2023-01-23 00:35:11.245639: step: 1584/531, loss: 0.2914878726005554 2023-01-23 00:35:12.393381: step: 1588/531, loss: 0.29573917388916016 2023-01-23 00:35:13.505062: step: 1592/531, loss: 0.33285731077194214 2023-01-23 00:35:14.624067: step: 1596/531, loss: 0.49965807795524597 2023-01-23 00:35:15.736495: step: 1600/531, loss: 0.07334871590137482 2023-01-23 00:35:16.864498: step: 1604/531, loss: 0.13643741607666016 2023-01-23 00:35:17.988780: step: 1608/531, loss: 0.03537602350115776 2023-01-23 00:35:19.132518: step: 1612/531, loss: 0.30862337350845337 2023-01-23 00:35:20.264895: step: 1616/531, loss: 0.0699569582939148 2023-01-23 00:35:21.438735: step: 1620/531, loss: 0.08563575893640518 2023-01-23 00:35:22.568507: step: 1624/531, loss: 0.09937821328639984 2023-01-23 00:35:23.678438: step: 1628/531, loss: 1.1821191310882568 2023-01-23 00:35:24.799586: step: 1632/531, loss: 0.07876958698034286 2023-01-23 00:35:25.938999: step: 1636/531, loss: 0.34555259346961975 2023-01-23 00:35:27.068940: step: 1640/531, loss: 0.1000639945268631 2023-01-23 00:35:28.186802: step: 1644/531, loss: 0.23995858430862427 2023-01-23 00:35:29.345152: step: 1648/531, loss: 0.15299482643604279 2023-01-23 00:35:30.449659: step: 1652/531, loss: 0.48042032122612 2023-01-23 00:35:31.577753: step: 1656/531, loss: 0.13149146735668182 2023-01-23 00:35:32.721322: step: 1660/531, loss: 0.16275987029075623 2023-01-23 00:35:33.845419: step: 1664/531, loss: 0.6057541966438293 2023-01-23 00:35:34.992393: step: 1668/531, loss: 0.09899254143238068 2023-01-23 00:35:36.144317: step: 1672/531, loss: 0.3080213963985443 2023-01-23 00:35:37.267347: step: 1676/531, loss: 0.08646812289953232 2023-01-23 00:35:38.372593: step: 1680/531, loss: 0.47820383310317993 2023-01-23 00:35:39.488656: step: 1684/531, loss: 0.07515960186719894 2023-01-23 00:35:40.604772: step: 1688/531, loss: 0.14665627479553223 2023-01-23 00:35:41.740964: step: 1692/531, loss: 0.1035272628068924 2023-01-23 00:35:42.878256: step: 1696/531, loss: 0.06674177944660187 2023-01-23 00:35:44.008440: step: 1700/531, loss: 0.23921889066696167 2023-01-23 00:35:45.132047: step: 1704/531, loss: 0.1536807119846344 2023-01-23 00:35:46.265156: step: 1708/531, loss: 0.19392962753772736 2023-01-23 00:35:47.399458: step: 1712/531, loss: 0.07393474876880646 2023-01-23 00:35:48.519103: step: 1716/531, loss: 0.04834197834134102 2023-01-23 00:35:49.658855: step: 1720/531, loss: 0.1221797987818718 2023-01-23 00:35:50.801804: step: 1724/531, loss: 0.27693310379981995 2023-01-23 00:35:51.942892: step: 1728/531, loss: 0.07360410690307617 2023-01-23 00:35:53.061274: step: 1732/531, loss: 0.015581751242280006 2023-01-23 00:35:54.174624: step: 1736/531, loss: 0.18243809044361115 2023-01-23 00:35:55.317893: step: 1740/531, loss: 0.10712333023548126 2023-01-23 00:35:56.440192: step: 1744/531, loss: 0.10772786289453506 2023-01-23 00:35:57.538178: step: 1748/531, loss: 0.09582405537366867 2023-01-23 00:35:58.647837: step: 1752/531, loss: 0.03783788904547691 2023-01-23 00:35:59.756137: step: 1756/531, loss: 0.04986685886979103 2023-01-23 00:36:00.862577: step: 1760/531, loss: 0.14363345503807068 2023-01-23 00:36:01.983533: step: 1764/531, loss: 0.09788118302822113 2023-01-23 00:36:03.115424: step: 1768/531, loss: 0.07006435096263885 2023-01-23 00:36:04.246176: step: 1772/531, loss: 0.053702641278505325 2023-01-23 00:36:05.381783: step: 1776/531, loss: 0.21209116280078888 2023-01-23 00:36:06.518595: step: 1780/531, loss: 0.14365005493164062 2023-01-23 00:36:07.661923: step: 1784/531, loss: 0.11875534057617188 2023-01-23 00:36:08.816981: step: 1788/531, loss: 0.4750362038612366 2023-01-23 00:36:09.940479: step: 1792/531, loss: 0.20035740733146667 2023-01-23 00:36:11.081334: step: 1796/531, loss: 0.0603879950940609 2023-01-23 00:36:12.219385: step: 1800/531, loss: 0.04758801311254501 2023-01-23 00:36:13.371177: step: 1804/531, loss: 0.8994182348251343 2023-01-23 00:36:14.502708: step: 1808/531, loss: 0.3572255074977875 2023-01-23 00:36:15.652998: step: 1812/531, loss: 0.3359192907810211 2023-01-23 00:36:16.798859: step: 1816/531, loss: 0.14851704239845276 2023-01-23 00:36:17.915494: step: 1820/531, loss: 0.0450710766017437 2023-01-23 00:36:19.086174: step: 1824/531, loss: 0.16118574142456055 2023-01-23 00:36:20.210742: step: 1828/531, loss: 0.30910825729370117 2023-01-23 00:36:21.357644: step: 1832/531, loss: 0.05252084881067276 2023-01-23 00:36:22.477346: step: 1836/531, loss: 0.23340129852294922 2023-01-23 00:36:23.622690: step: 1840/531, loss: 0.13735082745552063 2023-01-23 00:36:24.745670: step: 1844/531, loss: 0.532707691192627 2023-01-23 00:36:25.874764: step: 1848/531, loss: 0.04306596517562866 2023-01-23 00:36:27.021417: step: 1852/531, loss: 0.16566598415374756 2023-01-23 00:36:28.172512: step: 1856/531, loss: 0.060712434351444244 2023-01-23 00:36:29.291629: step: 1860/531, loss: 0.4060153067111969 2023-01-23 00:36:30.420504: step: 1864/531, loss: 0.039585210382938385 2023-01-23 00:36:31.537676: step: 1868/531, loss: 0.39462631940841675 2023-01-23 00:36:32.657961: step: 1872/531, loss: 0.15696831047534943 2023-01-23 00:36:33.783064: step: 1876/531, loss: 0.8997449278831482 2023-01-23 00:36:34.906250: step: 1880/531, loss: 0.20953330397605896 2023-01-23 00:36:36.035591: step: 1884/531, loss: 0.0996655523777008 2023-01-23 00:36:37.136593: step: 1888/531, loss: 0.47123146057128906 2023-01-23 00:36:38.260502: step: 1892/531, loss: 0.13718290627002716 2023-01-23 00:36:39.397843: step: 1896/531, loss: 0.19029779732227325 2023-01-23 00:36:40.529332: step: 1900/531, loss: 0.19887980818748474 2023-01-23 00:36:41.626063: step: 1904/531, loss: 0.024227716028690338 2023-01-23 00:36:42.733779: step: 1908/531, loss: 0.13054236769676208 2023-01-23 00:36:43.862535: step: 1912/531, loss: 0.08709659427404404 2023-01-23 00:36:45.038489: step: 1916/531, loss: 0.05153237283229828 2023-01-23 00:36:46.124465: step: 1920/531, loss: 0.040041614323854446 2023-01-23 00:36:47.246365: step: 1924/531, loss: 0.08991570770740509 2023-01-23 00:36:48.404896: step: 1928/531, loss: 0.7397158145904541 2023-01-23 00:36:49.568984: step: 1932/531, loss: 0.03327503055334091 2023-01-23 00:36:50.726326: step: 1936/531, loss: 0.06251826137304306 2023-01-23 00:36:51.861607: step: 1940/531, loss: 0.29227766394615173 2023-01-23 00:36:52.986734: step: 1944/531, loss: 0.24235452711582184 2023-01-23 00:36:54.089940: step: 1948/531, loss: 0.3568245470523834 2023-01-23 00:36:55.212721: step: 1952/531, loss: 0.10946788638830185 2023-01-23 00:36:56.319349: step: 1956/531, loss: 0.17433635890483856 2023-01-23 00:36:57.415155: step: 1960/531, loss: 0.02267162874341011 2023-01-23 00:36:58.557559: step: 1964/531, loss: 0.03350190818309784 2023-01-23 00:36:59.691158: step: 1968/531, loss: 0.06581374257802963 2023-01-23 00:37:00.826525: step: 1972/531, loss: 0.48753586411476135 2023-01-23 00:37:01.962395: step: 1976/531, loss: 0.41141003370285034 2023-01-23 00:37:03.084168: step: 1980/531, loss: 0.14639610052108765 2023-01-23 00:37:04.209209: step: 1984/531, loss: 0.6341049075126648 2023-01-23 00:37:05.336198: step: 1988/531, loss: 0.327711284160614 2023-01-23 00:37:06.453348: step: 1992/531, loss: 0.016730882227420807 2023-01-23 00:37:07.585489: step: 1996/531, loss: 0.07582951337099075 2023-01-23 00:37:08.717648: step: 2000/531, loss: 0.24256345629692078 2023-01-23 00:37:09.857632: step: 2004/531, loss: 0.680566668510437 2023-01-23 00:37:10.958258: step: 2008/531, loss: 0.18523502349853516 2023-01-23 00:37:12.100588: step: 2012/531, loss: 0.1843395233154297 2023-01-23 00:37:13.222893: step: 2016/531, loss: 0.39234840869903564 2023-01-23 00:37:14.360764: step: 2020/531, loss: 0.06970968097448349 2023-01-23 00:37:15.464479: step: 2024/531, loss: 0.45255327224731445 2023-01-23 00:37:16.577928: step: 2028/531, loss: 0.04113595560193062 2023-01-23 00:37:17.701197: step: 2032/531, loss: 0.0591922290623188 2023-01-23 00:37:18.830540: step: 2036/531, loss: 0.4618026614189148 2023-01-23 00:37:19.959763: step: 2040/531, loss: 0.14358139038085938 2023-01-23 00:37:21.070807: step: 2044/531, loss: 0.06845646351575851 2023-01-23 00:37:22.179079: step: 2048/531, loss: 0.13484154641628265 2023-01-23 00:37:23.323454: step: 2052/531, loss: 0.074066162109375 2023-01-23 00:37:24.487093: step: 2056/531, loss: 0.11598539352416992 2023-01-23 00:37:25.616176: step: 2060/531, loss: 0.2781764566898346 2023-01-23 00:37:26.746579: step: 2064/531, loss: 0.13594137132167816 2023-01-23 00:37:27.872912: step: 2068/531, loss: 0.034331224858760834 2023-01-23 00:37:29.004303: step: 2072/531, loss: 0.22666719555854797 2023-01-23 00:37:30.162384: step: 2076/531, loss: 0.1427541822195053 2023-01-23 00:37:31.260503: step: 2080/531, loss: 0.05288839340209961 2023-01-23 00:37:32.375494: step: 2084/531, loss: 0.18077774345874786 2023-01-23 00:37:33.494325: step: 2088/531, loss: 0.0589456707239151 2023-01-23 00:37:34.593872: step: 2092/531, loss: 0.19929802417755127 2023-01-23 00:37:35.722594: step: 2096/531, loss: 0.06978168338537216 2023-01-23 00:37:36.806516: step: 2100/531, loss: 0.0208948515355587 2023-01-23 00:37:37.920998: step: 2104/531, loss: 0.22816124558448792 2023-01-23 00:37:39.052754: step: 2108/531, loss: 0.6041961312294006 2023-01-23 00:37:40.213283: step: 2112/531, loss: 0.277596116065979 2023-01-23 00:37:41.366261: step: 2116/531, loss: 0.20652687549591064 2023-01-23 00:37:42.525251: step: 2120/531, loss: 0.7925464510917664 2023-01-23 00:37:43.649768: step: 2124/531, loss: 0.09546632319688797 ================================================== Loss: 0.220 -------------------- Dev: {'event': {'p': 0.6341463414634146, 'r': 0.7270306258322237, 'f1': 0.6774193548387097}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Test: {'event': {'p': 0.6328794755421079, 'r': 0.7483601669648181, 'f1': 0.6857923497267759}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Chinese: {'event': {'p': 0.5897435897435898, 'r': 0.8518518518518519, 'f1': 0.6969696969696971}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Korean: {'event': {'p': 0.717948717948718, 'r': 0.4444444444444444, 'f1': 0.5490196078431373}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Russian: {'event': {'p': 0.5588235294117647, 'r': 0.5277777777777778, 'f1': 0.5428571428571428}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} New best russian model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.5440900562851783, 'r': 0.7723035952063915, 'f1': 0.6384149697303247}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Korean: {'event': {'p': 0.5672823218997362, 'r': 0.7692307692307693, 'f1': 0.6529992406985574}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Korean: {'event': {'p': 0.6557377049180327, 'r': 0.6349206349206349, 'f1': 0.6451612903225806}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Russian: {'event': {'p': 0.6341463414634146, 'r': 0.7270306258322237, 'f1': 0.6774193548387097}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Eng Test for Russian: {'event': {'p': 0.6328794755421079, 'r': 0.7483601669648181, 'f1': 0.6857923497267759}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'event': {'p': 0.5588235294117647, 'r': 0.5277777777777778, 'f1': 0.5428571428571428}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} ****************************** Epoch: 7 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:38:29.370669: step: 4/531, loss: 0.07043810188770294 2023-01-23 00:38:30.476432: step: 8/531, loss: 0.10553699731826782 2023-01-23 00:38:31.605227: step: 12/531, loss: 0.09284356236457825 2023-01-23 00:38:32.788866: step: 16/531, loss: 0.07346749305725098 2023-01-23 00:38:33.903711: step: 20/531, loss: 0.0006258010980673134 2023-01-23 00:38:35.030929: step: 24/531, loss: 0.09264421463012695 2023-01-23 00:38:36.158413: step: 28/531, loss: 0.13885116577148438 2023-01-23 00:38:37.302328: step: 32/531, loss: 0.1623314619064331 2023-01-23 00:38:38.466012: step: 36/531, loss: 0.06133217737078667 2023-01-23 00:38:39.580574: step: 40/531, loss: 0.025447798892855644 2023-01-23 00:38:40.692988: step: 44/531, loss: 0.11186742782592773 2023-01-23 00:38:41.802680: step: 48/531, loss: 0.02981090545654297 2023-01-23 00:38:42.925279: step: 52/531, loss: 0.029117824509739876 2023-01-23 00:38:44.068142: step: 56/531, loss: 0.4157348871231079 2023-01-23 00:38:45.219181: step: 60/531, loss: 0.7605549097061157 2023-01-23 00:38:46.334467: step: 64/531, loss: 0.05197324603796005 2023-01-23 00:38:47.445833: step: 68/531, loss: 0.13566988706588745 2023-01-23 00:38:48.583413: step: 72/531, loss: 0.25406399369239807 2023-01-23 00:38:49.675416: step: 76/531, loss: 0.10325708240270615 2023-01-23 00:38:50.773115: step: 80/531, loss: 0.1093946024775505 2023-01-23 00:38:51.898311: step: 84/531, loss: 0.21392956376075745 2023-01-23 00:38:52.999706: step: 88/531, loss: 0.07840041816234589 2023-01-23 00:38:54.124486: step: 92/531, loss: 0.16843782365322113 2023-01-23 00:38:55.232972: step: 96/531, loss: 0.05662889778614044 2023-01-23 00:38:56.329593: step: 100/531, loss: 0.0841674879193306 2023-01-23 00:38:57.484458: step: 104/531, loss: 0.12745200097560883 2023-01-23 00:38:58.599948: step: 108/531, loss: 0.18303155899047852 2023-01-23 00:38:59.716695: step: 112/531, loss: 0.18164515495300293 2023-01-23 00:39:00.864963: step: 116/531, loss: 0.19314762949943542 2023-01-23 00:39:01.977617: step: 120/531, loss: 0.004279470071196556 2023-01-23 00:39:03.158897: step: 124/531, loss: 0.13550348579883575 2023-01-23 00:39:04.278611: step: 128/531, loss: 0.9660297632217407 2023-01-23 00:39:05.411867: step: 132/531, loss: 0.2043839544057846 2023-01-23 00:39:06.532697: step: 136/531, loss: 0.10098914802074432 2023-01-23 00:39:07.694895: step: 140/531, loss: 0.11982069164514542 2023-01-23 00:39:08.868883: step: 144/531, loss: 0.02806832268834114 2023-01-23 00:39:09.986960: step: 148/531, loss: 0.11593222618103027 2023-01-23 00:39:11.079758: step: 152/531, loss: 0.07075939327478409 2023-01-23 00:39:12.236949: step: 156/531, loss: 0.13400134444236755 2023-01-23 00:39:13.385751: step: 160/531, loss: 0.06865911185741425 2023-01-23 00:39:14.512306: step: 164/531, loss: 0.02464580535888672 2023-01-23 00:39:15.644753: step: 168/531, loss: 0.205671027302742 2023-01-23 00:39:16.768672: step: 172/531, loss: 0.03251934051513672 2023-01-23 00:39:17.885830: step: 176/531, loss: 0.07147588580846786 2023-01-23 00:39:19.012226: step: 180/531, loss: 0.06803826242685318 2023-01-23 00:39:20.128987: step: 184/531, loss: 0.13635559380054474 2023-01-23 00:39:21.263342: step: 188/531, loss: 0.03813920170068741 2023-01-23 00:39:22.369693: step: 192/531, loss: 0.19532173871994019 2023-01-23 00:39:23.493736: step: 196/531, loss: 0.036008551716804504 2023-01-23 00:39:24.617696: step: 200/531, loss: 0.028285982087254524 2023-01-23 00:39:25.714884: step: 204/531, loss: 0.1155426949262619 2023-01-23 00:39:26.828513: step: 208/531, loss: 0.43132781982421875 2023-01-23 00:39:27.937339: step: 212/531, loss: 0.216070756316185 2023-01-23 00:39:29.068054: step: 216/531, loss: 0.03134775161743164 2023-01-23 00:39:30.205937: step: 220/531, loss: 0.201005756855011 2023-01-23 00:39:31.333582: step: 224/531, loss: 0.1663801074028015 2023-01-23 00:39:32.446164: step: 228/531, loss: 0.20485468208789825 2023-01-23 00:39:33.587053: step: 232/531, loss: 0.05807044729590416 2023-01-23 00:39:34.659658: step: 236/531, loss: 0.03855285793542862 2023-01-23 00:39:35.758202: step: 240/531, loss: 0.3140796720981598 2023-01-23 00:39:36.871490: step: 244/531, loss: 0.07671451568603516 2023-01-23 00:39:37.985562: step: 248/531, loss: 0.13200493156909943 2023-01-23 00:39:39.097036: step: 252/531, loss: 0.14742641150951385 2023-01-23 00:39:40.221860: step: 256/531, loss: 0.6432132720947266 2023-01-23 00:39:41.342973: step: 260/531, loss: 0.3542617857456207 2023-01-23 00:39:42.495287: step: 264/531, loss: 0.052869513630867004 2023-01-23 00:39:43.611168: step: 268/531, loss: 0.19288387894630432 2023-01-23 00:39:44.720768: step: 272/531, loss: 0.06808986514806747 2023-01-23 00:39:45.920980: step: 276/531, loss: 0.18539771437644958 2023-01-23 00:39:47.047102: step: 280/531, loss: 0.09230299293994904 2023-01-23 00:39:48.175424: step: 284/531, loss: 0.25081387162208557 2023-01-23 00:39:49.307333: step: 288/531, loss: 0.034148551523685455 2023-01-23 00:39:50.431974: step: 292/531, loss: 0.31364166736602783 2023-01-23 00:39:51.543248: step: 296/531, loss: 0.06507282704114914 2023-01-23 00:39:52.661699: step: 300/531, loss: 0.09602861106395721 2023-01-23 00:39:53.767767: step: 304/531, loss: 0.03367586433887482 2023-01-23 00:39:54.891097: step: 308/531, loss: 0.057691290974617004 2023-01-23 00:39:55.997812: step: 312/531, loss: 0.12167918682098389 2023-01-23 00:39:57.103449: step: 316/531, loss: 0.07826504856348038 2023-01-23 00:39:58.232190: step: 320/531, loss: 0.07062100619077682 2023-01-23 00:39:59.341101: step: 324/531, loss: 0.0921703353524208 2023-01-23 00:40:00.475994: step: 328/531, loss: 0.15968933701515198 2023-01-23 00:40:01.606542: step: 332/531, loss: 0.06463050842285156 2023-01-23 00:40:02.729519: step: 336/531, loss: 0.035058051347732544 2023-01-23 00:40:03.836983: step: 340/531, loss: 0.06314316391944885 2023-01-23 00:40:04.943378: step: 344/531, loss: 0.07324714958667755 2023-01-23 00:40:06.090245: step: 348/531, loss: 0.3959523141384125 2023-01-23 00:40:07.241696: step: 352/531, loss: 0.11822903156280518 2023-01-23 00:40:08.363690: step: 356/531, loss: 0.062113240361213684 2023-01-23 00:40:09.480172: step: 360/531, loss: 0.12607058882713318 2023-01-23 00:40:10.605634: step: 364/531, loss: 0.1403324156999588 2023-01-23 00:40:11.737271: step: 368/531, loss: 0.007817650213837624 2023-01-23 00:40:12.913346: step: 372/531, loss: 0.33558589220046997 2023-01-23 00:40:14.041963: step: 376/531, loss: 0.0747016966342926 2023-01-23 00:40:15.154006: step: 380/531, loss: 0.12668228149414062 2023-01-23 00:40:16.285413: step: 384/531, loss: 0.19364416599273682 2023-01-23 00:40:17.370358: step: 388/531, loss: 0.02216210402548313 2023-01-23 00:40:18.517001: step: 392/531, loss: 0.019629955291748047 2023-01-23 00:40:19.638224: step: 396/531, loss: 0.03158894553780556 2023-01-23 00:40:20.756495: step: 400/531, loss: 0.06251172721385956 2023-01-23 00:40:21.861512: step: 404/531, loss: 0.1830941140651703 2023-01-23 00:40:22.979839: step: 408/531, loss: 0.18001490831375122 2023-01-23 00:40:24.102950: step: 412/531, loss: 0.09499998390674591 2023-01-23 00:40:25.225834: step: 416/531, loss: 0.26958349347114563 2023-01-23 00:40:26.321084: step: 420/531, loss: 0.01233301218599081 2023-01-23 00:40:27.423605: step: 424/531, loss: 0.15814343094825745 2023-01-23 00:40:28.544364: step: 428/531, loss: 0.1123630553483963 2023-01-23 00:40:29.653609: step: 432/531, loss: 0.10992403328418732 2023-01-23 00:40:30.755775: step: 436/531, loss: 0.12458506226539612 2023-01-23 00:40:31.874923: step: 440/531, loss: 0.12730321288108826 2023-01-23 00:40:33.019267: step: 444/531, loss: 0.05852989852428436 2023-01-23 00:40:34.135757: step: 448/531, loss: 0.0977010726928711 2023-01-23 00:40:35.233842: step: 452/531, loss: 0.0946040228009224 2023-01-23 00:40:36.347318: step: 456/531, loss: 0.059867192059755325 2023-01-23 00:40:37.463590: step: 460/531, loss: 0.03374843671917915 2023-01-23 00:40:38.586511: step: 464/531, loss: 0.14702625572681427 2023-01-23 00:40:39.689426: step: 468/531, loss: 0.3507455885410309 2023-01-23 00:40:40.808522: step: 472/531, loss: 0.5204029083251953 2023-01-23 00:40:41.947399: step: 476/531, loss: 0.13334789872169495 2023-01-23 00:40:43.058795: step: 480/531, loss: 0.04629345238208771 2023-01-23 00:40:44.205116: step: 484/531, loss: 0.008711290545761585 2023-01-23 00:40:45.313480: step: 488/531, loss: 0.1234511211514473 2023-01-23 00:40:46.426462: step: 492/531, loss: 0.046222761273384094 2023-01-23 00:40:47.573881: step: 496/531, loss: 0.5264490842819214 2023-01-23 00:40:48.697415: step: 500/531, loss: 0.08505754172801971 2023-01-23 00:40:49.797076: step: 504/531, loss: 0.12540502846240997 2023-01-23 00:40:50.890928: step: 508/531, loss: 0.04753284528851509 2023-01-23 00:40:52.021350: step: 512/531, loss: 0.1117459237575531 2023-01-23 00:40:53.167961: step: 516/531, loss: 0.14420872926712036 2023-01-23 00:40:54.305998: step: 520/531, loss: 0.06652151048183441 2023-01-23 00:40:55.398501: step: 524/531, loss: 0.009200763888657093 2023-01-23 00:40:56.527330: step: 528/531, loss: 0.02844679355621338 2023-01-23 00:40:57.665021: step: 532/531, loss: 0.08461914956569672 2023-01-23 00:40:58.810650: step: 536/531, loss: 0.04467616230249405 2023-01-23 00:40:59.936045: step: 540/531, loss: 0.05932598188519478 2023-01-23 00:41:01.048879: step: 544/531, loss: 0.17919273674488068 2023-01-23 00:41:02.183020: step: 548/531, loss: 0.12711143493652344 2023-01-23 00:41:03.334564: step: 552/531, loss: 0.0872282087802887 2023-01-23 00:41:04.471530: step: 556/531, loss: 0.3747473955154419 2023-01-23 00:41:05.576561: step: 560/531, loss: 0.23570489883422852 2023-01-23 00:41:06.691511: step: 564/531, loss: 0.1573580652475357 2023-01-23 00:41:07.808613: step: 568/531, loss: 0.01681988313794136 2023-01-23 00:41:08.936756: step: 572/531, loss: 0.14629077911376953 2023-01-23 00:41:10.078556: step: 576/531, loss: 0.12140703201293945 2023-01-23 00:41:11.210798: step: 580/531, loss: 0.22671937942504883 2023-01-23 00:41:12.318987: step: 584/531, loss: 0.21995753049850464 2023-01-23 00:41:13.461371: step: 588/531, loss: 0.07481873035430908 2023-01-23 00:41:14.606072: step: 592/531, loss: 0.040781259536743164 2023-01-23 00:41:15.711075: step: 596/531, loss: 0.13340292870998383 2023-01-23 00:41:16.820281: step: 600/531, loss: 0.05432162433862686 2023-01-23 00:41:17.966365: step: 604/531, loss: 0.08480659127235413 2023-01-23 00:41:19.093837: step: 608/531, loss: 0.14956489205360413 2023-01-23 00:41:20.212388: step: 612/531, loss: 0.1737562119960785 2023-01-23 00:41:21.320236: step: 616/531, loss: 0.04875154793262482 2023-01-23 00:41:22.471990: step: 620/531, loss: 0.2536260485649109 2023-01-23 00:41:23.581919: step: 624/531, loss: 0.15681058168411255 2023-01-23 00:41:24.712422: step: 628/531, loss: 0.09617605805397034 2023-01-23 00:41:25.822022: step: 632/531, loss: 0.16250906884670258 2023-01-23 00:41:26.951359: step: 636/531, loss: 0.04505458101630211 2023-01-23 00:41:28.025699: step: 640/531, loss: 0.14508800208568573 2023-01-23 00:41:29.153827: step: 644/531, loss: 0.1883261352777481 2023-01-23 00:41:30.305138: step: 648/531, loss: 0.027052687481045723 2023-01-23 00:41:31.433251: step: 652/531, loss: 0.1003628522157669 2023-01-23 00:41:32.560120: step: 656/531, loss: 0.057857707142829895 2023-01-23 00:41:33.695185: step: 660/531, loss: 0.10964766144752502 2023-01-23 00:41:34.809889: step: 664/531, loss: 0.20194792747497559 2023-01-23 00:41:35.914141: step: 668/531, loss: 0.25048828125 2023-01-23 00:41:37.023909: step: 672/531, loss: 0.12961697578430176 2023-01-23 00:41:38.172230: step: 676/531, loss: 0.27555981278419495 2023-01-23 00:41:39.317472: step: 680/531, loss: 0.2224808633327484 2023-01-23 00:41:40.432525: step: 684/531, loss: 0.15243959426879883 2023-01-23 00:41:41.551638: step: 688/531, loss: 0.07391514629125595 2023-01-23 00:41:42.671537: step: 692/531, loss: 0.2605249285697937 2023-01-23 00:41:43.812287: step: 696/531, loss: 0.3467138409614563 2023-01-23 00:41:44.937693: step: 700/531, loss: 0.07069382816553116 2023-01-23 00:41:46.068900: step: 704/531, loss: 0.9537258744239807 2023-01-23 00:41:47.193978: step: 708/531, loss: 0.3350968360900879 2023-01-23 00:41:48.309083: step: 712/531, loss: 0.14998823404312134 2023-01-23 00:41:49.425462: step: 716/531, loss: 0.16751570999622345 2023-01-23 00:41:50.556206: step: 720/531, loss: 0.03789644315838814 2023-01-23 00:41:51.681174: step: 724/531, loss: 0.018326472491025925 2023-01-23 00:41:52.781761: step: 728/531, loss: 0.11100774258375168 2023-01-23 00:41:53.888449: step: 732/531, loss: 0.036242008209228516 2023-01-23 00:41:55.029291: step: 736/531, loss: 0.3225898742675781 2023-01-23 00:41:56.152802: step: 740/531, loss: 0.1815796047449112 2023-01-23 00:41:57.300258: step: 744/531, loss: 0.09762974083423615 2023-01-23 00:41:58.427463: step: 748/531, loss: 0.023768378421664238 2023-01-23 00:41:59.556296: step: 752/531, loss: 0.4569108784198761 2023-01-23 00:42:00.673043: step: 756/531, loss: 0.08272562175989151 2023-01-23 00:42:01.803034: step: 760/531, loss: 0.10275106877088547 2023-01-23 00:42:02.924228: step: 764/531, loss: 0.140064999461174 2023-01-23 00:42:04.024277: step: 768/531, loss: 0.10238762199878693 2023-01-23 00:42:05.153047: step: 772/531, loss: 0.07606640458106995 2023-01-23 00:42:06.279170: step: 776/531, loss: 0.22227692604064941 2023-01-23 00:42:07.408003: step: 780/531, loss: 0.48534050583839417 2023-01-23 00:42:08.560775: step: 784/531, loss: 0.03770866617560387 2023-01-23 00:42:09.692066: step: 788/531, loss: 0.13308972120285034 2023-01-23 00:42:10.832583: step: 792/531, loss: 0.06787033379077911 2023-01-23 00:42:11.960379: step: 796/531, loss: 0.020800592377781868 2023-01-23 00:42:13.056941: step: 800/531, loss: 0.013608455657958984 2023-01-23 00:42:14.176289: step: 804/531, loss: 0.04169588163495064 2023-01-23 00:42:15.319923: step: 808/531, loss: 0.10209417343139648 2023-01-23 00:42:16.450339: step: 812/531, loss: 0.10331220924854279 2023-01-23 00:42:17.561151: step: 816/531, loss: 0.18031969666481018 2023-01-23 00:42:18.666277: step: 820/531, loss: 0.2590846121311188 2023-01-23 00:42:19.769719: step: 824/531, loss: 0.1501200795173645 2023-01-23 00:42:20.891485: step: 828/531, loss: 0.23756837844848633 2023-01-23 00:42:22.031761: step: 832/531, loss: 0.04336748272180557 2023-01-23 00:42:23.157340: step: 836/531, loss: 0.0832592025399208 2023-01-23 00:42:24.294647: step: 840/531, loss: 0.11387291550636292 2023-01-23 00:42:25.399164: step: 844/531, loss: 0.09098873287439346 2023-01-23 00:42:26.535835: step: 848/531, loss: 0.12483464181423187 2023-01-23 00:42:27.680591: step: 852/531, loss: 0.1567266285419464 2023-01-23 00:42:28.799685: step: 856/531, loss: 0.07843542098999023 2023-01-23 00:42:29.917469: step: 860/531, loss: 0.045104026794433594 2023-01-23 00:42:31.086002: step: 864/531, loss: 0.3480880558490753 2023-01-23 00:42:32.204070: step: 868/531, loss: 0.05490522459149361 2023-01-23 00:42:33.330274: step: 872/531, loss: 0.024423886090517044 2023-01-23 00:42:34.457969: step: 876/531, loss: 0.0413423553109169 2023-01-23 00:42:35.602541: step: 880/531, loss: 3.462635040283203 2023-01-23 00:42:36.767029: step: 884/531, loss: 0.06214094161987305 2023-01-23 00:42:37.868416: step: 888/531, loss: 0.019264603033661842 2023-01-23 00:42:38.987874: step: 892/531, loss: 0.13383379578590393 2023-01-23 00:42:40.133518: step: 896/531, loss: 1.3504865169525146 2023-01-23 00:42:41.239037: step: 900/531, loss: 0.2580377459526062 2023-01-23 00:42:42.364148: step: 904/531, loss: 0.007694053463637829 2023-01-23 00:42:43.498926: step: 908/531, loss: 0.09733685851097107 2023-01-23 00:42:44.631650: step: 912/531, loss: 0.1629856824874878 2023-01-23 00:42:45.771322: step: 916/531, loss: 0.059734441339969635 2023-01-23 00:42:46.889694: step: 920/531, loss: 0.2142011672258377 2023-01-23 00:42:48.004543: step: 924/531, loss: 0.17765305936336517 2023-01-23 00:42:49.176814: step: 928/531, loss: 0.08439116179943085 2023-01-23 00:42:50.323796: step: 932/531, loss: 0.2529619634151459 2023-01-23 00:42:51.435623: step: 936/531, loss: 0.02510538138449192 2023-01-23 00:42:52.570008: step: 940/531, loss: 0.0638599842786789 2023-01-23 00:42:53.684795: step: 944/531, loss: 0.04697451740503311 2023-01-23 00:42:54.783353: step: 948/531, loss: 0.041563842445611954 2023-01-23 00:42:55.883111: step: 952/531, loss: 0.13343146443367004 2023-01-23 00:42:56.994451: step: 956/531, loss: 0.09967027604579926 2023-01-23 00:42:58.091422: step: 960/531, loss: 0.07713194191455841 2023-01-23 00:42:59.205410: step: 964/531, loss: 0.05081605911254883 2023-01-23 00:43:00.350057: step: 968/531, loss: 0.16429057717323303 2023-01-23 00:43:01.495335: step: 972/531, loss: 0.04790535196661949 2023-01-23 00:43:02.598812: step: 976/531, loss: 0.06761045753955841 2023-01-23 00:43:03.713797: step: 980/531, loss: 0.1260969191789627 2023-01-23 00:43:04.831181: step: 984/531, loss: 0.05767088010907173 2023-01-23 00:43:05.950254: step: 988/531, loss: 0.2321719229221344 2023-01-23 00:43:07.045139: step: 992/531, loss: 0.01792454719543457 2023-01-23 00:43:08.176013: step: 996/531, loss: 0.03353476524353027 2023-01-23 00:43:09.313280: step: 1000/531, loss: 0.08557009696960449 2023-01-23 00:43:10.426377: step: 1004/531, loss: 0.11002512276172638 2023-01-23 00:43:11.534889: step: 1008/531, loss: 0.04827170819044113 2023-01-23 00:43:12.628077: step: 1012/531, loss: 0.13274335861206055 2023-01-23 00:43:13.754526: step: 1016/531, loss: 0.30336493253707886 2023-01-23 00:43:14.903039: step: 1020/531, loss: 0.10287819802761078 2023-01-23 00:43:16.040316: step: 1024/531, loss: 0.1338208168745041 2023-01-23 00:43:17.154841: step: 1028/531, loss: 0.23707932233810425 2023-01-23 00:43:18.263053: step: 1032/531, loss: 0.022287465631961823 2023-01-23 00:43:19.392624: step: 1036/531, loss: 0.10019197314977646 2023-01-23 00:43:20.512885: step: 1040/531, loss: 0.09221439063549042 2023-01-23 00:43:21.663088: step: 1044/531, loss: 0.06489257514476776 2023-01-23 00:43:22.786509: step: 1048/531, loss: 0.214176744222641 2023-01-23 00:43:23.932191: step: 1052/531, loss: 0.07543335109949112 2023-01-23 00:43:25.048463: step: 1056/531, loss: 0.14391061663627625 2023-01-23 00:43:26.198283: step: 1060/531, loss: 0.1280902922153473 2023-01-23 00:43:27.290087: step: 1064/531, loss: 0.04282693564891815 2023-01-23 00:43:28.411034: step: 1068/531, loss: 0.05827150493860245 2023-01-23 00:43:29.555988: step: 1072/531, loss: 0.14233139157295227 2023-01-23 00:43:30.680269: step: 1076/531, loss: 0.07143135368824005 2023-01-23 00:43:31.791746: step: 1080/531, loss: 0.038634397089481354 2023-01-23 00:43:32.930932: step: 1084/531, loss: 0.033639095723629 2023-01-23 00:43:34.036521: step: 1088/531, loss: 0.12870611250400543 2023-01-23 00:43:35.163464: step: 1092/531, loss: 0.13585814833641052 2023-01-23 00:43:36.275037: step: 1096/531, loss: 0.1311245709657669 2023-01-23 00:43:37.408893: step: 1100/531, loss: 0.09645405411720276 2023-01-23 00:43:38.532225: step: 1104/531, loss: 0.3717021346092224 2023-01-23 00:43:39.666777: step: 1108/531, loss: 0.3307401239871979 2023-01-23 00:43:40.762826: step: 1112/531, loss: 0.033965207636356354 2023-01-23 00:43:41.883923: step: 1116/531, loss: 0.21775293350219727 2023-01-23 00:43:43.021680: step: 1120/531, loss: 0.0483977310359478 2023-01-23 00:43:44.148480: step: 1124/531, loss: 0.21006564795970917 2023-01-23 00:43:45.261709: step: 1128/531, loss: 0.13897915184497833 2023-01-23 00:43:46.411741: step: 1132/531, loss: 0.02990732342004776 2023-01-23 00:43:47.531799: step: 1136/531, loss: 0.07611493766307831 2023-01-23 00:43:48.629679: step: 1140/531, loss: 0.16593007743358612 2023-01-23 00:43:49.754223: step: 1144/531, loss: 0.1310376077890396 2023-01-23 00:43:50.862033: step: 1148/531, loss: 0.051835253834724426 2023-01-23 00:43:51.963387: step: 1152/531, loss: 0.07659588009119034 2023-01-23 00:43:53.082764: step: 1156/531, loss: 0.1934429109096527 2023-01-23 00:43:54.212930: step: 1160/531, loss: 0.05769224092364311 2023-01-23 00:43:55.318808: step: 1164/531, loss: 0.10543718934059143 2023-01-23 00:43:56.493641: step: 1168/531, loss: 0.09350114315748215 2023-01-23 00:43:57.598202: step: 1172/531, loss: 0.10899510979652405 2023-01-23 00:43:58.720156: step: 1176/531, loss: 0.04249449074268341 2023-01-23 00:43:59.889955: step: 1180/531, loss: 0.3995322287082672 2023-01-23 00:44:01.054223: step: 1184/531, loss: 0.07186432182788849 2023-01-23 00:44:02.184136: step: 1188/531, loss: 0.0017005919944494963 2023-01-23 00:44:03.355797: step: 1192/531, loss: 0.041445113718509674 2023-01-23 00:44:04.459547: step: 1196/531, loss: 0.12923212349414825 2023-01-23 00:44:05.576673: step: 1200/531, loss: 0.09676117449998856 2023-01-23 00:44:06.682533: step: 1204/531, loss: 0.04722614586353302 2023-01-23 00:44:07.805913: step: 1208/531, loss: 0.05771093815565109 2023-01-23 00:44:08.930815: step: 1212/531, loss: 0.0929323211312294 2023-01-23 00:44:10.088366: step: 1216/531, loss: 0.08821926265954971 2023-01-23 00:44:11.178937: step: 1220/531, loss: 0.19201692938804626 2023-01-23 00:44:12.285816: step: 1224/531, loss: 0.028134608641266823 2023-01-23 00:44:13.433559: step: 1228/531, loss: 0.00859603937715292 2023-01-23 00:44:14.595050: step: 1232/531, loss: 0.043401144444942474 2023-01-23 00:44:15.699846: step: 1236/531, loss: 0.08370713889598846 2023-01-23 00:44:16.841675: step: 1240/531, loss: 0.11315050721168518 2023-01-23 00:44:17.980123: step: 1244/531, loss: 0.04142189025878906 2023-01-23 00:44:19.107628: step: 1248/531, loss: 0.1623091697692871 2023-01-23 00:44:20.258371: step: 1252/531, loss: 0.05393552780151367 2023-01-23 00:44:21.387966: step: 1256/531, loss: 0.010057163424789906 2023-01-23 00:44:22.509118: step: 1260/531, loss: 0.13477346301078796 2023-01-23 00:44:23.639296: step: 1264/531, loss: 0.051331259310245514 2023-01-23 00:44:24.727872: step: 1268/531, loss: 0.0423489585518837 2023-01-23 00:44:25.860571: step: 1272/531, loss: 0.07789783179759979 2023-01-23 00:44:26.986089: step: 1276/531, loss: 0.7017719745635986 2023-01-23 00:44:28.099435: step: 1280/531, loss: 0.09573662281036377 2023-01-23 00:44:29.241855: step: 1284/531, loss: 0.13838711380958557 2023-01-23 00:44:30.368384: step: 1288/531, loss: 0.784483790397644 2023-01-23 00:44:31.497572: step: 1292/531, loss: 0.04157862812280655 2023-01-23 00:44:32.591141: step: 1296/531, loss: 0.571773886680603 2023-01-23 00:44:33.746034: step: 1300/531, loss: 0.01401672326028347 2023-01-23 00:44:34.886675: step: 1304/531, loss: 0.10879745334386826 2023-01-23 00:44:36.025906: step: 1308/531, loss: 0.20757122337818146 2023-01-23 00:44:37.160338: step: 1312/531, loss: 0.47576338052749634 2023-01-23 00:44:38.309431: step: 1316/531, loss: 0.23827214539051056 2023-01-23 00:44:39.445669: step: 1320/531, loss: 0.14400728046894073 2023-01-23 00:44:40.566479: step: 1324/531, loss: 0.24711456894874573 2023-01-23 00:44:41.694197: step: 1328/531, loss: 0.0715951919555664 2023-01-23 00:44:42.815944: step: 1332/531, loss: 0.14545460045337677 2023-01-23 00:44:43.962616: step: 1336/531, loss: 0.09485816955566406 2023-01-23 00:44:45.071361: step: 1340/531, loss: 0.25709590315818787 2023-01-23 00:44:46.203140: step: 1344/531, loss: 0.060201358050107956 2023-01-23 00:44:47.317126: step: 1348/531, loss: 0.10800275951623917 2023-01-23 00:44:48.449482: step: 1352/531, loss: 0.03048262745141983 2023-01-23 00:44:49.613245: step: 1356/531, loss: 0.7740105390548706 2023-01-23 00:44:50.746244: step: 1360/531, loss: 0.8764164447784424 2023-01-23 00:44:51.900302: step: 1364/531, loss: 0.12279796600341797 2023-01-23 00:44:53.028193: step: 1368/531, loss: 0.08460044860839844 2023-01-23 00:44:54.151610: step: 1372/531, loss: 0.057626061141490936 2023-01-23 00:44:55.279649: step: 1376/531, loss: 0.02584528923034668 2023-01-23 00:44:56.403218: step: 1380/531, loss: 0.6731345057487488 2023-01-23 00:44:57.513838: step: 1384/531, loss: 0.1666877269744873 2023-01-23 00:44:58.653177: step: 1388/531, loss: 0.037947557866573334 2023-01-23 00:44:59.761527: step: 1392/531, loss: 0.03781953081488609 2023-01-23 00:45:00.890168: step: 1396/531, loss: 0.49211588501930237 2023-01-23 00:45:02.009260: step: 1400/531, loss: 0.2098522186279297 2023-01-23 00:45:03.151764: step: 1404/531, loss: 0.3870559632778168 2023-01-23 00:45:04.260721: step: 1408/531, loss: 0.08882780373096466 2023-01-23 00:45:05.393968: step: 1412/531, loss: 0.27566489577293396 2023-01-23 00:45:06.512444: step: 1416/531, loss: 0.28264617919921875 2023-01-23 00:45:07.621753: step: 1420/531, loss: 0.08777904510498047 2023-01-23 00:45:08.738560: step: 1424/531, loss: 0.03323392942547798 2023-01-23 00:45:09.872880: step: 1428/531, loss: 0.1422245055437088 2023-01-23 00:45:10.982432: step: 1432/531, loss: 0.06089983135461807 2023-01-23 00:45:12.136988: step: 1436/531, loss: 0.0912291556596756 2023-01-23 00:45:13.270875: step: 1440/531, loss: 0.21680957078933716 2023-01-23 00:45:14.361846: step: 1444/531, loss: 0.025788212195038795 2023-01-23 00:45:15.473503: step: 1448/531, loss: 0.0727960541844368 2023-01-23 00:45:16.568161: step: 1452/531, loss: 0.17060253024101257 2023-01-23 00:45:17.682363: step: 1456/531, loss: 0.1232885867357254 2023-01-23 00:45:18.814901: step: 1460/531, loss: 0.08619175851345062 2023-01-23 00:45:19.988344: step: 1464/531, loss: 0.269447922706604 2023-01-23 00:45:21.128335: step: 1468/531, loss: 0.11458693444728851 2023-01-23 00:45:22.235072: step: 1472/531, loss: 1.254642128944397 2023-01-23 00:45:23.362397: step: 1476/531, loss: 0.10386581718921661 2023-01-23 00:45:24.499056: step: 1480/531, loss: 0.17072373628616333 2023-01-23 00:45:25.641869: step: 1484/531, loss: 0.04292626678943634 2023-01-23 00:45:26.766489: step: 1488/531, loss: 0.403641939163208 2023-01-23 00:45:27.863515: step: 1492/531, loss: 0.050987910479307175 2023-01-23 00:45:29.017758: step: 1496/531, loss: 0.11971607804298401 2023-01-23 00:45:30.135582: step: 1500/531, loss: 0.04373779520392418 2023-01-23 00:45:31.293601: step: 1504/531, loss: 0.22015738487243652 2023-01-23 00:45:32.409265: step: 1508/531, loss: 0.041093263775110245 2023-01-23 00:45:33.561525: step: 1512/531, loss: 0.22739893198013306 2023-01-23 00:45:34.678254: step: 1516/531, loss: 0.023385334759950638 2023-01-23 00:45:35.762946: step: 1520/531, loss: 0.5267195701599121 2023-01-23 00:45:36.892714: step: 1524/531, loss: 0.13907480239868164 2023-01-23 00:45:38.034539: step: 1528/531, loss: 0.12416734546422958 2023-01-23 00:45:39.182328: step: 1532/531, loss: 0.3645566999912262 2023-01-23 00:45:40.319989: step: 1536/531, loss: 0.046117447316646576 2023-01-23 00:45:41.457146: step: 1540/531, loss: 0.08457255363464355 2023-01-23 00:45:42.571500: step: 1544/531, loss: 0.05190284177660942 2023-01-23 00:45:43.721526: step: 1548/531, loss: 0.1138916015625 2023-01-23 00:45:44.856097: step: 1552/531, loss: 0.5177223086357117 2023-01-23 00:45:45.994341: step: 1556/531, loss: 0.5734477639198303 2023-01-23 00:45:47.111694: step: 1560/531, loss: 0.10317230224609375 2023-01-23 00:45:48.237090: step: 1564/531, loss: 0.045778464525938034 2023-01-23 00:45:49.418964: step: 1568/531, loss: 0.049491312354803085 2023-01-23 00:45:50.546131: step: 1572/531, loss: 0.08923673629760742 2023-01-23 00:45:51.673755: step: 1576/531, loss: 0.27718499302864075 2023-01-23 00:45:52.797876: step: 1580/531, loss: 0.16943521797657013 2023-01-23 00:45:53.915059: step: 1584/531, loss: 0.04468240961432457 2023-01-23 00:45:55.052948: step: 1588/531, loss: 0.16840925812721252 2023-01-23 00:45:56.151199: step: 1592/531, loss: 0.0574522502720356 2023-01-23 00:45:57.249945: step: 1596/531, loss: 0.0275300033390522 2023-01-23 00:45:58.388077: step: 1600/531, loss: 0.08415809273719788 2023-01-23 00:45:59.497603: step: 1604/531, loss: 0.3440810739994049 2023-01-23 00:46:00.623357: step: 1608/531, loss: 0.22018957138061523 2023-01-23 00:46:01.754714: step: 1612/531, loss: 0.03315267711877823 2023-01-23 00:46:02.895040: step: 1616/531, loss: 0.1362323760986328 2023-01-23 00:46:04.027625: step: 1620/531, loss: 0.1901501566171646 2023-01-23 00:46:05.188699: step: 1624/531, loss: 0.11003279685974121 2023-01-23 00:46:06.327515: step: 1628/531, loss: 0.06049681082367897 2023-01-23 00:46:07.441826: step: 1632/531, loss: 0.1325400322675705 2023-01-23 00:46:08.581081: step: 1636/531, loss: 0.0879817008972168 2023-01-23 00:46:09.721925: step: 1640/531, loss: 0.04208064079284668 2023-01-23 00:46:10.824271: step: 1644/531, loss: 0.050332210958004 2023-01-23 00:46:11.974134: step: 1648/531, loss: 0.14123983681201935 2023-01-23 00:46:13.100736: step: 1652/531, loss: 0.117760568857193 2023-01-23 00:46:14.241331: step: 1656/531, loss: 0.11904564499855042 2023-01-23 00:46:15.396337: step: 1660/531, loss: 0.02638225629925728 2023-01-23 00:46:16.480570: step: 1664/531, loss: 0.01613588258624077 2023-01-23 00:46:17.592135: step: 1668/531, loss: 0.24884557723999023 2023-01-23 00:46:18.709651: step: 1672/531, loss: 0.06303200870752335 2023-01-23 00:46:19.872939: step: 1676/531, loss: 0.12995930016040802 2023-01-23 00:46:21.004364: step: 1680/531, loss: 0.31771478056907654 2023-01-23 00:46:22.145786: step: 1684/531, loss: 0.1062169075012207 2023-01-23 00:46:23.256178: step: 1688/531, loss: 0.1441192626953125 2023-01-23 00:46:24.362721: step: 1692/531, loss: 0.08201823383569717 2023-01-23 00:46:25.453840: step: 1696/531, loss: 0.2671300768852234 2023-01-23 00:46:26.574060: step: 1700/531, loss: 0.08638972789049149 2023-01-23 00:46:27.704498: step: 1704/531, loss: 0.1728196144104004 2023-01-23 00:46:28.796154: step: 1708/531, loss: 0.029881764203310013 2023-01-23 00:46:29.901334: step: 1712/531, loss: 0.10439501702785492 2023-01-23 00:46:31.015480: step: 1716/531, loss: 0.01975731924176216 2023-01-23 00:46:32.141159: step: 1720/531, loss: 0.048876188695430756 2023-01-23 00:46:33.269398: step: 1724/531, loss: 0.21802587807178497 2023-01-23 00:46:34.440515: step: 1728/531, loss: 0.5060909390449524 2023-01-23 00:46:35.559094: step: 1732/531, loss: 0.6130267977714539 2023-01-23 00:46:36.668657: step: 1736/531, loss: 0.1057361587882042 2023-01-23 00:46:37.761042: step: 1740/531, loss: 0.06724705547094345 2023-01-23 00:46:38.869863: step: 1744/531, loss: 0.08268866688013077 2023-01-23 00:46:39.999900: step: 1748/531, loss: 0.09658577293157578 2023-01-23 00:46:41.134009: step: 1752/531, loss: 0.267011821269989 2023-01-23 00:46:42.259121: step: 1756/531, loss: 0.3914510905742645 2023-01-23 00:46:43.398161: step: 1760/531, loss: 0.09398870915174484 2023-01-23 00:46:44.519724: step: 1764/531, loss: 0.2301032543182373 2023-01-23 00:46:45.612881: step: 1768/531, loss: 0.05576739460229874 2023-01-23 00:46:46.739506: step: 1772/531, loss: 0.11827392876148224 2023-01-23 00:46:47.862518: step: 1776/531, loss: 0.13004884123802185 2023-01-23 00:46:48.973073: step: 1780/531, loss: 0.13228054344654083 2023-01-23 00:46:50.122796: step: 1784/531, loss: 0.15703706443309784 2023-01-23 00:46:51.225173: step: 1788/531, loss: 0.1578660011291504 2023-01-23 00:46:52.331091: step: 1792/531, loss: 0.056783199310302734 2023-01-23 00:46:53.463384: step: 1796/531, loss: 0.06593399494886398 2023-01-23 00:46:54.611391: step: 1800/531, loss: 1.5454472303390503 2023-01-23 00:46:55.742723: step: 1804/531, loss: 0.04158506542444229 2023-01-23 00:46:56.842741: step: 1808/531, loss: 1.0753564834594727 2023-01-23 00:46:57.959430: step: 1812/531, loss: 0.087615966796875 2023-01-23 00:46:59.084167: step: 1816/531, loss: 0.023408317938447 2023-01-23 00:47:00.189805: step: 1820/531, loss: 0.082577183842659 2023-01-23 00:47:01.292599: step: 1824/531, loss: 0.05507383495569229 2023-01-23 00:47:02.432854: step: 1828/531, loss: 0.4281843304634094 2023-01-23 00:47:03.545462: step: 1832/531, loss: 0.07420730590820312 2023-01-23 00:47:04.699769: step: 1836/531, loss: 1.752892255783081 2023-01-23 00:47:05.803650: step: 1840/531, loss: 0.06989984214305878 2023-01-23 00:47:06.896363: step: 1844/531, loss: 0.2567642629146576 2023-01-23 00:47:08.034253: step: 1848/531, loss: 0.057174019515514374 2023-01-23 00:47:09.184156: step: 1852/531, loss: 0.1755599081516266 2023-01-23 00:47:10.315792: step: 1856/531, loss: 0.1232757568359375 2023-01-23 00:47:11.461284: step: 1860/531, loss: 0.12502965331077576 2023-01-23 00:47:12.599165: step: 1864/531, loss: 0.029199469834566116 2023-01-23 00:47:13.726896: step: 1868/531, loss: 0.08130288124084473 2023-01-23 00:47:14.846841: step: 1872/531, loss: 0.1965988278388977 2023-01-23 00:47:15.971313: step: 1876/531, loss: 1.381486177444458 2023-01-23 00:47:17.126434: step: 1880/531, loss: 0.15052194893360138 2023-01-23 00:47:18.250419: step: 1884/531, loss: 0.2976197302341461 2023-01-23 00:47:19.368246: step: 1888/531, loss: 0.15820464491844177 2023-01-23 00:47:20.515796: step: 1892/531, loss: 0.07607245445251465 2023-01-23 00:47:21.616216: step: 1896/531, loss: 0.10969372093677521 2023-01-23 00:47:22.698384: step: 1900/531, loss: 0.2989429533481598 2023-01-23 00:47:23.827744: step: 1904/531, loss: 0.1606767773628235 2023-01-23 00:47:24.979386: step: 1908/531, loss: 0.06513051688671112 2023-01-23 00:47:26.090998: step: 1912/531, loss: 0.08248001337051392 2023-01-23 00:47:27.214961: step: 1916/531, loss: 0.29668235778808594 2023-01-23 00:47:28.344987: step: 1920/531, loss: 0.09794149547815323 2023-01-23 00:47:29.474581: step: 1924/531, loss: 0.12080007791519165 2023-01-23 00:47:30.615443: step: 1928/531, loss: 0.09691813588142395 2023-01-23 00:47:31.749807: step: 1932/531, loss: 0.4276502728462219 2023-01-23 00:47:32.874880: step: 1936/531, loss: 0.40770435333251953 2023-01-23 00:47:34.000166: step: 1940/531, loss: 0.06810712814331055 2023-01-23 00:47:35.095386: step: 1944/531, loss: 0.018558122217655182 2023-01-23 00:47:36.224988: step: 1948/531, loss: 0.2289590835571289 2023-01-23 00:47:37.367553: step: 1952/531, loss: 0.09229016304016113 2023-01-23 00:47:38.483518: step: 1956/531, loss: 0.04901190102100372 2023-01-23 00:47:39.613722: step: 1960/531, loss: 0.1415625512599945 2023-01-23 00:47:40.736831: step: 1964/531, loss: 0.12899285554885864 2023-01-23 00:47:41.879459: step: 1968/531, loss: 0.5322989821434021 2023-01-23 00:47:43.019277: step: 1972/531, loss: 0.30357494950294495 2023-01-23 00:47:44.160079: step: 1976/531, loss: 0.05293073505163193 2023-01-23 00:47:45.320988: step: 1980/531, loss: 0.7628833651542664 2023-01-23 00:47:46.449647: step: 1984/531, loss: 0.1101200133562088 2023-01-23 00:47:47.568578: step: 1988/531, loss: 1.9830515384674072 2023-01-23 00:47:48.697374: step: 1992/531, loss: 0.05994720384478569 2023-01-23 00:47:49.833995: step: 1996/531, loss: 0.12858600914478302 2023-01-23 00:47:50.948633: step: 2000/531, loss: 0.10530281811952591 2023-01-23 00:47:52.079211: step: 2004/531, loss: 0.2252178192138672 2023-01-23 00:47:53.228224: step: 2008/531, loss: 0.11051426082849503 2023-01-23 00:47:54.385599: step: 2012/531, loss: 0.09581919014453888 2023-01-23 00:47:55.489639: step: 2016/531, loss: 0.04288170486688614 2023-01-23 00:47:56.619753: step: 2020/531, loss: 0.18412618339061737 2023-01-23 00:47:57.739691: step: 2024/531, loss: 0.20220623910427094 2023-01-23 00:47:58.881835: step: 2028/531, loss: 0.12490224838256836 2023-01-23 00:48:00.007397: step: 2032/531, loss: 0.20448951423168182 2023-01-23 00:48:01.137113: step: 2036/531, loss: 0.13962887227535248 2023-01-23 00:48:02.306601: step: 2040/531, loss: 0.07309694588184357 2023-01-23 00:48:03.426198: step: 2044/531, loss: 0.08817487210035324 2023-01-23 00:48:04.565876: step: 2048/531, loss: 0.27600231766700745 2023-01-23 00:48:05.693800: step: 2052/531, loss: 0.12943366169929504 2023-01-23 00:48:06.817993: step: 2056/531, loss: 0.1355927437543869 2023-01-23 00:48:07.940872: step: 2060/531, loss: 0.058237623423337936 2023-01-23 00:48:09.067929: step: 2064/531, loss: 0.1104951724410057 2023-01-23 00:48:10.217526: step: 2068/531, loss: 0.10589724034070969 2023-01-23 00:48:11.333054: step: 2072/531, loss: 0.042347051203250885 2023-01-23 00:48:12.491997: step: 2076/531, loss: 0.1506047248840332 2023-01-23 00:48:13.610624: step: 2080/531, loss: 0.11841832101345062 2023-01-23 00:48:14.749692: step: 2084/531, loss: 0.11827349662780762 2023-01-23 00:48:15.892260: step: 2088/531, loss: 0.13589878380298615 2023-01-23 00:48:17.018470: step: 2092/531, loss: 0.02547468990087509 2023-01-23 00:48:18.169170: step: 2096/531, loss: 0.05758802965283394 2023-01-23 00:48:19.295078: step: 2100/531, loss: 0.07847519218921661 2023-01-23 00:48:20.411343: step: 2104/531, loss: 0.05289927124977112 2023-01-23 00:48:21.544185: step: 2108/531, loss: 0.10537058115005493 2023-01-23 00:48:22.661653: step: 2112/531, loss: 0.03232002258300781 2023-01-23 00:48:23.784986: step: 2116/531, loss: 0.11319885402917862 2023-01-23 00:48:24.877061: step: 2120/531, loss: 0.02126908302307129 2023-01-23 00:48:25.988664: step: 2124/531, loss: 0.05684996023774147 ================================================== Loss: 0.167 -------------------- Dev: {'event': {'p': 0.5606773283160865, 'r': 0.7936085219707057, 'f1': 0.6571113561190738}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Test: {'event': {'p': 0.6227316141356256, 'r': 0.7775790101371497, 'f1': 0.6915937417130734}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Chinese: {'event': {'p': 0.5, 'r': 0.8333333333333334, 'f1': 0.625}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Korean: {'event': {'p': 0.576271186440678, 'r': 0.5396825396825397, 'f1': 0.5573770491803278}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Russian: {'event': {'p': 0.48936170212765956, 'r': 0.6388888888888888, 'f1': 0.5542168674698795}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.5440900562851783, 'r': 0.7723035952063915, 'f1': 0.6384149697303247}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Korean: {'event': {'p': 0.5672823218997362, 'r': 0.7692307692307693, 'f1': 0.6529992406985574}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Korean: {'event': {'p': 0.6557377049180327, 'r': 0.6349206349206349, 'f1': 0.6451612903225806}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Russian: {'event': {'p': 0.6341463414634146, 'r': 0.7270306258322237, 'f1': 0.6774193548387097}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Eng Test for Russian: {'event': {'p': 0.6328794755421079, 'r': 0.7483601669648181, 'f1': 0.6857923497267759}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'event': {'p': 0.5588235294117647, 'r': 0.5277777777777778, 'f1': 0.5428571428571428}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} ****************************** Epoch: 8 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:49:06.032142: step: 4/531, loss: 0.24521350860595703 2023-01-23 00:49:07.174417: step: 8/531, loss: 0.0637439712882042 2023-01-23 00:49:08.323125: step: 12/531, loss: 0.061720848083496094 2023-01-23 00:49:09.435960: step: 16/531, loss: 0.06758013367652893 2023-01-23 00:49:10.585584: step: 20/531, loss: 0.034948062151670456 2023-01-23 00:49:11.699903: step: 24/531, loss: 0.23340940475463867 2023-01-23 00:49:12.832565: step: 28/531, loss: 0.7450456619262695 2023-01-23 00:49:13.973039: step: 32/531, loss: 0.042577553540468216 2023-01-23 00:49:15.109551: step: 36/531, loss: 0.43093380331993103 2023-01-23 00:49:16.242110: step: 40/531, loss: 0.09589161723852158 2023-01-23 00:49:17.339184: step: 44/531, loss: 0.040903665125370026 2023-01-23 00:49:18.453475: step: 48/531, loss: 0.03664741665124893 2023-01-23 00:49:19.628558: step: 52/531, loss: 0.10274062305688858 2023-01-23 00:49:20.750235: step: 56/531, loss: 0.050665855407714844 2023-01-23 00:49:21.871672: step: 60/531, loss: 0.0111229894682765 2023-01-23 00:49:22.971391: step: 64/531, loss: 0.017444707453250885 2023-01-23 00:49:24.078522: step: 68/531, loss: 0.05859022215008736 2023-01-23 00:49:25.216609: step: 72/531, loss: 0.35822564363479614 2023-01-23 00:49:26.354520: step: 76/531, loss: 0.04259061813354492 2023-01-23 00:49:27.463076: step: 80/531, loss: 0.18119025230407715 2023-01-23 00:49:28.590526: step: 84/531, loss: 0.2853566110134125 2023-01-23 00:49:29.689602: step: 88/531, loss: 0.036359503865242004 2023-01-23 00:49:30.821126: step: 92/531, loss: 0.17923030257225037 2023-01-23 00:49:31.937953: step: 96/531, loss: 0.06044473499059677 2023-01-23 00:49:33.052228: step: 100/531, loss: 0.14107543230056763 2023-01-23 00:49:34.169793: step: 104/531, loss: 0.06166019290685654 2023-01-23 00:49:35.286596: step: 108/531, loss: 0.05762805789709091 2023-01-23 00:49:36.424128: step: 112/531, loss: 0.04909314960241318 2023-01-23 00:49:37.546524: step: 116/531, loss: 0.06578508019447327 2023-01-23 00:49:38.697988: step: 120/531, loss: 0.11613292992115021 2023-01-23 00:49:39.813208: step: 124/531, loss: 0.10140958428382874 2023-01-23 00:49:40.947829: step: 128/531, loss: 0.23020735383033752 2023-01-23 00:49:42.056010: step: 132/531, loss: 0.01379852369427681 2023-01-23 00:49:43.153720: step: 136/531, loss: 0.071214959025383 2023-01-23 00:49:44.288371: step: 140/531, loss: 0.4613441526889801 2023-01-23 00:49:45.405264: step: 144/531, loss: 0.023187827318906784 2023-01-23 00:49:46.535311: step: 148/531, loss: 0.047972872853279114 2023-01-23 00:49:47.646073: step: 152/531, loss: 0.02971353381872177 2023-01-23 00:49:48.752377: step: 156/531, loss: 0.06931009143590927 2023-01-23 00:49:49.891500: step: 160/531, loss: 0.12905187904834747 2023-01-23 00:49:51.012158: step: 164/531, loss: 0.028540946543216705 2023-01-23 00:49:52.145163: step: 168/531, loss: 0.07333078980445862 2023-01-23 00:49:53.276570: step: 172/531, loss: 0.08187265694141388 2023-01-23 00:49:54.397516: step: 176/531, loss: 0.11157284677028656 2023-01-23 00:49:55.550065: step: 180/531, loss: 0.4296124279499054 2023-01-23 00:49:56.680071: step: 184/531, loss: 0.23175391554832458 2023-01-23 00:49:57.836318: step: 188/531, loss: 0.2413499802350998 2023-01-23 00:49:58.972235: step: 192/531, loss: 0.082981638610363 2023-01-23 00:50:00.128552: step: 196/531, loss: 0.024634171277284622 2023-01-23 00:50:01.239699: step: 200/531, loss: 0.06192101538181305 2023-01-23 00:50:02.379430: step: 204/531, loss: 0.07577943801879883 2023-01-23 00:50:03.491612: step: 208/531, loss: 0.18514639139175415 2023-01-23 00:50:04.592554: step: 212/531, loss: 0.059965040534734726 2023-01-23 00:50:05.719073: step: 216/531, loss: 0.0421847328543663 2023-01-23 00:50:06.853701: step: 220/531, loss: 0.08330269157886505 2023-01-23 00:50:07.965080: step: 224/531, loss: 1.1466501951217651 2023-01-23 00:50:09.088287: step: 228/531, loss: 0.41923987865448 2023-01-23 00:50:10.207556: step: 232/531, loss: 0.1362285614013672 2023-01-23 00:50:11.382903: step: 236/531, loss: 0.17704887688159943 2023-01-23 00:50:12.510910: step: 240/531, loss: 0.2890928387641907 2023-01-23 00:50:13.621369: step: 244/531, loss: 0.029705429449677467 2023-01-23 00:50:14.775850: step: 248/531, loss: 0.07625313103199005 2023-01-23 00:50:15.903821: step: 252/531, loss: 0.13213196396827698 2023-01-23 00:50:17.046365: step: 256/531, loss: 0.06964436173439026 2023-01-23 00:50:18.170616: step: 260/531, loss: 0.13338060677051544 2023-01-23 00:50:19.288734: step: 264/531, loss: 0.12334012985229492 2023-01-23 00:50:20.414343: step: 268/531, loss: 0.0038129331078380346 2023-01-23 00:50:21.546677: step: 272/531, loss: 0.10957928001880646 2023-01-23 00:50:22.672381: step: 276/531, loss: 0.07325001060962677 2023-01-23 00:50:23.814274: step: 280/531, loss: 0.07740020751953125 2023-01-23 00:50:24.945588: step: 284/531, loss: 0.030539896339178085 2023-01-23 00:50:26.070509: step: 288/531, loss: 0.013364791870117188 2023-01-23 00:50:27.217277: step: 292/531, loss: 0.06818647682666779 2023-01-23 00:50:28.326387: step: 296/531, loss: 0.07460494339466095 2023-01-23 00:50:29.455718: step: 300/531, loss: 0.04049735143780708 2023-01-23 00:50:30.568420: step: 304/531, loss: 0.05165739357471466 2023-01-23 00:50:31.690652: step: 308/531, loss: 0.07476544380187988 2023-01-23 00:50:32.830762: step: 312/531, loss: 0.11238288879394531 2023-01-23 00:50:33.978741: step: 316/531, loss: 0.13798531889915466 2023-01-23 00:50:35.099558: step: 320/531, loss: 0.002304124878719449 2023-01-23 00:50:36.238126: step: 324/531, loss: 0.023406410589814186 2023-01-23 00:50:37.400037: step: 328/531, loss: 0.09620776772499084 2023-01-23 00:50:38.529491: step: 332/531, loss: 0.41692429780960083 2023-01-23 00:50:39.653794: step: 336/531, loss: 0.3007393479347229 2023-01-23 00:50:40.790140: step: 340/531, loss: 0.03148365020751953 2023-01-23 00:50:41.928557: step: 344/531, loss: 0.023790931329131126 2023-01-23 00:50:43.043874: step: 348/531, loss: 0.02186756208539009 2023-01-23 00:50:44.172023: step: 352/531, loss: 0.500514805316925 2023-01-23 00:50:45.291272: step: 356/531, loss: 0.4334770441055298 2023-01-23 00:50:46.390634: step: 360/531, loss: 0.043232060968875885 2023-01-23 00:50:47.497020: step: 364/531, loss: 0.2623669505119324 2023-01-23 00:50:48.618679: step: 368/531, loss: 0.09848713874816895 2023-01-23 00:50:49.768087: step: 372/531, loss: 0.12542200088500977 2023-01-23 00:50:50.898051: step: 376/531, loss: 0.0418303981423378 2023-01-23 00:50:52.037322: step: 380/531, loss: 0.04276122897863388 2023-01-23 00:50:53.175999: step: 384/531, loss: 0.07473735511302948 2023-01-23 00:50:54.319212: step: 388/531, loss: 0.12753501534461975 2023-01-23 00:50:55.448964: step: 392/531, loss: 0.07406473159790039 2023-01-23 00:50:56.585390: step: 396/531, loss: 0.17697182297706604 2023-01-23 00:50:57.686914: step: 400/531, loss: 0.07076764106750488 2023-01-23 00:50:58.805367: step: 404/531, loss: 0.08918514847755432 2023-01-23 00:50:59.899479: step: 408/531, loss: 0.030029643326997757 2023-01-23 00:51:01.051818: step: 412/531, loss: 0.054741814732551575 2023-01-23 00:51:02.182780: step: 416/531, loss: 0.052515413612127304 2023-01-23 00:51:03.325626: step: 420/531, loss: 0.051069118082523346 2023-01-23 00:51:04.463437: step: 424/531, loss: 0.1179811954498291 2023-01-23 00:51:05.606139: step: 428/531, loss: 0.023988008499145508 2023-01-23 00:51:06.735246: step: 432/531, loss: 0.11671815067529678 2023-01-23 00:51:07.853688: step: 436/531, loss: 0.21174821257591248 2023-01-23 00:51:08.985642: step: 440/531, loss: 0.01924161985516548 2023-01-23 00:51:10.100454: step: 444/531, loss: 0.03493242338299751 2023-01-23 00:51:11.243744: step: 448/531, loss: 0.11870650947093964 2023-01-23 00:51:12.390344: step: 452/531, loss: 0.11427745968103409 2023-01-23 00:51:13.507633: step: 456/531, loss: 0.14556297659873962 2023-01-23 00:51:14.653795: step: 460/531, loss: 0.5253650546073914 2023-01-23 00:51:15.773003: step: 464/531, loss: 0.09302349388599396 2023-01-23 00:51:16.917963: step: 468/531, loss: 0.028496742248535156 2023-01-23 00:51:18.034857: step: 472/531, loss: 0.1322811096906662 2023-01-23 00:51:19.189202: step: 476/531, loss: 0.1962333619594574 2023-01-23 00:51:20.308735: step: 480/531, loss: 0.10265693068504333 2023-01-23 00:51:21.464495: step: 484/531, loss: 0.09085159748792648 2023-01-23 00:51:22.568589: step: 488/531, loss: 0.12242496013641357 2023-01-23 00:51:23.718424: step: 492/531, loss: 1.0959402322769165 2023-01-23 00:51:24.848169: step: 496/531, loss: 0.042792320251464844 2023-01-23 00:51:25.969801: step: 500/531, loss: 0.13432160019874573 2023-01-23 00:51:27.089613: step: 504/531, loss: 0.13506154716014862 2023-01-23 00:51:28.224823: step: 508/531, loss: 0.07583389431238174 2023-01-23 00:51:29.359003: step: 512/531, loss: 0.05381155386567116 2023-01-23 00:51:30.478942: step: 516/531, loss: 0.23388594388961792 2023-01-23 00:51:31.606512: step: 520/531, loss: 0.16427969932556152 2023-01-23 00:51:32.720332: step: 524/531, loss: 0.05261101573705673 2023-01-23 00:51:33.823621: step: 528/531, loss: 0.0637696236371994 2023-01-23 00:51:34.950390: step: 532/531, loss: 0.019101906567811966 2023-01-23 00:51:36.061086: step: 536/531, loss: 0.16299466788768768 2023-01-23 00:51:37.159950: step: 540/531, loss: 0.03241760656237602 2023-01-23 00:51:38.287915: step: 544/531, loss: 0.08427772670984268 2023-01-23 00:51:39.422427: step: 548/531, loss: 0.07498989254236221 2023-01-23 00:51:40.543907: step: 552/531, loss: 0.14200668036937714 2023-01-23 00:51:41.663045: step: 556/531, loss: 0.04064350202679634 2023-01-23 00:51:42.778298: step: 560/531, loss: 0.03512401506304741 2023-01-23 00:51:43.963010: step: 564/531, loss: 0.10931988060474396 2023-01-23 00:51:45.106523: step: 568/531, loss: 1.8063678741455078 2023-01-23 00:51:46.237167: step: 572/531, loss: 0.03486442565917969 2023-01-23 00:51:47.359243: step: 576/531, loss: 0.06649918854236603 2023-01-23 00:51:48.479881: step: 580/531, loss: 0.011412239633500576 2023-01-23 00:51:49.596844: step: 584/531, loss: 0.07249526679515839 2023-01-23 00:51:50.692831: step: 588/531, loss: 0.09728346019983292 2023-01-23 00:51:51.797854: step: 592/531, loss: 0.12365636974573135 2023-01-23 00:51:52.940389: step: 596/531, loss: 0.051905252039432526 2023-01-23 00:51:54.098345: step: 600/531, loss: 0.27257871627807617 2023-01-23 00:51:55.280428: step: 604/531, loss: 0.2695941925048828 2023-01-23 00:51:56.381510: step: 608/531, loss: 0.06744766235351562 2023-01-23 00:51:57.505554: step: 612/531, loss: 0.1624092161655426 2023-01-23 00:51:58.630657: step: 616/531, loss: 0.10697192698717117 2023-01-23 00:51:59.742160: step: 620/531, loss: 0.013153934851288795 2023-01-23 00:52:00.850837: step: 624/531, loss: 0.07322165369987488 2023-01-23 00:52:01.965502: step: 628/531, loss: 0.10920357704162598 2023-01-23 00:52:03.101387: step: 632/531, loss: 0.04836883395910263 2023-01-23 00:52:04.232271: step: 636/531, loss: 0.018249941989779472 2023-01-23 00:52:05.361445: step: 640/531, loss: 0.10932368040084839 2023-01-23 00:52:06.492911: step: 644/531, loss: 0.072850801050663 2023-01-23 00:52:07.597448: step: 648/531, loss: 0.18117637932300568 2023-01-23 00:52:08.723840: step: 652/531, loss: 0.01232462003827095 2023-01-23 00:52:09.839026: step: 656/531, loss: 0.01563425175845623 2023-01-23 00:52:10.954140: step: 660/531, loss: 0.0649867057800293 2023-01-23 00:52:12.130650: step: 664/531, loss: 0.0731443390250206 2023-01-23 00:52:13.250479: step: 668/531, loss: 0.15695782005786896 2023-01-23 00:52:14.413851: step: 672/531, loss: 0.04744892194867134 2023-01-23 00:52:15.544911: step: 676/531, loss: 0.06414695084095001 2023-01-23 00:52:16.678298: step: 680/531, loss: 0.05328016355633736 2023-01-23 00:52:17.819234: step: 684/531, loss: 0.08182373642921448 2023-01-23 00:52:18.976709: step: 688/531, loss: 0.040016986429691315 2023-01-23 00:52:20.082547: step: 692/531, loss: 0.0013580322265625 2023-01-23 00:52:21.196153: step: 696/531, loss: 0.15640011429786682 2023-01-23 00:52:22.313580: step: 700/531, loss: 0.08039741218090057 2023-01-23 00:52:23.422661: step: 704/531, loss: 0.07359619438648224 2023-01-23 00:52:24.577467: step: 708/531, loss: 0.1425948143005371 2023-01-23 00:52:25.687472: step: 712/531, loss: 0.03512106090784073 2023-01-23 00:52:26.800124: step: 716/531, loss: 0.05033493414521217 2023-01-23 00:52:27.941428: step: 720/531, loss: 0.2445165514945984 2023-01-23 00:52:29.066486: step: 724/531, loss: 0.7185272574424744 2023-01-23 00:52:30.218736: step: 728/531, loss: 0.08073179423809052 2023-01-23 00:52:31.351094: step: 732/531, loss: 0.04225664213299751 2023-01-23 00:52:32.445971: step: 736/531, loss: 0.08425255119800568 2023-01-23 00:52:33.596313: step: 740/531, loss: 0.030803300440311432 2023-01-23 00:52:34.728205: step: 744/531, loss: 0.0819791778922081 2023-01-23 00:52:35.867299: step: 748/531, loss: 0.20876997709274292 2023-01-23 00:52:36.975064: step: 752/531, loss: 0.02880225144326687 2023-01-23 00:52:38.097242: step: 756/531, loss: 0.016536330804228783 2023-01-23 00:52:39.205714: step: 760/531, loss: 0.06827898323535919 2023-01-23 00:52:40.357150: step: 764/531, loss: 0.20995837450027466 2023-01-23 00:52:41.488692: step: 768/531, loss: 0.056046295911073685 2023-01-23 00:52:42.639864: step: 772/531, loss: 0.08739929646253586 2023-01-23 00:52:43.763447: step: 776/531, loss: 0.12965135276317596 2023-01-23 00:52:44.889884: step: 780/531, loss: 0.0807647705078125 2023-01-23 00:52:46.009179: step: 784/531, loss: 0.16201750934123993 2023-01-23 00:52:47.129211: step: 788/531, loss: 0.13932104408740997 2023-01-23 00:52:48.252050: step: 792/531, loss: 0.3195396363735199 2023-01-23 00:52:49.384613: step: 796/531, loss: 0.08106646686792374 2023-01-23 00:52:50.482704: step: 800/531, loss: 0.09317875653505325 2023-01-23 00:52:51.640567: step: 804/531, loss: 0.22033005952835083 2023-01-23 00:52:52.773738: step: 808/531, loss: 0.00297031388618052 2023-01-23 00:52:53.874436: step: 812/531, loss: 0.14223118126392365 2023-01-23 00:52:55.005750: step: 816/531, loss: 0.07982216030359268 2023-01-23 00:52:56.126449: step: 820/531, loss: 0.04033965989947319 2023-01-23 00:52:57.266589: step: 824/531, loss: 0.5727187395095825 2023-01-23 00:52:58.367655: step: 828/531, loss: 0.1576448380947113 2023-01-23 00:52:59.488632: step: 832/531, loss: 0.0631125420331955 2023-01-23 00:53:00.583947: step: 836/531, loss: 0.02203233726322651 2023-01-23 00:53:01.699740: step: 840/531, loss: 0.022807979956269264 2023-01-23 00:53:02.798471: step: 844/531, loss: 0.08285131305456161 2023-01-23 00:53:03.901444: step: 848/531, loss: 0.1330675184726715 2023-01-23 00:53:05.027553: step: 852/531, loss: 0.3412402272224426 2023-01-23 00:53:06.129671: step: 856/531, loss: 0.05455436930060387 2023-01-23 00:53:07.211813: step: 860/531, loss: 0.08719502389431 2023-01-23 00:53:08.370242: step: 864/531, loss: 0.15049421787261963 2023-01-23 00:53:09.491818: step: 868/531, loss: 0.6061941385269165 2023-01-23 00:53:10.600317: step: 872/531, loss: 0.03395424038171768 2023-01-23 00:53:11.732012: step: 876/531, loss: 0.08362732082605362 2023-01-23 00:53:12.893784: step: 880/531, loss: 0.4415343403816223 2023-01-23 00:53:14.022696: step: 884/531, loss: 0.4605218768119812 2023-01-23 00:53:15.144365: step: 888/531, loss: 0.15848055481910706 2023-01-23 00:53:16.286950: step: 892/531, loss: 0.1300487518310547 2023-01-23 00:53:17.389883: step: 896/531, loss: 0.16720028221607208 2023-01-23 00:53:18.488067: step: 900/531, loss: 0.10289835929870605 2023-01-23 00:53:19.601956: step: 904/531, loss: 0.05237159878015518 2023-01-23 00:53:20.728200: step: 908/531, loss: 0.035078905522823334 2023-01-23 00:53:21.865530: step: 912/531, loss: 0.2856786847114563 2023-01-23 00:53:22.976364: step: 916/531, loss: 0.05845584720373154 2023-01-23 00:53:24.109067: step: 920/531, loss: 0.02715129777789116 2023-01-23 00:53:25.253392: step: 924/531, loss: 0.06343994289636612 2023-01-23 00:53:26.384740: step: 928/531, loss: 0.021636009216308594 2023-01-23 00:53:27.499114: step: 932/531, loss: 0.2872146666049957 2023-01-23 00:53:28.661003: step: 936/531, loss: 0.799748420715332 2023-01-23 00:53:29.764957: step: 940/531, loss: 0.028060341253876686 2023-01-23 00:53:30.892673: step: 944/531, loss: 0.07654837518930435 2023-01-23 00:53:32.023053: step: 948/531, loss: 0.2365356981754303 2023-01-23 00:53:33.154196: step: 952/531, loss: 0.11675992608070374 2023-01-23 00:53:34.287939: step: 956/531, loss: 0.18697862327098846 2023-01-23 00:53:35.427179: step: 960/531, loss: 0.06001434475183487 2023-01-23 00:53:36.526030: step: 964/531, loss: 0.02468586154282093 2023-01-23 00:53:37.653625: step: 968/531, loss: 0.13695946335792542 2023-01-23 00:53:38.763677: step: 972/531, loss: 0.07833647727966309 2023-01-23 00:53:39.880915: step: 976/531, loss: 0.1112087219953537 2023-01-23 00:53:40.981486: step: 980/531, loss: 0.1378290206193924 2023-01-23 00:53:42.115287: step: 984/531, loss: 0.12337536364793777 2023-01-23 00:53:43.248624: step: 988/531, loss: 0.1059502586722374 2023-01-23 00:53:44.357262: step: 992/531, loss: 0.04917726665735245 2023-01-23 00:53:45.480559: step: 996/531, loss: 0.10076946020126343 2023-01-23 00:53:46.599095: step: 1000/531, loss: 0.11674318462610245 2023-01-23 00:53:47.744877: step: 1004/531, loss: 0.31445178389549255 2023-01-23 00:53:48.848674: step: 1008/531, loss: 0.11476679146289825 2023-01-23 00:53:49.965356: step: 1012/531, loss: 0.6012662649154663 2023-01-23 00:53:51.128509: step: 1016/531, loss: 0.2123938649892807 2023-01-23 00:53:52.225391: step: 1020/531, loss: 0.07088909298181534 2023-01-23 00:53:53.368767: step: 1024/531, loss: 0.0908966064453125 2023-01-23 00:53:54.486489: step: 1028/531, loss: 0.07293333858251572 2023-01-23 00:53:55.609338: step: 1032/531, loss: 0.0146652702242136 2023-01-23 00:53:56.713033: step: 1036/531, loss: 0.06168394535779953 2023-01-23 00:53:57.844205: step: 1040/531, loss: 0.10019131004810333 2023-01-23 00:53:58.957305: step: 1044/531, loss: 0.1352185755968094 2023-01-23 00:54:00.063419: step: 1048/531, loss: 0.04007406532764435 2023-01-23 00:54:01.173734: step: 1052/531, loss: 0.12325391918420792 2023-01-23 00:54:02.320329: step: 1056/531, loss: 0.19440516829490662 2023-01-23 00:54:03.450793: step: 1060/531, loss: 0.07504969090223312 2023-01-23 00:54:04.586957: step: 1064/531, loss: 0.575652003288269 2023-01-23 00:54:05.727758: step: 1068/531, loss: 0.05950909107923508 2023-01-23 00:54:06.840872: step: 1072/531, loss: 0.08521442115306854 2023-01-23 00:54:07.961344: step: 1076/531, loss: 0.08552151173353195 2023-01-23 00:54:09.113643: step: 1080/531, loss: 0.13359637558460236 2023-01-23 00:54:10.224755: step: 1084/531, loss: 0.04953209310770035 2023-01-23 00:54:11.355602: step: 1088/531, loss: 0.04551658779382706 2023-01-23 00:54:12.482009: step: 1092/531, loss: 0.14216624200344086 2023-01-23 00:54:13.599161: step: 1096/531, loss: 0.016649913042783737 2023-01-23 00:54:14.711817: step: 1100/531, loss: 0.04294843599200249 2023-01-23 00:54:15.787910: step: 1104/531, loss: 0.09906189888715744 2023-01-23 00:54:16.905591: step: 1108/531, loss: 0.024163054302334785 2023-01-23 00:54:18.033050: step: 1112/531, loss: 1.226098656654358 2023-01-23 00:54:19.157072: step: 1116/531, loss: 0.05841789394617081 2023-01-23 00:54:20.266454: step: 1120/531, loss: 0.2756267488002777 2023-01-23 00:54:21.392338: step: 1124/531, loss: 0.28697648644447327 2023-01-23 00:54:22.528195: step: 1128/531, loss: 0.05700264126062393 2023-01-23 00:54:23.659077: step: 1132/531, loss: 0.0170589666813612 2023-01-23 00:54:24.773997: step: 1136/531, loss: 0.0415375679731369 2023-01-23 00:54:25.901435: step: 1140/531, loss: 0.08867931365966797 2023-01-23 00:54:27.001905: step: 1144/531, loss: 0.05796985328197479 2023-01-23 00:54:28.114402: step: 1148/531, loss: 0.10943002998828888 2023-01-23 00:54:29.222416: step: 1152/531, loss: 0.04627714306116104 2023-01-23 00:54:30.330860: step: 1156/531, loss: 0.1122528612613678 2023-01-23 00:54:31.456892: step: 1160/531, loss: 0.4377952516078949 2023-01-23 00:54:32.558270: step: 1164/531, loss: 0.08515045791864395 2023-01-23 00:54:33.669389: step: 1168/531, loss: 0.07734423130750656 2023-01-23 00:54:34.795126: step: 1172/531, loss: 0.0968279168009758 2023-01-23 00:54:35.930066: step: 1176/531, loss: 0.3050578236579895 2023-01-23 00:54:37.049563: step: 1180/531, loss: 0.19285497069358826 2023-01-23 00:54:38.180384: step: 1184/531, loss: 0.14222678542137146 2023-01-23 00:54:39.311966: step: 1188/531, loss: 0.01756925694644451 2023-01-23 00:54:40.429356: step: 1192/531, loss: 0.05800233036279678 2023-01-23 00:54:41.549109: step: 1196/531, loss: 0.2442062348127365 2023-01-23 00:54:42.659292: step: 1200/531, loss: 0.21531029045581818 2023-01-23 00:54:43.770118: step: 1204/531, loss: 0.06375684589147568 2023-01-23 00:54:44.931966: step: 1208/531, loss: 0.10060425102710724 2023-01-23 00:54:46.057204: step: 1212/531, loss: 0.3734360635280609 2023-01-23 00:54:47.164811: step: 1216/531, loss: 0.1496995985507965 2023-01-23 00:54:48.280275: step: 1220/531, loss: 0.268998920917511 2023-01-23 00:54:49.413730: step: 1224/531, loss: 0.12165851145982742 2023-01-23 00:54:50.559395: step: 1228/531, loss: 0.26144829392433167 2023-01-23 00:54:51.668992: step: 1232/531, loss: 0.12758643925189972 2023-01-23 00:54:52.787232: step: 1236/531, loss: 0.8067438006401062 2023-01-23 00:54:53.933204: step: 1240/531, loss: 0.10680641978979111 2023-01-23 00:54:55.067942: step: 1244/531, loss: 0.08535700291395187 2023-01-23 00:54:56.199229: step: 1248/531, loss: 0.0566440112888813 2023-01-23 00:54:57.314697: step: 1252/531, loss: 0.10076337307691574 2023-01-23 00:54:58.461529: step: 1256/531, loss: 0.4221707284450531 2023-01-23 00:54:59.539477: step: 1260/531, loss: 0.01611499860882759 2023-01-23 00:55:00.672995: step: 1264/531, loss: 0.27151861786842346 2023-01-23 00:55:01.800998: step: 1268/531, loss: 0.06830034404993057 2023-01-23 00:55:02.915533: step: 1272/531, loss: 0.07658328860998154 2023-01-23 00:55:04.019382: step: 1276/531, loss: 0.07827463746070862 2023-01-23 00:55:05.101890: step: 1280/531, loss: 0.11044149100780487 2023-01-23 00:55:06.213265: step: 1284/531, loss: 0.10964183509349823 2023-01-23 00:55:07.362701: step: 1288/531, loss: 0.10096164047718048 2023-01-23 00:55:08.458236: step: 1292/531, loss: 0.06555338203907013 2023-01-23 00:55:09.565083: step: 1296/531, loss: 0.03680391609668732 2023-01-23 00:55:10.697036: step: 1300/531, loss: 0.09422969818115234 2023-01-23 00:55:11.813220: step: 1304/531, loss: 0.07193374633789062 2023-01-23 00:55:12.941341: step: 1308/531, loss: 0.05417776107788086 2023-01-23 00:55:14.054368: step: 1312/531, loss: 0.1379416584968567 2023-01-23 00:55:15.199368: step: 1316/531, loss: 0.05674038082361221 2023-01-23 00:55:16.339612: step: 1320/531, loss: 0.1321527510881424 2023-01-23 00:55:17.494740: step: 1324/531, loss: 0.004388761706650257 2023-01-23 00:55:18.630630: step: 1328/531, loss: 0.04815931245684624 2023-01-23 00:55:19.773451: step: 1332/531, loss: 0.11551463603973389 2023-01-23 00:55:20.883151: step: 1336/531, loss: 0.21281471848487854 2023-01-23 00:55:21.970418: step: 1340/531, loss: 0.017727375030517578 2023-01-23 00:55:23.120595: step: 1344/531, loss: 0.0568358451128006 2023-01-23 00:55:24.215574: step: 1348/531, loss: 0.09832391142845154 2023-01-23 00:55:25.327072: step: 1352/531, loss: 0.1027216762304306 2023-01-23 00:55:26.441351: step: 1356/531, loss: 0.17875251173973083 2023-01-23 00:55:27.566388: step: 1360/531, loss: 0.16329073905944824 2023-01-23 00:55:28.691017: step: 1364/531, loss: 0.3278714716434479 2023-01-23 00:55:29.846824: step: 1368/531, loss: 0.05779998376965523 2023-01-23 00:55:30.929606: step: 1372/531, loss: 0.12646451592445374 2023-01-23 00:55:32.034603: step: 1376/531, loss: 0.12449002265930176 2023-01-23 00:55:33.153956: step: 1380/531, loss: 0.020156146958470345 2023-01-23 00:55:34.277585: step: 1384/531, loss: 0.0186938289552927 2023-01-23 00:55:35.381527: step: 1388/531, loss: 0.02045416831970215 2023-01-23 00:55:36.514182: step: 1392/531, loss: 0.07984685897827148 2023-01-23 00:55:37.659131: step: 1396/531, loss: 0.10682721436023712 2023-01-23 00:55:38.781828: step: 1400/531, loss: 0.42404088377952576 2023-01-23 00:55:39.922842: step: 1404/531, loss: 0.12215514481067657 2023-01-23 00:55:41.069892: step: 1408/531, loss: 0.09165388345718384 2023-01-23 00:55:42.215129: step: 1412/531, loss: 0.002543376525864005 2023-01-23 00:55:43.352552: step: 1416/531, loss: 0.5665777325630188 2023-01-23 00:55:44.479405: step: 1420/531, loss: 0.0015269280411303043 2023-01-23 00:55:45.595285: step: 1424/531, loss: 0.1153249740600586 2023-01-23 00:55:46.721910: step: 1428/531, loss: 0.0989016517996788 2023-01-23 00:55:47.822416: step: 1432/531, loss: 0.033327486366033554 2023-01-23 00:55:48.929202: step: 1436/531, loss: 0.05953827127814293 2023-01-23 00:55:50.021115: step: 1440/531, loss: 0.2497701644897461 2023-01-23 00:55:51.159665: step: 1444/531, loss: 0.0793834701180458 2023-01-23 00:55:52.303897: step: 1448/531, loss: 0.05613536760210991 2023-01-23 00:55:53.439977: step: 1452/531, loss: 0.25240668654441833 2023-01-23 00:55:54.574289: step: 1456/531, loss: 0.017452144995331764 2023-01-23 00:55:55.674664: step: 1460/531, loss: 0.17958489060401917 2023-01-23 00:55:56.803468: step: 1464/531, loss: 0.10113926231861115 2023-01-23 00:55:57.915430: step: 1468/531, loss: 0.13999024033546448 2023-01-23 00:55:59.031181: step: 1472/531, loss: 0.0996173843741417 2023-01-23 00:56:00.133833: step: 1476/531, loss: 0.07060623168945312 2023-01-23 00:56:01.282102: step: 1480/531, loss: 0.06714344024658203 2023-01-23 00:56:02.426005: step: 1484/531, loss: 0.08549261838197708 2023-01-23 00:56:03.537318: step: 1488/531, loss: 0.13169966638088226 2023-01-23 00:56:04.652421: step: 1492/531, loss: 0.13795089721679688 2023-01-23 00:56:05.794868: step: 1496/531, loss: 0.08925362676382065 2023-01-23 00:56:06.929610: step: 1500/531, loss: 0.7767423391342163 2023-01-23 00:56:08.050246: step: 1504/531, loss: 0.2303818166255951 2023-01-23 00:56:09.171070: step: 1508/531, loss: 0.00633630808442831 2023-01-23 00:56:10.306776: step: 1512/531, loss: 0.16380244493484497 2023-01-23 00:56:11.410836: step: 1516/531, loss: 0.021251963451504707 2023-01-23 00:56:12.522865: step: 1520/531, loss: 0.06433776766061783 2023-01-23 00:56:13.654129: step: 1524/531, loss: 0.02528095245361328 2023-01-23 00:56:14.763944: step: 1528/531, loss: 0.2898082733154297 2023-01-23 00:56:15.883802: step: 1532/531, loss: 0.019547272473573685 2023-01-23 00:56:17.000453: step: 1536/531, loss: 0.031021595001220703 2023-01-23 00:56:18.150319: step: 1540/531, loss: 0.04787087440490723 2023-01-23 00:56:19.308481: step: 1544/531, loss: 0.04735222086310387 2023-01-23 00:56:20.428260: step: 1548/531, loss: 0.20263047516345978 2023-01-23 00:56:21.576201: step: 1552/531, loss: 0.685335099697113 2023-01-23 00:56:22.710697: step: 1556/531, loss: 0.1388796865940094 2023-01-23 00:56:23.843915: step: 1560/531, loss: 0.2668720483779907 2023-01-23 00:56:24.977014: step: 1564/531, loss: 0.12971554696559906 2023-01-23 00:56:26.171664: step: 1568/531, loss: 0.27143239974975586 2023-01-23 00:56:27.293303: step: 1572/531, loss: 0.1479652374982834 2023-01-23 00:56:28.381331: step: 1576/531, loss: 0.048185061663389206 2023-01-23 00:56:29.507100: step: 1580/531, loss: 0.07632150501012802 2023-01-23 00:56:30.612792: step: 1584/531, loss: 0.02477874793112278 2023-01-23 00:56:31.785265: step: 1588/531, loss: 0.13197068870067596 2023-01-23 00:56:32.883913: step: 1592/531, loss: 0.003121018409729004 2023-01-23 00:56:34.011840: step: 1596/531, loss: 0.07607078552246094 2023-01-23 00:56:35.119261: step: 1600/531, loss: 0.4999350905418396 2023-01-23 00:56:36.220077: step: 1604/531, loss: 0.12806305289268494 2023-01-23 00:56:37.333132: step: 1608/531, loss: 0.05977220833301544 2023-01-23 00:56:38.484538: step: 1612/531, loss: 0.07361612468957901 2023-01-23 00:56:39.597255: step: 1616/531, loss: 0.1490984410047531 2023-01-23 00:56:40.699678: step: 1620/531, loss: 0.012660825625061989 2023-01-23 00:56:41.874499: step: 1624/531, loss: 0.15770864486694336 2023-01-23 00:56:43.000636: step: 1628/531, loss: 0.01678595505654812 2023-01-23 00:56:44.138042: step: 1632/531, loss: 0.48617449402809143 2023-01-23 00:56:45.254790: step: 1636/531, loss: 0.15545912086963654 2023-01-23 00:56:46.378236: step: 1640/531, loss: 0.17079493403434753 2023-01-23 00:56:47.536551: step: 1644/531, loss: 0.13841553032398224 2023-01-23 00:56:48.691063: step: 1648/531, loss: 0.15446797013282776 2023-01-23 00:56:49.839095: step: 1652/531, loss: 0.07637205719947815 2023-01-23 00:56:50.951133: step: 1656/531, loss: 0.009453678503632545 2023-01-23 00:56:52.052365: step: 1660/531, loss: 0.28441524505615234 2023-01-23 00:56:53.168808: step: 1664/531, loss: 0.04133646562695503 2023-01-23 00:56:54.304776: step: 1668/531, loss: 0.07641124725341797 2023-01-23 00:56:55.429462: step: 1672/531, loss: 0.06717157363891602 2023-01-23 00:56:56.539543: step: 1676/531, loss: 0.14095020294189453 2023-01-23 00:56:57.705876: step: 1680/531, loss: 0.33387327194213867 2023-01-23 00:56:58.818682: step: 1684/531, loss: 0.09981915354728699 2023-01-23 00:56:59.949053: step: 1688/531, loss: 1.1915531158447266 2023-01-23 00:57:01.065107: step: 1692/531, loss: 0.019327307119965553 2023-01-23 00:57:02.200446: step: 1696/531, loss: 0.02034783363342285 2023-01-23 00:57:03.323681: step: 1700/531, loss: 0.006193351931869984 2023-01-23 00:57:04.457658: step: 1704/531, loss: 0.5670868158340454 2023-01-23 00:57:05.599187: step: 1708/531, loss: 2.2158162593841553 2023-01-23 00:57:06.713555: step: 1712/531, loss: 0.10447955131530762 2023-01-23 00:57:07.819675: step: 1716/531, loss: 0.04610328748822212 2023-01-23 00:57:08.939868: step: 1720/531, loss: 0.09350728988647461 2023-01-23 00:57:10.097825: step: 1724/531, loss: 0.11373177915811539 2023-01-23 00:57:11.249915: step: 1728/531, loss: 0.10574197769165039 2023-01-23 00:57:12.415353: step: 1732/531, loss: 0.14384347200393677 2023-01-23 00:57:13.592328: step: 1736/531, loss: 0.14875225722789764 2023-01-23 00:57:14.712653: step: 1740/531, loss: 0.11043176054954529 2023-01-23 00:57:15.823412: step: 1744/531, loss: 0.0686761885881424 2023-01-23 00:57:16.970665: step: 1748/531, loss: 0.14543715119361877 2023-01-23 00:57:18.078757: step: 1752/531, loss: 0.23023854196071625 2023-01-23 00:57:19.216803: step: 1756/531, loss: 0.06384191662073135 2023-01-23 00:57:20.359889: step: 1760/531, loss: 0.0273863784968853 2023-01-23 00:57:21.496797: step: 1764/531, loss: 0.031943678855895996 2023-01-23 00:57:22.602776: step: 1768/531, loss: 0.06780986487865448 2023-01-23 00:57:23.718927: step: 1772/531, loss: 0.059722043573856354 2023-01-23 00:57:24.846217: step: 1776/531, loss: 0.09308472275733948 2023-01-23 00:57:25.977173: step: 1780/531, loss: 0.06750917434692383 2023-01-23 00:57:27.091362: step: 1784/531, loss: 0.08478861302137375 2023-01-23 00:57:28.198859: step: 1788/531, loss: 0.010016251355409622 2023-01-23 00:57:29.320533: step: 1792/531, loss: 0.07060471177101135 2023-01-23 00:57:30.418868: step: 1796/531, loss: 0.1633385717868805 2023-01-23 00:57:31.548797: step: 1800/531, loss: 0.008719349279999733 2023-01-23 00:57:32.690829: step: 1804/531, loss: 0.0678853988647461 2023-01-23 00:57:33.795427: step: 1808/531, loss: 0.5351272225379944 2023-01-23 00:57:34.874392: step: 1812/531, loss: 0.08534260094165802 2023-01-23 00:57:35.995784: step: 1816/531, loss: 0.15985670685768127 2023-01-23 00:57:37.130005: step: 1820/531, loss: 0.08133211731910706 2023-01-23 00:57:38.270191: step: 1824/531, loss: 0.026988983154296875 2023-01-23 00:57:39.410762: step: 1828/531, loss: 0.10074234008789062 2023-01-23 00:57:40.551373: step: 1832/531, loss: 0.1154978796839714 2023-01-23 00:57:41.648010: step: 1836/531, loss: 0.07311529666185379 2023-01-23 00:57:42.829744: step: 1840/531, loss: 0.0812930092215538 2023-01-23 00:57:43.957801: step: 1844/531, loss: 0.07065191864967346 2023-01-23 00:57:45.063571: step: 1848/531, loss: 0.03830909729003906 2023-01-23 00:57:46.194653: step: 1852/531, loss: 0.26345059275627136 2023-01-23 00:57:47.323240: step: 1856/531, loss: 0.2540784776210785 2023-01-23 00:57:48.421455: step: 1860/531, loss: 0.3297414779663086 2023-01-23 00:57:49.557163: step: 1864/531, loss: 0.29000750184059143 2023-01-23 00:57:50.740258: step: 1868/531, loss: 0.9680474996566772 2023-01-23 00:57:51.861077: step: 1872/531, loss: 0.05043964087963104 2023-01-23 00:57:53.007714: step: 1876/531, loss: 0.21940383315086365 2023-01-23 00:57:54.128590: step: 1880/531, loss: 0.07403242588043213 2023-01-23 00:57:55.251955: step: 1884/531, loss: 0.08945588767528534 2023-01-23 00:57:56.379905: step: 1888/531, loss: 0.08835281431674957 2023-01-23 00:57:57.531347: step: 1892/531, loss: 0.0068323370069265366 2023-01-23 00:57:58.666469: step: 1896/531, loss: 0.019809197634458542 2023-01-23 00:57:59.773034: step: 1900/531, loss: 0.059524111449718475 2023-01-23 00:58:00.874536: step: 1904/531, loss: 0.08691282570362091 2023-01-23 00:58:01.992491: step: 1908/531, loss: 0.3050805926322937 2023-01-23 00:58:03.111129: step: 1912/531, loss: 0.1144617572426796 2023-01-23 00:58:04.231158: step: 1916/531, loss: 0.15250836312770844 2023-01-23 00:58:05.327708: step: 1920/531, loss: 0.05736827850341797 2023-01-23 00:58:06.457523: step: 1924/531, loss: 0.11525671184062958 2023-01-23 00:58:07.558015: step: 1928/531, loss: 0.1003013551235199 2023-01-23 00:58:08.701242: step: 1932/531, loss: 0.06514883041381836 2023-01-23 00:58:09.823350: step: 1936/531, loss: 0.0098114013671875 2023-01-23 00:58:10.959290: step: 1940/531, loss: 0.07660255581140518 2023-01-23 00:58:12.108498: step: 1944/531, loss: 0.1701059341430664 2023-01-23 00:58:13.201301: step: 1948/531, loss: 0.1081278845667839 2023-01-23 00:58:14.309056: step: 1952/531, loss: 0.18591547012329102 2023-01-23 00:58:15.415273: step: 1956/531, loss: 0.052460767328739166 2023-01-23 00:58:16.537793: step: 1960/531, loss: 0.14924316108226776 2023-01-23 00:58:17.679222: step: 1964/531, loss: 0.22551065683364868 2023-01-23 00:58:18.807935: step: 1968/531, loss: 0.01474838238209486 2023-01-23 00:58:19.908241: step: 1972/531, loss: 0.08996324986219406 2023-01-23 00:58:21.021032: step: 1976/531, loss: 0.07870092988014221 2023-01-23 00:58:22.122417: step: 1980/531, loss: 0.12241601943969727 2023-01-23 00:58:23.250161: step: 1984/531, loss: 0.10796394944190979 2023-01-23 00:58:24.361122: step: 1988/531, loss: 0.02081594616174698 2023-01-23 00:58:25.506334: step: 1992/531, loss: 0.10027255862951279 2023-01-23 00:58:26.632060: step: 1996/531, loss: 0.08087669312953949 2023-01-23 00:58:27.757101: step: 2000/531, loss: 0.10377130657434464 2023-01-23 00:58:28.871190: step: 2004/531, loss: 0.12663955986499786 2023-01-23 00:58:29.992049: step: 2008/531, loss: 0.06766510009765625 2023-01-23 00:58:31.108905: step: 2012/531, loss: 0.0170135498046875 2023-01-23 00:58:32.231146: step: 2016/531, loss: 0.17701482772827148 2023-01-23 00:58:33.345042: step: 2020/531, loss: 0.14005009829998016 2023-01-23 00:58:34.446568: step: 2024/531, loss: 0.06755342334508896 2023-01-23 00:58:35.603313: step: 2028/531, loss: 0.09678593277931213 2023-01-23 00:58:36.718505: step: 2032/531, loss: 0.003017520997673273 2023-01-23 00:58:37.839929: step: 2036/531, loss: 0.09307806193828583 2023-01-23 00:58:38.978706: step: 2040/531, loss: 0.365352988243103 2023-01-23 00:58:40.077196: step: 2044/531, loss: 0.0351262092590332 2023-01-23 00:58:41.192639: step: 2048/531, loss: 0.1755780279636383 2023-01-23 00:58:42.288835: step: 2052/531, loss: 0.3529370427131653 2023-01-23 00:58:43.403781: step: 2056/531, loss: 0.09701728820800781 2023-01-23 00:58:44.561551: step: 2060/531, loss: 0.1922387182712555 2023-01-23 00:58:45.709437: step: 2064/531, loss: 0.10014934837818146 2023-01-23 00:58:46.807097: step: 2068/531, loss: 0.3301507234573364 2023-01-23 00:58:47.927464: step: 2072/531, loss: 0.04248299449682236 2023-01-23 00:58:49.052877: step: 2076/531, loss: 0.30544883012771606 2023-01-23 00:58:50.157427: step: 2080/531, loss: 0.23375114798545837 2023-01-23 00:58:51.289769: step: 2084/531, loss: 0.04997606575489044 2023-01-23 00:58:52.403678: step: 2088/531, loss: 0.12072034180164337 2023-01-23 00:58:53.563695: step: 2092/531, loss: 0.19212289154529572 2023-01-23 00:58:54.680652: step: 2096/531, loss: 0.07236509025096893 2023-01-23 00:58:55.804606: step: 2100/531, loss: 0.10623031854629517 2023-01-23 00:58:56.904421: step: 2104/531, loss: 0.0291458610445261 2023-01-23 00:58:58.045157: step: 2108/531, loss: 0.13434329628944397 2023-01-23 00:58:59.193135: step: 2112/531, loss: 0.07110749185085297 2023-01-23 00:59:00.321021: step: 2116/531, loss: 0.1486809253692627 2023-01-23 00:59:01.476601: step: 2120/531, loss: 0.16819463670253754 2023-01-23 00:59:02.585720: step: 2124/531, loss: 0.10602817684412003 ================================================== Loss: 0.144 -------------------- Dev: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Test: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Chinese: {'event': {'p': 0.5813953488372093, 'r': 0.9259259259259259, 'f1': 0.7142857142857142}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Korean: {'event': {'p': 0.5942028985507246, 'r': 0.6507936507936508, 'f1': 0.6212121212121213}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} New best russian model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.5440900562851783, 'r': 0.7723035952063915, 'f1': 0.6384149697303247}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Korean: {'event': {'p': 0.5672823218997362, 'r': 0.7692307692307693, 'f1': 0.6529992406985574}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Korean: {'event': {'p': 0.6557377049180327, 'r': 0.6349206349206349, 'f1': 0.6451612903225806}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 9 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:59:51.748474: step: 4/531, loss: 0.31909045577049255 2023-01-23 00:59:52.856182: step: 8/531, loss: 0.037078142166137695 2023-01-23 00:59:53.991017: step: 12/531, loss: 0.0841670036315918 2023-01-23 00:59:55.136590: step: 16/531, loss: 0.12582865357398987 2023-01-23 00:59:56.300074: step: 20/531, loss: 0.17043820023536682 2023-01-23 00:59:57.445902: step: 24/531, loss: 0.12123699486255646 2023-01-23 00:59:58.598311: step: 28/531, loss: 0.039379313588142395 2023-01-23 00:59:59.722853: step: 32/531, loss: 0.09817925095558167 2023-01-23 01:00:00.858588: step: 36/531, loss: 0.07535353302955627 2023-01-23 01:00:01.991090: step: 40/531, loss: 0.10911738872528076 2023-01-23 01:00:03.112730: step: 44/531, loss: 0.05755939334630966 2023-01-23 01:00:04.219586: step: 48/531, loss: 0.07273159176111221 2023-01-23 01:00:05.354223: step: 52/531, loss: 0.07716216892004013 2023-01-23 01:00:06.448535: step: 56/531, loss: 0.804498553276062 2023-01-23 01:00:07.554336: step: 60/531, loss: 0.049272045493125916 2023-01-23 01:00:08.698680: step: 64/531, loss: 0.018027782440185547 2023-01-23 01:00:09.865547: step: 68/531, loss: 0.12482643872499466 2023-01-23 01:00:11.032064: step: 72/531, loss: 0.24122542142868042 2023-01-23 01:00:12.140556: step: 76/531, loss: 0.3045158386230469 2023-01-23 01:00:13.245724: step: 80/531, loss: 0.03714761883020401 2023-01-23 01:00:14.405379: step: 84/531, loss: 0.18369553983211517 2023-01-23 01:00:15.536404: step: 88/531, loss: 0.21868562698364258 2023-01-23 01:00:16.661545: step: 92/531, loss: 0.05772380903363228 2023-01-23 01:00:17.811082: step: 96/531, loss: 5.584299087524414 2023-01-23 01:00:18.964411: step: 100/531, loss: 0.027874279767274857 2023-01-23 01:00:20.085154: step: 104/531, loss: 0.01469721831381321 2023-01-23 01:00:21.204987: step: 108/531, loss: 1.1670235395431519 2023-01-23 01:00:22.310887: step: 112/531, loss: 0.18922977149486542 2023-01-23 01:00:23.413496: step: 116/531, loss: 0.08198051154613495 2023-01-23 01:00:24.570468: step: 120/531, loss: 0.1001686081290245 2023-01-23 01:00:25.715231: step: 124/531, loss: 0.06911039352416992 2023-01-23 01:00:26.870299: step: 128/531, loss: 0.09733366966247559 2023-01-23 01:00:27.983531: step: 132/531, loss: 0.016380691900849342 2023-01-23 01:00:29.101167: step: 136/531, loss: 0.10049648582935333 2023-01-23 01:00:30.269786: step: 140/531, loss: 0.11149883270263672 2023-01-23 01:00:31.426492: step: 144/531, loss: 0.23117217421531677 2023-01-23 01:00:32.559899: step: 148/531, loss: 0.05333299562335014 2023-01-23 01:00:33.686630: step: 152/531, loss: 0.037677764892578125 2023-01-23 01:00:34.822207: step: 156/531, loss: 0.04156494140625 2023-01-23 01:00:35.974818: step: 160/531, loss: 0.04186396673321724 2023-01-23 01:00:37.121269: step: 164/531, loss: 0.06371164321899414 2023-01-23 01:00:38.223533: step: 168/531, loss: 0.2132912129163742 2023-01-23 01:00:39.358264: step: 172/531, loss: 0.034682463854551315 2023-01-23 01:00:40.471291: step: 176/531, loss: 0.028105830773711205 2023-01-23 01:00:41.586607: step: 180/531, loss: 0.06054316833615303 2023-01-23 01:00:42.703320: step: 184/531, loss: 0.10442256927490234 2023-01-23 01:00:43.824369: step: 188/531, loss: 0.2509746551513672 2023-01-23 01:00:44.917647: step: 192/531, loss: 0.03860321268439293 2023-01-23 01:00:46.065318: step: 196/531, loss: 0.12578296661376953 2023-01-23 01:00:47.205486: step: 200/531, loss: 0.07383518666028976 2023-01-23 01:00:48.357963: step: 204/531, loss: 0.6116253137588501 2023-01-23 01:00:49.466609: step: 208/531, loss: 0.14551010727882385 2023-01-23 01:00:50.595423: step: 212/531, loss: 0.04541236162185669 2023-01-23 01:00:51.706185: step: 216/531, loss: 0.125435933470726 2023-01-23 01:00:52.804863: step: 220/531, loss: 0.03750262036919594 2023-01-23 01:00:53.918626: step: 224/531, loss: 0.04600963741540909 2023-01-23 01:00:55.055490: step: 228/531, loss: 0.1304098218679428 2023-01-23 01:00:56.158590: step: 232/531, loss: 0.07334728538990021 2023-01-23 01:00:57.270596: step: 236/531, loss: 0.13074293732643127 2023-01-23 01:00:58.407880: step: 240/531, loss: 0.10463714599609375 2023-01-23 01:00:59.540639: step: 244/531, loss: 0.15595942735671997 2023-01-23 01:01:00.664354: step: 248/531, loss: 0.14452801644802094 2023-01-23 01:01:01.808640: step: 252/531, loss: 0.047496773302555084 2023-01-23 01:01:02.939123: step: 256/531, loss: 0.21338015794754028 2023-01-23 01:01:04.032506: step: 260/531, loss: 0.031382083892822266 2023-01-23 01:01:05.172221: step: 264/531, loss: 0.04427752271294594 2023-01-23 01:01:06.278248: step: 268/531, loss: 0.11613781750202179 2023-01-23 01:01:07.413075: step: 272/531, loss: 0.09052430093288422 2023-01-23 01:01:08.521838: step: 276/531, loss: 0.029305458068847656 2023-01-23 01:01:09.628112: step: 280/531, loss: 0.01891002617776394 2023-01-23 01:01:10.723711: step: 284/531, loss: 0.04548092186450958 2023-01-23 01:01:11.828799: step: 288/531, loss: 0.016316795721650124 2023-01-23 01:01:12.960772: step: 292/531, loss: 0.024338459596037865 2023-01-23 01:01:14.060173: step: 296/531, loss: 0.13635587692260742 2023-01-23 01:01:15.153314: step: 300/531, loss: 0.0565369613468647 2023-01-23 01:01:16.291933: step: 304/531, loss: 0.07782649993896484 2023-01-23 01:01:17.422305: step: 308/531, loss: 0.02421417273581028 2023-01-23 01:01:18.583286: step: 312/531, loss: 0.48803621530532837 2023-01-23 01:01:19.696274: step: 316/531, loss: 0.021121645346283913 2023-01-23 01:01:20.814268: step: 320/531, loss: 0.03810405731201172 2023-01-23 01:01:21.938663: step: 324/531, loss: 0.06846747547388077 2023-01-23 01:01:23.060967: step: 328/531, loss: 0.06744422763586044 2023-01-23 01:01:24.196434: step: 332/531, loss: 0.051459599286317825 2023-01-23 01:01:25.323507: step: 336/531, loss: 0.07121706008911133 2023-01-23 01:01:26.437003: step: 340/531, loss: 0.1775798797607422 2023-01-23 01:01:27.546449: step: 344/531, loss: 0.46801018714904785 2023-01-23 01:01:28.644488: step: 348/531, loss: 0.07138261944055557 2023-01-23 01:01:29.754636: step: 352/531, loss: 0.058658890426158905 2023-01-23 01:01:30.856604: step: 356/531, loss: 0.036805249750614166 2023-01-23 01:01:32.004577: step: 360/531, loss: 0.06067600101232529 2023-01-23 01:01:33.132764: step: 364/531, loss: 0.05421276018023491 2023-01-23 01:01:34.263320: step: 368/531, loss: 0.08465640246868134 2023-01-23 01:01:35.392717: step: 372/531, loss: 0.020247556269168854 2023-01-23 01:01:36.495905: step: 376/531, loss: 0.09053757786750793 2023-01-23 01:01:37.592121: step: 380/531, loss: 0.021294688805937767 2023-01-23 01:01:38.743045: step: 384/531, loss: 0.06727886199951172 2023-01-23 01:01:39.916031: step: 388/531, loss: 0.02019805833697319 2023-01-23 01:01:41.049891: step: 392/531, loss: 0.07179603725671768 2023-01-23 01:01:42.191000: step: 396/531, loss: 0.03279180824756622 2023-01-23 01:01:43.316371: step: 400/531, loss: 0.2440163642168045 2023-01-23 01:01:44.474967: step: 404/531, loss: 0.12595805525779724 2023-01-23 01:01:45.645848: step: 408/531, loss: 0.2527076005935669 2023-01-23 01:01:46.756258: step: 412/531, loss: 0.08531894534826279 2023-01-23 01:01:47.888191: step: 416/531, loss: 0.003537422278895974 2023-01-23 01:01:49.009555: step: 420/531, loss: 0.25832539796829224 2023-01-23 01:01:50.167260: step: 424/531, loss: 0.181183859705925 2023-01-23 01:01:51.315314: step: 428/531, loss: 0.732094407081604 2023-01-23 01:01:52.452054: step: 432/531, loss: 0.08710232377052307 2023-01-23 01:01:53.588115: step: 436/531, loss: 0.15557976067066193 2023-01-23 01:01:54.742756: step: 440/531, loss: 0.07858042418956757 2023-01-23 01:01:55.854891: step: 444/531, loss: 0.022815801203250885 2023-01-23 01:01:56.991056: step: 448/531, loss: 0.32257080078125 2023-01-23 01:01:58.111281: step: 452/531, loss: 0.1284807175397873 2023-01-23 01:01:59.237134: step: 456/531, loss: 0.03366794437170029 2023-01-23 01:02:00.367086: step: 460/531, loss: 0.08315540850162506 2023-01-23 01:02:01.496595: step: 464/531, loss: 0.45114973187446594 2023-01-23 01:02:02.598951: step: 468/531, loss: 0.07650595158338547 2023-01-23 01:02:03.710607: step: 472/531, loss: 0.0158828254789114 2023-01-23 01:02:04.817787: step: 476/531, loss: 0.011155223473906517 2023-01-23 01:02:05.936014: step: 480/531, loss: 0.07284365594387054 2023-01-23 01:02:07.063211: step: 484/531, loss: 0.6783890724182129 2023-01-23 01:02:08.209937: step: 488/531, loss: 1.201550006866455 2023-01-23 01:02:09.326447: step: 492/531, loss: 0.5268360376358032 2023-01-23 01:02:10.454124: step: 496/531, loss: 0.12687844038009644 2023-01-23 01:02:11.579977: step: 500/531, loss: 0.06399841606616974 2023-01-23 01:02:12.719125: step: 504/531, loss: 0.022037316113710403 2023-01-23 01:02:13.838478: step: 508/531, loss: 0.06802206486463547 2023-01-23 01:02:14.972014: step: 512/531, loss: 0.10186205059289932 2023-01-23 01:02:16.084167: step: 516/531, loss: 0.13489818572998047 2023-01-23 01:02:17.200316: step: 520/531, loss: 0.13723459839820862 2023-01-23 01:02:18.336148: step: 524/531, loss: 0.05866193771362305 2023-01-23 01:02:19.441278: step: 528/531, loss: 0.289966881275177 2023-01-23 01:02:20.554970: step: 532/531, loss: 0.03711710125207901 2023-01-23 01:02:21.677193: step: 536/531, loss: 0.03171816095709801 2023-01-23 01:02:22.791192: step: 540/531, loss: 0.11456423252820969 2023-01-23 01:02:23.926101: step: 544/531, loss: 0.05799246206879616 2023-01-23 01:02:25.039924: step: 548/531, loss: 0.0937773734331131 2023-01-23 01:02:26.187329: step: 552/531, loss: 0.06285877525806427 2023-01-23 01:02:27.298359: step: 556/531, loss: 0.021840382367372513 2023-01-23 01:02:28.432261: step: 560/531, loss: 0.06548366695642471 2023-01-23 01:02:29.535231: step: 564/531, loss: 0.1856127828359604 2023-01-23 01:02:30.630002: step: 568/531, loss: 0.1117589920759201 2023-01-23 01:02:31.762566: step: 572/531, loss: 0.07032656669616699 2023-01-23 01:02:32.873094: step: 576/531, loss: 0.042340949177742004 2023-01-23 01:02:34.007160: step: 580/531, loss: 0.062026023864746094 2023-01-23 01:02:35.142216: step: 584/531, loss: 0.01966714859008789 2023-01-23 01:02:36.241852: step: 588/531, loss: 0.02689497545361519 2023-01-23 01:02:37.375403: step: 592/531, loss: 0.03945789486169815 2023-01-23 01:02:38.506654: step: 596/531, loss: 0.04961257055401802 2023-01-23 01:02:39.618616: step: 600/531, loss: 0.043687380850315094 2023-01-23 01:02:40.743597: step: 604/531, loss: 0.44286900758743286 2023-01-23 01:02:41.888341: step: 608/531, loss: 0.08942585438489914 2023-01-23 01:02:42.984766: step: 612/531, loss: 0.030391883105039597 2023-01-23 01:02:44.128520: step: 616/531, loss: 0.18536902964115143 2023-01-23 01:02:45.249187: step: 620/531, loss: 0.07486476749181747 2023-01-23 01:02:46.381494: step: 624/531, loss: 0.09050474315881729 2023-01-23 01:02:47.502191: step: 628/531, loss: 0.021355438977479935 2023-01-23 01:02:48.647274: step: 632/531, loss: 0.022111916914582253 2023-01-23 01:02:49.777144: step: 636/531, loss: 0.11121144145727158 2023-01-23 01:02:50.937598: step: 640/531, loss: 0.07157446444034576 2023-01-23 01:02:52.022911: step: 644/531, loss: 0.04613058641552925 2023-01-23 01:02:53.156943: step: 648/531, loss: 0.06300268322229385 2023-01-23 01:02:54.245480: step: 652/531, loss: 0.0605621337890625 2023-01-23 01:02:55.359307: step: 656/531, loss: 0.04886054992675781 2023-01-23 01:02:56.503066: step: 660/531, loss: 0.05051088333129883 2023-01-23 01:02:57.618854: step: 664/531, loss: 0.1045856699347496 2023-01-23 01:02:58.751163: step: 668/531, loss: 0.029742669314146042 2023-01-23 01:02:59.874204: step: 672/531, loss: 0.04398756101727486 2023-01-23 01:03:00.995997: step: 676/531, loss: 0.005153656005859375 2023-01-23 01:03:02.129007: step: 680/531, loss: 0.05403862148523331 2023-01-23 01:03:03.277256: step: 684/531, loss: 0.1767353117465973 2023-01-23 01:03:04.387079: step: 688/531, loss: 0.12196855992078781 2023-01-23 01:03:05.514827: step: 692/531, loss: 0.035198405385017395 2023-01-23 01:03:06.644210: step: 696/531, loss: 0.05893202871084213 2023-01-23 01:03:07.767549: step: 700/531, loss: 0.06709900498390198 2023-01-23 01:03:08.885377: step: 704/531, loss: 0.06087055057287216 2023-01-23 01:03:10.018003: step: 708/531, loss: 0.07497959583997726 2023-01-23 01:03:11.167317: step: 712/531, loss: 0.49636325240135193 2023-01-23 01:03:12.304388: step: 716/531, loss: 0.012936878018081188 2023-01-23 01:03:13.408563: step: 720/531, loss: 0.11730575561523438 2023-01-23 01:03:14.541079: step: 724/531, loss: 0.0354369655251503 2023-01-23 01:03:15.642791: step: 728/531, loss: 0.056232403963804245 2023-01-23 01:03:16.740154: step: 732/531, loss: 0.07861710339784622 2023-01-23 01:03:17.894919: step: 736/531, loss: 0.48582953214645386 2023-01-23 01:03:19.014208: step: 740/531, loss: 0.058643341064453125 2023-01-23 01:03:20.123057: step: 744/531, loss: 0.06756439059972763 2023-01-23 01:03:21.236024: step: 748/531, loss: 0.07690195739269257 2023-01-23 01:03:22.364278: step: 752/531, loss: 0.08358307182788849 2023-01-23 01:03:23.537108: step: 756/531, loss: 0.06763915717601776 2023-01-23 01:03:24.643245: step: 760/531, loss: 0.027800749987363815 2023-01-23 01:03:25.779134: step: 764/531, loss: 0.14589671790599823 2023-01-23 01:03:26.901047: step: 768/531, loss: 0.11147131770849228 2023-01-23 01:03:28.040765: step: 772/531, loss: 0.1203577071428299 2023-01-23 01:03:29.171873: step: 776/531, loss: 0.08193574100732803 2023-01-23 01:03:30.284925: step: 780/531, loss: 0.08551311492919922 2023-01-23 01:03:31.420781: step: 784/531, loss: 0.0034237862564623356 2023-01-23 01:03:32.516124: step: 788/531, loss: 0.06019439548254013 2023-01-23 01:03:33.621531: step: 792/531, loss: 0.18407364189624786 2023-01-23 01:03:34.724901: step: 796/531, loss: 0.030537035316228867 2023-01-23 01:03:35.824977: step: 800/531, loss: 0.04573202133178711 2023-01-23 01:03:36.929331: step: 804/531, loss: 0.031029606238007545 2023-01-23 01:03:38.054406: step: 808/531, loss: 0.03390789031982422 2023-01-23 01:03:39.193680: step: 812/531, loss: 0.0703950896859169 2023-01-23 01:03:40.301602: step: 816/531, loss: 0.07148733735084534 2023-01-23 01:03:41.412247: step: 820/531, loss: 0.332615464925766 2023-01-23 01:03:42.535214: step: 824/531, loss: 0.006299400236457586 2023-01-23 01:03:43.680724: step: 828/531, loss: 0.12487068772315979 2023-01-23 01:03:44.814555: step: 832/531, loss: 0.06828222423791885 2023-01-23 01:03:45.936120: step: 836/531, loss: 0.11381582170724869 2023-01-23 01:03:47.067462: step: 840/531, loss: 0.057506464421749115 2023-01-23 01:03:48.168764: step: 844/531, loss: 0.05408325418829918 2023-01-23 01:03:49.286310: step: 848/531, loss: 0.012376022525131702 2023-01-23 01:03:50.416813: step: 852/531, loss: 0.026075365021824837 2023-01-23 01:03:51.551865: step: 856/531, loss: 0.05112367123365402 2023-01-23 01:03:52.678650: step: 860/531, loss: 0.05298871919512749 2023-01-23 01:03:53.812065: step: 864/531, loss: 0.03767933323979378 2023-01-23 01:03:54.960854: step: 868/531, loss: 0.08618870377540588 2023-01-23 01:03:56.091886: step: 872/531, loss: 0.0011233807308599353 2023-01-23 01:03:57.188279: step: 876/531, loss: 0.013519859872758389 2023-01-23 01:03:58.340946: step: 880/531, loss: 0.010107326321303844 2023-01-23 01:03:59.478708: step: 884/531, loss: 0.4964072108268738 2023-01-23 01:04:00.621455: step: 888/531, loss: 0.07998915016651154 2023-01-23 01:04:01.762856: step: 892/531, loss: 0.16665571928024292 2023-01-23 01:04:02.879280: step: 896/531, loss: 0.0017509461613371968 2023-01-23 01:04:04.024971: step: 900/531, loss: 0.09706497192382812 2023-01-23 01:04:05.103067: step: 904/531, loss: 0.0925418883562088 2023-01-23 01:04:06.235533: step: 908/531, loss: 0.17431049048900604 2023-01-23 01:04:07.367667: step: 912/531, loss: 0.05579571798443794 2023-01-23 01:04:08.477685: step: 916/531, loss: 0.035646677017211914 2023-01-23 01:04:09.610256: step: 920/531, loss: 0.07259368896484375 2023-01-23 01:04:10.770755: step: 924/531, loss: 0.09151449054479599 2023-01-23 01:04:11.927286: step: 928/531, loss: 0.07399015128612518 2023-01-23 01:04:13.038939: step: 932/531, loss: 0.03648786619305611 2023-01-23 01:04:14.165397: step: 936/531, loss: 0.09260229766368866 2023-01-23 01:04:15.285919: step: 940/531, loss: 0.02603902854025364 2023-01-23 01:04:16.421538: step: 944/531, loss: 0.06427974998950958 2023-01-23 01:04:17.538202: step: 948/531, loss: 0.016600418835878372 2023-01-23 01:04:18.685291: step: 952/531, loss: 0.09395341575145721 2023-01-23 01:04:19.808747: step: 956/531, loss: 0.032543376088142395 2023-01-23 01:04:20.944967: step: 960/531, loss: 0.12543974816799164 2023-01-23 01:04:22.055260: step: 964/531, loss: 0.03914222866296768 2023-01-23 01:04:23.147918: step: 968/531, loss: 0.11849412322044373 2023-01-23 01:04:24.248073: step: 972/531, loss: 0.06281500309705734 2023-01-23 01:04:25.403462: step: 976/531, loss: 0.08752767741680145 2023-01-23 01:04:26.565616: step: 980/531, loss: 0.06272812187671661 2023-01-23 01:04:27.695244: step: 984/531, loss: 0.027181481942534447 2023-01-23 01:04:28.793374: step: 988/531, loss: 0.338143527507782 2023-01-23 01:04:29.928966: step: 992/531, loss: 0.09397459030151367 2023-01-23 01:04:31.034870: step: 996/531, loss: 0.12037691473960876 2023-01-23 01:04:32.146575: step: 1000/531, loss: 0.053514860570430756 2023-01-23 01:04:33.270020: step: 1004/531, loss: 0.009878921322524548 2023-01-23 01:04:34.402661: step: 1008/531, loss: 0.016734063625335693 2023-01-23 01:04:35.513903: step: 1012/531, loss: 0.39299219846725464 2023-01-23 01:04:36.656246: step: 1016/531, loss: 0.1138072982430458 2023-01-23 01:04:37.807668: step: 1020/531, loss: 0.1508607417345047 2023-01-23 01:04:38.917940: step: 1024/531, loss: 0.04865751415491104 2023-01-23 01:04:40.032533: step: 1028/531, loss: 0.36741331219673157 2023-01-23 01:04:41.177945: step: 1032/531, loss: 0.08409996330738068 2023-01-23 01:04:42.257393: step: 1036/531, loss: 0.037766218185424805 2023-01-23 01:04:43.379405: step: 1040/531, loss: 0.0341833122074604 2023-01-23 01:04:44.506650: step: 1044/531, loss: 0.2561612129211426 2023-01-23 01:04:45.625202: step: 1048/531, loss: 0.057497598230838776 2023-01-23 01:04:46.766391: step: 1052/531, loss: 0.03621196746826172 2023-01-23 01:04:47.933142: step: 1056/531, loss: 0.09418167918920517 2023-01-23 01:04:49.096674: step: 1060/531, loss: 0.13171425461769104 2023-01-23 01:04:50.239587: step: 1064/531, loss: 0.05039277300238609 2023-01-23 01:04:51.371943: step: 1068/531, loss: 0.007500076200813055 2023-01-23 01:04:52.481664: step: 1072/531, loss: 0.22494734823703766 2023-01-23 01:04:53.599350: step: 1076/531, loss: 0.010428810492157936 2023-01-23 01:04:54.731706: step: 1080/531, loss: 0.1318804770708084 2023-01-23 01:04:55.852461: step: 1084/531, loss: 0.07227544486522675 2023-01-23 01:04:56.968841: step: 1088/531, loss: 0.024955179542303085 2023-01-23 01:04:58.094038: step: 1092/531, loss: 0.04012352228164673 2023-01-23 01:04:59.212580: step: 1096/531, loss: 0.07499256730079651 2023-01-23 01:05:00.310972: step: 1100/531, loss: 0.06900925934314728 2023-01-23 01:05:01.429449: step: 1104/531, loss: 0.1402699500322342 2023-01-23 01:05:02.536038: step: 1108/531, loss: 0.08230245113372803 2023-01-23 01:05:03.657386: step: 1112/531, loss: 0.2658105790615082 2023-01-23 01:05:04.743630: step: 1116/531, loss: 0.10187321156263351 2023-01-23 01:05:05.940788: step: 1120/531, loss: 0.04635109752416611 2023-01-23 01:05:07.056572: step: 1124/531, loss: 0.06519933044910431 2023-01-23 01:05:08.177445: step: 1128/531, loss: 0.1938866227865219 2023-01-23 01:05:09.307561: step: 1132/531, loss: 0.060333251953125 2023-01-23 01:05:10.422330: step: 1136/531, loss: 0.030127622187137604 2023-01-23 01:05:11.558937: step: 1140/531, loss: 0.18982361257076263 2023-01-23 01:05:12.694082: step: 1144/531, loss: 0.04920043796300888 2023-01-23 01:05:13.815176: step: 1148/531, loss: 0.06450013816356659 2023-01-23 01:05:14.963849: step: 1152/531, loss: 0.05666818842291832 2023-01-23 01:05:16.085214: step: 1156/531, loss: 0.03358716890215874 2023-01-23 01:05:17.230125: step: 1160/531, loss: 1.0521191358566284 2023-01-23 01:05:18.392470: step: 1164/531, loss: 0.12741927802562714 2023-01-23 01:05:19.548349: step: 1168/531, loss: 0.054516952484846115 2023-01-23 01:05:20.662090: step: 1172/531, loss: 0.06138163059949875 2023-01-23 01:05:21.773276: step: 1176/531, loss: 0.2393409013748169 2023-01-23 01:05:22.889913: step: 1180/531, loss: 0.10070428997278214 2023-01-23 01:05:24.009322: step: 1184/531, loss: 0.003009033389389515 2023-01-23 01:05:25.131393: step: 1188/531, loss: 0.1621139496564865 2023-01-23 01:05:26.268439: step: 1192/531, loss: 0.1881113201379776 2023-01-23 01:05:27.361249: step: 1196/531, loss: 0.03025512769818306 2023-01-23 01:05:28.465066: step: 1200/531, loss: 0.08049193024635315 2023-01-23 01:05:29.550035: step: 1204/531, loss: 0.011544609442353249 2023-01-23 01:05:30.684311: step: 1208/531, loss: 0.3903922140598297 2023-01-23 01:05:31.826108: step: 1212/531, loss: 0.02210693433880806 2023-01-23 01:05:32.985921: step: 1216/531, loss: 0.10994205623865128 2023-01-23 01:05:34.121440: step: 1220/531, loss: 0.17045241594314575 2023-01-23 01:05:35.243281: step: 1224/531, loss: 0.10896234214305878 2023-01-23 01:05:36.360148: step: 1228/531, loss: 0.044367074966430664 2023-01-23 01:05:37.499210: step: 1232/531, loss: 0.03995375707745552 2023-01-23 01:05:38.620947: step: 1236/531, loss: 0.11155105382204056 2023-01-23 01:05:39.774151: step: 1240/531, loss: 0.041585251688957214 2023-01-23 01:05:40.864873: step: 1244/531, loss: 0.1555168330669403 2023-01-23 01:05:41.992665: step: 1248/531, loss: 0.05432291328907013 2023-01-23 01:05:43.146709: step: 1252/531, loss: 0.06665945053100586 2023-01-23 01:05:44.274857: step: 1256/531, loss: 0.3027457296848297 2023-01-23 01:05:45.393991: step: 1260/531, loss: 0.03419108688831329 2023-01-23 01:05:46.512063: step: 1264/531, loss: 0.03092327155172825 2023-01-23 01:05:47.636319: step: 1268/531, loss: 0.09152260422706604 2023-01-23 01:05:48.768263: step: 1272/531, loss: 0.05294923856854439 2023-01-23 01:05:49.882536: step: 1276/531, loss: 0.1418066918849945 2023-01-23 01:05:51.031186: step: 1280/531, loss: 0.045946039259433746 2023-01-23 01:05:52.138292: step: 1284/531, loss: 0.04810695722699165 2023-01-23 01:05:53.277509: step: 1288/531, loss: 0.09096412360668182 2023-01-23 01:05:54.381451: step: 1292/531, loss: 0.09424886852502823 2023-01-23 01:05:55.508353: step: 1296/531, loss: 0.1724928915500641 2023-01-23 01:05:56.668043: step: 1300/531, loss: 0.3312043249607086 2023-01-23 01:05:57.784087: step: 1304/531, loss: 0.10930080711841583 2023-01-23 01:05:58.918700: step: 1308/531, loss: 0.018151475116610527 2023-01-23 01:06:00.038375: step: 1312/531, loss: 0.23641586303710938 2023-01-23 01:06:01.153432: step: 1316/531, loss: 0.02094106748700142 2023-01-23 01:06:02.281733: step: 1320/531, loss: 0.07753334194421768 2023-01-23 01:06:03.419928: step: 1324/531, loss: 0.10565080493688583 2023-01-23 01:06:04.564252: step: 1328/531, loss: 0.1561090499162674 2023-01-23 01:06:05.676546: step: 1332/531, loss: 0.06907806545495987 2023-01-23 01:06:06.805605: step: 1336/531, loss: 0.14385661482810974 2023-01-23 01:06:07.940280: step: 1340/531, loss: 0.09367132186889648 2023-01-23 01:06:09.029700: step: 1344/531, loss: 0.06509292125701904 2023-01-23 01:06:10.134668: step: 1348/531, loss: 0.05485076829791069 2023-01-23 01:06:11.293207: step: 1352/531, loss: 0.2639181911945343 2023-01-23 01:06:12.420579: step: 1356/531, loss: 0.07145442813634872 2023-01-23 01:06:13.545125: step: 1360/531, loss: 0.06612322479486465 2023-01-23 01:06:14.658824: step: 1364/531, loss: 0.15043020248413086 2023-01-23 01:06:15.762066: step: 1368/531, loss: 0.468067467212677 2023-01-23 01:06:16.876291: step: 1372/531, loss: 0.04793062061071396 2023-01-23 01:06:17.981685: step: 1376/531, loss: 0.022897720336914062 2023-01-23 01:06:19.087971: step: 1380/531, loss: 0.061315443366765976 2023-01-23 01:06:20.211950: step: 1384/531, loss: 0.01979074440896511 2023-01-23 01:06:21.350632: step: 1388/531, loss: 0.0639532059431076 2023-01-23 01:06:22.485329: step: 1392/531, loss: 0.15867909789085388 2023-01-23 01:06:23.605470: step: 1396/531, loss: 0.11509492993354797 2023-01-23 01:06:24.766296: step: 1400/531, loss: 0.3212408125400543 2023-01-23 01:06:25.891911: step: 1404/531, loss: 0.19735059142112732 2023-01-23 01:06:27.014601: step: 1408/531, loss: 0.03058490715920925 2023-01-23 01:06:28.147764: step: 1412/531, loss: 0.08253774791955948 2023-01-23 01:06:29.274657: step: 1416/531, loss: 0.22068509459495544 2023-01-23 01:06:30.411629: step: 1420/531, loss: 0.06424064934253693 2023-01-23 01:06:31.512030: step: 1424/531, loss: 0.04112853854894638 2023-01-23 01:06:32.630804: step: 1428/531, loss: 0.12625522911548615 2023-01-23 01:06:33.776881: step: 1432/531, loss: 0.049492742866277695 2023-01-23 01:06:34.915604: step: 1436/531, loss: 0.09311608970165253 2023-01-23 01:06:36.038456: step: 1440/531, loss: 0.13868045806884766 2023-01-23 01:06:37.140379: step: 1444/531, loss: 0.0742088332772255 2023-01-23 01:06:38.269528: step: 1448/531, loss: 0.04856519773602486 2023-01-23 01:06:39.413419: step: 1452/531, loss: 0.02445363998413086 2023-01-23 01:06:40.537811: step: 1456/531, loss: 0.03946185111999512 2023-01-23 01:06:41.666474: step: 1460/531, loss: 0.13403233885765076 2023-01-23 01:06:42.781471: step: 1464/531, loss: 0.12843599915504456 2023-01-23 01:06:43.894200: step: 1468/531, loss: 0.13778942823410034 2023-01-23 01:06:45.013628: step: 1472/531, loss: 0.032407306134700775 2023-01-23 01:06:46.147465: step: 1476/531, loss: 0.016209697350859642 2023-01-23 01:06:47.279750: step: 1480/531, loss: 0.05104951933026314 2023-01-23 01:06:48.396806: step: 1484/531, loss: 0.04848307743668556 2023-01-23 01:06:49.521944: step: 1488/531, loss: 0.08278913795948029 2023-01-23 01:06:50.647456: step: 1492/531, loss: 0.11328534781932831 2023-01-23 01:06:51.751584: step: 1496/531, loss: 0.06588558852672577 2023-01-23 01:06:52.900604: step: 1500/531, loss: 0.12327533215284348 2023-01-23 01:06:54.020098: step: 1504/531, loss: 0.10412827134132385 2023-01-23 01:06:55.135203: step: 1508/531, loss: 0.04282745346426964 2023-01-23 01:06:56.226910: step: 1512/531, loss: 0.08965437859296799 2023-01-23 01:06:57.352378: step: 1516/531, loss: 0.05842337757349014 2023-01-23 01:06:58.458059: step: 1520/531, loss: 0.02496948279440403 2023-01-23 01:06:59.538411: step: 1524/531, loss: 0.04317150264978409 2023-01-23 01:07:00.651116: step: 1528/531, loss: 0.17738455533981323 2023-01-23 01:07:01.756564: step: 1532/531, loss: 0.0683690533041954 2023-01-23 01:07:02.871895: step: 1536/531, loss: 0.11062216758728027 2023-01-23 01:07:03.992538: step: 1540/531, loss: 0.06784573197364807 2023-01-23 01:07:05.105905: step: 1544/531, loss: 0.03943901136517525 2023-01-23 01:07:06.233790: step: 1548/531, loss: 0.10623788833618164 2023-01-23 01:07:07.371308: step: 1552/531, loss: 0.13369427621364594 2023-01-23 01:07:08.503833: step: 1556/531, loss: 0.12765559554100037 2023-01-23 01:07:09.646430: step: 1560/531, loss: 0.10928305983543396 2023-01-23 01:07:10.756596: step: 1564/531, loss: 0.08626241981983185 2023-01-23 01:07:11.940439: step: 1568/531, loss: 0.07066144794225693 2023-01-23 01:07:13.066259: step: 1572/531, loss: 0.15028266608715057 2023-01-23 01:07:14.188714: step: 1576/531, loss: 0.13189373910427094 2023-01-23 01:07:15.285080: step: 1580/531, loss: 0.040720224380493164 2023-01-23 01:07:16.434630: step: 1584/531, loss: 0.10490189492702484 2023-01-23 01:07:17.540289: step: 1588/531, loss: 0.11022691428661346 2023-01-23 01:07:18.658328: step: 1592/531, loss: 0.06727757304906845 2023-01-23 01:07:19.778401: step: 1596/531, loss: 0.21008263528347015 2023-01-23 01:07:20.903023: step: 1600/531, loss: 0.110480397939682 2023-01-23 01:07:22.019014: step: 1604/531, loss: 0.038848876953125 2023-01-23 01:07:23.162997: step: 1608/531, loss: 0.14333534240722656 2023-01-23 01:07:24.295208: step: 1612/531, loss: 0.012298773974180222 2023-01-23 01:07:25.418041: step: 1616/531, loss: 0.09040911495685577 2023-01-23 01:07:26.557252: step: 1620/531, loss: 0.36637288331985474 2023-01-23 01:07:27.660355: step: 1624/531, loss: 0.03619823604822159 2023-01-23 01:07:28.745222: step: 1628/531, loss: 0.12785176932811737 2023-01-23 01:07:29.915157: step: 1632/531, loss: 0.10669832676649094 2023-01-23 01:07:31.040611: step: 1636/531, loss: 0.017387963831424713 2023-01-23 01:07:32.163672: step: 1640/531, loss: 0.05115075409412384 2023-01-23 01:07:33.263258: step: 1644/531, loss: 0.025862883776426315 2023-01-23 01:07:34.402532: step: 1648/531, loss: 0.052378181368112564 2023-01-23 01:07:35.534539: step: 1652/531, loss: 0.10733203589916229 2023-01-23 01:07:36.660441: step: 1656/531, loss: 0.039488885551691055 2023-01-23 01:07:37.797056: step: 1660/531, loss: 0.23800000548362732 2023-01-23 01:07:38.925148: step: 1664/531, loss: 0.11620549857616425 2023-01-23 01:07:40.033687: step: 1668/531, loss: 0.04421696439385414 2023-01-23 01:07:41.169959: step: 1672/531, loss: 0.037122488021850586 2023-01-23 01:07:42.304022: step: 1676/531, loss: 0.157939612865448 2023-01-23 01:07:43.430141: step: 1680/531, loss: 0.13577966392040253 2023-01-23 01:07:44.562516: step: 1684/531, loss: 0.0504489429295063 2023-01-23 01:07:45.670179: step: 1688/531, loss: 0.07392968982458115 2023-01-23 01:07:46.786464: step: 1692/531, loss: 0.08688240498304367 2023-01-23 01:07:47.911988: step: 1696/531, loss: 0.0782146006822586 2023-01-23 01:07:48.995612: step: 1700/531, loss: 0.002805781550705433 2023-01-23 01:07:50.111989: step: 1704/531, loss: 0.05476422235369682 2023-01-23 01:07:51.227136: step: 1708/531, loss: 0.15776805579662323 2023-01-23 01:07:52.370673: step: 1712/531, loss: 0.06970424950122833 2023-01-23 01:07:53.491747: step: 1716/531, loss: 0.06293182820081711 2023-01-23 01:07:54.615690: step: 1720/531, loss: 0.11307448893785477 2023-01-23 01:07:55.718068: step: 1724/531, loss: 0.06442832946777344 2023-01-23 01:07:56.824219: step: 1728/531, loss: 0.13848206400871277 2023-01-23 01:07:57.938720: step: 1732/531, loss: 0.06902675330638885 2023-01-23 01:07:59.088103: step: 1736/531, loss: 0.0935390517115593 2023-01-23 01:08:00.221007: step: 1740/531, loss: 0.03362732008099556 2023-01-23 01:08:01.359052: step: 1744/531, loss: 0.07098159193992615 2023-01-23 01:08:02.474270: step: 1748/531, loss: 0.09417543560266495 2023-01-23 01:08:03.596907: step: 1752/531, loss: 0.09569878876209259 2023-01-23 01:08:04.725009: step: 1756/531, loss: 0.0673101395368576 2023-01-23 01:08:05.848864: step: 1760/531, loss: 0.04576165974140167 2023-01-23 01:08:06.964876: step: 1764/531, loss: 0.06733598560094833 2023-01-23 01:08:08.076201: step: 1768/531, loss: 0.10334434360265732 2023-01-23 01:08:09.189669: step: 1772/531, loss: 0.04893012344837189 2023-01-23 01:08:10.330545: step: 1776/531, loss: 0.4683903455734253 2023-01-23 01:08:11.510289: step: 1780/531, loss: 0.12383831292390823 2023-01-23 01:08:12.643590: step: 1784/531, loss: 0.1388513594865799 2023-01-23 01:08:13.781272: step: 1788/531, loss: 0.05231037363409996 2023-01-23 01:08:14.909469: step: 1792/531, loss: 0.25601157546043396 2023-01-23 01:08:16.042375: step: 1796/531, loss: 0.029924677684903145 2023-01-23 01:08:17.139276: step: 1800/531, loss: 0.05668769031763077 2023-01-23 01:08:18.255891: step: 1804/531, loss: 0.02938680723309517 2023-01-23 01:08:19.405349: step: 1808/531, loss: 0.07446356117725372 2023-01-23 01:08:20.538990: step: 1812/531, loss: 0.014715051278471947 2023-01-23 01:08:21.667867: step: 1816/531, loss: 0.1153407096862793 2023-01-23 01:08:22.819644: step: 1820/531, loss: 0.06602511554956436 2023-01-23 01:08:23.953663: step: 1824/531, loss: 0.1012735366821289 2023-01-23 01:08:25.118374: step: 1828/531, loss: 0.16131165623664856 2023-01-23 01:08:26.291619: step: 1832/531, loss: 0.21821995079517365 2023-01-23 01:08:27.415464: step: 1836/531, loss: 0.05337848886847496 2023-01-23 01:08:28.533046: step: 1840/531, loss: 0.03549156337976456 2023-01-23 01:08:29.652661: step: 1844/531, loss: 0.050888631492853165 2023-01-23 01:08:30.790089: step: 1848/531, loss: 0.13226155936717987 2023-01-23 01:08:31.928592: step: 1852/531, loss: 0.038954734802246094 2023-01-23 01:08:33.072068: step: 1856/531, loss: 0.10684119164943695 2023-01-23 01:08:34.229176: step: 1860/531, loss: 0.04459972679615021 2023-01-23 01:08:35.373000: step: 1864/531, loss: 0.026985742151737213 2023-01-23 01:08:36.483639: step: 1868/531, loss: 0.06536979228258133 2023-01-23 01:08:37.590528: step: 1872/531, loss: 0.0018253803718835115 2023-01-23 01:08:38.748160: step: 1876/531, loss: 0.18263044953346252 2023-01-23 01:08:39.854642: step: 1880/531, loss: 0.1416151076555252 2023-01-23 01:08:40.974016: step: 1884/531, loss: 0.16252461075782776 2023-01-23 01:08:42.104710: step: 1888/531, loss: 0.0649823397397995 2023-01-23 01:08:43.241044: step: 1892/531, loss: 1.1305994987487793 2023-01-23 01:08:44.407715: step: 1896/531, loss: 0.035987094044685364 2023-01-23 01:08:45.523439: step: 1900/531, loss: 0.4749086797237396 2023-01-23 01:08:46.647400: step: 1904/531, loss: 0.050026897341012955 2023-01-23 01:08:47.733168: step: 1908/531, loss: 0.03537178039550781 2023-01-23 01:08:48.872261: step: 1912/531, loss: 0.04737458378076553 2023-01-23 01:08:49.992037: step: 1916/531, loss: 0.13724027574062347 2023-01-23 01:08:51.130449: step: 1920/531, loss: 0.056358207017183304 2023-01-23 01:08:52.250718: step: 1924/531, loss: 0.05533742904663086 2023-01-23 01:08:53.366360: step: 1928/531, loss: 0.016994094476103783 2023-01-23 01:08:54.478027: step: 1932/531, loss: 0.06053461879491806 2023-01-23 01:08:55.605452: step: 1936/531, loss: 0.01579570770263672 2023-01-23 01:08:56.720170: step: 1940/531, loss: 0.004652261734008789 2023-01-23 01:08:57.824853: step: 1944/531, loss: 0.062317658215761185 2023-01-23 01:08:58.920060: step: 1948/531, loss: 0.16865481436252594 2023-01-23 01:09:00.041071: step: 1952/531, loss: 0.11037836223840714 2023-01-23 01:09:01.162106: step: 1956/531, loss: 0.09929104149341583 2023-01-23 01:09:02.272155: step: 1960/531, loss: 0.029140328988432884 2023-01-23 01:09:03.393840: step: 1964/531, loss: 0.47770261764526367 2023-01-23 01:09:04.515705: step: 1968/531, loss: 0.039046622812747955 2023-01-23 01:09:05.640035: step: 1972/531, loss: 0.05972623825073242 2023-01-23 01:09:06.790053: step: 1976/531, loss: 0.07596192508935928 2023-01-23 01:09:07.900502: step: 1980/531, loss: 0.04300356283783913 2023-01-23 01:09:09.017240: step: 1984/531, loss: 0.19802507758140564 2023-01-23 01:09:10.155408: step: 1988/531, loss: 0.3206310272216797 2023-01-23 01:09:11.302854: step: 1992/531, loss: 0.015581679530441761 2023-01-23 01:09:12.400988: step: 1996/531, loss: 0.13693447411060333 2023-01-23 01:09:13.525869: step: 2000/531, loss: 0.10448551177978516 2023-01-23 01:09:14.651688: step: 2004/531, loss: 0.03738908842206001 2023-01-23 01:09:15.758053: step: 2008/531, loss: 0.022568179294466972 2023-01-23 01:09:16.877868: step: 2012/531, loss: 0.071390300989151 2023-01-23 01:09:18.017612: step: 2016/531, loss: 0.1915409117937088 2023-01-23 01:09:19.159575: step: 2020/531, loss: 0.04772500693798065 2023-01-23 01:09:20.305004: step: 2024/531, loss: 0.19542759656906128 2023-01-23 01:09:21.416155: step: 2028/531, loss: 0.04015187919139862 2023-01-23 01:09:22.531713: step: 2032/531, loss: 0.017212580889463425 2023-01-23 01:09:23.681630: step: 2036/531, loss: 0.06971025466918945 2023-01-23 01:09:24.799117: step: 2040/531, loss: 0.03179655224084854 2023-01-23 01:09:25.922083: step: 2044/531, loss: 0.09848861396312714 2023-01-23 01:09:27.053646: step: 2048/531, loss: 0.05716719850897789 2023-01-23 01:09:28.204320: step: 2052/531, loss: 0.06620045006275177 2023-01-23 01:09:29.325066: step: 2056/531, loss: 0.12890753149986267 2023-01-23 01:09:30.454406: step: 2060/531, loss: 0.05382375419139862 2023-01-23 01:09:31.558531: step: 2064/531, loss: 0.33697694540023804 2023-01-23 01:09:32.690522: step: 2068/531, loss: 0.03177852928638458 2023-01-23 01:09:33.816165: step: 2072/531, loss: 0.024903679266572 2023-01-23 01:09:34.919853: step: 2076/531, loss: 0.09133139252662659 2023-01-23 01:09:36.040151: step: 2080/531, loss: 0.018282651901245117 2023-01-23 01:09:37.175195: step: 2084/531, loss: 0.0645199790596962 2023-01-23 01:09:38.289692: step: 2088/531, loss: 0.06097374111413956 2023-01-23 01:09:39.403067: step: 2092/531, loss: 0.011859512887895107 2023-01-23 01:09:40.512009: step: 2096/531, loss: 0.06787758320569992 2023-01-23 01:09:41.616458: step: 2100/531, loss: 0.07847819477319717 2023-01-23 01:09:42.738828: step: 2104/531, loss: 0.05095729976892471 2023-01-23 01:09:43.854802: step: 2108/531, loss: 0.10668793320655823 2023-01-23 01:09:44.957453: step: 2112/531, loss: 0.06331310421228409 2023-01-23 01:09:46.101512: step: 2116/531, loss: 0.10046424716711044 2023-01-23 01:09:47.250586: step: 2120/531, loss: 0.07530622184276581 2023-01-23 01:09:48.369576: step: 2124/531, loss: 0.14874349534511566 ================================================== Loss: 0.120 -------------------- Dev: {'event': {'p': 0.5725264169068204, 'r': 0.7936085219707057, 'f1': 0.6651785714285715}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Test: {'event': {'p': 0.6113918236104732, 'r': 0.7936791890280263, 'f1': 0.6907109496626881}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Chinese: {'event': {'p': 0.569620253164557, 'r': 0.8333333333333334, 'f1': 0.6766917293233082}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Korean: {'event': {'p': 0.65625, 'r': 0.6666666666666666, 'f1': 0.6614173228346457}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Russian: {'event': {'p': 0.42857142857142855, 'r': 0.5833333333333334, 'f1': 0.49411764705882355}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} New best korean model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.5725264169068204, 'r': 0.7936085219707057, 'f1': 0.6651785714285715}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Eng Test for Korean: {'event': {'p': 0.6113918236104732, 'r': 0.7936791890280263, 'f1': 0.6907109496626881}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Korean: {'event': {'p': 0.65625, 'r': 0.6666666666666666, 'f1': 0.6614173228346457}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 10 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:10:36.876176: step: 4/531, loss: 0.21623441576957703 2023-01-23 01:10:37.991566: step: 8/531, loss: 0.020121287554502487 2023-01-23 01:10:39.110145: step: 12/531, loss: 0.08715877681970596 2023-01-23 01:10:40.243938: step: 16/531, loss: 0.026087380945682526 2023-01-23 01:10:41.353273: step: 20/531, loss: 0.051083993166685104 2023-01-23 01:10:42.508335: step: 24/531, loss: 0.028424646705389023 2023-01-23 01:10:43.659635: step: 28/531, loss: 0.008292818441987038 2023-01-23 01:10:44.804787: step: 32/531, loss: 0.04293462634086609 2023-01-23 01:10:45.903513: step: 36/531, loss: 0.05734968185424805 2023-01-23 01:10:47.009683: step: 40/531, loss: 0.026438143104314804 2023-01-23 01:10:48.133558: step: 44/531, loss: 0.04494807869195938 2023-01-23 01:10:49.282661: step: 48/531, loss: 0.18213118612766266 2023-01-23 01:10:50.382076: step: 52/531, loss: 0.04565410688519478 2023-01-23 01:10:51.509544: step: 56/531, loss: 0.05773639678955078 2023-01-23 01:10:52.628366: step: 60/531, loss: 0.052641965448856354 2023-01-23 01:10:53.736572: step: 64/531, loss: 0.0018669129349291325 2023-01-23 01:10:54.855658: step: 68/531, loss: 0.29329538345336914 2023-01-23 01:10:55.985927: step: 72/531, loss: 0.0014976500533521175 2023-01-23 01:10:57.085731: step: 76/531, loss: 0.013664532452821732 2023-01-23 01:10:58.218670: step: 80/531, loss: 0.07327013462781906 2023-01-23 01:10:59.346124: step: 84/531, loss: 0.07483325153589249 2023-01-23 01:11:00.526772: step: 88/531, loss: 0.08705577254295349 2023-01-23 01:11:01.644355: step: 92/531, loss: 0.018394364044070244 2023-01-23 01:11:02.752234: step: 96/531, loss: 0.03158698230981827 2023-01-23 01:11:03.864867: step: 100/531, loss: 0.1435195505619049 2023-01-23 01:11:04.985962: step: 104/531, loss: 0.03944563865661621 2023-01-23 01:11:06.122837: step: 108/531, loss: 0.3374462127685547 2023-01-23 01:11:07.233997: step: 112/531, loss: 0.3695562481880188 2023-01-23 01:11:08.379407: step: 116/531, loss: 0.029473688453435898 2023-01-23 01:11:09.510451: step: 120/531, loss: 0.09563598036766052 2023-01-23 01:11:10.639996: step: 124/531, loss: 0.07941122353076935 2023-01-23 01:11:11.782393: step: 128/531, loss: 0.34564247727394104 2023-01-23 01:11:12.939824: step: 132/531, loss: 0.07962484657764435 2023-01-23 01:11:14.040259: step: 136/531, loss: 0.006733751390129328 2023-01-23 01:11:15.198022: step: 140/531, loss: 0.0843932181596756 2023-01-23 01:11:16.325215: step: 144/531, loss: 0.056038569658994675 2023-01-23 01:11:17.430912: step: 148/531, loss: 0.012675905600190163 2023-01-23 01:11:18.577841: step: 152/531, loss: 0.039282940328121185 2023-01-23 01:11:19.683211: step: 156/531, loss: 0.02194995991885662 2023-01-23 01:11:20.791191: step: 160/531, loss: 0.08320216834545135 2023-01-23 01:11:21.886730: step: 164/531, loss: 0.04401605203747749 2023-01-23 01:11:23.024537: step: 168/531, loss: 0.0696466863155365 2023-01-23 01:11:24.144612: step: 172/531, loss: 0.03185214847326279 2023-01-23 01:11:25.266890: step: 176/531, loss: 0.02067866548895836 2023-01-23 01:11:26.411987: step: 180/531, loss: 0.05394601821899414 2023-01-23 01:11:27.560659: step: 184/531, loss: 0.04944973066449165 2023-01-23 01:11:28.694443: step: 188/531, loss: 0.06316594779491425 2023-01-23 01:11:29.813344: step: 192/531, loss: 0.3765878677368164 2023-01-23 01:11:30.934512: step: 196/531, loss: 0.009817123413085938 2023-01-23 01:11:32.087281: step: 200/531, loss: 0.09896345436573029 2023-01-23 01:11:33.213187: step: 204/531, loss: 0.33717355132102966 2023-01-23 01:11:34.352315: step: 208/531, loss: 0.06959133595228195 2023-01-23 01:11:35.464379: step: 212/531, loss: 0.08850102871656418 2023-01-23 01:11:36.565922: step: 216/531, loss: 0.016798116266727448 2023-01-23 01:11:37.718539: step: 220/531, loss: 0.04097968339920044 2023-01-23 01:11:38.836895: step: 224/531, loss: 0.13019585609436035 2023-01-23 01:11:39.963869: step: 228/531, loss: 0.05013582855463028 2023-01-23 01:11:41.067819: step: 232/531, loss: 0.2794569134712219 2023-01-23 01:11:42.226894: step: 236/531, loss: 0.029644489288330078 2023-01-23 01:11:43.345387: step: 240/531, loss: 0.07343072444200516 2023-01-23 01:11:44.453618: step: 244/531, loss: 0.0627521499991417 2023-01-23 01:11:45.589942: step: 248/531, loss: 0.010606097988784313 2023-01-23 01:11:46.699927: step: 252/531, loss: 0.048505548387765884 2023-01-23 01:11:47.799510: step: 256/531, loss: 0.07152329385280609 2023-01-23 01:11:48.933342: step: 260/531, loss: 0.058820534497499466 2023-01-23 01:11:50.058345: step: 264/531, loss: 0.0063720704056322575 2023-01-23 01:11:51.189498: step: 268/531, loss: 0.013552665710449219 2023-01-23 01:11:52.315584: step: 272/531, loss: 0.36703377962112427 2023-01-23 01:11:53.438897: step: 276/531, loss: 0.19733428955078125 2023-01-23 01:11:54.567870: step: 280/531, loss: 0.038312721997499466 2023-01-23 01:11:55.735178: step: 284/531, loss: 0.04330892488360405 2023-01-23 01:11:56.851195: step: 288/531, loss: 0.14655809104442596 2023-01-23 01:11:57.994559: step: 292/531, loss: 0.09843883663415909 2023-01-23 01:11:59.126078: step: 296/531, loss: 0.26909342408180237 2023-01-23 01:12:00.266080: step: 300/531, loss: 0.03592033311724663 2023-01-23 01:12:01.385934: step: 304/531, loss: 0.0702594742178917 2023-01-23 01:12:02.520677: step: 308/531, loss: 0.09555979073047638 2023-01-23 01:12:03.627076: step: 312/531, loss: 0.4399285316467285 2023-01-23 01:12:04.766749: step: 316/531, loss: 0.03937721252441406 2023-01-23 01:12:05.859108: step: 320/531, loss: 0.02788022719323635 2023-01-23 01:12:06.989708: step: 324/531, loss: 0.10528340190649033 2023-01-23 01:12:08.148441: step: 328/531, loss: 0.041455648839473724 2023-01-23 01:12:09.278716: step: 332/531, loss: 0.15865746140480042 2023-01-23 01:12:10.398521: step: 336/531, loss: 0.04110288619995117 2023-01-23 01:12:11.533432: step: 340/531, loss: 0.005844688508659601 2023-01-23 01:12:12.645260: step: 344/531, loss: 0.10154610127210617 2023-01-23 01:12:13.773063: step: 348/531, loss: 0.09441948682069778 2023-01-23 01:12:14.913833: step: 352/531, loss: 0.09893741458654404 2023-01-23 01:12:16.047343: step: 356/531, loss: 0.023644447326660156 2023-01-23 01:12:17.189819: step: 360/531, loss: 0.05272217094898224 2023-01-23 01:12:18.330003: step: 364/531, loss: 0.10865690559148788 2023-01-23 01:12:19.443229: step: 368/531, loss: 0.07566137611865997 2023-01-23 01:12:20.561345: step: 372/531, loss: 0.05899462848901749 2023-01-23 01:12:21.689375: step: 376/531, loss: 0.09528933465480804 2023-01-23 01:12:22.789824: step: 380/531, loss: 0.005741882137954235 2023-01-23 01:12:23.917089: step: 384/531, loss: 0.0318605899810791 2023-01-23 01:12:25.043308: step: 388/531, loss: 0.1636379212141037 2023-01-23 01:12:26.177356: step: 392/531, loss: 0.04427357017993927 2023-01-23 01:12:27.312113: step: 396/531, loss: 0.13655900955200195 2023-01-23 01:12:28.454956: step: 400/531, loss: 0.09229107201099396 2023-01-23 01:12:29.588150: step: 404/531, loss: 0.04023800045251846 2023-01-23 01:12:30.696412: step: 408/531, loss: 0.011039048433303833 2023-01-23 01:12:31.814764: step: 412/531, loss: 0.06464796513319016 2023-01-23 01:12:32.963808: step: 416/531, loss: 0.3171292841434479 2023-01-23 01:12:34.079640: step: 420/531, loss: 0.012929152697324753 2023-01-23 01:12:35.180867: step: 424/531, loss: 0.03202877193689346 2023-01-23 01:12:36.290051: step: 428/531, loss: 0.05081872642040253 2023-01-23 01:12:37.403299: step: 432/531, loss: 0.07394762337207794 2023-01-23 01:12:38.520160: step: 436/531, loss: 0.0629698783159256 2023-01-23 01:12:39.639562: step: 440/531, loss: 0.09848938137292862 2023-01-23 01:12:40.786558: step: 444/531, loss: 0.10481653362512589 2023-01-23 01:12:41.935912: step: 448/531, loss: 0.09322714805603027 2023-01-23 01:12:43.050142: step: 452/531, loss: 0.10081973671913147 2023-01-23 01:12:44.155822: step: 456/531, loss: 0.038343336433172226 2023-01-23 01:12:45.291213: step: 460/531, loss: 0.027730178087949753 2023-01-23 01:12:46.408212: step: 464/531, loss: 0.10366799682378769 2023-01-23 01:12:47.512470: step: 468/531, loss: 0.16739359498023987 2023-01-23 01:12:48.621135: step: 472/531, loss: 0.26914986968040466 2023-01-23 01:12:49.755370: step: 476/531, loss: 0.031618881970644 2023-01-23 01:12:50.899373: step: 480/531, loss: 0.022040940821170807 2023-01-23 01:12:52.009651: step: 484/531, loss: 0.07305736839771271 2023-01-23 01:12:53.150587: step: 488/531, loss: 0.6566334366798401 2023-01-23 01:12:54.264682: step: 492/531, loss: 0.14733240008354187 2023-01-23 01:12:55.364159: step: 496/531, loss: 0.03914584964513779 2023-01-23 01:12:56.498284: step: 500/531, loss: 0.0702170878648758 2023-01-23 01:12:57.597383: step: 504/531, loss: 0.11220322549343109 2023-01-23 01:12:58.743433: step: 508/531, loss: 0.06979923695325851 2023-01-23 01:12:59.861139: step: 512/531, loss: 0.05417288839817047 2023-01-23 01:13:00.970088: step: 516/531, loss: 0.03533516079187393 2023-01-23 01:13:02.105189: step: 520/531, loss: 0.03577156364917755 2023-01-23 01:13:03.220206: step: 524/531, loss: 0.13439102470874786 2023-01-23 01:13:04.339552: step: 528/531, loss: 0.06326007843017578 2023-01-23 01:13:05.463243: step: 532/531, loss: 0.023912906646728516 2023-01-23 01:13:06.585573: step: 536/531, loss: 0.05921498313546181 2023-01-23 01:13:07.743769: step: 540/531, loss: 0.018293287605047226 2023-01-23 01:13:08.855306: step: 544/531, loss: 0.012829208746552467 2023-01-23 01:13:09.979445: step: 548/531, loss: 0.3172028660774231 2023-01-23 01:13:11.130262: step: 552/531, loss: 0.004756164271384478 2023-01-23 01:13:12.269697: step: 556/531, loss: 0.01308374386280775 2023-01-23 01:13:13.390607: step: 560/531, loss: 0.046912387013435364 2023-01-23 01:13:14.537523: step: 564/531, loss: 0.005837440490722656 2023-01-23 01:13:15.676519: step: 568/531, loss: 0.02226100116968155 2023-01-23 01:13:16.794192: step: 572/531, loss: 0.007289028260856867 2023-01-23 01:13:17.913339: step: 576/531, loss: 0.0626005157828331 2023-01-23 01:13:19.032602: step: 580/531, loss: 0.11476173996925354 2023-01-23 01:13:20.137324: step: 584/531, loss: 0.021791744977235794 2023-01-23 01:13:21.231547: step: 588/531, loss: 0.0356481559574604 2023-01-23 01:13:22.333057: step: 592/531, loss: 0.06095151975750923 2023-01-23 01:13:23.449838: step: 596/531, loss: 0.14686232805252075 2023-01-23 01:13:24.572510: step: 600/531, loss: 0.10427875816822052 2023-01-23 01:13:25.713431: step: 604/531, loss: 0.005172777455300093 2023-01-23 01:13:26.837456: step: 608/531, loss: 0.0070788380689918995 2023-01-23 01:13:27.961323: step: 612/531, loss: 0.07341833412647247 2023-01-23 01:13:29.095100: step: 616/531, loss: 0.11404337733983994 2023-01-23 01:13:30.199355: step: 620/531, loss: 0.11168313026428223 2023-01-23 01:13:31.328611: step: 624/531, loss: 0.03533286973834038 2023-01-23 01:13:32.442031: step: 628/531, loss: 0.16926708817481995 2023-01-23 01:13:33.581161: step: 632/531, loss: 0.02252807654440403 2023-01-23 01:13:34.715472: step: 636/531, loss: 0.09222564101219177 2023-01-23 01:13:35.825628: step: 640/531, loss: 0.05632071569561958 2023-01-23 01:13:36.929144: step: 644/531, loss: 0.09267549216747284 2023-01-23 01:13:38.066749: step: 648/531, loss: 0.016103744506835938 2023-01-23 01:13:39.191077: step: 652/531, loss: 0.00413855304941535 2023-01-23 01:13:40.298915: step: 656/531, loss: 0.23023460805416107 2023-01-23 01:13:41.436743: step: 660/531, loss: 0.04975175857543945 2023-01-23 01:13:42.564844: step: 664/531, loss: 0.1383899748325348 2023-01-23 01:13:43.648206: step: 668/531, loss: 0.08326377719640732 2023-01-23 01:13:44.762053: step: 672/531, loss: 0.01661386527121067 2023-01-23 01:13:45.888763: step: 676/531, loss: 0.02125072479248047 2023-01-23 01:13:47.021559: step: 680/531, loss: 0.006105518434196711 2023-01-23 01:13:48.169379: step: 684/531, loss: 0.36228635907173157 2023-01-23 01:13:49.324072: step: 688/531, loss: 0.1943696141242981 2023-01-23 01:13:50.445818: step: 692/531, loss: 0.00706901540979743 2023-01-23 01:13:51.578220: step: 696/531, loss: 0.025682950392365456 2023-01-23 01:13:52.692302: step: 700/531, loss: 0.08898001164197922 2023-01-23 01:13:53.864373: step: 704/531, loss: 0.003070497652515769 2023-01-23 01:13:54.965997: step: 708/531, loss: 0.03321390226483345 2023-01-23 01:13:56.117844: step: 712/531, loss: 0.2003200650215149 2023-01-23 01:13:57.246450: step: 716/531, loss: 0.11334299296140671 2023-01-23 01:13:58.355428: step: 720/531, loss: 0.07198648899793625 2023-01-23 01:13:59.479226: step: 724/531, loss: 0.07582226395606995 2023-01-23 01:14:00.592964: step: 728/531, loss: 0.059401609003543854 2023-01-23 01:14:01.717010: step: 732/531, loss: 0.13553687930107117 2023-01-23 01:14:02.814709: step: 736/531, loss: 0.00605697650462389 2023-01-23 01:14:03.924258: step: 740/531, loss: 0.06719312816858292 2023-01-23 01:14:05.030723: step: 744/531, loss: 0.045046616345644 2023-01-23 01:14:06.145626: step: 748/531, loss: 0.1551249623298645 2023-01-23 01:14:07.287313: step: 752/531, loss: 0.051241420209407806 2023-01-23 01:14:08.405094: step: 756/531, loss: 0.03444976732134819 2023-01-23 01:14:09.528147: step: 760/531, loss: 0.005519390106201172 2023-01-23 01:14:10.658007: step: 764/531, loss: 0.5704889297485352 2023-01-23 01:14:11.768507: step: 768/531, loss: 0.02174239233136177 2023-01-23 01:14:12.900933: step: 772/531, loss: 0.0718604102730751 2023-01-23 01:14:14.011533: step: 776/531, loss: 0.10223827511072159 2023-01-23 01:14:15.136573: step: 780/531, loss: 0.07152089476585388 2023-01-23 01:14:16.256514: step: 784/531, loss: 0.17063112556934357 2023-01-23 01:14:17.370450: step: 788/531, loss: 0.03548021242022514 2023-01-23 01:14:18.509739: step: 792/531, loss: 0.0362551212310791 2023-01-23 01:14:19.642953: step: 796/531, loss: 0.01795806922018528 2023-01-23 01:14:20.737530: step: 800/531, loss: 0.06009354442358017 2023-01-23 01:14:21.858138: step: 804/531, loss: 0.5600084066390991 2023-01-23 01:14:22.972754: step: 808/531, loss: 0.05992145463824272 2023-01-23 01:14:24.079072: step: 812/531, loss: 0.11755962669849396 2023-01-23 01:14:25.182526: step: 816/531, loss: 0.2835141122341156 2023-01-23 01:14:26.276406: step: 820/531, loss: 0.0030007362365722656 2023-01-23 01:14:27.419564: step: 824/531, loss: 0.0632781982421875 2023-01-23 01:14:28.542002: step: 828/531, loss: 0.15143266320228577 2023-01-23 01:14:29.633458: step: 832/531, loss: 0.03235621377825737 2023-01-23 01:14:30.770603: step: 836/531, loss: 0.16603946685791016 2023-01-23 01:14:31.923476: step: 840/531, loss: 0.015852833166718483 2023-01-23 01:14:33.073646: step: 844/531, loss: 0.09591273963451385 2023-01-23 01:14:34.214869: step: 848/531, loss: 0.07614460587501526 2023-01-23 01:14:35.355116: step: 852/531, loss: 0.017177201807498932 2023-01-23 01:14:36.515759: step: 856/531, loss: 0.15117035806179047 2023-01-23 01:14:37.605122: step: 860/531, loss: 0.11116638779640198 2023-01-23 01:14:38.738427: step: 864/531, loss: 0.09981352090835571 2023-01-23 01:14:39.893623: step: 868/531, loss: 6.600597381591797 2023-01-23 01:14:41.035650: step: 872/531, loss: 0.23515033721923828 2023-01-23 01:14:42.156290: step: 876/531, loss: 0.04414348676800728 2023-01-23 01:14:43.304640: step: 880/531, loss: 0.21615906059741974 2023-01-23 01:14:44.413361: step: 884/531, loss: 0.01897592470049858 2023-01-23 01:14:45.566161: step: 888/531, loss: 0.06870318204164505 2023-01-23 01:14:46.667695: step: 892/531, loss: 0.12570691108703613 2023-01-23 01:14:47.770813: step: 896/531, loss: 0.010081482119858265 2023-01-23 01:14:48.877413: step: 900/531, loss: 0.009365749545395374 2023-01-23 01:14:50.000062: step: 904/531, loss: 0.0889807790517807 2023-01-23 01:14:51.164598: step: 908/531, loss: 0.3951340913772583 2023-01-23 01:14:52.302812: step: 912/531, loss: 0.2790369391441345 2023-01-23 01:14:53.409399: step: 916/531, loss: 0.023556184023618698 2023-01-23 01:14:54.530972: step: 920/531, loss: 0.08003024756908417 2023-01-23 01:14:55.650768: step: 924/531, loss: 0.018730737268924713 2023-01-23 01:14:56.752046: step: 928/531, loss: 0.04774751514196396 2023-01-23 01:14:57.848675: step: 932/531, loss: 0.10205135494470596 2023-01-23 01:14:58.993256: step: 936/531, loss: 0.7415167689323425 2023-01-23 01:15:00.089625: step: 940/531, loss: 0.06834926456212997 2023-01-23 01:15:01.232254: step: 944/531, loss: 0.12860898673534393 2023-01-23 01:15:02.343597: step: 948/531, loss: 0.47881144285202026 2023-01-23 01:15:03.445660: step: 952/531, loss: 0.09848156571388245 2023-01-23 01:15:04.588733: step: 956/531, loss: 0.06895532459020615 2023-01-23 01:15:05.739136: step: 960/531, loss: 0.04453601688146591 2023-01-23 01:15:06.873938: step: 964/531, loss: 0.013002299703657627 2023-01-23 01:15:08.025340: step: 968/531, loss: 0.06620798259973526 2023-01-23 01:15:09.148795: step: 972/531, loss: 0.03886881098151207 2023-01-23 01:15:10.305142: step: 976/531, loss: 0.021335698664188385 2023-01-23 01:15:11.429639: step: 980/531, loss: 0.0800880491733551 2023-01-23 01:15:12.565922: step: 984/531, loss: 0.08799200505018234 2023-01-23 01:15:13.671774: step: 988/531, loss: 0.38767823576927185 2023-01-23 01:15:14.799015: step: 992/531, loss: 0.04175548627972603 2023-01-23 01:15:15.887399: step: 996/531, loss: 0.023601436987519264 2023-01-23 01:15:16.991773: step: 1000/531, loss: 0.04929669201374054 2023-01-23 01:15:18.140042: step: 1004/531, loss: 0.11158613860607147 2023-01-23 01:15:19.263439: step: 1008/531, loss: 0.08201932907104492 2023-01-23 01:15:20.384590: step: 1012/531, loss: 0.18200074136257172 2023-01-23 01:15:21.535577: step: 1016/531, loss: 0.0829593688249588 2023-01-23 01:15:22.647098: step: 1020/531, loss: 0.017726518213748932 2023-01-23 01:15:23.768206: step: 1024/531, loss: 0.04975710064172745 2023-01-23 01:15:24.883622: step: 1028/531, loss: 0.07877931743860245 2023-01-23 01:15:26.002245: step: 1032/531, loss: 0.0431065559387207 2023-01-23 01:15:27.122266: step: 1036/531, loss: 0.05532970279455185 2023-01-23 01:15:28.259085: step: 1040/531, loss: 0.04636359214782715 2023-01-23 01:15:29.368644: step: 1044/531, loss: 0.09782705456018448 2023-01-23 01:15:30.462841: step: 1048/531, loss: 0.08972759544849396 2023-01-23 01:15:31.576348: step: 1052/531, loss: 0.07853545993566513 2023-01-23 01:15:32.709489: step: 1056/531, loss: 0.12326722592115402 2023-01-23 01:15:33.851534: step: 1060/531, loss: 0.023280715569853783 2023-01-23 01:15:34.970897: step: 1064/531, loss: 0.04248838499188423 2023-01-23 01:15:36.102453: step: 1068/531, loss: 0.0165773406624794 2023-01-23 01:15:37.199128: step: 1072/531, loss: 0.011294294148683548 2023-01-23 01:15:38.333590: step: 1076/531, loss: 0.17084245383739471 2023-01-23 01:15:39.460416: step: 1080/531, loss: 0.09570303559303284 2023-01-23 01:15:40.569610: step: 1084/531, loss: 0.014852046966552734 2023-01-23 01:15:41.699326: step: 1088/531, loss: 0.022930670529603958 2023-01-23 01:15:42.833491: step: 1092/531, loss: 0.16185075044631958 2023-01-23 01:15:43.969574: step: 1096/531, loss: 0.1440170258283615 2023-01-23 01:15:45.096092: step: 1100/531, loss: 0.03081207349896431 2023-01-23 01:15:46.228184: step: 1104/531, loss: 0.06661882251501083 2023-01-23 01:15:47.336450: step: 1108/531, loss: 0.011142825707793236 2023-01-23 01:15:48.454659: step: 1112/531, loss: 0.054775189608335495 2023-01-23 01:15:49.567939: step: 1116/531, loss: 0.037755679339170456 2023-01-23 01:15:50.676507: step: 1120/531, loss: 0.035413362085819244 2023-01-23 01:15:51.781408: step: 1124/531, loss: 0.028282545506954193 2023-01-23 01:15:52.891269: step: 1128/531, loss: 0.361624538898468 2023-01-23 01:15:54.008932: step: 1132/531, loss: 0.16452975571155548 2023-01-23 01:15:55.113227: step: 1136/531, loss: 0.058150291442871094 2023-01-23 01:15:56.217181: step: 1140/531, loss: 0.1460392028093338 2023-01-23 01:15:57.329515: step: 1144/531, loss: 0.015816306695342064 2023-01-23 01:15:58.451963: step: 1148/531, loss: 0.08659800887107849 2023-01-23 01:15:59.583335: step: 1152/531, loss: 0.16230088472366333 2023-01-23 01:16:00.716907: step: 1156/531, loss: 0.007613944821059704 2023-01-23 01:16:01.866863: step: 1160/531, loss: 0.06964094936847687 2023-01-23 01:16:02.997183: step: 1164/531, loss: 0.1682472825050354 2023-01-23 01:16:04.129715: step: 1168/531, loss: 0.013976383954286575 2023-01-23 01:16:05.258106: step: 1172/531, loss: 0.014623391442000866 2023-01-23 01:16:06.377238: step: 1176/531, loss: 0.08265195041894913 2023-01-23 01:16:07.496923: step: 1180/531, loss: 0.391972154378891 2023-01-23 01:16:08.627100: step: 1184/531, loss: 0.1276385337114334 2023-01-23 01:16:09.742057: step: 1188/531, loss: 0.5758189558982849 2023-01-23 01:16:10.862661: step: 1192/531, loss: 0.011744881048798561 2023-01-23 01:16:12.018407: step: 1196/531, loss: 0.010767650790512562 2023-01-23 01:16:13.151857: step: 1200/531, loss: 0.062334444373846054 2023-01-23 01:16:14.284960: step: 1204/531, loss: 0.21208438277244568 2023-01-23 01:16:15.423880: step: 1208/531, loss: 0.06007290259003639 2023-01-23 01:16:16.545869: step: 1212/531, loss: 0.05585651472210884 2023-01-23 01:16:17.672685: step: 1216/531, loss: 0.05054893344640732 2023-01-23 01:16:18.797374: step: 1220/531, loss: 0.08229760825634003 2023-01-23 01:16:19.914368: step: 1224/531, loss: 0.012652969919145107 2023-01-23 01:16:21.062464: step: 1228/531, loss: 0.041400909423828125 2023-01-23 01:16:22.190797: step: 1232/531, loss: 0.08686723560094833 2023-01-23 01:16:23.325407: step: 1236/531, loss: 0.03182416036725044 2023-01-23 01:16:24.468982: step: 1240/531, loss: 0.12221205234527588 2023-01-23 01:16:25.628406: step: 1244/531, loss: 0.011512613855302334 2023-01-23 01:16:26.761537: step: 1248/531, loss: 0.00769419688731432 2023-01-23 01:16:27.866211: step: 1252/531, loss: 0.04380970075726509 2023-01-23 01:16:29.018067: step: 1256/531, loss: 0.10821003466844559 2023-01-23 01:16:30.158714: step: 1260/531, loss: 0.09529123455286026 2023-01-23 01:16:31.263426: step: 1264/531, loss: 0.0686555877327919 2023-01-23 01:16:32.364290: step: 1268/531, loss: 0.016816092655062675 2023-01-23 01:16:33.498145: step: 1272/531, loss: 0.005986523814499378 2023-01-23 01:16:34.644450: step: 1276/531, loss: 0.49938222765922546 2023-01-23 01:16:35.767042: step: 1280/531, loss: 0.05700397491455078 2023-01-23 01:16:36.923748: step: 1284/531, loss: 0.09781436622142792 2023-01-23 01:16:38.065098: step: 1288/531, loss: 0.19268473982810974 2023-01-23 01:16:39.187524: step: 1292/531, loss: 0.158880814909935 2023-01-23 01:16:40.307901: step: 1296/531, loss: 0.17393112182617188 2023-01-23 01:16:41.443065: step: 1300/531, loss: 0.17259693145751953 2023-01-23 01:16:42.639577: step: 1304/531, loss: 0.0538850761950016 2023-01-23 01:16:43.757189: step: 1308/531, loss: 0.05626258626580238 2023-01-23 01:16:44.876268: step: 1312/531, loss: 0.07283325493335724 2023-01-23 01:16:45.987677: step: 1316/531, loss: 0.261345773935318 2023-01-23 01:16:47.135664: step: 1320/531, loss: 0.023939799517393112 2023-01-23 01:16:48.243394: step: 1324/531, loss: 0.07654476165771484 2023-01-23 01:16:49.365410: step: 1328/531, loss: 0.032021380960941315 2023-01-23 01:16:50.504069: step: 1332/531, loss: 0.04924974590539932 2023-01-23 01:16:51.607533: step: 1336/531, loss: 0.33833616971969604 2023-01-23 01:16:52.699069: step: 1340/531, loss: 0.11932249367237091 2023-01-23 01:16:53.802285: step: 1344/531, loss: 0.043663978576660156 2023-01-23 01:16:54.923659: step: 1348/531, loss: 0.08932981640100479 2023-01-23 01:16:56.034199: step: 1352/531, loss: 0.07595663517713547 2023-01-23 01:16:57.174824: step: 1356/531, loss: 0.1263381689786911 2023-01-23 01:16:58.291446: step: 1360/531, loss: 0.1303871124982834 2023-01-23 01:16:59.409302: step: 1364/531, loss: 0.4046003222465515 2023-01-23 01:17:00.555975: step: 1368/531, loss: 0.07024087756872177 2023-01-23 01:17:01.693972: step: 1372/531, loss: 0.04590454325079918 2023-01-23 01:17:02.820074: step: 1376/531, loss: 0.05047474056482315 2023-01-23 01:17:03.948253: step: 1380/531, loss: 0.062482595443725586 2023-01-23 01:17:05.099843: step: 1384/531, loss: 0.08354830741882324 2023-01-23 01:17:06.199263: step: 1388/531, loss: 0.04470429569482803 2023-01-23 01:17:07.305821: step: 1392/531, loss: 0.016810130327939987 2023-01-23 01:17:08.437737: step: 1396/531, loss: 0.05169057846069336 2023-01-23 01:17:09.545097: step: 1400/531, loss: 0.058431342244148254 2023-01-23 01:17:10.665849: step: 1404/531, loss: 0.06417732685804367 2023-01-23 01:17:11.808500: step: 1408/531, loss: 0.005791378207504749 2023-01-23 01:17:12.956712: step: 1412/531, loss: 0.050724029541015625 2023-01-23 01:17:14.069266: step: 1416/531, loss: 0.100092314183712 2023-01-23 01:17:15.214870: step: 1420/531, loss: 0.05062408745288849 2023-01-23 01:17:16.313527: step: 1424/531, loss: 0.033126164227724075 2023-01-23 01:17:17.478935: step: 1428/531, loss: 0.0731164962053299 2023-01-23 01:17:18.613964: step: 1432/531, loss: 0.047882940620183945 2023-01-23 01:17:19.729191: step: 1436/531, loss: 0.05545955151319504 2023-01-23 01:17:20.865938: step: 1440/531, loss: 0.04085004702210426 2023-01-23 01:17:22.018934: step: 1444/531, loss: 0.10405702888965607 2023-01-23 01:17:23.171089: step: 1448/531, loss: 0.09285449981689453 2023-01-23 01:17:24.275404: step: 1452/531, loss: 0.041260432451963425 2023-01-23 01:17:25.400795: step: 1456/531, loss: 0.01364598236978054 2023-01-23 01:17:26.529163: step: 1460/531, loss: 0.027989627793431282 2023-01-23 01:17:27.665992: step: 1464/531, loss: 0.07774285972118378 2023-01-23 01:17:28.788756: step: 1468/531, loss: 0.02292471006512642 2023-01-23 01:17:29.924255: step: 1472/531, loss: 0.17965182662010193 2023-01-23 01:17:31.041583: step: 1476/531, loss: 0.08426199108362198 2023-01-23 01:17:32.143577: step: 1480/531, loss: 0.0807638168334961 2023-01-23 01:17:33.272489: step: 1484/531, loss: 0.10578146576881409 2023-01-23 01:17:34.398519: step: 1488/531, loss: 0.12574782967567444 2023-01-23 01:17:35.506827: step: 1492/531, loss: 0.026172496378421783 2023-01-23 01:17:36.640144: step: 1496/531, loss: 0.1960858404636383 2023-01-23 01:17:37.773505: step: 1500/531, loss: 0.0625336617231369 2023-01-23 01:17:38.904633: step: 1504/531, loss: 0.03143353760242462 2023-01-23 01:17:40.037825: step: 1508/531, loss: 0.1215442568063736 2023-01-23 01:17:41.162549: step: 1512/531, loss: 0.2620088458061218 2023-01-23 01:17:42.300025: step: 1516/531, loss: 0.08306656032800674 2023-01-23 01:17:43.435391: step: 1520/531, loss: 0.0044418335892260075 2023-01-23 01:17:44.561195: step: 1524/531, loss: 0.026612281799316406 2023-01-23 01:17:45.689367: step: 1528/531, loss: 0.10181407630443573 2023-01-23 01:17:46.818185: step: 1532/531, loss: 0.09413104504346848 2023-01-23 01:17:47.959909: step: 1536/531, loss: 0.03781495243310928 2023-01-23 01:17:49.078040: step: 1540/531, loss: 0.003916787914931774 2023-01-23 01:17:50.186957: step: 1544/531, loss: 0.1770172119140625 2023-01-23 01:17:51.295957: step: 1548/531, loss: 0.006284570321440697 2023-01-23 01:17:52.443163: step: 1552/531, loss: 0.08632194995880127 2023-01-23 01:17:53.579309: step: 1556/531, loss: 0.022620487958192825 2023-01-23 01:17:54.720403: step: 1560/531, loss: 0.04532375559210777 2023-01-23 01:17:55.901613: step: 1564/531, loss: 0.04324665293097496 2023-01-23 01:17:57.085124: step: 1568/531, loss: 0.016980551183223724 2023-01-23 01:17:58.218309: step: 1572/531, loss: 0.06600189208984375 2023-01-23 01:17:59.326416: step: 1576/531, loss: 0.12555399537086487 2023-01-23 01:18:00.430719: step: 1580/531, loss: 0.14547604322433472 2023-01-23 01:18:01.546783: step: 1584/531, loss: 0.08104319870471954 2023-01-23 01:18:02.648292: step: 1588/531, loss: 0.047277260571718216 2023-01-23 01:18:03.748757: step: 1592/531, loss: 0.02387695387005806 2023-01-23 01:18:04.912720: step: 1596/531, loss: 0.0806453749537468 2023-01-23 01:18:06.061569: step: 1600/531, loss: 0.04324378818273544 2023-01-23 01:18:07.178795: step: 1604/531, loss: 0.0976925790309906 2023-01-23 01:18:08.271412: step: 1608/531, loss: 0.003158140229061246 2023-01-23 01:18:09.385124: step: 1612/531, loss: 0.051337577402591705 2023-01-23 01:18:10.521741: step: 1616/531, loss: 0.24263755977153778 2023-01-23 01:18:11.656581: step: 1620/531, loss: 0.05816173925995827 2023-01-23 01:18:12.767787: step: 1624/531, loss: 0.04355583339929581 2023-01-23 01:18:13.895089: step: 1628/531, loss: 0.14264144003391266 2023-01-23 01:18:15.015955: step: 1632/531, loss: 0.06893301010131836 2023-01-23 01:18:16.135390: step: 1636/531, loss: 0.013486465439200401 2023-01-23 01:18:17.247032: step: 1640/531, loss: 0.19555817544460297 2023-01-23 01:18:18.373124: step: 1644/531, loss: 0.03461914137005806 2023-01-23 01:18:19.485967: step: 1648/531, loss: 0.0044204238802194595 2023-01-23 01:18:20.625483: step: 1652/531, loss: 0.01669464074075222 2023-01-23 01:18:21.772179: step: 1656/531, loss: 0.038656093180179596 2023-01-23 01:18:22.938248: step: 1660/531, loss: 0.04943694919347763 2023-01-23 01:18:24.042991: step: 1664/531, loss: 0.05698385462164879 2023-01-23 01:18:25.176908: step: 1668/531, loss: 0.05592689663171768 2023-01-23 01:18:26.316334: step: 1672/531, loss: 0.05145673453807831 2023-01-23 01:18:27.446114: step: 1676/531, loss: 0.044964030385017395 2023-01-23 01:18:28.537455: step: 1680/531, loss: 0.04249439388513565 2023-01-23 01:18:29.658806: step: 1684/531, loss: 0.17572708427906036 2023-01-23 01:18:30.758468: step: 1688/531, loss: 0.041205596178770065 2023-01-23 01:18:31.875360: step: 1692/531, loss: 0.1240943968296051 2023-01-23 01:18:33.009535: step: 1696/531, loss: 0.5286551117897034 2023-01-23 01:18:34.111915: step: 1700/531, loss: 0.011068916879594326 2023-01-23 01:18:35.266822: step: 1704/531, loss: 0.126966655254364 2023-01-23 01:18:36.407389: step: 1708/531, loss: 0.06042356789112091 2023-01-23 01:18:37.500984: step: 1712/531, loss: 0.08028201758861542 2023-01-23 01:18:38.597744: step: 1716/531, loss: 0.019240237772464752 2023-01-23 01:18:39.717407: step: 1720/531, loss: 0.058541107922792435 2023-01-23 01:18:40.843741: step: 1724/531, loss: 0.388589084148407 2023-01-23 01:18:41.966141: step: 1728/531, loss: 0.016455747187137604 2023-01-23 01:18:43.099283: step: 1732/531, loss: 0.11167659610509872 2023-01-23 01:18:44.236814: step: 1736/531, loss: 0.19166278839111328 2023-01-23 01:18:45.410352: step: 1740/531, loss: 0.07711277157068253 2023-01-23 01:18:46.546256: step: 1744/531, loss: 0.06050138175487518 2023-01-23 01:18:47.726136: step: 1748/531, loss: 0.010099506005644798 2023-01-23 01:18:48.842759: step: 1752/531, loss: 0.7771268486976624 2023-01-23 01:18:49.975008: step: 1756/531, loss: 0.08279304206371307 2023-01-23 01:18:51.077822: step: 1760/531, loss: 0.038699328899383545 2023-01-23 01:18:52.192010: step: 1764/531, loss: 0.028452303260564804 2023-01-23 01:18:53.343168: step: 1768/531, loss: 0.0399787463247776 2023-01-23 01:18:54.455843: step: 1772/531, loss: 0.11855180561542511 2023-01-23 01:18:55.595716: step: 1776/531, loss: 0.05953254923224449 2023-01-23 01:18:56.703152: step: 1780/531, loss: 0.03719845041632652 2023-01-23 01:18:57.823176: step: 1784/531, loss: 0.08078131824731827 2023-01-23 01:18:58.968338: step: 1788/531, loss: 0.05043911933898926 2023-01-23 01:19:00.069980: step: 1792/531, loss: 0.09735693782567978 2023-01-23 01:19:01.187613: step: 1796/531, loss: 0.12132323533296585 2023-01-23 01:19:02.302410: step: 1800/531, loss: 0.08893604576587677 2023-01-23 01:19:03.405056: step: 1804/531, loss: 0.05169811472296715 2023-01-23 01:19:04.516870: step: 1808/531, loss: 0.11936827003955841 2023-01-23 01:19:05.666911: step: 1812/531, loss: 0.032808687537908554 2023-01-23 01:19:06.775369: step: 1816/531, loss: 0.002257442567497492 2023-01-23 01:19:07.887434: step: 1820/531, loss: 0.041516877710819244 2023-01-23 01:19:08.983492: step: 1824/531, loss: 0.08992882072925568 2023-01-23 01:19:10.111884: step: 1828/531, loss: 0.04004926607012749 2023-01-23 01:19:11.267545: step: 1832/531, loss: 0.15225887298583984 2023-01-23 01:19:12.424868: step: 1836/531, loss: 0.05785880237817764 2023-01-23 01:19:13.554617: step: 1840/531, loss: 0.03980877250432968 2023-01-23 01:19:14.723429: step: 1844/531, loss: 1.4729385375976562 2023-01-23 01:19:15.872355: step: 1848/531, loss: 0.11329560726881027 2023-01-23 01:19:16.991704: step: 1852/531, loss: 0.030247116461396217 2023-01-23 01:19:18.132300: step: 1856/531, loss: 0.061458565294742584 2023-01-23 01:19:19.246797: step: 1860/531, loss: 0.05626792833209038 2023-01-23 01:19:20.349792: step: 1864/531, loss: 0.04756994545459747 2023-01-23 01:19:21.461825: step: 1868/531, loss: 0.13331520557403564 2023-01-23 01:19:22.607365: step: 1872/531, loss: 0.022486496716737747 2023-01-23 01:19:23.745560: step: 1876/531, loss: 0.017060469835996628 2023-01-23 01:19:24.902886: step: 1880/531, loss: 0.0424840934574604 2023-01-23 01:19:26.004415: step: 1884/531, loss: 0.050362586975097656 2023-01-23 01:19:27.115457: step: 1888/531, loss: 0.14166298508644104 2023-01-23 01:19:28.255273: step: 1892/531, loss: 0.033858489245176315 2023-01-23 01:19:29.377744: step: 1896/531, loss: 0.10135313868522644 2023-01-23 01:19:30.514879: step: 1900/531, loss: 0.08659668266773224 2023-01-23 01:19:31.643211: step: 1904/531, loss: 0.036715030670166016 2023-01-23 01:19:32.772602: step: 1908/531, loss: 0.110712431371212 2023-01-23 01:19:33.872755: step: 1912/531, loss: 0.027908803895115852 2023-01-23 01:19:34.992850: step: 1916/531, loss: 0.035930585116147995 2023-01-23 01:19:36.092514: step: 1920/531, loss: 0.08405759930610657 2023-01-23 01:19:37.197255: step: 1924/531, loss: 0.026928137987852097 2023-01-23 01:19:38.324599: step: 1928/531, loss: 0.15556088089942932 2023-01-23 01:19:39.478013: step: 1932/531, loss: 0.11883096396923065 2023-01-23 01:19:40.604176: step: 1936/531, loss: 0.1011347770690918 2023-01-23 01:19:41.755010: step: 1940/531, loss: 0.12977942824363708 2023-01-23 01:19:42.904698: step: 1944/531, loss: 0.04790253937244415 2023-01-23 01:19:44.061671: step: 1948/531, loss: 0.05401799827814102 2023-01-23 01:19:45.198462: step: 1952/531, loss: 0.02795390971004963 2023-01-23 01:19:46.300203: step: 1956/531, loss: 0.1771896332502365 2023-01-23 01:19:47.402762: step: 1960/531, loss: 0.08870792388916016 2023-01-23 01:19:48.504509: step: 1964/531, loss: 0.3486073315143585 2023-01-23 01:19:49.658464: step: 1968/531, loss: 0.07131248712539673 2023-01-23 01:19:50.795822: step: 1972/531, loss: 0.03205299377441406 2023-01-23 01:19:51.911507: step: 1976/531, loss: 0.13303136825561523 2023-01-23 01:19:53.052374: step: 1980/531, loss: 0.10535793751478195 2023-01-23 01:19:54.207470: step: 1984/531, loss: 0.024078847840428352 2023-01-23 01:19:55.325995: step: 1988/531, loss: 1.4425712823867798 2023-01-23 01:19:56.439988: step: 1992/531, loss: 1.1072731018066406 2023-01-23 01:19:57.580209: step: 1996/531, loss: 0.03151216730475426 2023-01-23 01:19:58.719562: step: 2000/531, loss: 0.04063913971185684 2023-01-23 01:19:59.807526: step: 2004/531, loss: 0.028385544195771217 2023-01-23 01:20:00.923643: step: 2008/531, loss: 0.0454859733581543 2023-01-23 01:20:02.036686: step: 2012/531, loss: 0.08616266399621964 2023-01-23 01:20:03.161463: step: 2016/531, loss: 0.0756472572684288 2023-01-23 01:20:04.302085: step: 2020/531, loss: 0.1179908812046051 2023-01-23 01:20:05.460265: step: 2024/531, loss: 0.03920431062579155 2023-01-23 01:20:06.585740: step: 2028/531, loss: 0.11259995400905609 2023-01-23 01:20:07.738214: step: 2032/531, loss: 0.07037858664989471 2023-01-23 01:20:08.871499: step: 2036/531, loss: 0.02563338354229927 2023-01-23 01:20:10.001435: step: 2040/531, loss: 0.029477596282958984 2023-01-23 01:20:11.121735: step: 2044/531, loss: 0.134347602725029 2023-01-23 01:20:12.252005: step: 2048/531, loss: 0.28497275710105896 2023-01-23 01:20:13.380113: step: 2052/531, loss: 0.06466750800609589 2023-01-23 01:20:14.546850: step: 2056/531, loss: 0.04205689579248428 2023-01-23 01:20:15.659042: step: 2060/531, loss: 0.08378100395202637 2023-01-23 01:20:16.806509: step: 2064/531, loss: 0.1108739897608757 2023-01-23 01:20:17.959722: step: 2068/531, loss: 0.035460758954286575 2023-01-23 01:20:19.093054: step: 2072/531, loss: 0.06382570415735245 2023-01-23 01:20:20.242465: step: 2076/531, loss: 0.036687564104795456 2023-01-23 01:20:21.350694: step: 2080/531, loss: 0.05562110245227814 2023-01-23 01:20:22.491437: step: 2084/531, loss: 0.0508517287671566 2023-01-23 01:20:23.624986: step: 2088/531, loss: 0.021392060443758965 2023-01-23 01:20:24.727197: step: 2092/531, loss: 0.039246369153261185 2023-01-23 01:20:25.852217: step: 2096/531, loss: 0.10869696736335754 2023-01-23 01:20:26.974513: step: 2100/531, loss: 0.05337419733405113 2023-01-23 01:20:28.074913: step: 2104/531, loss: 0.0476841926574707 2023-01-23 01:20:29.206663: step: 2108/531, loss: 0.058841705322265625 2023-01-23 01:20:30.333764: step: 2112/531, loss: 0.08721671253442764 2023-01-23 01:20:31.446905: step: 2116/531, loss: 0.014968396164476871 2023-01-23 01:20:32.555066: step: 2120/531, loss: 0.06654996424913406 2023-01-23 01:20:33.684520: step: 2124/531, loss: 0.062440112233161926 ================================================== Loss: 0.110 -------------------- Dev: {'event': {'p': 0.5704500978473581, 'r': 0.7762982689747004, 'f1': 0.6576424139875917}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Test: {'event': {'p': 0.61090573012939, 'r': 0.7883124627310674, 'f1': 0.6883624056235356}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Chinese: {'event': {'p': 0.5632183908045977, 'r': 0.9074074074074074, 'f1': 0.6950354609929078}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Korean: {'event': {'p': 0.6481481481481481, 'r': 0.5555555555555556, 'f1': 0.5982905982905983}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Russian: {'event': {'p': 0.4782608695652174, 'r': 0.6111111111111112, 'f1': 0.5365853658536586}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.5725264169068204, 'r': 0.7936085219707057, 'f1': 0.6651785714285715}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Eng Test for Korean: {'event': {'p': 0.6113918236104732, 'r': 0.7936791890280263, 'f1': 0.6907109496626881}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Korean: {'event': {'p': 0.65625, 'r': 0.6666666666666666, 'f1': 0.6614173228346457}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 11 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:21:13.761498: step: 4/531, loss: 0.18987540900707245 2023-01-23 01:21:14.865998: step: 8/531, loss: 0.0072501180693507195 2023-01-23 01:21:16.054423: step: 12/531, loss: 0.037050630897283554 2023-01-23 01:21:17.175166: step: 16/531, loss: 0.03488335758447647 2023-01-23 01:21:18.301642: step: 20/531, loss: 0.057396698743104935 2023-01-23 01:21:19.448921: step: 24/531, loss: 0.10559973865747452 2023-01-23 01:21:20.612797: step: 28/531, loss: 0.3329788148403168 2023-01-23 01:21:21.749211: step: 32/531, loss: 0.026273632422089577 2023-01-23 01:21:22.886475: step: 36/531, loss: 0.0021824599243700504 2023-01-23 01:21:24.024697: step: 40/531, loss: 0.07027044892311096 2023-01-23 01:21:25.152138: step: 44/531, loss: 0.04060091823339462 2023-01-23 01:21:26.271109: step: 48/531, loss: 0.004965400323271751 2023-01-23 01:21:27.395554: step: 52/531, loss: 0.21512404084205627 2023-01-23 01:21:28.541537: step: 56/531, loss: 0.18710888922214508 2023-01-23 01:21:29.656971: step: 60/531, loss: 0.005432033445686102 2023-01-23 01:21:30.763215: step: 64/531, loss: 0.02842388115823269 2023-01-23 01:21:31.908914: step: 68/531, loss: 0.0749642625451088 2023-01-23 01:21:33.024608: step: 72/531, loss: 0.018126148730516434 2023-01-23 01:21:34.150511: step: 76/531, loss: 0.003795528318732977 2023-01-23 01:21:35.271388: step: 80/531, loss: 0.06516571342945099 2023-01-23 01:21:36.388925: step: 84/531, loss: 0.02129889652132988 2023-01-23 01:21:37.506332: step: 88/531, loss: 0.03553161770105362 2023-01-23 01:21:38.633850: step: 92/531, loss: 1.1856123208999634 2023-01-23 01:21:39.767392: step: 96/531, loss: 0.34031039476394653 2023-01-23 01:21:40.904101: step: 100/531, loss: 0.014528656378388405 2023-01-23 01:21:42.002960: step: 104/531, loss: 0.05841522291302681 2023-01-23 01:21:43.147867: step: 108/531, loss: 0.061768725514411926 2023-01-23 01:21:44.269393: step: 112/531, loss: 0.003407597541809082 2023-01-23 01:21:45.342126: step: 116/531, loss: 0.0662296861410141 2023-01-23 01:21:46.495579: step: 120/531, loss: 0.014042950235307217 2023-01-23 01:21:47.660235: step: 124/531, loss: 0.020859479904174805 2023-01-23 01:21:48.798390: step: 128/531, loss: 0.01929612271487713 2023-01-23 01:21:49.921725: step: 132/531, loss: 0.05305957794189453 2023-01-23 01:21:51.066686: step: 136/531, loss: 0.04234256595373154 2023-01-23 01:21:52.199845: step: 140/531, loss: 0.019442368298768997 2023-01-23 01:21:53.310501: step: 144/531, loss: 0.016852760687470436 2023-01-23 01:21:54.438652: step: 148/531, loss: 0.01007623691111803 2023-01-23 01:21:55.556071: step: 152/531, loss: 0.05290966108441353 2023-01-23 01:21:56.699578: step: 156/531, loss: 0.3353423476219177 2023-01-23 01:21:57.817899: step: 160/531, loss: 0.021361876279115677 2023-01-23 01:21:59.005454: step: 164/531, loss: 0.354099839925766 2023-01-23 01:22:00.131229: step: 168/531, loss: 0.023737479001283646 2023-01-23 01:22:01.258657: step: 172/531, loss: 0.03588595613837242 2023-01-23 01:22:02.400410: step: 176/531, loss: 0.12529344856739044 2023-01-23 01:22:03.574150: step: 180/531, loss: 0.02104330062866211 2023-01-23 01:22:04.724452: step: 184/531, loss: 0.011593818664550781 2023-01-23 01:22:05.832028: step: 188/531, loss: 0.07578025013208389 2023-01-23 01:22:06.982634: step: 192/531, loss: 0.03191714361310005 2023-01-23 01:22:08.074701: step: 196/531, loss: 0.004897499457001686 2023-01-23 01:22:09.199343: step: 200/531, loss: 0.2933124601840973 2023-01-23 01:22:10.321837: step: 204/531, loss: 0.018488075584173203 2023-01-23 01:22:11.421073: step: 208/531, loss: 0.059687234461307526 2023-01-23 01:22:12.533119: step: 212/531, loss: 0.06043891981244087 2023-01-23 01:22:13.640836: step: 216/531, loss: 0.06790924072265625 2023-01-23 01:22:14.765524: step: 220/531, loss: 0.030646134167909622 2023-01-23 01:22:15.896827: step: 224/531, loss: 0.14025917649269104 2023-01-23 01:22:17.007983: step: 228/531, loss: 0.07133427262306213 2023-01-23 01:22:18.141462: step: 232/531, loss: 0.08211421966552734 2023-01-23 01:22:19.256024: step: 236/531, loss: 0.006190681830048561 2023-01-23 01:22:20.362322: step: 240/531, loss: 0.019986726343631744 2023-01-23 01:22:21.496920: step: 244/531, loss: 0.0880856066942215 2023-01-23 01:22:22.593450: step: 248/531, loss: 0.07127171009778976 2023-01-23 01:22:23.725018: step: 252/531, loss: 0.014315510168671608 2023-01-23 01:22:24.862475: step: 256/531, loss: 0.035547591745853424 2023-01-23 01:22:26.002967: step: 260/531, loss: 0.3115212619304657 2023-01-23 01:22:27.165804: step: 264/531, loss: 0.08876543492078781 2023-01-23 01:22:28.257031: step: 268/531, loss: 0.03832416608929634 2023-01-23 01:22:29.363038: step: 272/531, loss: 0.032218076288700104 2023-01-23 01:22:30.500879: step: 276/531, loss: 0.08227644115686417 2023-01-23 01:22:31.633189: step: 280/531, loss: 0.06085105240345001 2023-01-23 01:22:32.767325: step: 284/531, loss: 0.01662912406027317 2023-01-23 01:22:33.865847: step: 288/531, loss: 0.007895088754594326 2023-01-23 01:22:34.984110: step: 292/531, loss: 0.08230190724134445 2023-01-23 01:22:36.116300: step: 296/531, loss: 0.04370632395148277 2023-01-23 01:22:37.268248: step: 300/531, loss: 0.025006866082549095 2023-01-23 01:22:38.376680: step: 304/531, loss: 0.056152328848838806 2023-01-23 01:22:39.505614: step: 308/531, loss: 0.04372129589319229 2023-01-23 01:22:40.657347: step: 312/531, loss: 0.05147209390997887 2023-01-23 01:22:41.770437: step: 316/531, loss: 0.055887412279844284 2023-01-23 01:22:42.911135: step: 320/531, loss: 0.015970803797245026 2023-01-23 01:22:44.038869: step: 324/531, loss: 0.06616868823766708 2023-01-23 01:22:45.176667: step: 328/531, loss: 0.0749916136264801 2023-01-23 01:22:46.299725: step: 332/531, loss: 0.012963676825165749 2023-01-23 01:22:47.443033: step: 336/531, loss: 0.041699692606925964 2023-01-23 01:22:48.603102: step: 340/531, loss: 0.09000120311975479 2023-01-23 01:22:49.705140: step: 344/531, loss: 0.035185907036066055 2023-01-23 01:22:50.856175: step: 348/531, loss: 0.06844715774059296 2023-01-23 01:22:51.995212: step: 352/531, loss: 0.006012916564941406 2023-01-23 01:22:53.127878: step: 356/531, loss: 0.031853388994932175 2023-01-23 01:22:54.285943: step: 360/531, loss: 0.00910100992769003 2023-01-23 01:22:55.402509: step: 364/531, loss: 0.018318749964237213 2023-01-23 01:22:56.538389: step: 368/531, loss: 0.026638412848114967 2023-01-23 01:22:57.670915: step: 372/531, loss: 0.013840246014297009 2023-01-23 01:22:58.803989: step: 376/531, loss: 0.05256080627441406 2023-01-23 01:22:59.915002: step: 380/531, loss: 0.055220797657966614 2023-01-23 01:23:01.034548: step: 384/531, loss: 0.11647415161132812 2023-01-23 01:23:02.178897: step: 388/531, loss: 0.08593187481164932 2023-01-23 01:23:03.311049: step: 392/531, loss: 0.10279569774866104 2023-01-23 01:23:04.420151: step: 396/531, loss: 0.0340307243168354 2023-01-23 01:23:05.506611: step: 400/531, loss: 0.008854531683027744 2023-01-23 01:23:06.612833: step: 404/531, loss: 0.012184333987534046 2023-01-23 01:23:07.754847: step: 408/531, loss: 0.17254963517189026 2023-01-23 01:23:08.873753: step: 412/531, loss: 0.10960359871387482 2023-01-23 01:23:09.998642: step: 416/531, loss: 0.02779102325439453 2023-01-23 01:23:11.124336: step: 420/531, loss: 0.04721851274371147 2023-01-23 01:23:12.263040: step: 424/531, loss: 0.5016942024230957 2023-01-23 01:23:13.372420: step: 428/531, loss: 0.07115888595581055 2023-01-23 01:23:14.494434: step: 432/531, loss: 0.012452316470444202 2023-01-23 01:23:15.632776: step: 436/531, loss: 0.07896728068590164 2023-01-23 01:23:16.765656: step: 440/531, loss: 0.03048224374651909 2023-01-23 01:23:17.918284: step: 444/531, loss: 0.004890632815659046 2023-01-23 01:23:19.076489: step: 448/531, loss: 0.14467020332813263 2023-01-23 01:23:20.184914: step: 452/531, loss: 0.012338734231889248 2023-01-23 01:23:21.311317: step: 456/531, loss: 0.03182778134942055 2023-01-23 01:23:22.437991: step: 460/531, loss: 0.025916673243045807 2023-01-23 01:23:23.568461: step: 464/531, loss: 0.054062843322753906 2023-01-23 01:23:24.672375: step: 468/531, loss: 0.0019515752792358398 2023-01-23 01:23:25.803473: step: 472/531, loss: 0.41069117188453674 2023-01-23 01:23:26.932362: step: 476/531, loss: 0.06978817284107208 2023-01-23 01:23:28.056921: step: 480/531, loss: 0.012439345940947533 2023-01-23 01:23:29.178979: step: 484/531, loss: 0.013334274291992188 2023-01-23 01:23:30.276809: step: 488/531, loss: 0.02295989915728569 2023-01-23 01:23:31.434679: step: 492/531, loss: 0.02122201956808567 2023-01-23 01:23:32.571096: step: 496/531, loss: 0.144618421792984 2023-01-23 01:23:33.675062: step: 500/531, loss: 0.306057333946228 2023-01-23 01:23:34.789470: step: 504/531, loss: 0.07174110412597656 2023-01-23 01:23:35.893452: step: 508/531, loss: 0.11224012076854706 2023-01-23 01:23:37.034654: step: 512/531, loss: 0.03417778015136719 2023-01-23 01:23:38.179560: step: 516/531, loss: 0.10084018856287003 2023-01-23 01:23:39.297006: step: 520/531, loss: 0.146678626537323 2023-01-23 01:23:40.471323: step: 524/531, loss: 0.20326891541481018 2023-01-23 01:23:41.611252: step: 528/531, loss: 0.3622824251651764 2023-01-23 01:23:42.791809: step: 532/531, loss: 0.22680988907814026 2023-01-23 01:23:43.945853: step: 536/531, loss: 0.37415409088134766 2023-01-23 01:23:45.079117: step: 540/531, loss: 0.04333953559398651 2023-01-23 01:23:46.204526: step: 544/531, loss: 0.07723169773817062 2023-01-23 01:23:47.315957: step: 548/531, loss: 0.014127874746918678 2023-01-23 01:23:48.438867: step: 552/531, loss: 0.03958110883831978 2023-01-23 01:23:49.572120: step: 556/531, loss: 0.021869469434022903 2023-01-23 01:23:50.721966: step: 560/531, loss: 0.10576210170984268 2023-01-23 01:23:51.857721: step: 564/531, loss: 0.03191089630126953 2023-01-23 01:23:52.944414: step: 568/531, loss: 0.008348274976015091 2023-01-23 01:23:54.049669: step: 572/531, loss: 0.04977264627814293 2023-01-23 01:23:55.172548: step: 576/531, loss: 0.008417129516601562 2023-01-23 01:23:56.302170: step: 580/531, loss: 0.06541161239147186 2023-01-23 01:23:57.413182: step: 584/531, loss: 0.01195461768656969 2023-01-23 01:23:58.569191: step: 588/531, loss: 0.040986157953739166 2023-01-23 01:23:59.688114: step: 592/531, loss: 0.23552751541137695 2023-01-23 01:24:00.813656: step: 596/531, loss: 0.003774166340008378 2023-01-23 01:24:01.932909: step: 600/531, loss: 0.06213798373937607 2023-01-23 01:24:03.084295: step: 604/531, loss: 0.00360107421875 2023-01-23 01:24:04.219720: step: 608/531, loss: 0.017049027606844902 2023-01-23 01:24:05.346436: step: 612/531, loss: 0.005968618206679821 2023-01-23 01:24:06.517939: step: 616/531, loss: 0.22247114777565002 2023-01-23 01:24:07.635606: step: 620/531, loss: 0.02117347903549671 2023-01-23 01:24:08.767313: step: 624/531, loss: 0.016951942816376686 2023-01-23 01:24:09.872989: step: 628/531, loss: 0.0916353240609169 2023-01-23 01:24:11.022698: step: 632/531, loss: 0.014162063598632812 2023-01-23 01:24:12.150171: step: 636/531, loss: 0.1066884994506836 2023-01-23 01:24:13.268281: step: 640/531, loss: 0.0066679478622972965 2023-01-23 01:24:14.404329: step: 644/531, loss: 0.07322931289672852 2023-01-23 01:24:15.532407: step: 648/531, loss: 0.04711761325597763 2023-01-23 01:24:16.643496: step: 652/531, loss: 0.04643545299768448 2023-01-23 01:24:17.757266: step: 656/531, loss: 0.2179102897644043 2023-01-23 01:24:18.865244: step: 660/531, loss: 0.0009292125469073653 2023-01-23 01:24:19.996224: step: 664/531, loss: 0.7998761534690857 2023-01-23 01:24:21.073693: step: 668/531, loss: 0.02630983106791973 2023-01-23 01:24:22.195986: step: 672/531, loss: 0.014265251345932484 2023-01-23 01:24:23.289655: step: 676/531, loss: 0.00098590855486691 2023-01-23 01:24:24.427856: step: 680/531, loss: 0.009912585839629173 2023-01-23 01:24:25.518929: step: 684/531, loss: 0.1357942521572113 2023-01-23 01:24:26.610112: step: 688/531, loss: 0.014799404889345169 2023-01-23 01:24:27.730137: step: 692/531, loss: 0.029797174036502838 2023-01-23 01:24:28.908530: step: 696/531, loss: 0.012627029791474342 2023-01-23 01:24:30.054690: step: 700/531, loss: 0.01823272742331028 2023-01-23 01:24:31.154372: step: 704/531, loss: 0.06258168071508408 2023-01-23 01:24:32.297985: step: 708/531, loss: 0.03286170959472656 2023-01-23 01:24:33.421450: step: 712/531, loss: 0.02969665452837944 2023-01-23 01:24:34.540096: step: 716/531, loss: 0.006225496530532837 2023-01-23 01:24:35.661872: step: 720/531, loss: 0.06629371643066406 2023-01-23 01:24:36.794962: step: 724/531, loss: 0.08167973160743713 2023-01-23 01:24:37.934126: step: 728/531, loss: 0.05467739328742027 2023-01-23 01:24:39.093797: step: 732/531, loss: 0.016378212720155716 2023-01-23 01:24:40.217681: step: 736/531, loss: 0.051668357104063034 2023-01-23 01:24:41.370770: step: 740/531, loss: 0.04822579771280289 2023-01-23 01:24:42.535701: step: 744/531, loss: 0.06492596119642258 2023-01-23 01:24:43.643988: step: 748/531, loss: 0.05175819620490074 2023-01-23 01:24:44.778783: step: 752/531, loss: 0.15580235421657562 2023-01-23 01:24:45.923593: step: 756/531, loss: 0.07834777981042862 2023-01-23 01:24:47.043380: step: 760/531, loss: 0.05625486373901367 2023-01-23 01:24:48.166567: step: 764/531, loss: 0.09737630188465118 2023-01-23 01:24:49.307187: step: 768/531, loss: 0.08191872388124466 2023-01-23 01:24:50.463666: step: 772/531, loss: 0.025229312479496002 2023-01-23 01:24:51.585699: step: 776/531, loss: 0.1384994387626648 2023-01-23 01:24:52.689775: step: 780/531, loss: 0.010183000937104225 2023-01-23 01:24:53.792114: step: 784/531, loss: 0.04601306840777397 2023-01-23 01:24:54.906547: step: 788/531, loss: 0.051373135298490524 2023-01-23 01:24:56.029173: step: 792/531, loss: 0.05576353520154953 2023-01-23 01:24:57.151981: step: 796/531, loss: 0.09234924614429474 2023-01-23 01:24:58.292295: step: 800/531, loss: 0.04915919154882431 2023-01-23 01:24:59.399872: step: 804/531, loss: 0.04209165647625923 2023-01-23 01:25:00.523871: step: 808/531, loss: 0.05142030492424965 2023-01-23 01:25:01.639776: step: 812/531, loss: 0.005220318213105202 2023-01-23 01:25:02.744404: step: 816/531, loss: 0.06891937553882599 2023-01-23 01:25:03.877398: step: 820/531, loss: 0.018738461658358574 2023-01-23 01:25:05.002366: step: 824/531, loss: 0.23927488923072815 2023-01-23 01:25:06.135965: step: 828/531, loss: 0.017548561096191406 2023-01-23 01:25:07.267635: step: 832/531, loss: 0.10842597484588623 2023-01-23 01:25:08.401215: step: 836/531, loss: 0.07489724457263947 2023-01-23 01:25:09.508763: step: 840/531, loss: 0.10272105038166046 2023-01-23 01:25:10.640929: step: 844/531, loss: 0.04646625369787216 2023-01-23 01:25:11.772856: step: 848/531, loss: 0.051653482019901276 2023-01-23 01:25:12.947330: step: 852/531, loss: 0.054238513112068176 2023-01-23 01:25:14.092929: step: 856/531, loss: 0.015908241271972656 2023-01-23 01:25:15.201290: step: 860/531, loss: 0.019634723663330078 2023-01-23 01:25:16.340702: step: 864/531, loss: 0.09593596309423447 2023-01-23 01:25:17.454524: step: 868/531, loss: 0.014927363023161888 2023-01-23 01:25:18.561556: step: 872/531, loss: 0.03744788467884064 2023-01-23 01:25:19.707352: step: 876/531, loss: 0.023227693513035774 2023-01-23 01:25:20.814615: step: 880/531, loss: 0.026528455317020416 2023-01-23 01:25:21.922233: step: 884/531, loss: 0.010198403149843216 2023-01-23 01:25:23.048294: step: 888/531, loss: 0.005169820971786976 2023-01-23 01:25:24.188160: step: 892/531, loss: 0.07492761313915253 2023-01-23 01:25:25.315690: step: 896/531, loss: 0.06531677395105362 2023-01-23 01:25:26.439589: step: 900/531, loss: 0.005061245057731867 2023-01-23 01:25:27.571624: step: 904/531, loss: 1.0460602045059204 2023-01-23 01:25:28.678289: step: 908/531, loss: 0.13290157914161682 2023-01-23 01:25:29.812890: step: 912/531, loss: 0.08747921139001846 2023-01-23 01:25:30.942214: step: 916/531, loss: 0.05565004423260689 2023-01-23 01:25:32.075126: step: 920/531, loss: 0.08048954606056213 2023-01-23 01:25:33.211910: step: 924/531, loss: 0.005328941158950329 2023-01-23 01:25:34.295456: step: 928/531, loss: 0.038314905017614365 2023-01-23 01:25:35.426188: step: 932/531, loss: 0.11348043382167816 2023-01-23 01:25:36.547290: step: 936/531, loss: 0.08749179542064667 2023-01-23 01:25:37.659137: step: 940/531, loss: 0.012257575988769531 2023-01-23 01:25:38.770752: step: 944/531, loss: 0.10363617539405823 2023-01-23 01:25:39.861283: step: 948/531, loss: 0.06945953518152237 2023-01-23 01:25:40.964251: step: 952/531, loss: 0.016492843627929688 2023-01-23 01:25:42.115419: step: 956/531, loss: 0.038178253918886185 2023-01-23 01:25:43.241542: step: 960/531, loss: 0.11736125499010086 2023-01-23 01:25:44.396515: step: 964/531, loss: 0.059011079370975494 2023-01-23 01:25:45.543840: step: 968/531, loss: 0.03714399412274361 2023-01-23 01:25:46.661267: step: 972/531, loss: 0.06651488691568375 2023-01-23 01:25:47.828858: step: 976/531, loss: 0.39760273694992065 2023-01-23 01:25:48.953211: step: 980/531, loss: 0.004326629918068647 2023-01-23 01:25:50.101788: step: 984/531, loss: 0.0034225464332848787 2023-01-23 01:25:51.216548: step: 988/531, loss: 0.03228771686553955 2023-01-23 01:25:52.327325: step: 992/531, loss: 0.14333724975585938 2023-01-23 01:25:53.424663: step: 996/531, loss: 0.047245219349861145 2023-01-23 01:25:54.534754: step: 1000/531, loss: 0.14321298897266388 2023-01-23 01:25:55.666061: step: 1004/531, loss: 0.05715999752283096 2023-01-23 01:25:56.783628: step: 1008/531, loss: 0.018375109881162643 2023-01-23 01:25:57.904098: step: 1012/531, loss: 0.0494932197034359 2023-01-23 01:25:59.050290: step: 1016/531, loss: 0.04739503562450409 2023-01-23 01:26:00.163723: step: 1020/531, loss: 0.02921323850750923 2023-01-23 01:26:01.288450: step: 1024/531, loss: 0.0077949524857103825 2023-01-23 01:26:02.451679: step: 1028/531, loss: 0.027232743799686432 2023-01-23 01:26:03.582225: step: 1032/531, loss: 0.07723388820886612 2023-01-23 01:26:04.725627: step: 1036/531, loss: 0.030594348907470703 2023-01-23 01:26:05.871057: step: 1040/531, loss: 0.021930648013949394 2023-01-23 01:26:07.008499: step: 1044/531, loss: 0.06510657072067261 2023-01-23 01:26:08.097601: step: 1048/531, loss: 0.02773761749267578 2023-01-23 01:26:09.220190: step: 1052/531, loss: 0.2726472020149231 2023-01-23 01:26:10.360455: step: 1056/531, loss: 0.0855376273393631 2023-01-23 01:26:11.476288: step: 1060/531, loss: 0.0018999099265784025 2023-01-23 01:26:12.656791: step: 1064/531, loss: 0.12846003472805023 2023-01-23 01:26:13.788858: step: 1068/531, loss: 0.08939714729785919 2023-01-23 01:26:14.921455: step: 1072/531, loss: 0.15981721878051758 2023-01-23 01:26:16.036603: step: 1076/531, loss: 0.038900043815374374 2023-01-23 01:26:17.156488: step: 1080/531, loss: 0.06309547275304794 2023-01-23 01:26:18.283111: step: 1084/531, loss: 0.12618786096572876 2023-01-23 01:26:19.412689: step: 1088/531, loss: 0.09540615975856781 2023-01-23 01:26:20.553721: step: 1092/531, loss: 0.010279751382768154 2023-01-23 01:26:21.646959: step: 1096/531, loss: 0.03257796913385391 2023-01-23 01:26:22.771615: step: 1100/531, loss: 0.14710812270641327 2023-01-23 01:26:23.877678: step: 1104/531, loss: 0.008303165435791016 2023-01-23 01:26:25.022259: step: 1108/531, loss: 0.04552774503827095 2023-01-23 01:26:26.134164: step: 1112/531, loss: 0.04540209844708443 2023-01-23 01:26:27.277023: step: 1116/531, loss: 0.008212566375732422 2023-01-23 01:26:28.393717: step: 1120/531, loss: 0.024722862988710403 2023-01-23 01:26:29.494770: step: 1124/531, loss: 0.002464342163875699 2023-01-23 01:26:30.625250: step: 1128/531, loss: 0.030570555478334427 2023-01-23 01:26:31.737519: step: 1132/531, loss: 0.052817728370428085 2023-01-23 01:26:32.856678: step: 1136/531, loss: 0.1002207100391388 2023-01-23 01:26:33.998518: step: 1140/531, loss: 0.06465911865234375 2023-01-23 01:26:35.112826: step: 1144/531, loss: 0.01697215996682644 2023-01-23 01:26:36.235030: step: 1148/531, loss: 0.2452809363603592 2023-01-23 01:26:37.364506: step: 1152/531, loss: 0.055530741810798645 2023-01-23 01:26:38.483137: step: 1156/531, loss: 0.06503229588270187 2023-01-23 01:26:39.608700: step: 1160/531, loss: 0.013924885541200638 2023-01-23 01:26:40.735215: step: 1164/531, loss: 0.029801130294799805 2023-01-23 01:26:41.880630: step: 1168/531, loss: 0.04488945007324219 2023-01-23 01:26:42.993064: step: 1172/531, loss: 0.051447439938783646 2023-01-23 01:26:44.126313: step: 1176/531, loss: 0.04332561790943146 2023-01-23 01:26:45.251726: step: 1180/531, loss: 0.027602005749940872 2023-01-23 01:26:46.340253: step: 1184/531, loss: 0.011275816708803177 2023-01-23 01:26:47.454976: step: 1188/531, loss: 0.09218786656856537 2023-01-23 01:26:48.592608: step: 1192/531, loss: 0.10438642650842667 2023-01-23 01:26:49.750859: step: 1196/531, loss: 0.038543201982975006 2023-01-23 01:26:50.879041: step: 1200/531, loss: 0.10220833122730255 2023-01-23 01:26:52.015786: step: 1204/531, loss: 0.40021011233329773 2023-01-23 01:26:53.128227: step: 1208/531, loss: 0.17189693450927734 2023-01-23 01:26:54.269628: step: 1212/531, loss: 0.019986821338534355 2023-01-23 01:26:55.407057: step: 1216/531, loss: 0.04291076958179474 2023-01-23 01:26:56.528874: step: 1220/531, loss: 0.0387515053153038 2023-01-23 01:26:57.666267: step: 1224/531, loss: 0.041299011558294296 2023-01-23 01:26:58.830059: step: 1228/531, loss: 0.1474718153476715 2023-01-23 01:26:59.959047: step: 1232/531, loss: 0.06479165703058243 2023-01-23 01:27:01.058015: step: 1236/531, loss: 0.06092948839068413 2023-01-23 01:27:02.205116: step: 1240/531, loss: 0.040706731379032135 2023-01-23 01:27:03.329346: step: 1244/531, loss: 0.0832282081246376 2023-01-23 01:27:04.469381: step: 1248/531, loss: 0.0672634094953537 2023-01-23 01:27:05.573161: step: 1252/531, loss: 0.8018707036972046 2023-01-23 01:27:06.714310: step: 1256/531, loss: 0.020440055057406425 2023-01-23 01:27:07.841253: step: 1260/531, loss: 0.09119053184986115 2023-01-23 01:27:08.967035: step: 1264/531, loss: 0.05557747185230255 2023-01-23 01:27:10.103205: step: 1268/531, loss: 0.14118710160255432 2023-01-23 01:27:11.250062: step: 1272/531, loss: 0.04338102415204048 2023-01-23 01:27:12.366488: step: 1276/531, loss: 0.5367922782897949 2023-01-23 01:27:13.501440: step: 1280/531, loss: 0.04592332988977432 2023-01-23 01:27:14.630809: step: 1284/531, loss: 0.07780647277832031 2023-01-23 01:27:15.773530: step: 1288/531, loss: 0.27759668231010437 2023-01-23 01:27:16.911847: step: 1292/531, loss: 0.06564340740442276 2023-01-23 01:27:18.065240: step: 1296/531, loss: 0.15360213816165924 2023-01-23 01:27:19.172218: step: 1300/531, loss: 0.15772390365600586 2023-01-23 01:27:20.289106: step: 1304/531, loss: 0.03248763084411621 2023-01-23 01:27:21.412969: step: 1308/531, loss: 0.12499995529651642 2023-01-23 01:27:22.543219: step: 1312/531, loss: 0.09133310616016388 2023-01-23 01:27:23.743978: step: 1316/531, loss: 0.10047177970409393 2023-01-23 01:27:24.893340: step: 1320/531, loss: 0.09492664039134979 2023-01-23 01:27:26.041263: step: 1324/531, loss: 0.12200088798999786 2023-01-23 01:27:27.154014: step: 1328/531, loss: 0.0032106759026646614 2023-01-23 01:27:28.287977: step: 1332/531, loss: 0.017783308401703835 2023-01-23 01:27:29.409365: step: 1336/531, loss: 0.0718679428100586 2023-01-23 01:27:30.543712: step: 1340/531, loss: 0.04496727138757706 2023-01-23 01:27:31.659005: step: 1344/531, loss: 0.006965351291000843 2023-01-23 01:27:32.775542: step: 1348/531, loss: 0.06089191883802414 2023-01-23 01:27:33.918817: step: 1352/531, loss: 0.013176441192626953 2023-01-23 01:27:35.019592: step: 1356/531, loss: 0.08237600326538086 2023-01-23 01:27:36.127112: step: 1360/531, loss: 0.04487152397632599 2023-01-23 01:27:37.261898: step: 1364/531, loss: 0.012930680066347122 2023-01-23 01:27:38.407524: step: 1368/531, loss: 0.05785961449146271 2023-01-23 01:27:39.519135: step: 1372/531, loss: 0.005673789884895086 2023-01-23 01:27:40.639630: step: 1376/531, loss: 0.18685337901115417 2023-01-23 01:27:41.764244: step: 1380/531, loss: 0.10376176983118057 2023-01-23 01:27:42.909797: step: 1384/531, loss: 0.13303223252296448 2023-01-23 01:27:44.016135: step: 1388/531, loss: 0.06505189090967178 2023-01-23 01:27:45.124796: step: 1392/531, loss: 0.050676967948675156 2023-01-23 01:27:46.243665: step: 1396/531, loss: 0.00166740408167243 2023-01-23 01:27:47.379015: step: 1400/531, loss: 0.01211013738065958 2023-01-23 01:27:48.504946: step: 1404/531, loss: 0.12830395996570587 2023-01-23 01:27:49.655463: step: 1408/531, loss: 0.02961292304098606 2023-01-23 01:27:50.792056: step: 1412/531, loss: 0.034870196133852005 2023-01-23 01:27:51.922090: step: 1416/531, loss: 0.04339813441038132 2023-01-23 01:27:53.047774: step: 1420/531, loss: 0.11966609954833984 2023-01-23 01:27:54.202568: step: 1424/531, loss: 0.02256040647625923 2023-01-23 01:27:55.351342: step: 1428/531, loss: 0.025408554822206497 2023-01-23 01:27:56.474314: step: 1432/531, loss: 0.05446481704711914 2023-01-23 01:27:57.588485: step: 1436/531, loss: 0.012572193518280983 2023-01-23 01:27:58.728078: step: 1440/531, loss: 0.07757556438446045 2023-01-23 01:27:59.835263: step: 1444/531, loss: 0.036899056285619736 2023-01-23 01:28:00.962572: step: 1448/531, loss: 0.06191766634583473 2023-01-23 01:28:02.093329: step: 1452/531, loss: 0.07007431983947754 2023-01-23 01:28:03.230672: step: 1456/531, loss: 0.04421382024884224 2023-01-23 01:28:04.376692: step: 1460/531, loss: 0.0008575439569540322 2023-01-23 01:28:05.479949: step: 1464/531, loss: 0.020115423947572708 2023-01-23 01:28:06.607398: step: 1468/531, loss: 0.03657855838537216 2023-01-23 01:28:07.727776: step: 1472/531, loss: 0.06703939288854599 2023-01-23 01:28:08.851977: step: 1476/531, loss: 0.1048712208867073 2023-01-23 01:28:09.976285: step: 1480/531, loss: 0.015604782849550247 2023-01-23 01:28:11.118909: step: 1484/531, loss: 0.3073750138282776 2023-01-23 01:28:12.225744: step: 1488/531, loss: 0.09835891425609589 2023-01-23 01:28:13.362923: step: 1492/531, loss: 0.03867781162261963 2023-01-23 01:28:14.487390: step: 1496/531, loss: 0.0779334083199501 2023-01-23 01:28:15.626453: step: 1500/531, loss: 0.06903638690710068 2023-01-23 01:28:16.764982: step: 1504/531, loss: 0.21315795183181763 2023-01-23 01:28:17.888074: step: 1508/531, loss: 0.322004497051239 2023-01-23 01:28:19.020375: step: 1512/531, loss: 0.030344534665346146 2023-01-23 01:28:20.152622: step: 1516/531, loss: 0.10674019157886505 2023-01-23 01:28:21.266317: step: 1520/531, loss: 0.048955343663692474 2023-01-23 01:28:22.397538: step: 1524/531, loss: 0.1165475845336914 2023-01-23 01:28:23.553134: step: 1528/531, loss: 0.09774628281593323 2023-01-23 01:28:24.726963: step: 1532/531, loss: 0.1109004095196724 2023-01-23 01:28:25.851234: step: 1536/531, loss: 0.04538846015930176 2023-01-23 01:28:26.964233: step: 1540/531, loss: 0.04183550179004669 2023-01-23 01:28:28.076066: step: 1544/531, loss: 0.043260835111141205 2023-01-23 01:28:29.185839: step: 1548/531, loss: 0.22896814346313477 2023-01-23 01:28:30.282628: step: 1552/531, loss: 0.018180465325713158 2023-01-23 01:28:31.403217: step: 1556/531, loss: 0.035036277025938034 2023-01-23 01:28:32.525145: step: 1560/531, loss: 0.23148202896118164 2023-01-23 01:28:33.641827: step: 1564/531, loss: 0.08412857353687286 2023-01-23 01:28:34.783337: step: 1568/531, loss: 0.07042789459228516 2023-01-23 01:28:35.877517: step: 1572/531, loss: 0.05395574867725372 2023-01-23 01:28:37.003226: step: 1576/531, loss: 0.0663209930062294 2023-01-23 01:28:38.142643: step: 1580/531, loss: 0.05465278401970863 2023-01-23 01:28:39.252452: step: 1584/531, loss: 0.08454103767871857 2023-01-23 01:28:40.377414: step: 1588/531, loss: 0.057108692824840546 2023-01-23 01:28:41.498792: step: 1592/531, loss: 0.67942214012146 2023-01-23 01:28:42.659758: step: 1596/531, loss: 0.04212953522801399 2023-01-23 01:28:43.775641: step: 1600/531, loss: 0.06312417984008789 2023-01-23 01:28:44.877116: step: 1604/531, loss: 0.01411585882306099 2023-01-23 01:28:46.000460: step: 1608/531, loss: 0.22869928181171417 2023-01-23 01:28:47.136894: step: 1612/531, loss: 0.0613219290971756 2023-01-23 01:28:48.312987: step: 1616/531, loss: 0.4913889765739441 2023-01-23 01:28:49.425580: step: 1620/531, loss: 0.04574775695800781 2023-01-23 01:28:50.537042: step: 1624/531, loss: 0.03464813157916069 2023-01-23 01:28:51.626866: step: 1628/531, loss: 0.04906511306762695 2023-01-23 01:28:52.736907: step: 1632/531, loss: 0.03040132485330105 2023-01-23 01:28:53.863304: step: 1636/531, loss: 0.7075724601745605 2023-01-23 01:28:54.979456: step: 1640/531, loss: 0.8525973558425903 2023-01-23 01:28:56.092389: step: 1644/531, loss: 0.0013964890968054533 2023-01-23 01:28:57.230701: step: 1648/531, loss: 0.0379701629281044 2023-01-23 01:28:58.334115: step: 1652/531, loss: 0.001712799072265625 2023-01-23 01:28:59.472124: step: 1656/531, loss: 0.2171657681465149 2023-01-23 01:29:00.592210: step: 1660/531, loss: 0.019266320392489433 2023-01-23 01:29:01.669146: step: 1664/531, loss: 0.015967750921845436 2023-01-23 01:29:02.793320: step: 1668/531, loss: 0.0714021772146225 2023-01-23 01:29:03.884038: step: 1672/531, loss: 0.057566121220588684 2023-01-23 01:29:05.056474: step: 1676/531, loss: 0.0203904639929533 2023-01-23 01:29:06.199007: step: 1680/531, loss: 0.07539396733045578 2023-01-23 01:29:07.284487: step: 1684/531, loss: 0.09189968556165695 2023-01-23 01:29:08.388445: step: 1688/531, loss: 0.02228088304400444 2023-01-23 01:29:09.527774: step: 1692/531, loss: 0.11487503349781036 2023-01-23 01:29:10.676407: step: 1696/531, loss: 0.1178162544965744 2023-01-23 01:29:11.793731: step: 1700/531, loss: 0.004256772808730602 2023-01-23 01:29:12.929999: step: 1704/531, loss: 0.09162741154432297 2023-01-23 01:29:14.041079: step: 1708/531, loss: 0.13242283463478088 2023-01-23 01:29:15.173863: step: 1712/531, loss: 0.009106731973588467 2023-01-23 01:29:16.279162: step: 1716/531, loss: 0.028816986829042435 2023-01-23 01:29:17.363407: step: 1720/531, loss: 0.03114795684814453 2023-01-23 01:29:18.472049: step: 1724/531, loss: 0.18771325051784515 2023-01-23 01:29:19.574096: step: 1728/531, loss: 0.025037577375769615 2023-01-23 01:29:20.696363: step: 1732/531, loss: 0.022452164441347122 2023-01-23 01:29:21.836986: step: 1736/531, loss: 0.05461602285504341 2023-01-23 01:29:23.016399: step: 1740/531, loss: 0.1423497200012207 2023-01-23 01:29:24.107780: step: 1744/531, loss: 0.04620747268199921 2023-01-23 01:29:25.209546: step: 1748/531, loss: 0.0406682975590229 2023-01-23 01:29:26.340547: step: 1752/531, loss: 0.1025405153632164 2023-01-23 01:29:27.472659: step: 1756/531, loss: 0.15270757675170898 2023-01-23 01:29:28.620515: step: 1760/531, loss: 0.008900643326342106 2023-01-23 01:29:29.754179: step: 1764/531, loss: 0.011030149646103382 2023-01-23 01:29:30.882558: step: 1768/531, loss: 0.0023666382767260075 2023-01-23 01:29:32.030561: step: 1772/531, loss: 0.0548916831612587 2023-01-23 01:29:33.142796: step: 1776/531, loss: 0.0020505907014012337 2023-01-23 01:29:34.263031: step: 1780/531, loss: 0.03554067760705948 2023-01-23 01:29:35.407665: step: 1784/531, loss: 0.030874157324433327 2023-01-23 01:29:36.550316: step: 1788/531, loss: 0.17636089026927948 2023-01-23 01:29:37.695735: step: 1792/531, loss: 0.09419545531272888 2023-01-23 01:29:38.798116: step: 1796/531, loss: 0.03293962776660919 2023-01-23 01:29:39.923551: step: 1800/531, loss: 0.008083535358309746 2023-01-23 01:29:41.048572: step: 1804/531, loss: 0.018291760236024857 2023-01-23 01:29:42.204722: step: 1808/531, loss: 0.3487284779548645 2023-01-23 01:29:43.332778: step: 1812/531, loss: 0.03713226318359375 2023-01-23 01:29:44.460721: step: 1816/531, loss: 0.02489914931356907 2023-01-23 01:29:45.566825: step: 1820/531, loss: 0.06141247600317001 2023-01-23 01:29:46.697428: step: 1824/531, loss: 0.3006841838359833 2023-01-23 01:29:47.815453: step: 1828/531, loss: 0.019359780475497246 2023-01-23 01:29:48.929799: step: 1832/531, loss: 0.054311562329530716 2023-01-23 01:29:50.067305: step: 1836/531, loss: 0.07356911152601242 2023-01-23 01:29:51.205374: step: 1840/531, loss: 0.05465659871697426 2023-01-23 01:29:52.334930: step: 1844/531, loss: 0.8070365190505981 2023-01-23 01:29:53.423720: step: 1848/531, loss: 0.016205500811338425 2023-01-23 01:29:54.534720: step: 1852/531, loss: 0.007967568002641201 2023-01-23 01:29:55.671552: step: 1856/531, loss: 0.11349640041589737 2023-01-23 01:29:56.818756: step: 1860/531, loss: 0.13214674592018127 2023-01-23 01:29:57.927098: step: 1864/531, loss: 0.07776127010583878 2023-01-23 01:29:59.081249: step: 1868/531, loss: 0.08564339578151703 2023-01-23 01:30:00.196141: step: 1872/531, loss: 0.09141826629638672 2023-01-23 01:30:01.338991: step: 1876/531, loss: 0.5359444618225098 2023-01-23 01:30:02.461305: step: 1880/531, loss: 0.0693565383553505 2023-01-23 01:30:03.600377: step: 1884/531, loss: 0.026195909827947617 2023-01-23 01:30:04.740338: step: 1888/531, loss: 0.10916309058666229 2023-01-23 01:30:05.846622: step: 1892/531, loss: 0.024379348382353783 2023-01-23 01:30:06.946672: step: 1896/531, loss: 0.060793161392211914 2023-01-23 01:30:08.064037: step: 1900/531, loss: 0.005732154939323664 2023-01-23 01:30:09.189255: step: 1904/531, loss: 0.021378135308623314 2023-01-23 01:30:10.311844: step: 1908/531, loss: 0.1498439759016037 2023-01-23 01:30:11.454079: step: 1912/531, loss: 0.031178856268525124 2023-01-23 01:30:12.556905: step: 1916/531, loss: 0.006459998432546854 2023-01-23 01:30:13.677753: step: 1920/531, loss: 0.007043266203254461 2023-01-23 01:30:14.783927: step: 1924/531, loss: 0.031800128519535065 2023-01-23 01:30:15.894160: step: 1928/531, loss: 0.0641942024230957 2023-01-23 01:30:17.002546: step: 1932/531, loss: 0.028751373291015625 2023-01-23 01:30:18.144039: step: 1936/531, loss: 0.06740102916955948 2023-01-23 01:30:19.279511: step: 1940/531, loss: 0.05365820229053497 2023-01-23 01:30:20.404825: step: 1944/531, loss: 0.16541728377342224 2023-01-23 01:30:21.546753: step: 1948/531, loss: 0.059989165514707565 2023-01-23 01:30:22.675361: step: 1952/531, loss: 0.03951587527990341 2023-01-23 01:30:23.786459: step: 1956/531, loss: 0.125335693359375 2023-01-23 01:30:24.899569: step: 1960/531, loss: 0.129795640707016 2023-01-23 01:30:26.017337: step: 1964/531, loss: 0.12310321629047394 2023-01-23 01:30:27.123869: step: 1968/531, loss: 0.18646980822086334 2023-01-23 01:30:28.232970: step: 1972/531, loss: 0.1499319076538086 2023-01-23 01:30:29.361647: step: 1976/531, loss: 0.06747885048389435 2023-01-23 01:30:30.498561: step: 1980/531, loss: 0.019020844250917435 2023-01-23 01:30:31.596194: step: 1984/531, loss: 0.04625578224658966 2023-01-23 01:30:32.731194: step: 1988/531, loss: 0.02260427549481392 2023-01-23 01:30:33.847740: step: 1992/531, loss: 0.02519693411886692 2023-01-23 01:30:34.973199: step: 1996/531, loss: 0.02496337890625 2023-01-23 01:30:36.098653: step: 2000/531, loss: 0.0812978744506836 2023-01-23 01:30:37.240714: step: 2004/531, loss: 0.13371573388576508 2023-01-23 01:30:38.348224: step: 2008/531, loss: 0.08028726279735565 2023-01-23 01:30:39.460738: step: 2012/531, loss: 0.08460722118616104 2023-01-23 01:30:40.585243: step: 2016/531, loss: 0.3900224566459656 2023-01-23 01:30:41.744251: step: 2020/531, loss: 0.08664903789758682 2023-01-23 01:30:42.896550: step: 2024/531, loss: 0.13433456420898438 2023-01-23 01:30:44.027470: step: 2028/531, loss: 0.1564178466796875 2023-01-23 01:30:45.171026: step: 2032/531, loss: 0.006483840756118298 2023-01-23 01:30:46.306442: step: 2036/531, loss: 0.0345611572265625 2023-01-23 01:30:47.401024: step: 2040/531, loss: 0.14012479782104492 2023-01-23 01:30:48.512543: step: 2044/531, loss: 0.09803247451782227 2023-01-23 01:30:49.653561: step: 2048/531, loss: 0.007104110904037952 2023-01-23 01:30:50.769600: step: 2052/531, loss: 0.03317451477050781 2023-01-23 01:30:51.897776: step: 2056/531, loss: 0.12858638167381287 2023-01-23 01:30:53.029195: step: 2060/531, loss: 0.036590054631233215 2023-01-23 01:30:54.130965: step: 2064/531, loss: 0.0404357947409153 2023-01-23 01:30:55.241978: step: 2068/531, loss: 0.0540614128112793 2023-01-23 01:30:56.390041: step: 2072/531, loss: 0.10163593292236328 2023-01-23 01:30:57.523138: step: 2076/531, loss: 0.047170642763376236 2023-01-23 01:30:58.676145: step: 2080/531, loss: 0.025232411921024323 2023-01-23 01:30:59.798277: step: 2084/531, loss: 0.08812332153320312 2023-01-23 01:31:00.929808: step: 2088/531, loss: 0.08783034980297089 2023-01-23 01:31:02.066951: step: 2092/531, loss: 0.041292428970336914 2023-01-23 01:31:03.167202: step: 2096/531, loss: 0.021515464410185814 2023-01-23 01:31:04.298362: step: 2100/531, loss: 0.06693977117538452 2023-01-23 01:31:05.435032: step: 2104/531, loss: 0.0738416239619255 2023-01-23 01:31:06.545153: step: 2108/531, loss: 0.05874716117978096 2023-01-23 01:31:07.668231: step: 2112/531, loss: 0.6315667629241943 2023-01-23 01:31:08.760621: step: 2116/531, loss: 0.08243980258703232 2023-01-23 01:31:09.850975: step: 2120/531, loss: 0.039584919810295105 2023-01-23 01:31:10.934983: step: 2124/531, loss: 0.05166645348072052 ================================================== Loss: 0.086 -------------------- Dev: {'event': {'p': 0.5961923847695391, 'r': 0.7922769640479361, 'f1': 0.6803887935963409}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Test: {'event': {'p': 0.6274416388756551, 'r': 0.7853309481216458, 'f1': 0.697563559322034}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Chinese: {'event': {'p': 0.5925925925925926, 'r': 0.8888888888888888, 'f1': 0.711111111111111}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Korean: {'event': {'p': 0.6981132075471698, 'r': 0.5873015873015873, 'f1': 0.6379310344827586}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Russian: {'event': {'p': 0.4883720930232558, 'r': 0.5833333333333334, 'f1': 0.5316455696202531}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.5725264169068204, 'r': 0.7936085219707057, 'f1': 0.6651785714285715}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Eng Test for Korean: {'event': {'p': 0.6113918236104732, 'r': 0.7936791890280263, 'f1': 0.6907109496626881}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Korean: {'event': {'p': 0.65625, 'r': 0.6666666666666666, 'f1': 0.6614173228346457}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 12 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:31:51.011689: step: 4/531, loss: 0.038973234593868256 2023-01-23 01:31:52.131763: step: 8/531, loss: 0.34092310070991516 2023-01-23 01:31:53.278273: step: 12/531, loss: 0.042228128761053085 2023-01-23 01:31:54.396767: step: 16/531, loss: 0.0032852173317223787 2023-01-23 01:31:55.518957: step: 20/531, loss: 0.10696382075548172 2023-01-23 01:31:56.637597: step: 24/531, loss: 0.005381679628044367 2023-01-23 01:31:57.753068: step: 28/531, loss: 0.02842102013528347 2023-01-23 01:31:58.899661: step: 32/531, loss: 0.35137346386909485 2023-01-23 01:32:00.031850: step: 36/531, loss: 0.02191338688135147 2023-01-23 01:32:01.186638: step: 40/531, loss: 0.07915983349084854 2023-01-23 01:32:02.331146: step: 44/531, loss: 0.036618996411561966 2023-01-23 01:32:03.486282: step: 48/531, loss: 0.007364988327026367 2023-01-23 01:32:04.608416: step: 52/531, loss: 0.03930368646979332 2023-01-23 01:32:05.743714: step: 56/531, loss: 0.023439789190888405 2023-01-23 01:32:06.861550: step: 60/531, loss: 0.02361888997256756 2023-01-23 01:32:08.012913: step: 64/531, loss: 0.023807907477021217 2023-01-23 01:32:09.132960: step: 68/531, loss: 0.1282922625541687 2023-01-23 01:32:10.279955: step: 72/531, loss: 0.04363222420215607 2023-01-23 01:32:11.417886: step: 76/531, loss: 0.0015531539684161544 2023-01-23 01:32:12.587558: step: 80/531, loss: 0.12497377395629883 2023-01-23 01:32:13.715689: step: 84/531, loss: 0.021805381402373314 2023-01-23 01:32:14.844016: step: 88/531, loss: 0.011691666208207607 2023-01-23 01:32:16.004956: step: 92/531, loss: 0.045899197459220886 2023-01-23 01:32:17.132484: step: 96/531, loss: 0.10177517682313919 2023-01-23 01:32:18.262837: step: 100/531, loss: 0.28401979804039 2023-01-23 01:32:19.375460: step: 104/531, loss: 0.017455102875828743 2023-01-23 01:32:20.513745: step: 108/531, loss: 0.04333076626062393 2023-01-23 01:32:21.636716: step: 112/531, loss: 0.04375896602869034 2023-01-23 01:32:22.734896: step: 116/531, loss: 0.026859380304813385 2023-01-23 01:32:23.877677: step: 120/531, loss: 0.04175710678100586 2023-01-23 01:32:25.016373: step: 124/531, loss: 0.04286069795489311 2023-01-23 01:32:26.147566: step: 128/531, loss: 0.013673496432602406 2023-01-23 01:32:27.284765: step: 132/531, loss: 0.020584868267178535 2023-01-23 01:32:28.405997: step: 136/531, loss: 0.02770562283694744 2023-01-23 01:32:29.552692: step: 140/531, loss: 0.16088980436325073 2023-01-23 01:32:30.674392: step: 144/531, loss: 0.010279846377670765 2023-01-23 01:32:31.763130: step: 148/531, loss: 0.03518953546881676 2023-01-23 01:32:32.869897: step: 152/531, loss: 0.03272116929292679 2023-01-23 01:32:34.008731: step: 156/531, loss: 0.07336197048425674 2023-01-23 01:32:35.142963: step: 160/531, loss: 0.054956912994384766 2023-01-23 01:32:36.278147: step: 164/531, loss: 0.017528247088193893 2023-01-23 01:32:37.360279: step: 168/531, loss: 0.03565650060772896 2023-01-23 01:32:38.499753: step: 172/531, loss: 0.06515967845916748 2023-01-23 01:32:39.644173: step: 176/531, loss: 0.02084369584918022 2023-01-23 01:32:40.775366: step: 180/531, loss: 0.020826244726777077 2023-01-23 01:32:41.934795: step: 184/531, loss: 0.026675987988710403 2023-01-23 01:32:43.050825: step: 188/531, loss: 0.0576656349003315 2023-01-23 01:32:44.191395: step: 192/531, loss: 0.09280738979578018 2023-01-23 01:32:45.308806: step: 196/531, loss: 0.21827135980129242 2023-01-23 01:32:46.408323: step: 200/531, loss: 0.05327282100915909 2023-01-23 01:32:47.523892: step: 204/531, loss: 0.033853866159915924 2023-01-23 01:32:48.646678: step: 208/531, loss: 0.08336200565099716 2023-01-23 01:32:49.783453: step: 212/531, loss: 0.012221718207001686 2023-01-23 01:32:50.932684: step: 216/531, loss: 0.012851571664214134 2023-01-23 01:32:52.045558: step: 220/531, loss: 0.008442306891083717 2023-01-23 01:32:53.179199: step: 224/531, loss: 0.025832366198301315 2023-01-23 01:32:54.279442: step: 228/531, loss: 0.020372772589325905 2023-01-23 01:32:55.381634: step: 232/531, loss: 0.034896086901426315 2023-01-23 01:32:56.501920: step: 236/531, loss: 0.0697927474975586 2023-01-23 01:32:57.603551: step: 240/531, loss: 0.038404084742069244 2023-01-23 01:32:58.720420: step: 244/531, loss: 0.028582381084561348 2023-01-23 01:32:59.832654: step: 248/531, loss: 0.09015293419361115 2023-01-23 01:33:00.928997: step: 252/531, loss: 0.006644725799560547 2023-01-23 01:33:02.066870: step: 256/531, loss: 0.04751196131110191 2023-01-23 01:33:03.184828: step: 260/531, loss: 0.30338409543037415 2023-01-23 01:33:04.347978: step: 264/531, loss: 0.018543099984526634 2023-01-23 01:33:05.469976: step: 268/531, loss: 0.0506771095097065 2023-01-23 01:33:06.585743: step: 272/531, loss: 0.005135345738381147 2023-01-23 01:33:07.699862: step: 276/531, loss: 0.01716327667236328 2023-01-23 01:33:08.844055: step: 280/531, loss: 0.26174649596214294 2023-01-23 01:33:09.987997: step: 284/531, loss: 0.036356449127197266 2023-01-23 01:33:11.099969: step: 288/531, loss: 0.006159747019410133 2023-01-23 01:33:12.242467: step: 292/531, loss: 0.046012308448553085 2023-01-23 01:33:13.374172: step: 296/531, loss: 0.09360122680664062 2023-01-23 01:33:14.483172: step: 300/531, loss: 0.006849479861557484 2023-01-23 01:33:15.601841: step: 304/531, loss: 0.14085274934768677 2023-01-23 01:33:16.725596: step: 308/531, loss: 0.03922691196203232 2023-01-23 01:33:17.862233: step: 312/531, loss: 0.02872176095843315 2023-01-23 01:33:19.018160: step: 316/531, loss: 0.032641030848026276 2023-01-23 01:33:20.162864: step: 320/531, loss: 0.052202798426151276 2023-01-23 01:33:21.298683: step: 324/531, loss: 0.034711744636297226 2023-01-23 01:33:22.440767: step: 328/531, loss: 0.1793808937072754 2023-01-23 01:33:23.578161: step: 332/531, loss: 0.017008112743496895 2023-01-23 01:33:24.711339: step: 336/531, loss: 0.052149295806884766 2023-01-23 01:33:25.845764: step: 340/531, loss: 0.05364866554737091 2023-01-23 01:33:26.992117: step: 344/531, loss: 0.09223794937133789 2023-01-23 01:33:28.121204: step: 348/531, loss: 0.019942283630371094 2023-01-23 01:33:29.267097: step: 352/531, loss: 0.04162969812750816 2023-01-23 01:33:30.393807: step: 356/531, loss: 0.041677091270685196 2023-01-23 01:33:31.519338: step: 360/531, loss: 0.06603717803955078 2023-01-23 01:33:32.640275: step: 364/531, loss: 0.008761358447372913 2023-01-23 01:33:33.768953: step: 368/531, loss: 0.11486272513866425 2023-01-23 01:33:34.876349: step: 372/531, loss: 0.10126963257789612 2023-01-23 01:33:36.012210: step: 376/531, loss: 0.01708240434527397 2023-01-23 01:33:37.155070: step: 380/531, loss: 0.004142189398407936 2023-01-23 01:33:38.265170: step: 384/531, loss: 0.00884018000215292 2023-01-23 01:33:39.391820: step: 388/531, loss: 0.01080484502017498 2023-01-23 01:33:40.528016: step: 392/531, loss: 0.019925978034734726 2023-01-23 01:33:41.641626: step: 396/531, loss: 0.011363506317138672 2023-01-23 01:33:42.761216: step: 400/531, loss: 0.1028587818145752 2023-01-23 01:33:43.879421: step: 404/531, loss: 0.04232080280780792 2023-01-23 01:33:45.022394: step: 408/531, loss: 0.2529030740261078 2023-01-23 01:33:46.155677: step: 412/531, loss: 0.015860557556152344 2023-01-23 01:33:47.264044: step: 416/531, loss: 0.03498277813196182 2023-01-23 01:33:48.374940: step: 420/531, loss: 0.14679089188575745 2023-01-23 01:33:49.500522: step: 424/531, loss: 0.02433185651898384 2023-01-23 01:33:50.637602: step: 428/531, loss: 0.025880241766572 2023-01-23 01:33:51.731954: step: 432/531, loss: 0.048050880432128906 2023-01-23 01:33:52.868932: step: 436/531, loss: 0.021410560235381126 2023-01-23 01:33:53.992603: step: 440/531, loss: 0.06916351616382599 2023-01-23 01:33:55.158998: step: 444/531, loss: 0.06000576168298721 2023-01-23 01:33:56.280922: step: 448/531, loss: 0.013830471783876419 2023-01-23 01:33:57.406831: step: 452/531, loss: 0.05849146842956543 2023-01-23 01:33:58.516264: step: 456/531, loss: 0.030888844281435013 2023-01-23 01:33:59.608030: step: 460/531, loss: 0.04168892279267311 2023-01-23 01:34:00.710937: step: 464/531, loss: 0.053305864334106445 2023-01-23 01:34:01.853700: step: 468/531, loss: 0.0145600326359272 2023-01-23 01:34:02.979348: step: 472/531, loss: 0.014923286624252796 2023-01-23 01:34:04.091716: step: 476/531, loss: 0.02691326104104519 2023-01-23 01:34:05.218800: step: 480/531, loss: 0.04722227901220322 2023-01-23 01:34:06.345066: step: 484/531, loss: 0.01708083227276802 2023-01-23 01:34:07.449773: step: 488/531, loss: 0.0630342960357666 2023-01-23 01:34:08.577341: step: 492/531, loss: 0.0018398285610601306 2023-01-23 01:34:09.688839: step: 496/531, loss: 0.0629514679312706 2023-01-23 01:34:10.828965: step: 500/531, loss: 0.009606361389160156 2023-01-23 01:34:11.945434: step: 504/531, loss: 0.00251941685564816 2023-01-23 01:34:13.066442: step: 508/531, loss: 0.05768918991088867 2023-01-23 01:34:14.202402: step: 512/531, loss: 0.005676459986716509 2023-01-23 01:34:15.341134: step: 516/531, loss: 0.033266447484493256 2023-01-23 01:34:16.492447: step: 520/531, loss: 0.05728187412023544 2023-01-23 01:34:17.636511: step: 524/531, loss: 0.23796996474266052 2023-01-23 01:34:18.760922: step: 528/531, loss: 0.04635239019989967 2023-01-23 01:34:19.890096: step: 532/531, loss: 0.1938735991716385 2023-01-23 01:34:21.054938: step: 536/531, loss: 0.048374176025390625 2023-01-23 01:34:22.194380: step: 540/531, loss: 0.04418602213263512 2023-01-23 01:34:23.296355: step: 544/531, loss: 0.023776818066835403 2023-01-23 01:34:24.427585: step: 548/531, loss: 0.02112445794045925 2023-01-23 01:34:25.533894: step: 552/531, loss: 0.012280656024813652 2023-01-23 01:34:26.649262: step: 556/531, loss: 0.056977175176143646 2023-01-23 01:34:27.751456: step: 560/531, loss: 0.10055427998304367 2023-01-23 01:34:28.858452: step: 564/531, loss: 0.012489033862948418 2023-01-23 01:34:29.970054: step: 568/531, loss: 0.06431732326745987 2023-01-23 01:34:31.088277: step: 572/531, loss: 0.01454234216362238 2023-01-23 01:34:32.217262: step: 576/531, loss: 0.010251638479530811 2023-01-23 01:34:33.340275: step: 580/531, loss: 0.013840293511748314 2023-01-23 01:34:34.476299: step: 584/531, loss: 0.036703046411275864 2023-01-23 01:34:35.577290: step: 588/531, loss: 0.07474103569984436 2023-01-23 01:34:36.736809: step: 592/531, loss: 0.06297874450683594 2023-01-23 01:34:37.851097: step: 596/531, loss: 0.1341439187526703 2023-01-23 01:34:38.958660: step: 600/531, loss: 0.011738300323486328 2023-01-23 01:34:40.070398: step: 604/531, loss: 0.06567726284265518 2023-01-23 01:34:41.182948: step: 608/531, loss: 0.01404562033712864 2023-01-23 01:34:42.360692: step: 612/531, loss: 0.11313267052173615 2023-01-23 01:34:43.481569: step: 616/531, loss: 0.012928009033203125 2023-01-23 01:34:44.593894: step: 620/531, loss: 0.07304663956165314 2023-01-23 01:34:45.694419: step: 624/531, loss: 0.02281656302511692 2023-01-23 01:34:46.836548: step: 628/531, loss: 0.024804305285215378 2023-01-23 01:34:47.994261: step: 632/531, loss: 0.07919197529554367 2023-01-23 01:34:49.129129: step: 636/531, loss: 0.09788642078638077 2023-01-23 01:34:50.245234: step: 640/531, loss: 0.17398062348365784 2023-01-23 01:34:51.373756: step: 644/531, loss: 0.03532905876636505 2023-01-23 01:34:52.494095: step: 648/531, loss: 0.04142698645591736 2023-01-23 01:34:53.616170: step: 652/531, loss: 0.022336864843964577 2023-01-23 01:34:54.751560: step: 656/531, loss: 0.1160479336977005 2023-01-23 01:34:55.902345: step: 660/531, loss: 0.01974334754049778 2023-01-23 01:34:57.023264: step: 664/531, loss: 0.08427486568689346 2023-01-23 01:34:58.135244: step: 668/531, loss: 0.056188393384218216 2023-01-23 01:34:59.270044: step: 672/531, loss: 0.03814134746789932 2023-01-23 01:35:00.401253: step: 676/531, loss: 0.009484196081757545 2023-01-23 01:35:01.533371: step: 680/531, loss: 0.11058798432350159 2023-01-23 01:35:02.681952: step: 684/531, loss: 0.002372360322624445 2023-01-23 01:35:03.806924: step: 688/531, loss: 0.004443073645234108 2023-01-23 01:35:04.944576: step: 692/531, loss: 0.018896352499723434 2023-01-23 01:35:06.067492: step: 696/531, loss: 0.1435987502336502 2023-01-23 01:35:07.168474: step: 700/531, loss: 0.00687332171946764 2023-01-23 01:35:08.284433: step: 704/531, loss: 0.04493732377886772 2023-01-23 01:35:09.433944: step: 708/531, loss: 0.030828284099698067 2023-01-23 01:35:10.557092: step: 712/531, loss: 0.10376262664794922 2023-01-23 01:35:11.701788: step: 716/531, loss: 0.027099132537841797 2023-01-23 01:35:12.811183: step: 720/531, loss: 0.0434780977666378 2023-01-23 01:35:13.927506: step: 724/531, loss: 0.06284217536449432 2023-01-23 01:35:15.069335: step: 728/531, loss: 0.14878997206687927 2023-01-23 01:35:16.186977: step: 732/531, loss: 0.03637829050421715 2023-01-23 01:35:17.302599: step: 736/531, loss: 0.025745106860995293 2023-01-23 01:35:18.430873: step: 740/531, loss: 0.0034799575805664062 2023-01-23 01:35:19.568128: step: 744/531, loss: 0.11501456052064896 2023-01-23 01:35:20.677129: step: 748/531, loss: 0.02026224136352539 2023-01-23 01:35:21.786576: step: 752/531, loss: 0.008418465033173561 2023-01-23 01:35:22.887757: step: 756/531, loss: 0.022331953048706055 2023-01-23 01:35:24.037264: step: 760/531, loss: 0.0366579033434391 2023-01-23 01:35:25.186195: step: 764/531, loss: 0.005731010343879461 2023-01-23 01:35:26.303253: step: 768/531, loss: 0.05093841627240181 2023-01-23 01:35:27.426596: step: 772/531, loss: 0.019498253241181374 2023-01-23 01:35:28.586738: step: 776/531, loss: 0.049474529922008514 2023-01-23 01:35:29.723823: step: 780/531, loss: 0.006289434619247913 2023-01-23 01:35:30.848871: step: 784/531, loss: 0.044103339314460754 2023-01-23 01:35:31.960139: step: 788/531, loss: 0.059922412037849426 2023-01-23 01:35:33.099845: step: 792/531, loss: 0.014417696744203568 2023-01-23 01:35:34.227466: step: 796/531, loss: 0.0026498795486986637 2023-01-23 01:35:35.357657: step: 800/531, loss: 0.03474941477179527 2023-01-23 01:35:36.488160: step: 804/531, loss: 0.01415939349681139 2023-01-23 01:35:37.624882: step: 808/531, loss: 0.051536086946725845 2023-01-23 01:35:38.754591: step: 812/531, loss: 0.4574885368347168 2023-01-23 01:35:39.881186: step: 816/531, loss: 0.040415383875370026 2023-01-23 01:35:40.998890: step: 820/531, loss: 0.012056541629135609 2023-01-23 01:35:42.149679: step: 824/531, loss: 0.030512237921357155 2023-01-23 01:35:43.267213: step: 828/531, loss: 0.015198040753602982 2023-01-23 01:35:44.374839: step: 832/531, loss: 0.023713206872344017 2023-01-23 01:35:45.535374: step: 836/531, loss: 0.04139909893274307 2023-01-23 01:35:46.647298: step: 840/531, loss: 0.021001243963837624 2023-01-23 01:35:47.765905: step: 844/531, loss: 0.0702524185180664 2023-01-23 01:35:48.897567: step: 848/531, loss: 0.052890945225954056 2023-01-23 01:35:50.057853: step: 852/531, loss: 0.011954927816987038 2023-01-23 01:35:51.167403: step: 856/531, loss: 0.028821755200624466 2023-01-23 01:35:52.308147: step: 860/531, loss: 0.0012111187679693103 2023-01-23 01:35:53.461402: step: 864/531, loss: 0.007202529814094305 2023-01-23 01:35:54.553371: step: 868/531, loss: 0.0023280144669115543 2023-01-23 01:35:55.690556: step: 872/531, loss: 0.4859399199485779 2023-01-23 01:35:56.843456: step: 876/531, loss: 0.08663120865821838 2023-01-23 01:35:57.977769: step: 880/531, loss: 0.019716953858733177 2023-01-23 01:35:59.103524: step: 884/531, loss: 0.06647635996341705 2023-01-23 01:36:00.209198: step: 888/531, loss: 0.021984290331602097 2023-01-23 01:36:01.371876: step: 892/531, loss: 0.017107294872403145 2023-01-23 01:36:02.481421: step: 896/531, loss: 0.0345836877822876 2023-01-23 01:36:03.628248: step: 900/531, loss: 0.05378246307373047 2023-01-23 01:36:04.754254: step: 904/531, loss: 0.02114563062787056 2023-01-23 01:36:05.876133: step: 908/531, loss: 0.026821112260222435 2023-01-23 01:36:06.982322: step: 912/531, loss: 0.0363890640437603 2023-01-23 01:36:08.085063: step: 916/531, loss: 0.03604002296924591 2023-01-23 01:36:09.219630: step: 920/531, loss: 0.026711082085967064 2023-01-23 01:36:10.346293: step: 924/531, loss: 0.08712110668420792 2023-01-23 01:36:11.530151: step: 928/531, loss: 0.46248742938041687 2023-01-23 01:36:12.662579: step: 932/531, loss: 0.030457021668553352 2023-01-23 01:36:13.796610: step: 936/531, loss: 0.0970296859741211 2023-01-23 01:36:14.912527: step: 940/531, loss: 0.027438737452030182 2023-01-23 01:36:15.999993: step: 944/531, loss: 0.029915904626250267 2023-01-23 01:36:17.115849: step: 948/531, loss: 0.027980666607618332 2023-01-23 01:36:18.250454: step: 952/531, loss: 0.10565929859876633 2023-01-23 01:36:19.366401: step: 956/531, loss: 0.06206989288330078 2023-01-23 01:36:20.463882: step: 960/531, loss: 0.08147916942834854 2023-01-23 01:36:21.605475: step: 964/531, loss: 0.014317680150270462 2023-01-23 01:36:22.720939: step: 968/531, loss: 0.04774452745914459 2023-01-23 01:36:23.872358: step: 972/531, loss: 0.008439827710390091 2023-01-23 01:36:24.971468: step: 976/531, loss: 0.02240018919110298 2023-01-23 01:36:26.106485: step: 980/531, loss: 0.09134330600500107 2023-01-23 01:36:27.260634: step: 984/531, loss: 0.006825447082519531 2023-01-23 01:36:28.381529: step: 988/531, loss: 0.050775717943906784 2023-01-23 01:36:29.516467: step: 992/531, loss: 0.01884746551513672 2023-01-23 01:36:30.640599: step: 996/531, loss: 0.10304941982030869 2023-01-23 01:36:31.787826: step: 1000/531, loss: 0.06675796210765839 2023-01-23 01:36:32.929018: step: 1004/531, loss: 0.01001830119639635 2023-01-23 01:36:34.056613: step: 1008/531, loss: 0.004202842712402344 2023-01-23 01:36:35.159332: step: 1012/531, loss: 0.04262109100818634 2023-01-23 01:36:36.283597: step: 1016/531, loss: 0.0853450819849968 2023-01-23 01:36:37.392661: step: 1020/531, loss: 0.10220298916101456 2023-01-23 01:36:38.535014: step: 1024/531, loss: 0.03408947214484215 2023-01-23 01:36:39.647117: step: 1028/531, loss: 0.10641252994537354 2023-01-23 01:36:40.812483: step: 1032/531, loss: 0.027832651510834694 2023-01-23 01:36:41.959203: step: 1036/531, loss: 0.698275625705719 2023-01-23 01:36:43.103789: step: 1040/531, loss: 0.09946541488170624 2023-01-23 01:36:44.203287: step: 1044/531, loss: 0.03708686679601669 2023-01-23 01:36:45.312034: step: 1048/531, loss: 0.022433269768953323 2023-01-23 01:36:46.448316: step: 1052/531, loss: 0.025570297613739967 2023-01-23 01:36:47.589584: step: 1056/531, loss: 0.10999850928783417 2023-01-23 01:36:48.709267: step: 1060/531, loss: 0.0662631019949913 2023-01-23 01:36:49.824607: step: 1064/531, loss: 0.01800365373492241 2023-01-23 01:36:50.977211: step: 1068/531, loss: 0.002541160676628351 2023-01-23 01:36:52.079247: step: 1072/531, loss: 0.012896394357085228 2023-01-23 01:36:53.220482: step: 1076/531, loss: 0.03825845941901207 2023-01-23 01:36:54.368936: step: 1080/531, loss: 0.05021210014820099 2023-01-23 01:36:55.477274: step: 1084/531, loss: 0.05231847986578941 2023-01-23 01:36:56.602273: step: 1088/531, loss: 0.16162744164466858 2023-01-23 01:36:57.716514: step: 1092/531, loss: 0.11827889084815979 2023-01-23 01:36:58.893228: step: 1096/531, loss: 0.019343852996826172 2023-01-23 01:37:00.026636: step: 1100/531, loss: 0.03673610836267471 2023-01-23 01:37:01.158509: step: 1104/531, loss: 0.0640331283211708 2023-01-23 01:37:02.300347: step: 1108/531, loss: 0.07004113495349884 2023-01-23 01:37:03.418764: step: 1112/531, loss: 0.02098817750811577 2023-01-23 01:37:04.526985: step: 1116/531, loss: 0.007976197637617588 2023-01-23 01:37:05.663277: step: 1120/531, loss: 0.01597442664206028 2023-01-23 01:37:06.806916: step: 1124/531, loss: 0.016730977222323418 2023-01-23 01:37:07.922892: step: 1128/531, loss: 0.04646758735179901 2023-01-23 01:37:09.041843: step: 1132/531, loss: 0.031763769686222076 2023-01-23 01:37:10.143674: step: 1136/531, loss: 0.03558769449591637 2023-01-23 01:37:11.253451: step: 1140/531, loss: 0.04510064423084259 2023-01-23 01:37:12.372080: step: 1144/531, loss: 0.047937989234924316 2023-01-23 01:37:13.505274: step: 1148/531, loss: 0.05689249187707901 2023-01-23 01:37:14.623267: step: 1152/531, loss: 0.2007051408290863 2023-01-23 01:37:15.745065: step: 1156/531, loss: 0.014181804843246937 2023-01-23 01:37:16.863357: step: 1160/531, loss: 0.0476049929857254 2023-01-23 01:37:18.010534: step: 1164/531, loss: 0.15232238173484802 2023-01-23 01:37:19.153286: step: 1168/531, loss: 0.04912414774298668 2023-01-23 01:37:20.271399: step: 1172/531, loss: 0.022928999736905098 2023-01-23 01:37:21.405454: step: 1176/531, loss: 0.08101377636194229 2023-01-23 01:37:22.521671: step: 1180/531, loss: 0.008741283789277077 2023-01-23 01:37:23.662108: step: 1184/531, loss: 0.10629777610301971 2023-01-23 01:37:24.797672: step: 1188/531, loss: 0.1857776939868927 2023-01-23 01:37:25.939703: step: 1192/531, loss: 0.02162330225110054 2023-01-23 01:37:27.050038: step: 1196/531, loss: 0.02423687092959881 2023-01-23 01:37:28.165064: step: 1200/531, loss: 0.0004979983204975724 2023-01-23 01:37:29.283409: step: 1204/531, loss: 0.00899505615234375 2023-01-23 01:37:30.405032: step: 1208/531, loss: 0.0004848003445658833 2023-01-23 01:37:31.600882: step: 1212/531, loss: 0.04509296268224716 2023-01-23 01:37:32.739091: step: 1216/531, loss: 0.06766834855079651 2023-01-23 01:37:33.879473: step: 1220/531, loss: 0.02234821207821369 2023-01-23 01:37:35.027889: step: 1224/531, loss: 0.20259609818458557 2023-01-23 01:37:36.144810: step: 1228/531, loss: 0.1478653848171234 2023-01-23 01:37:37.259392: step: 1232/531, loss: 0.01733102649450302 2023-01-23 01:37:38.361765: step: 1236/531, loss: 0.07483520358800888 2023-01-23 01:37:39.497974: step: 1240/531, loss: 0.028918646275997162 2023-01-23 01:37:40.641624: step: 1244/531, loss: 0.09021492302417755 2023-01-23 01:37:41.788069: step: 1248/531, loss: 0.04510708153247833 2023-01-23 01:37:42.910574: step: 1252/531, loss: 0.03388190269470215 2023-01-23 01:37:44.050031: step: 1256/531, loss: 0.07007598876953125 2023-01-23 01:37:45.175996: step: 1260/531, loss: 0.003340053604915738 2023-01-23 01:37:46.282798: step: 1264/531, loss: 0.012237166985869408 2023-01-23 01:37:47.397659: step: 1268/531, loss: 0.008912467397749424 2023-01-23 01:37:48.491830: step: 1272/531, loss: 0.00470013590529561 2023-01-23 01:37:49.610491: step: 1276/531, loss: 0.07364121079444885 2023-01-23 01:37:50.756147: step: 1280/531, loss: 0.0245390422642231 2023-01-23 01:37:51.874233: step: 1284/531, loss: 0.05048947408795357 2023-01-23 01:37:53.000939: step: 1288/531, loss: 0.009117030538618565 2023-01-23 01:37:54.116233: step: 1292/531, loss: 0.009619617834687233 2023-01-23 01:37:55.243726: step: 1296/531, loss: 0.014437627978622913 2023-01-23 01:37:56.376165: step: 1300/531, loss: 0.14535284042358398 2023-01-23 01:37:57.516588: step: 1304/531, loss: 0.012126964516937733 2023-01-23 01:37:58.652432: step: 1308/531, loss: 0.017809296026825905 2023-01-23 01:37:59.813758: step: 1312/531, loss: 0.046889498829841614 2023-01-23 01:38:00.910992: step: 1316/531, loss: 0.09406042098999023 2023-01-23 01:38:02.028471: step: 1320/531, loss: 0.9882091879844666 2023-01-23 01:38:03.150599: step: 1324/531, loss: 0.020993998274207115 2023-01-23 01:38:04.262039: step: 1328/531, loss: 0.06654224544763565 2023-01-23 01:38:05.422557: step: 1332/531, loss: 1.1802095174789429 2023-01-23 01:38:06.530759: step: 1336/531, loss: 0.06622724235057831 2023-01-23 01:38:07.646838: step: 1340/531, loss: 0.09898719936609268 2023-01-23 01:38:08.776400: step: 1344/531, loss: 0.07313279807567596 2023-01-23 01:38:09.879114: step: 1348/531, loss: 0.029189683496952057 2023-01-23 01:38:11.011577: step: 1352/531, loss: 0.023584628477692604 2023-01-23 01:38:12.170237: step: 1356/531, loss: 0.07253327965736389 2023-01-23 01:38:13.304659: step: 1360/531, loss: 0.016899872571229935 2023-01-23 01:38:14.445620: step: 1364/531, loss: 0.023358821868896484 2023-01-23 01:38:15.576587: step: 1368/531, loss: 0.02863626554608345 2023-01-23 01:38:16.678110: step: 1372/531, loss: 0.03712787479162216 2023-01-23 01:38:17.787465: step: 1376/531, loss: 0.01287236250936985 2023-01-23 01:38:18.968332: step: 1380/531, loss: 0.044175148010253906 2023-01-23 01:38:20.108285: step: 1384/531, loss: 0.009384251199662685 2023-01-23 01:38:21.215536: step: 1388/531, loss: 0.015148353762924671 2023-01-23 01:38:22.345343: step: 1392/531, loss: 0.18818068504333496 2023-01-23 01:38:23.459522: step: 1396/531, loss: 0.06886816024780273 2023-01-23 01:38:24.595696: step: 1400/531, loss: 0.04330310598015785 2023-01-23 01:38:25.721244: step: 1404/531, loss: 0.14418324828147888 2023-01-23 01:38:26.787884: step: 1408/531, loss: 0.040449973195791245 2023-01-23 01:38:27.910585: step: 1412/531, loss: 0.15003032982349396 2023-01-23 01:38:29.018128: step: 1416/531, loss: 0.04059848561882973 2023-01-23 01:38:30.141071: step: 1420/531, loss: 0.02403726615011692 2023-01-23 01:38:31.279636: step: 1424/531, loss: 0.014409065246582031 2023-01-23 01:38:32.397907: step: 1428/531, loss: 0.021810341626405716 2023-01-23 01:38:33.507202: step: 1432/531, loss: 0.03038501739501953 2023-01-23 01:38:34.653466: step: 1436/531, loss: 0.03841428458690643 2023-01-23 01:38:35.776198: step: 1440/531, loss: 0.05535269156098366 2023-01-23 01:38:36.889482: step: 1444/531, loss: 0.027306556701660156 2023-01-23 01:38:38.005686: step: 1448/531, loss: 0.018312575295567513 2023-01-23 01:38:39.121026: step: 1452/531, loss: 0.15522870421409607 2023-01-23 01:38:40.247974: step: 1456/531, loss: 0.12756776809692383 2023-01-23 01:38:41.351552: step: 1460/531, loss: 0.00010938644845737144 2023-01-23 01:38:42.483007: step: 1464/531, loss: 0.06287985295057297 2023-01-23 01:38:43.597649: step: 1468/531, loss: 0.000268960022367537 2023-01-23 01:38:44.706093: step: 1472/531, loss: 0.04957442358136177 2023-01-23 01:38:45.822868: step: 1476/531, loss: 0.019331075251102448 2023-01-23 01:38:46.947715: step: 1480/531, loss: 0.08308392018079758 2023-01-23 01:38:48.080372: step: 1484/531, loss: 0.0050862194038927555 2023-01-23 01:38:49.207353: step: 1488/531, loss: 0.02396850660443306 2023-01-23 01:38:50.325552: step: 1492/531, loss: 0.017570162191987038 2023-01-23 01:38:51.428589: step: 1496/531, loss: 0.2160778045654297 2023-01-23 01:38:52.540085: step: 1500/531, loss: 0.01109619066119194 2023-01-23 01:38:53.661194: step: 1504/531, loss: 0.04328777641057968 2023-01-23 01:38:54.805158: step: 1508/531, loss: 0.008468151092529297 2023-01-23 01:38:55.934104: step: 1512/531, loss: 0.05934581905603409 2023-01-23 01:38:57.048702: step: 1516/531, loss: 0.045438673347234726 2023-01-23 01:38:58.165680: step: 1520/531, loss: 0.02972431294620037 2023-01-23 01:38:59.264202: step: 1524/531, loss: 0.018481923267245293 2023-01-23 01:39:00.378843: step: 1528/531, loss: 0.008174611255526543 2023-01-23 01:39:01.476196: step: 1532/531, loss: 0.015534305945038795 2023-01-23 01:39:02.573950: step: 1536/531, loss: 0.056647732853889465 2023-01-23 01:39:03.719569: step: 1540/531, loss: 0.012342738918960094 2023-01-23 01:39:04.835886: step: 1544/531, loss: 0.0005943298456259072 2023-01-23 01:39:05.993371: step: 1548/531, loss: 0.03918495029211044 2023-01-23 01:39:07.120067: step: 1552/531, loss: 0.0056892395950853825 2023-01-23 01:39:08.240114: step: 1556/531, loss: 0.06790996342897415 2023-01-23 01:39:09.346840: step: 1560/531, loss: 0.0029452084563672543 2023-01-23 01:39:10.509455: step: 1564/531, loss: 0.19618378579616547 2023-01-23 01:39:11.612123: step: 1568/531, loss: 0.010821056552231312 2023-01-23 01:39:12.729294: step: 1572/531, loss: 0.007067108526825905 2023-01-23 01:39:13.853396: step: 1576/531, loss: 0.07634472846984863 2023-01-23 01:39:14.987784: step: 1580/531, loss: 0.020088767632842064 2023-01-23 01:39:16.135822: step: 1584/531, loss: 0.032546043395996094 2023-01-23 01:39:17.259817: step: 1588/531, loss: 0.006006479263305664 2023-01-23 01:39:18.360021: step: 1592/531, loss: 0.001477956771850586 2023-01-23 01:39:19.502077: step: 1596/531, loss: 0.09744921326637268 2023-01-23 01:39:20.620841: step: 1600/531, loss: 0.07438497245311737 2023-01-23 01:39:21.717751: step: 1604/531, loss: 0.07650060951709747 2023-01-23 01:39:22.820052: step: 1608/531, loss: 0.10595346242189407 2023-01-23 01:39:23.953251: step: 1612/531, loss: 0.06526460498571396 2023-01-23 01:39:25.062815: step: 1616/531, loss: 0.09192466735839844 2023-01-23 01:39:26.196443: step: 1620/531, loss: 0.04222307354211807 2023-01-23 01:39:27.324541: step: 1624/531, loss: 0.010996311902999878 2023-01-23 01:39:28.435976: step: 1628/531, loss: 0.009233546443283558 2023-01-23 01:39:29.558083: step: 1632/531, loss: 0.04507913440465927 2023-01-23 01:39:30.681964: step: 1636/531, loss: 0.11485419422388077 2023-01-23 01:39:31.798964: step: 1640/531, loss: 0.0775420218706131 2023-01-23 01:39:32.936777: step: 1644/531, loss: 0.10292492061853409 2023-01-23 01:39:34.051410: step: 1648/531, loss: 0.006024551577866077 2023-01-23 01:39:35.178531: step: 1652/531, loss: 0.02397298812866211 2023-01-23 01:39:36.295078: step: 1656/531, loss: 0.1017235815525055 2023-01-23 01:39:37.420197: step: 1660/531, loss: 0.06056041643023491 2023-01-23 01:39:38.583424: step: 1664/531, loss: 0.13555994629859924 2023-01-23 01:39:39.711592: step: 1668/531, loss: 0.06228942796587944 2023-01-23 01:39:40.811243: step: 1672/531, loss: 0.07841825485229492 2023-01-23 01:39:41.944924: step: 1676/531, loss: 0.05848198011517525 2023-01-23 01:39:43.070447: step: 1680/531, loss: 0.0036527158226817846 2023-01-23 01:39:44.205939: step: 1684/531, loss: 0.0028963088989257812 2023-01-23 01:39:45.319203: step: 1688/531, loss: 0.08239364624023438 2023-01-23 01:39:46.444851: step: 1692/531, loss: 0.012074685655534267 2023-01-23 01:39:47.570492: step: 1696/531, loss: 0.04532909393310547 2023-01-23 01:39:48.703944: step: 1700/531, loss: 0.0305391326546669 2023-01-23 01:39:49.804094: step: 1704/531, loss: 0.04217987135052681 2023-01-23 01:39:50.915662: step: 1708/531, loss: 0.07543668895959854 2023-01-23 01:39:52.057003: step: 1712/531, loss: 0.06074810028076172 2023-01-23 01:39:53.161087: step: 1716/531, loss: 0.021434593945741653 2023-01-23 01:39:54.299003: step: 1720/531, loss: 0.05495748668909073 2023-01-23 01:39:55.458409: step: 1724/531, loss: 0.05599823221564293 2023-01-23 01:39:56.583518: step: 1728/531, loss: 0.011588573455810547 2023-01-23 01:39:57.689488: step: 1732/531, loss: 0.013997411355376244 2023-01-23 01:39:58.815881: step: 1736/531, loss: 0.030081558972597122 2023-01-23 01:39:59.944251: step: 1740/531, loss: 0.08217716217041016 2023-01-23 01:40:01.082351: step: 1744/531, loss: 0.10544148087501526 2023-01-23 01:40:02.210002: step: 1748/531, loss: 0.015371656976640224 2023-01-23 01:40:03.334327: step: 1752/531, loss: 0.37945443391799927 2023-01-23 01:40:04.439811: step: 1756/531, loss: 0.07821617275476456 2023-01-23 01:40:05.547438: step: 1760/531, loss: 0.08327965438365936 2023-01-23 01:40:06.712293: step: 1764/531, loss: 0.018086720257997513 2023-01-23 01:40:07.823808: step: 1768/531, loss: 0.05119304731488228 2023-01-23 01:40:08.982838: step: 1772/531, loss: 0.010021782480180264 2023-01-23 01:40:10.130832: step: 1776/531, loss: 0.052277565002441406 2023-01-23 01:40:11.275426: step: 1780/531, loss: 0.12825989723205566 2023-01-23 01:40:12.424653: step: 1784/531, loss: 0.0018489838112145662 2023-01-23 01:40:13.551486: step: 1788/531, loss: 0.03022794798016548 2023-01-23 01:40:14.671244: step: 1792/531, loss: 0.042185403406620026 2023-01-23 01:40:15.783201: step: 1796/531, loss: 0.03956933319568634 2023-01-23 01:40:16.878889: step: 1800/531, loss: 0.03247842937707901 2023-01-23 01:40:18.012720: step: 1804/531, loss: 0.0048469542525708675 2023-01-23 01:40:19.123960: step: 1808/531, loss: 0.013456583023071289 2023-01-23 01:40:20.260409: step: 1812/531, loss: 0.005601215176284313 2023-01-23 01:40:21.390363: step: 1816/531, loss: 0.025394631549715996 2023-01-23 01:40:22.508030: step: 1820/531, loss: 0.02357044257223606 2023-01-23 01:40:23.613921: step: 1824/531, loss: 0.01884479634463787 2023-01-23 01:40:24.718184: step: 1828/531, loss: 0.001811218331567943 2023-01-23 01:40:25.847005: step: 1832/531, loss: 0.024689625948667526 2023-01-23 01:40:26.950338: step: 1836/531, loss: 0.05257987976074219 2023-01-23 01:40:28.085369: step: 1840/531, loss: 0.01757230795919895 2023-01-23 01:40:29.236593: step: 1844/531, loss: 0.006537914741784334 2023-01-23 01:40:30.363752: step: 1848/531, loss: 0.11457739770412445 2023-01-23 01:40:31.535288: step: 1852/531, loss: 0.05506391450762749 2023-01-23 01:40:32.664872: step: 1856/531, loss: 0.012675618752837181 2023-01-23 01:40:33.797181: step: 1860/531, loss: 0.032007407397031784 2023-01-23 01:40:34.946110: step: 1864/531, loss: 0.029535962268710136 2023-01-23 01:40:36.054371: step: 1868/531, loss: 0.01293954811990261 2023-01-23 01:40:37.188917: step: 1872/531, loss: 0.0346558578312397 2023-01-23 01:40:38.321504: step: 1876/531, loss: 0.060494616627693176 2023-01-23 01:40:39.444417: step: 1880/531, loss: 0.05873880535364151 2023-01-23 01:40:40.591365: step: 1884/531, loss: 0.13491840660572052 2023-01-23 01:40:41.701311: step: 1888/531, loss: 0.45614093542099 2023-01-23 01:40:42.810799: step: 1892/531, loss: 0.08778896182775497 2023-01-23 01:40:43.938246: step: 1896/531, loss: 0.08901634812355042 2023-01-23 01:40:45.075546: step: 1900/531, loss: 0.02306346967816353 2023-01-23 01:40:46.222313: step: 1904/531, loss: 0.004037666600197554 2023-01-23 01:40:47.355390: step: 1908/531, loss: 0.12361335754394531 2023-01-23 01:40:48.499379: step: 1912/531, loss: 0.02625408209860325 2023-01-23 01:40:49.667181: step: 1916/531, loss: 0.11926212161779404 2023-01-23 01:40:50.793735: step: 1920/531, loss: 0.06683167815208435 2023-01-23 01:40:51.936976: step: 1924/531, loss: 0.03842058405280113 2023-01-23 01:40:53.052184: step: 1928/531, loss: 0.0680788978934288 2023-01-23 01:40:54.173816: step: 1932/531, loss: 0.01659870147705078 2023-01-23 01:40:55.307087: step: 1936/531, loss: 0.008098411373794079 2023-01-23 01:40:56.452270: step: 1940/531, loss: 0.013364476151764393 2023-01-23 01:40:57.590330: step: 1944/531, loss: 0.24418029189109802 2023-01-23 01:40:58.725565: step: 1948/531, loss: 0.008128595538437366 2023-01-23 01:40:59.857656: step: 1952/531, loss: 0.10405979305505753 2023-01-23 01:41:00.965228: step: 1956/531, loss: 0.018499184399843216 2023-01-23 01:41:02.093442: step: 1960/531, loss: 0.00597461499273777 2023-01-23 01:41:03.211478: step: 1964/531, loss: 0.035898495465517044 2023-01-23 01:41:04.335051: step: 1968/531, loss: 0.03136706352233887 2023-01-23 01:41:05.441626: step: 1972/531, loss: 0.05230360105633736 2023-01-23 01:41:06.559393: step: 1976/531, loss: 0.013293028809130192 2023-01-23 01:41:07.670022: step: 1980/531, loss: 0.011597824282944202 2023-01-23 01:41:08.796065: step: 1984/531, loss: 0.0887262374162674 2023-01-23 01:41:09.927447: step: 1988/531, loss: 0.06823473423719406 2023-01-23 01:41:11.060634: step: 1992/531, loss: 0.047307778149843216 2023-01-23 01:41:12.196595: step: 1996/531, loss: 0.04340305179357529 2023-01-23 01:41:13.330618: step: 2000/531, loss: 0.0034839629661291838 2023-01-23 01:41:14.467685: step: 2004/531, loss: 0.05612316355109215 2023-01-23 01:41:15.587104: step: 2008/531, loss: 0.054436493664979935 2023-01-23 01:41:16.740321: step: 2012/531, loss: 0.05939092859625816 2023-01-23 01:41:17.862141: step: 2016/531, loss: 0.01971726305782795 2023-01-23 01:41:18.965637: step: 2020/531, loss: 0.024842167273163795 2023-01-23 01:41:20.085973: step: 2024/531, loss: 0.02219834364950657 2023-01-23 01:41:21.230509: step: 2028/531, loss: 0.04516439512372017 2023-01-23 01:41:22.352697: step: 2032/531, loss: 0.018895531073212624 2023-01-23 01:41:23.490171: step: 2036/531, loss: 0.05838470533490181 2023-01-23 01:41:24.623281: step: 2040/531, loss: 0.04346800222992897 2023-01-23 01:41:25.740593: step: 2044/531, loss: 0.05949802324175835 2023-01-23 01:41:26.873303: step: 2048/531, loss: 0.06024212762713432 2023-01-23 01:41:28.006537: step: 2052/531, loss: 0.15944595634937286 2023-01-23 01:41:29.137686: step: 2056/531, loss: 0.07171754539012909 2023-01-23 01:41:30.272573: step: 2060/531, loss: 0.04210929945111275 2023-01-23 01:41:31.391143: step: 2064/531, loss: 0.049760058522224426 2023-01-23 01:41:32.554683: step: 2068/531, loss: 0.07243957370519638 2023-01-23 01:41:33.688288: step: 2072/531, loss: 0.04726238176226616 2023-01-23 01:41:34.806604: step: 2076/531, loss: 0.002039241837337613 2023-01-23 01:41:35.906168: step: 2080/531, loss: 0.0020648480858653784 2023-01-23 01:41:37.029062: step: 2084/531, loss: 0.10527076572179794 2023-01-23 01:41:38.155722: step: 2088/531, loss: 0.02209148369729519 2023-01-23 01:41:39.265310: step: 2092/531, loss: 0.023993873968720436 2023-01-23 01:41:40.413319: step: 2096/531, loss: 0.02828502655029297 2023-01-23 01:41:41.538419: step: 2100/531, loss: 0.026215743273496628 2023-01-23 01:41:42.684465: step: 2104/531, loss: 0.008970832452178001 2023-01-23 01:41:43.796871: step: 2108/531, loss: 0.1338087022304535 2023-01-23 01:41:44.915027: step: 2112/531, loss: 0.037584494799375534 2023-01-23 01:41:46.029674: step: 2116/531, loss: 0.01471557654440403 2023-01-23 01:41:47.208878: step: 2120/531, loss: 0.006036948878318071 2023-01-23 01:41:48.338837: step: 2124/531, loss: 0.1139741912484169 ================================================== Loss: 0.059 -------------------- Dev: {'event': {'p': 0.5975733063700708, 'r': 0.7869507323568575, 'f1': 0.6793103448275862}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Test: {'event': {'p': 0.6186242395882078, 'r': 0.7883124627310674, 'f1': 0.6932354483481908}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Chinese: {'event': {'p': 0.5903614457831325, 'r': 0.9074074074074074, 'f1': 0.7153284671532847}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Korean: {'event': {'p': 0.7169811320754716, 'r': 0.6031746031746031, 'f1': 0.6551724137931034}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Russian: {'event': {'p': 0.4666666666666667, 'r': 0.5833333333333334, 'f1': 0.5185185185185186}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} New best korean model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.5975733063700708, 'r': 0.7869507323568575, 'f1': 0.6793103448275862}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Eng Test for Korean: {'event': {'p': 0.6186242395882078, 'r': 0.7883124627310674, 'f1': 0.6932354483481908}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Korean: {'event': {'p': 0.7169811320754716, 'r': 0.6031746031746031, 'f1': 0.6551724137931034}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 13 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:42:34.497068: step: 4/531, loss: 0.03342418745160103 2023-01-23 01:42:35.603032: step: 8/531, loss: 0.07771588116884232 2023-01-23 01:42:36.729248: step: 12/531, loss: 0.0912843719124794 2023-01-23 01:42:37.867974: step: 16/531, loss: 0.054195791482925415 2023-01-23 01:42:38.972826: step: 20/531, loss: 0.06615810096263885 2023-01-23 01:42:40.076964: step: 24/531, loss: 0.018315888941287994 2023-01-23 01:42:41.214124: step: 28/531, loss: 0.006016540806740522 2023-01-23 01:42:42.421121: step: 32/531, loss: 0.059362128376960754 2023-01-23 01:42:43.556260: step: 36/531, loss: 0.01581754721701145 2023-01-23 01:42:44.684934: step: 40/531, loss: 0.10967950522899628 2023-01-23 01:42:45.840122: step: 44/531, loss: 0.04458465427160263 2023-01-23 01:42:46.944928: step: 48/531, loss: 0.05173778533935547 2023-01-23 01:42:48.075831: step: 52/531, loss: 0.014670658856630325 2023-01-23 01:42:49.204077: step: 56/531, loss: 0.007902145385742188 2023-01-23 01:42:50.339182: step: 60/531, loss: 0.03858385235071182 2023-01-23 01:42:51.500447: step: 64/531, loss: 0.1147555336356163 2023-01-23 01:42:52.645887: step: 68/531, loss: 0.00781016331166029 2023-01-23 01:42:53.766478: step: 72/531, loss: 0.029988668859004974 2023-01-23 01:42:54.861299: step: 76/531, loss: 0.17744828760623932 2023-01-23 01:42:55.987127: step: 80/531, loss: 0.010762691497802734 2023-01-23 01:42:57.100245: step: 84/531, loss: 0.002446174854412675 2023-01-23 01:42:58.236477: step: 88/531, loss: 0.29659736156463623 2023-01-23 01:42:59.329358: step: 92/531, loss: 0.054776858538389206 2023-01-23 01:43:00.451168: step: 96/531, loss: 0.03169412538409233 2023-01-23 01:43:01.526101: step: 100/531, loss: 0.011609840206801891 2023-01-23 01:43:02.637494: step: 104/531, loss: 0.04917926713824272 2023-01-23 01:43:03.759006: step: 108/531, loss: 0.026915669441223145 2023-01-23 01:43:04.951216: step: 112/531, loss: 0.030089378356933594 2023-01-23 01:43:06.069994: step: 116/531, loss: 0.036695003509521484 2023-01-23 01:43:07.213272: step: 120/531, loss: 0.6868606805801392 2023-01-23 01:43:08.353900: step: 124/531, loss: 0.05361232906579971 2023-01-23 01:43:09.469131: step: 128/531, loss: 0.04308462515473366 2023-01-23 01:43:10.610424: step: 132/531, loss: 0.05794540420174599 2023-01-23 01:43:11.737226: step: 136/531, loss: 0.009990166872739792 2023-01-23 01:43:12.875576: step: 140/531, loss: 0.01703043095767498 2023-01-23 01:43:14.017345: step: 144/531, loss: 0.023775434121489525 2023-01-23 01:43:15.136976: step: 148/531, loss: 0.0005966186872683465 2023-01-23 01:43:16.252936: step: 152/531, loss: 0.008697127923369408 2023-01-23 01:43:17.367353: step: 156/531, loss: 0.05570597946643829 2023-01-23 01:43:18.500879: step: 160/531, loss: 0.03552570194005966 2023-01-23 01:43:19.637919: step: 164/531, loss: 0.030355453491210938 2023-01-23 01:43:20.732916: step: 168/531, loss: 0.19229717552661896 2023-01-23 01:43:21.844375: step: 172/531, loss: 0.0591767318546772 2023-01-23 01:43:22.972190: step: 176/531, loss: 0.020240116864442825 2023-01-23 01:43:24.118702: step: 180/531, loss: 0.04676008224487305 2023-01-23 01:43:25.255145: step: 184/531, loss: 0.0069724079221487045 2023-01-23 01:43:26.376598: step: 188/531, loss: 0.030846787616610527 2023-01-23 01:43:27.515099: step: 192/531, loss: 0.008553886786103249 2023-01-23 01:43:28.696662: step: 196/531, loss: 0.07150917500257492 2023-01-23 01:43:29.791686: step: 200/531, loss: 0.0016655921936035156 2023-01-23 01:43:30.885442: step: 204/531, loss: 0.03125810623168945 2023-01-23 01:43:31.995169: step: 208/531, loss: 0.17051377892494202 2023-01-23 01:43:33.122380: step: 212/531, loss: 0.044175341725349426 2023-01-23 01:43:34.268772: step: 216/531, loss: 0.0743008628487587 2023-01-23 01:43:35.393412: step: 220/531, loss: 0.04328365623950958 2023-01-23 01:43:36.514303: step: 224/531, loss: 0.013271236792206764 2023-01-23 01:43:37.661701: step: 228/531, loss: 0.010369528084993362 2023-01-23 01:43:38.766322: step: 232/531, loss: 0.0013322352897375822 2023-01-23 01:43:39.907371: step: 236/531, loss: 0.06214666739106178 2023-01-23 01:43:41.038048: step: 240/531, loss: 0.23943157494068146 2023-01-23 01:43:42.166110: step: 244/531, loss: 0.002432918641716242 2023-01-23 01:43:43.276012: step: 248/531, loss: 0.022056009620428085 2023-01-23 01:43:44.382170: step: 252/531, loss: 0.04144277423620224 2023-01-23 01:43:45.496078: step: 256/531, loss: 0.039461899548769 2023-01-23 01:43:46.620317: step: 260/531, loss: 0.020702458918094635 2023-01-23 01:43:47.767018: step: 264/531, loss: 0.0020713568665087223 2023-01-23 01:43:48.960814: step: 268/531, loss: 0.024099208414554596 2023-01-23 01:43:50.078289: step: 272/531, loss: 0.014520073309540749 2023-01-23 01:43:51.169165: step: 276/531, loss: 0.046061232686042786 2023-01-23 01:43:52.263035: step: 280/531, loss: 0.011994361877441406 2023-01-23 01:43:53.398896: step: 284/531, loss: 0.003659582231193781 2023-01-23 01:43:54.522491: step: 288/531, loss: 0.05892801284790039 2023-01-23 01:43:55.658646: step: 292/531, loss: 0.05326499789953232 2023-01-23 01:43:56.800801: step: 296/531, loss: 0.021243762224912643 2023-01-23 01:43:57.948032: step: 300/531, loss: 0.00707664480432868 2023-01-23 01:43:59.086872: step: 304/531, loss: 0.04495277628302574 2023-01-23 01:44:00.216090: step: 308/531, loss: 0.11237473785877228 2023-01-23 01:44:01.328298: step: 312/531, loss: 0.005398750305175781 2023-01-23 01:44:02.459467: step: 316/531, loss: 0.051763586699962616 2023-01-23 01:44:03.572976: step: 320/531, loss: 0.017431069165468216 2023-01-23 01:44:04.730665: step: 324/531, loss: 0.01579895056784153 2023-01-23 01:44:05.832985: step: 328/531, loss: 0.04519863426685333 2023-01-23 01:44:06.959207: step: 332/531, loss: 0.079498291015625 2023-01-23 01:44:08.051932: step: 336/531, loss: 0.009938192553818226 2023-01-23 01:44:09.185466: step: 340/531, loss: 0.01903686486184597 2023-01-23 01:44:10.296652: step: 344/531, loss: 0.018907546997070312 2023-01-23 01:44:11.458683: step: 348/531, loss: 0.17576994001865387 2023-01-23 01:44:12.578396: step: 352/531, loss: 0.4193234443664551 2023-01-23 01:44:13.683622: step: 356/531, loss: 0.00028274653595872223 2023-01-23 01:44:14.806121: step: 360/531, loss: 0.0034258842933923006 2023-01-23 01:44:15.904232: step: 364/531, loss: 0.09827709197998047 2023-01-23 01:44:17.053805: step: 368/531, loss: 0.021271612495183945 2023-01-23 01:44:18.161393: step: 372/531, loss: 0.08587093651294708 2023-01-23 01:44:19.262301: step: 376/531, loss: 0.00026340485783293843 2023-01-23 01:44:20.370153: step: 380/531, loss: 0.027202893048524857 2023-01-23 01:44:21.438453: step: 384/531, loss: 0.0027251243591308594 2023-01-23 01:44:22.608663: step: 388/531, loss: 0.09790420532226562 2023-01-23 01:44:23.748409: step: 392/531, loss: 0.04433570057153702 2023-01-23 01:44:24.857311: step: 396/531, loss: 0.04228248819708824 2023-01-23 01:44:25.967823: step: 400/531, loss: 0.0013032912975177169 2023-01-23 01:44:27.083086: step: 404/531, loss: 0.07800416648387909 2023-01-23 01:44:28.202528: step: 408/531, loss: 0.010413790121674538 2023-01-23 01:44:29.310710: step: 412/531, loss: 0.08815765380859375 2023-01-23 01:44:30.428864: step: 416/531, loss: 0.01145267579704523 2023-01-23 01:44:31.576770: step: 420/531, loss: 0.030092386528849602 2023-01-23 01:44:32.731430: step: 424/531, loss: 0.030657770112156868 2023-01-23 01:44:33.863470: step: 428/531, loss: 0.07740054279565811 2023-01-23 01:44:34.984352: step: 432/531, loss: 0.013010883703827858 2023-01-23 01:44:36.126767: step: 436/531, loss: 0.036742210388183594 2023-01-23 01:44:37.246098: step: 440/531, loss: 0.027120592072606087 2023-01-23 01:44:38.370643: step: 444/531, loss: 0.030752800405025482 2023-01-23 01:44:39.499005: step: 448/531, loss: 0.012925815768539906 2023-01-23 01:44:40.618800: step: 452/531, loss: 0.07271194458007812 2023-01-23 01:44:41.757327: step: 456/531, loss: 0.00043334963265806437 2023-01-23 01:44:42.918273: step: 460/531, loss: 0.0144494054839015 2023-01-23 01:44:44.089890: step: 464/531, loss: 0.01098709087818861 2023-01-23 01:44:45.261604: step: 468/531, loss: 0.009502887725830078 2023-01-23 01:44:46.388126: step: 472/531, loss: 0.00017833709716796875 2023-01-23 01:44:47.492298: step: 476/531, loss: 0.03253984451293945 2023-01-23 01:44:48.642108: step: 480/531, loss: 0.021430017426609993 2023-01-23 01:44:49.775403: step: 484/531, loss: 0.0014138699043542147 2023-01-23 01:44:50.893459: step: 488/531, loss: 0.05034312978386879 2023-01-23 01:44:52.028902: step: 492/531, loss: 0.009504175744950771 2023-01-23 01:44:53.143124: step: 496/531, loss: 0.03173184394836426 2023-01-23 01:44:54.281510: step: 500/531, loss: 0.007869720458984375 2023-01-23 01:44:55.384070: step: 504/531, loss: 0.0459136962890625 2023-01-23 01:44:56.537957: step: 508/531, loss: 0.002742767333984375 2023-01-23 01:44:57.669390: step: 512/531, loss: 0.7011368274688721 2023-01-23 01:44:58.797079: step: 516/531, loss: 0.15821295976638794 2023-01-23 01:44:59.924901: step: 520/531, loss: 0.02230234071612358 2023-01-23 01:45:01.056912: step: 524/531, loss: 0.006270122714340687 2023-01-23 01:45:02.203524: step: 528/531, loss: 0.09530754387378693 2023-01-23 01:45:03.334621: step: 532/531, loss: 0.1895495355129242 2023-01-23 01:45:04.463863: step: 536/531, loss: 0.002535009291023016 2023-01-23 01:45:05.618227: step: 540/531, loss: 0.13452157378196716 2023-01-23 01:45:06.746285: step: 544/531, loss: 0.030552292242646217 2023-01-23 01:45:07.853283: step: 548/531, loss: 0.01806468889117241 2023-01-23 01:45:08.955483: step: 552/531, loss: 0.04686622694134712 2023-01-23 01:45:10.062334: step: 556/531, loss: 0.01401600893586874 2023-01-23 01:45:11.188803: step: 560/531, loss: 0.010833073407411575 2023-01-23 01:45:12.369546: step: 564/531, loss: 0.022986507043242455 2023-01-23 01:45:13.501494: step: 568/531, loss: 0.16372212767601013 2023-01-23 01:45:14.628354: step: 572/531, loss: 0.037123680114746094 2023-01-23 01:45:15.772307: step: 576/531, loss: 0.056557562202215195 2023-01-23 01:45:16.905001: step: 580/531, loss: 0.02711467817425728 2023-01-23 01:45:18.031572: step: 584/531, loss: 0.002179050352424383 2023-01-23 01:45:19.177313: step: 588/531, loss: 0.01753520965576172 2023-01-23 01:45:20.276360: step: 592/531, loss: 0.013611030764877796 2023-01-23 01:45:21.391774: step: 596/531, loss: 0.005151939578354359 2023-01-23 01:45:22.519928: step: 600/531, loss: 0.026070812717080116 2023-01-23 01:45:23.638319: step: 604/531, loss: 0.0047456263564527035 2023-01-23 01:45:24.823192: step: 608/531, loss: 0.09499035030603409 2023-01-23 01:45:25.929106: step: 612/531, loss: 0.039414893835783005 2023-01-23 01:45:27.031244: step: 616/531, loss: 0.08239135891199112 2023-01-23 01:45:28.147011: step: 620/531, loss: 0.02663116529583931 2023-01-23 01:45:29.264163: step: 624/531, loss: 0.06980671733617783 2023-01-23 01:45:30.386678: step: 628/531, loss: 0.15307196974754333 2023-01-23 01:45:31.530617: step: 632/531, loss: 0.08188334107398987 2023-01-23 01:45:32.638439: step: 636/531, loss: 0.05046458542346954 2023-01-23 01:45:33.780648: step: 640/531, loss: 0.007607633247971535 2023-01-23 01:45:34.895698: step: 644/531, loss: 0.005102729890495539 2023-01-23 01:45:36.037832: step: 648/531, loss: 0.024686718359589577 2023-01-23 01:45:37.143711: step: 652/531, loss: 0.023961687460541725 2023-01-23 01:45:38.249140: step: 656/531, loss: 0.06716085970401764 2023-01-23 01:45:39.363106: step: 660/531, loss: 0.0819934830069542 2023-01-23 01:45:40.491306: step: 664/531, loss: 0.10636787116527557 2023-01-23 01:45:41.616923: step: 668/531, loss: 0.03016195259988308 2023-01-23 01:45:42.727869: step: 672/531, loss: 0.02282133139669895 2023-01-23 01:45:43.873991: step: 676/531, loss: 0.01348643284291029 2023-01-23 01:45:45.045327: step: 680/531, loss: 0.08069805800914764 2023-01-23 01:45:46.149826: step: 684/531, loss: 0.01345887128263712 2023-01-23 01:45:47.298656: step: 688/531, loss: 0.021828461438417435 2023-01-23 01:45:48.413852: step: 692/531, loss: 0.008779001422226429 2023-01-23 01:45:49.554306: step: 696/531, loss: 0.03604163974523544 2023-01-23 01:45:50.663691: step: 700/531, loss: 0.0004502296505961567 2023-01-23 01:45:51.761453: step: 704/531, loss: 0.026473237201571465 2023-01-23 01:45:52.879826: step: 708/531, loss: 0.14528588950634003 2023-01-23 01:45:53.981650: step: 712/531, loss: 0.005115794949233532 2023-01-23 01:45:55.117921: step: 716/531, loss: 0.04945025593042374 2023-01-23 01:45:56.240875: step: 720/531, loss: 0.00409355154260993 2023-01-23 01:45:57.351575: step: 724/531, loss: 0.113672636449337 2023-01-23 01:45:58.494845: step: 728/531, loss: 0.020709609612822533 2023-01-23 01:45:59.653542: step: 732/531, loss: 0.0019387244246900082 2023-01-23 01:46:00.788782: step: 736/531, loss: 0.10363052040338516 2023-01-23 01:46:01.929667: step: 740/531, loss: 0.0933326706290245 2023-01-23 01:46:03.084254: step: 744/531, loss: 0.05928096920251846 2023-01-23 01:46:04.221546: step: 748/531, loss: 0.050866417586803436 2023-01-23 01:46:05.327239: step: 752/531, loss: 0.04574146121740341 2023-01-23 01:46:06.465663: step: 756/531, loss: 0.002296352293342352 2023-01-23 01:46:07.580604: step: 760/531, loss: 0.010215855203568935 2023-01-23 01:46:08.708278: step: 764/531, loss: 0.0519988052546978 2023-01-23 01:46:09.841650: step: 768/531, loss: 0.06496325135231018 2023-01-23 01:46:10.938286: step: 772/531, loss: 0.0005624771001748741 2023-01-23 01:46:12.109107: step: 776/531, loss: 6.814003427280113e-05 2023-01-23 01:46:13.277540: step: 780/531, loss: 1.06647527217865 2023-01-23 01:46:14.404021: step: 784/531, loss: 0.11282125115394592 2023-01-23 01:46:15.555638: step: 788/531, loss: 0.04966096952557564 2023-01-23 01:46:16.668227: step: 792/531, loss: 0.053189992904663086 2023-01-23 01:46:17.810946: step: 796/531, loss: 0.08382201194763184 2023-01-23 01:46:18.934480: step: 800/531, loss: 0.017506027594208717 2023-01-23 01:46:20.069301: step: 804/531, loss: 0.030462075024843216 2023-01-23 01:46:21.170195: step: 808/531, loss: 0.14633405208587646 2023-01-23 01:46:22.299183: step: 812/531, loss: 0.006342947483062744 2023-01-23 01:46:23.414195: step: 816/531, loss: 0.022717952728271484 2023-01-23 01:46:24.507696: step: 820/531, loss: 0.06884551048278809 2023-01-23 01:46:25.630648: step: 824/531, loss: 0.0024641992058604956 2023-01-23 01:46:26.755286: step: 828/531, loss: 0.02037644386291504 2023-01-23 01:46:27.849704: step: 832/531, loss: 0.0428725965321064 2023-01-23 01:46:28.975936: step: 836/531, loss: 0.0067348480224609375 2023-01-23 01:46:30.071178: step: 840/531, loss: 0.02822294272482395 2023-01-23 01:46:31.228927: step: 844/531, loss: 0.03486766666173935 2023-01-23 01:46:32.367205: step: 848/531, loss: 0.04856691509485245 2023-01-23 01:46:33.488557: step: 852/531, loss: 0.02101593092083931 2023-01-23 01:46:34.607746: step: 856/531, loss: 0.00337810511700809 2023-01-23 01:46:35.736218: step: 860/531, loss: 0.01625833660364151 2023-01-23 01:46:36.853506: step: 864/531, loss: 0.05743622034788132 2023-01-23 01:46:37.975191: step: 868/531, loss: 0.020027637481689453 2023-01-23 01:46:39.118679: step: 872/531, loss: 0.01802702061831951 2023-01-23 01:46:40.225399: step: 876/531, loss: 0.004216098692268133 2023-01-23 01:46:41.339349: step: 880/531, loss: 0.0009114265558309853 2023-01-23 01:46:42.454873: step: 884/531, loss: 0.0012843608856201172 2023-01-23 01:46:43.601578: step: 888/531, loss: 0.0017143726581707597 2023-01-23 01:46:44.744780: step: 892/531, loss: 0.0466671958565712 2023-01-23 01:46:45.861354: step: 896/531, loss: 0.0147438058629632 2023-01-23 01:46:46.965911: step: 900/531, loss: 0.030875110998749733 2023-01-23 01:46:48.092831: step: 904/531, loss: 0.0057347300462424755 2023-01-23 01:46:49.205206: step: 908/531, loss: 0.0002476215304341167 2023-01-23 01:46:50.324698: step: 912/531, loss: 0.1649744063615799 2023-01-23 01:46:51.457281: step: 916/531, loss: 0.007166576571762562 2023-01-23 01:46:52.598448: step: 920/531, loss: 0.043772414326667786 2023-01-23 01:46:53.703918: step: 924/531, loss: 0.045702554285526276 2023-01-23 01:46:54.808897: step: 928/531, loss: 0.01107478141784668 2023-01-23 01:46:55.911377: step: 932/531, loss: 0.03981318324804306 2023-01-23 01:46:57.016122: step: 936/531, loss: 0.0013188362354412675 2023-01-23 01:46:58.127398: step: 940/531, loss: 0.017920970916748047 2023-01-23 01:46:59.226899: step: 944/531, loss: 0.006014728918671608 2023-01-23 01:47:00.342386: step: 948/531, loss: 0.02703695371747017 2023-01-23 01:47:01.470484: step: 952/531, loss: 0.03177695348858833 2023-01-23 01:47:02.577970: step: 956/531, loss: 0.002156543778255582 2023-01-23 01:47:03.685619: step: 960/531, loss: 0.03146219253540039 2023-01-23 01:47:04.809705: step: 964/531, loss: 0.13395065069198608 2023-01-23 01:47:05.949289: step: 968/531, loss: 0.028921127319335938 2023-01-23 01:47:07.088972: step: 972/531, loss: 0.011514521203935146 2023-01-23 01:47:08.224204: step: 976/531, loss: 0.029919244349002838 2023-01-23 01:47:09.336679: step: 980/531, loss: 0.012060356326401234 2023-01-23 01:47:10.503648: step: 984/531, loss: 0.0004733085515908897 2023-01-23 01:47:11.646711: step: 988/531, loss: 0.06068840250372887 2023-01-23 01:47:12.787770: step: 992/531, loss: 0.03981971740722656 2023-01-23 01:47:13.902933: step: 996/531, loss: 0.02660827711224556 2023-01-23 01:47:15.004468: step: 1000/531, loss: 0.04644432291388512 2023-01-23 01:47:16.121477: step: 1004/531, loss: 0.037888504564762115 2023-01-23 01:47:17.276536: step: 1008/531, loss: 0.017255783081054688 2023-01-23 01:47:18.421528: step: 1012/531, loss: 0.027460671961307526 2023-01-23 01:47:19.550493: step: 1016/531, loss: 0.02686481550335884 2023-01-23 01:47:20.678452: step: 1020/531, loss: 0.014597893692553043 2023-01-23 01:47:21.803401: step: 1024/531, loss: 0.1491573452949524 2023-01-23 01:47:22.916299: step: 1028/531, loss: 0.03290205076336861 2023-01-23 01:47:24.022207: step: 1032/531, loss: 0.06254692375659943 2023-01-23 01:47:25.149988: step: 1036/531, loss: 0.12329020351171494 2023-01-23 01:47:26.282856: step: 1040/531, loss: 0.019033242017030716 2023-01-23 01:47:27.401255: step: 1044/531, loss: 0.03615732118487358 2023-01-23 01:47:28.527367: step: 1048/531, loss: 0.35037410259246826 2023-01-23 01:47:29.644928: step: 1052/531, loss: 0.02108621597290039 2023-01-23 01:47:30.770004: step: 1056/531, loss: 0.013318061828613281 2023-01-23 01:47:31.908675: step: 1060/531, loss: 0.029770758002996445 2023-01-23 01:47:33.034783: step: 1064/531, loss: 0.9443221092224121 2023-01-23 01:47:34.164970: step: 1068/531, loss: 0.03894639015197754 2023-01-23 01:47:35.315224: step: 1072/531, loss: 0.018570804968476295 2023-01-23 01:47:36.466957: step: 1076/531, loss: 0.27470168471336365 2023-01-23 01:47:37.565118: step: 1080/531, loss: 0.035878945142030716 2023-01-23 01:47:38.675300: step: 1084/531, loss: 0.04985857009887695 2023-01-23 01:47:39.789066: step: 1088/531, loss: 0.04064450040459633 2023-01-23 01:47:40.916959: step: 1092/531, loss: 0.0366833433508873 2023-01-23 01:47:42.032326: step: 1096/531, loss: 0.018360327929258347 2023-01-23 01:47:43.147768: step: 1100/531, loss: 0.0041765691712498665 2023-01-23 01:47:44.295784: step: 1104/531, loss: 0.17107433080673218 2023-01-23 01:47:45.428078: step: 1108/531, loss: 0.0321531780064106 2023-01-23 01:47:46.567532: step: 1112/531, loss: 0.1062128096818924 2023-01-23 01:47:47.727337: step: 1116/531, loss: 0.08963050693273544 2023-01-23 01:47:48.859543: step: 1120/531, loss: 0.004250717349350452 2023-01-23 01:47:49.977299: step: 1124/531, loss: 0.004715165589004755 2023-01-23 01:47:51.089142: step: 1128/531, loss: 0.03357705846428871 2023-01-23 01:47:52.224755: step: 1132/531, loss: 0.062082864344120026 2023-01-23 01:47:53.333788: step: 1136/531, loss: 0.009208345785737038 2023-01-23 01:47:54.483219: step: 1140/531, loss: 0.0702420249581337 2023-01-23 01:47:55.590015: step: 1144/531, loss: 0.003314399626106024 2023-01-23 01:47:56.700670: step: 1148/531, loss: 0.06663379818201065 2023-01-23 01:47:57.823900: step: 1152/531, loss: 0.016517065465450287 2023-01-23 01:47:58.943090: step: 1156/531, loss: 0.03861271217465401 2023-01-23 01:48:00.051434: step: 1160/531, loss: 0.006167793646454811 2023-01-23 01:48:01.191050: step: 1164/531, loss: 0.040250446647405624 2023-01-23 01:48:02.302872: step: 1168/531, loss: 0.033430956304073334 2023-01-23 01:48:03.417979: step: 1172/531, loss: 0.05916253849864006 2023-01-23 01:48:04.515928: step: 1176/531, loss: 0.004074668977409601 2023-01-23 01:48:05.663898: step: 1180/531, loss: 0.04831714555621147 2023-01-23 01:48:06.780094: step: 1184/531, loss: 0.07649173587560654 2023-01-23 01:48:07.925907: step: 1188/531, loss: 0.03734779357910156 2023-01-23 01:48:09.035451: step: 1192/531, loss: 0.061502739787101746 2023-01-23 01:48:10.160844: step: 1196/531, loss: 0.02992735058069229 2023-01-23 01:48:11.291385: step: 1200/531, loss: 0.04019127041101456 2023-01-23 01:48:12.410750: step: 1204/531, loss: 0.030754853039979935 2023-01-23 01:48:13.550073: step: 1208/531, loss: 0.009923934936523438 2023-01-23 01:48:14.725553: step: 1212/531, loss: 0.002894782926887274 2023-01-23 01:48:15.836974: step: 1216/531, loss: 0.055169105529785156 2023-01-23 01:48:16.963570: step: 1220/531, loss: 0.011361360549926758 2023-01-23 01:48:18.126528: step: 1224/531, loss: 0.018093539401888847 2023-01-23 01:48:19.282045: step: 1228/531, loss: 0.19842030107975006 2023-01-23 01:48:20.405353: step: 1232/531, loss: 0.009335804730653763 2023-01-23 01:48:21.547810: step: 1236/531, loss: 0.009306621737778187 2023-01-23 01:48:22.680441: step: 1240/531, loss: 0.0045677185989916325 2023-01-23 01:48:23.812879: step: 1244/531, loss: 0.10325918346643448 2023-01-23 01:48:24.936468: step: 1248/531, loss: 0.03712606430053711 2023-01-23 01:48:26.097479: step: 1252/531, loss: 0.0374598503112793 2023-01-23 01:48:27.250141: step: 1256/531, loss: 0.040802858769893646 2023-01-23 01:48:28.349932: step: 1260/531, loss: 0.017055416479706764 2023-01-23 01:48:29.476000: step: 1264/531, loss: 0.0450318343937397 2023-01-23 01:48:30.628352: step: 1268/531, loss: 0.0011103630531579256 2023-01-23 01:48:31.741858: step: 1272/531, loss: 0.017812538892030716 2023-01-23 01:48:32.862125: step: 1276/531, loss: 0.06374111771583557 2023-01-23 01:48:33.985781: step: 1280/531, loss: 0.06870412826538086 2023-01-23 01:48:35.089206: step: 1284/531, loss: 0.0009293556213378906 2023-01-23 01:48:36.193416: step: 1288/531, loss: 0.036452438682317734 2023-01-23 01:48:37.332682: step: 1292/531, loss: 0.16871647536754608 2023-01-23 01:48:38.458033: step: 1296/531, loss: 0.003957176115363836 2023-01-23 01:48:39.577752: step: 1300/531, loss: 0.027661800384521484 2023-01-23 01:48:40.731013: step: 1304/531, loss: 0.02792814001441002 2023-01-23 01:48:41.874978: step: 1308/531, loss: 0.03123478963971138 2023-01-23 01:48:42.982225: step: 1312/531, loss: 0.017508696764707565 2023-01-23 01:48:44.114805: step: 1316/531, loss: 0.04125919193029404 2023-01-23 01:48:45.194057: step: 1320/531, loss: 0.001905298326164484 2023-01-23 01:48:46.328353: step: 1324/531, loss: 0.06160926818847656 2023-01-23 01:48:47.426933: step: 1328/531, loss: 0.043677713721990585 2023-01-23 01:48:48.537216: step: 1332/531, loss: 0.06846804916858673 2023-01-23 01:48:49.667286: step: 1336/531, loss: 0.03277721628546715 2023-01-23 01:48:50.806821: step: 1340/531, loss: 0.022041525691747665 2023-01-23 01:48:51.927888: step: 1344/531, loss: 0.1207401230931282 2023-01-23 01:48:53.055679: step: 1348/531, loss: 0.026521779596805573 2023-01-23 01:48:54.203479: step: 1352/531, loss: 0.0005992889637127519 2023-01-23 01:48:55.343505: step: 1356/531, loss: 0.03614196926355362 2023-01-23 01:48:56.457418: step: 1360/531, loss: 0.00382575998082757 2023-01-23 01:48:57.570677: step: 1364/531, loss: 0.04001769796013832 2023-01-23 01:48:58.697190: step: 1368/531, loss: 0.002760982606559992 2023-01-23 01:48:59.878185: step: 1372/531, loss: 0.07854413986206055 2023-01-23 01:49:01.020614: step: 1376/531, loss: 0.045685771852731705 2023-01-23 01:49:02.136825: step: 1380/531, loss: 0.014691734686493874 2023-01-23 01:49:03.260143: step: 1384/531, loss: 0.05945310741662979 2023-01-23 01:49:04.358941: step: 1388/531, loss: 0.07075748592615128 2023-01-23 01:49:05.472849: step: 1392/531, loss: 0.07616720348596573 2023-01-23 01:49:06.595513: step: 1396/531, loss: 0.02531905099749565 2023-01-23 01:49:07.712152: step: 1400/531, loss: 0.48334693908691406 2023-01-23 01:49:08.839658: step: 1404/531, loss: 0.012325716204941273 2023-01-23 01:49:09.956210: step: 1408/531, loss: 0.008347606286406517 2023-01-23 01:49:11.088868: step: 1412/531, loss: 0.004252273123711348 2023-01-23 01:49:12.220624: step: 1416/531, loss: 0.0286438949406147 2023-01-23 01:49:13.334628: step: 1420/531, loss: 0.042565345764160156 2023-01-23 01:49:14.428519: step: 1424/531, loss: 0.006934451870620251 2023-01-23 01:49:15.571129: step: 1428/531, loss: 0.06696367263793945 2023-01-23 01:49:16.703653: step: 1432/531, loss: 0.01389846857637167 2023-01-23 01:49:17.844112: step: 1436/531, loss: 0.012557793408632278 2023-01-23 01:49:18.968981: step: 1440/531, loss: 0.006352805998176336 2023-01-23 01:49:20.094273: step: 1444/531, loss: 0.06379108130931854 2023-01-23 01:49:21.231513: step: 1448/531, loss: 0.051972150802612305 2023-01-23 01:49:22.334478: step: 1452/531, loss: 0.05340252071619034 2023-01-23 01:49:23.514131: step: 1456/531, loss: 0.07439880073070526 2023-01-23 01:49:24.651922: step: 1460/531, loss: 0.039669036865234375 2023-01-23 01:49:25.790120: step: 1464/531, loss: 0.04984131082892418 2023-01-23 01:49:26.900178: step: 1468/531, loss: 0.0027611255645751953 2023-01-23 01:49:28.057657: step: 1472/531, loss: 0.02588520012795925 2023-01-23 01:49:29.169682: step: 1476/531, loss: 0.0415501594543457 2023-01-23 01:49:30.271349: step: 1480/531, loss: 0.012391758151352406 2023-01-23 01:49:31.400762: step: 1484/531, loss: 0.07968264073133469 2023-01-23 01:49:32.511236: step: 1488/531, loss: 0.15641066431999207 2023-01-23 01:49:33.628539: step: 1492/531, loss: 0.09167805314064026 2023-01-23 01:49:34.750044: step: 1496/531, loss: 0.00865640677511692 2023-01-23 01:49:35.861227: step: 1500/531, loss: 0.0176880843937397 2023-01-23 01:49:36.978887: step: 1504/531, loss: 0.13483381271362305 2023-01-23 01:49:38.106823: step: 1508/531, loss: 0.00993204116821289 2023-01-23 01:49:39.238520: step: 1512/531, loss: 0.05060825124382973 2023-01-23 01:49:40.347146: step: 1516/531, loss: 0.03865347057580948 2023-01-23 01:49:41.439238: step: 1520/531, loss: 1.930574655532837 2023-01-23 01:49:42.559019: step: 1524/531, loss: 0.012262584641575813 2023-01-23 01:49:43.666282: step: 1528/531, loss: 0.02584686316549778 2023-01-23 01:49:44.803227: step: 1532/531, loss: 0.15780936181545258 2023-01-23 01:49:45.944545: step: 1536/531, loss: 0.0006384849548339844 2023-01-23 01:49:47.082109: step: 1540/531, loss: 0.12546196579933167 2023-01-23 01:49:48.182845: step: 1544/531, loss: 0.026625968515872955 2023-01-23 01:49:49.309825: step: 1548/531, loss: 0.00419273367151618 2023-01-23 01:49:50.451599: step: 1552/531, loss: 0.004591727163642645 2023-01-23 01:49:51.580547: step: 1556/531, loss: 0.03328075259923935 2023-01-23 01:49:52.715158: step: 1560/531, loss: 0.008942950516939163 2023-01-23 01:49:53.821495: step: 1564/531, loss: 0.09567117691040039 2023-01-23 01:49:54.942857: step: 1568/531, loss: 0.07516632974147797 2023-01-23 01:49:56.041657: step: 1572/531, loss: 0.008822822943329811 2023-01-23 01:49:57.177477: step: 1576/531, loss: 0.011792660690844059 2023-01-23 01:49:58.300230: step: 1580/531, loss: 0.04632125049829483 2023-01-23 01:49:59.448156: step: 1584/531, loss: 0.03399048373103142 2023-01-23 01:50:00.611561: step: 1588/531, loss: 0.05758104473352432 2023-01-23 01:50:01.733345: step: 1592/531, loss: 0.4461662173271179 2023-01-23 01:50:02.835435: step: 1596/531, loss: 0.1877305954694748 2023-01-23 01:50:03.970994: step: 1600/531, loss: 0.012031174264848232 2023-01-23 01:50:05.084125: step: 1604/531, loss: 0.05492992699146271 2023-01-23 01:50:06.211803: step: 1608/531, loss: 0.0587894432246685 2023-01-23 01:50:07.324710: step: 1612/531, loss: 0.02501382678747177 2023-01-23 01:50:08.416239: step: 1616/531, loss: 0.003394985105842352 2023-01-23 01:50:09.551166: step: 1620/531, loss: 0.10439453274011612 2023-01-23 01:50:10.699231: step: 1624/531, loss: 0.00922689400613308 2023-01-23 01:50:11.831067: step: 1628/531, loss: 0.007351208012551069 2023-01-23 01:50:12.962247: step: 1632/531, loss: 0.0024701834190636873 2023-01-23 01:50:14.089989: step: 1636/531, loss: 0.09640665352344513 2023-01-23 01:50:15.257600: step: 1640/531, loss: 0.056862831115722656 2023-01-23 01:50:16.361213: step: 1644/531, loss: 0.15328189730644226 2023-01-23 01:50:17.497003: step: 1648/531, loss: 0.052858710289001465 2023-01-23 01:50:18.632428: step: 1652/531, loss: 0.04492168501019478 2023-01-23 01:50:19.751634: step: 1656/531, loss: 0.00041294097900390625 2023-01-23 01:50:20.872056: step: 1660/531, loss: 0.009245586581528187 2023-01-23 01:50:21.983403: step: 1664/531, loss: 0.08038024604320526 2023-01-23 01:50:23.098028: step: 1668/531, loss: 0.014008236117661 2023-01-23 01:50:24.228011: step: 1672/531, loss: 0.06971540302038193 2023-01-23 01:50:25.344926: step: 1676/531, loss: 0.00341033935546875 2023-01-23 01:50:26.490223: step: 1680/531, loss: 0.000506496406160295 2023-01-23 01:50:27.631709: step: 1684/531, loss: 0.030138205736875534 2023-01-23 01:50:28.765025: step: 1688/531, loss: 0.1150185614824295 2023-01-23 01:50:29.872424: step: 1692/531, loss: 0.055745795369148254 2023-01-23 01:50:30.987226: step: 1696/531, loss: 0.004923009779304266 2023-01-23 01:50:32.118388: step: 1700/531, loss: 0.00832443218678236 2023-01-23 01:50:33.253559: step: 1704/531, loss: 0.06748123466968536 2023-01-23 01:50:34.395509: step: 1708/531, loss: 0.02942085452377796 2023-01-23 01:50:35.538700: step: 1712/531, loss: 0.04230327904224396 2023-01-23 01:50:36.664853: step: 1716/531, loss: 0.007904243655502796 2023-01-23 01:50:37.803444: step: 1720/531, loss: 0.008150482550263405 2023-01-23 01:50:38.933640: step: 1724/531, loss: 0.017395783215761185 2023-01-23 01:50:40.057677: step: 1728/531, loss: 0.13328370451927185 2023-01-23 01:50:41.215014: step: 1732/531, loss: 0.009827995672821999 2023-01-23 01:50:42.348432: step: 1736/531, loss: 0.002374362898990512 2023-01-23 01:50:43.486412: step: 1740/531, loss: 0.1539279967546463 2023-01-23 01:50:44.613565: step: 1744/531, loss: 0.015444987453520298 2023-01-23 01:50:45.737027: step: 1748/531, loss: 0.0011411190498620272 2023-01-23 01:50:46.879180: step: 1752/531, loss: 0.022669566795229912 2023-01-23 01:50:48.002195: step: 1756/531, loss: 0.036907292902469635 2023-01-23 01:50:49.124072: step: 1760/531, loss: 0.001556396484375 2023-01-23 01:50:50.247654: step: 1764/531, loss: 0.15257439017295837 2023-01-23 01:50:51.383547: step: 1768/531, loss: 0.05498996004462242 2023-01-23 01:50:52.522488: step: 1772/531, loss: 0.04025459289550781 2023-01-23 01:50:53.660396: step: 1776/531, loss: 0.06224804371595383 2023-01-23 01:50:54.809572: step: 1780/531, loss: 0.04794476181268692 2023-01-23 01:50:55.917536: step: 1784/531, loss: 0.07785062491893768 2023-01-23 01:50:57.067329: step: 1788/531, loss: 0.046160224825143814 2023-01-23 01:50:58.189513: step: 1792/531, loss: 0.02850637398660183 2023-01-23 01:50:59.317206: step: 1796/531, loss: 0.06720910221338272 2023-01-23 01:51:00.458813: step: 1800/531, loss: 0.035118963569402695 2023-01-23 01:51:01.585083: step: 1804/531, loss: 0.03721027448773384 2023-01-23 01:51:02.725055: step: 1808/531, loss: 0.06834888458251953 2023-01-23 01:51:03.849734: step: 1812/531, loss: 0.06516013294458389 2023-01-23 01:51:04.975745: step: 1816/531, loss: 0.4123293161392212 2023-01-23 01:51:06.112110: step: 1820/531, loss: 0.05392913892865181 2023-01-23 01:51:07.254869: step: 1824/531, loss: 0.0010543824173510075 2023-01-23 01:51:08.381736: step: 1828/531, loss: 0.03467082977294922 2023-01-23 01:51:09.520209: step: 1832/531, loss: 0.007027435582131147 2023-01-23 01:51:10.654159: step: 1836/531, loss: 1.952021837234497 2023-01-23 01:51:11.779883: step: 1840/531, loss: 0.003164959140121937 2023-01-23 01:51:12.929960: step: 1844/531, loss: 0.05413856729865074 2023-01-23 01:51:14.054067: step: 1848/531, loss: 0.014834022149443626 2023-01-23 01:51:15.174226: step: 1852/531, loss: 0.027669716626405716 2023-01-23 01:51:16.290950: step: 1856/531, loss: 0.01954364776611328 2023-01-23 01:51:17.426916: step: 1860/531, loss: 0.12610477209091187 2023-01-23 01:51:18.497499: step: 1864/531, loss: 0.05196359008550644 2023-01-23 01:51:19.639352: step: 1868/531, loss: 0.03356170654296875 2023-01-23 01:51:20.776308: step: 1872/531, loss: 0.010723471641540527 2023-01-23 01:51:21.916421: step: 1876/531, loss: 0.03133249282836914 2023-01-23 01:51:23.038666: step: 1880/531, loss: 0.11030636727809906 2023-01-23 01:51:24.137775: step: 1884/531, loss: 0.3563876152038574 2023-01-23 01:51:25.243444: step: 1888/531, loss: 0.040427327156066895 2023-01-23 01:51:26.368439: step: 1892/531, loss: 0.11936015635728836 2023-01-23 01:51:27.500523: step: 1896/531, loss: 0.20236405730247498 2023-01-23 01:51:28.626267: step: 1900/531, loss: 0.009434044361114502 2023-01-23 01:51:29.762574: step: 1904/531, loss: 0.051358990371227264 2023-01-23 01:51:30.908635: step: 1908/531, loss: 0.08827072381973267 2023-01-23 01:51:32.025792: step: 1912/531, loss: 0.008693313226103783 2023-01-23 01:51:33.148841: step: 1916/531, loss: 0.0628931075334549 2023-01-23 01:51:34.259310: step: 1920/531, loss: 0.03799457475543022 2023-01-23 01:51:35.394238: step: 1924/531, loss: 0.1904619187116623 2023-01-23 01:51:36.543331: step: 1928/531, loss: 0.0058269500732421875 2023-01-23 01:51:37.682971: step: 1932/531, loss: 0.020396806299686432 2023-01-23 01:51:38.803788: step: 1936/531, loss: 0.033467911183834076 2023-01-23 01:51:39.951002: step: 1940/531, loss: 0.08446350693702698 2023-01-23 01:51:41.056314: step: 1944/531, loss: 0.014724064618349075 2023-01-23 01:51:42.195873: step: 1948/531, loss: 0.020745133981108665 2023-01-23 01:51:43.330813: step: 1952/531, loss: 0.08192062377929688 2023-01-23 01:51:44.431072: step: 1956/531, loss: 0.03562808036804199 2023-01-23 01:51:45.526926: step: 1960/531, loss: 0.0007377624278888106 2023-01-23 01:51:46.662283: step: 1964/531, loss: 0.12118415534496307 2023-01-23 01:51:47.763001: step: 1968/531, loss: 0.03675975650548935 2023-01-23 01:51:48.887015: step: 1972/531, loss: 0.13955183327198029 2023-01-23 01:51:50.009049: step: 1976/531, loss: 0.007500886917114258 2023-01-23 01:51:51.117021: step: 1980/531, loss: 0.09517045319080353 2023-01-23 01:51:52.240464: step: 1984/531, loss: 0.019038772210478783 2023-01-23 01:51:53.373034: step: 1988/531, loss: 0.05047626420855522 2023-01-23 01:51:54.485438: step: 1992/531, loss: 0.0011599541176110506 2023-01-23 01:51:55.590137: step: 1996/531, loss: 0.001729345298372209 2023-01-23 01:51:56.706080: step: 2000/531, loss: 0.03404216840863228 2023-01-23 01:51:57.810211: step: 2004/531, loss: 0.009194565005600452 2023-01-23 01:51:58.934787: step: 2008/531, loss: 0.002778434893116355 2023-01-23 01:52:00.092220: step: 2012/531, loss: 0.02047252655029297 2023-01-23 01:52:01.216222: step: 2016/531, loss: 0.11225318908691406 2023-01-23 01:52:02.327345: step: 2020/531, loss: 0.0116150863468647 2023-01-23 01:52:03.445036: step: 2024/531, loss: 0.022969722747802734 2023-01-23 01:52:04.587098: step: 2028/531, loss: 0.06064276769757271 2023-01-23 01:52:05.690063: step: 2032/531, loss: 0.05816650390625 2023-01-23 01:52:06.801461: step: 2036/531, loss: 0.025210261344909668 2023-01-23 01:52:07.917643: step: 2040/531, loss: 0.05956621468067169 2023-01-23 01:52:09.059121: step: 2044/531, loss: 0.0003121137560810894 2023-01-23 01:52:10.230215: step: 2048/531, loss: 0.03138890117406845 2023-01-23 01:52:11.330452: step: 2052/531, loss: 0.0028836012352257967 2023-01-23 01:52:12.467541: step: 2056/531, loss: 0.05264396592974663 2023-01-23 01:52:13.578537: step: 2060/531, loss: 0.08131809532642365 2023-01-23 01:52:14.707130: step: 2064/531, loss: 0.050402261316776276 2023-01-23 01:52:15.830115: step: 2068/531, loss: 0.04946918413043022 2023-01-23 01:52:16.950423: step: 2072/531, loss: 0.08591398596763611 2023-01-23 01:52:18.079796: step: 2076/531, loss: 0.025300025939941406 2023-01-23 01:52:19.184848: step: 2080/531, loss: 0.001347446464933455 2023-01-23 01:52:20.301428: step: 2084/531, loss: 0.007817077450454235 2023-01-23 01:52:21.409892: step: 2088/531, loss: 0.04361867904663086 2023-01-23 01:52:22.527376: step: 2092/531, loss: 0.007647156715393066 2023-01-23 01:52:23.642861: step: 2096/531, loss: 0.0014867782592773438 2023-01-23 01:52:24.760298: step: 2100/531, loss: 0.05043201521039009 2023-01-23 01:52:25.859580: step: 2104/531, loss: 0.014550304971635342 2023-01-23 01:52:26.975886: step: 2108/531, loss: 0.00855245627462864 2023-01-23 01:52:28.113721: step: 2112/531, loss: 0.11540967226028442 2023-01-23 01:52:29.229893: step: 2116/531, loss: 0.02228555828332901 2023-01-23 01:52:30.351163: step: 2120/531, loss: 0.001979923341423273 2023-01-23 01:52:31.443518: step: 2124/531, loss: 0.00046296120854094625 ================================================== Loss: 0.060 -------------------- Dev: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Test: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Chinese: {'event': {'p': 0.55, 'r': 0.8148148148148148, 'f1': 0.6567164179104479}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Russian: {'event': {'p': 0.4878048780487805, 'r': 0.5555555555555556, 'f1': 0.5194805194805195}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} New best korean model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 14 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:53:16.280312: step: 4/531, loss: 0.1862301230430603 2023-01-23 01:53:17.384470: step: 8/531, loss: 0.03784370422363281 2023-01-23 01:53:18.478326: step: 12/531, loss: 0.0001312255917582661 2023-01-23 01:53:19.591614: step: 16/531, loss: 0.005646228790283203 2023-01-23 01:53:20.706557: step: 20/531, loss: 0.0002513647195883095 2023-01-23 01:53:21.833127: step: 24/531, loss: 0.05716114118695259 2023-01-23 01:53:22.945226: step: 28/531, loss: 0.008765793405473232 2023-01-23 01:53:24.066120: step: 32/531, loss: 0.08409976959228516 2023-01-23 01:53:25.185961: step: 36/531, loss: 0.0046863555908203125 2023-01-23 01:53:26.325037: step: 40/531, loss: 0.038842394948005676 2023-01-23 01:53:27.445344: step: 44/531, loss: 0.03919363021850586 2023-01-23 01:53:28.542649: step: 48/531, loss: 0.02682790718972683 2023-01-23 01:53:29.677917: step: 52/531, loss: 0.010151100344955921 2023-01-23 01:53:30.792344: step: 56/531, loss: 0.007682991214096546 2023-01-23 01:53:31.919122: step: 60/531, loss: 0.0046291351318359375 2023-01-23 01:53:33.104044: step: 64/531, loss: 0.035105325281620026 2023-01-23 01:53:34.224856: step: 68/531, loss: 0.13061365485191345 2023-01-23 01:53:35.358102: step: 72/531, loss: 0.11213311553001404 2023-01-23 01:53:36.501520: step: 76/531, loss: 0.045264437794685364 2023-01-23 01:53:37.628323: step: 80/531, loss: 0.014772068709135056 2023-01-23 01:53:38.807855: step: 84/531, loss: 0.013162041082978249 2023-01-23 01:53:39.956433: step: 88/531, loss: 0.011248065158724785 2023-01-23 01:53:41.099343: step: 92/531, loss: 0.02269775979220867 2023-01-23 01:53:42.216258: step: 96/531, loss: 0.0015083790058270097 2023-01-23 01:53:43.346416: step: 100/531, loss: 0.0008245468488894403 2023-01-23 01:53:44.446476: step: 104/531, loss: 0.007290299516171217 2023-01-23 01:53:45.591549: step: 108/531, loss: 0.05163154751062393 2023-01-23 01:53:46.701897: step: 112/531, loss: 0.04657135158777237 2023-01-23 01:53:47.825811: step: 116/531, loss: 0.08790865540504456 2023-01-23 01:53:48.960856: step: 120/531, loss: 0.08179759979248047 2023-01-23 01:53:50.075223: step: 124/531, loss: 0.10913095623254776 2023-01-23 01:53:51.213224: step: 128/531, loss: 0.02264099009335041 2023-01-23 01:53:52.334326: step: 132/531, loss: 0.007409191224724054 2023-01-23 01:53:53.478450: step: 136/531, loss: 0.13832911849021912 2023-01-23 01:53:54.628376: step: 140/531, loss: 0.04629650339484215 2023-01-23 01:53:55.770767: step: 144/531, loss: 0.004909324459731579 2023-01-23 01:53:56.899015: step: 148/531, loss: 0.017298413440585136 2023-01-23 01:53:58.016332: step: 152/531, loss: 0.0005558490520343184 2023-01-23 01:53:59.146411: step: 156/531, loss: 0.5295258164405823 2023-01-23 01:54:00.263885: step: 160/531, loss: 0.12251396477222443 2023-01-23 01:54:01.364603: step: 164/531, loss: 0.07428783923387527 2023-01-23 01:54:02.479774: step: 168/531, loss: 0.03385968133807182 2023-01-23 01:54:03.611193: step: 172/531, loss: 0.03745222091674805 2023-01-23 01:54:04.713159: step: 176/531, loss: 0.017795635387301445 2023-01-23 01:54:05.846566: step: 180/531, loss: 0.0130653390660882 2023-01-23 01:54:06.957893: step: 184/531, loss: 0.023881245404481888 2023-01-23 01:54:08.095586: step: 188/531, loss: 0.03159074857831001 2023-01-23 01:54:09.236181: step: 192/531, loss: 0.040824417024850845 2023-01-23 01:54:10.406414: step: 196/531, loss: 0.019517231732606888 2023-01-23 01:54:11.519851: step: 200/531, loss: 0.03165016323328018 2023-01-23 01:54:12.664112: step: 204/531, loss: 0.040506936609745026 2023-01-23 01:54:13.774874: step: 208/531, loss: 0.0012264729011803865 2023-01-23 01:54:14.881399: step: 212/531, loss: 0.012244414538145065 2023-01-23 01:54:15.983011: step: 216/531, loss: 0.0150572769343853 2023-01-23 01:54:17.102889: step: 220/531, loss: 0.035454630851745605 2023-01-23 01:54:18.227713: step: 224/531, loss: 0.014980555512011051 2023-01-23 01:54:19.364246: step: 228/531, loss: 0.02538886107504368 2023-01-23 01:54:20.495571: step: 232/531, loss: 0.049399856477975845 2023-01-23 01:54:21.622585: step: 236/531, loss: 0.05609875172376633 2023-01-23 01:54:22.742547: step: 240/531, loss: 0.022008182480931282 2023-01-23 01:54:23.889905: step: 244/531, loss: 0.026206398382782936 2023-01-23 01:54:25.018283: step: 248/531, loss: 0.016715049743652344 2023-01-23 01:54:26.142316: step: 252/531, loss: 0.06509499251842499 2023-01-23 01:54:27.292456: step: 256/531, loss: 0.031041527166962624 2023-01-23 01:54:28.406060: step: 260/531, loss: 0.0007351874955929816 2023-01-23 01:54:29.522631: step: 264/531, loss: 0.0032892227172851562 2023-01-23 01:54:30.632230: step: 268/531, loss: 0.01634979248046875 2023-01-23 01:54:31.748282: step: 272/531, loss: 0.008093548007309437 2023-01-23 01:54:32.901144: step: 276/531, loss: 0.02385406568646431 2023-01-23 01:54:34.020817: step: 280/531, loss: 0.012347603216767311 2023-01-23 01:54:35.143678: step: 284/531, loss: 0.009841347113251686 2023-01-23 01:54:36.260084: step: 288/531, loss: 0.0010462283389642835 2023-01-23 01:54:37.397242: step: 292/531, loss: 0.024961184710264206 2023-01-23 01:54:38.511700: step: 296/531, loss: 0.0011052133049815893 2023-01-23 01:54:39.669663: step: 300/531, loss: 0.002892398973926902 2023-01-23 01:54:40.794873: step: 304/531, loss: 0.012855720706284046 2023-01-23 01:54:41.940513: step: 308/531, loss: 0.011732339859008789 2023-01-23 01:54:43.059954: step: 312/531, loss: 0.006576919462531805 2023-01-23 01:54:44.165705: step: 316/531, loss: 0.0011031150352209806 2023-01-23 01:54:45.298647: step: 320/531, loss: 0.004845285322517157 2023-01-23 01:54:46.429749: step: 324/531, loss: 0.07610197365283966 2023-01-23 01:54:47.553022: step: 328/531, loss: 0.017949486151337624 2023-01-23 01:54:48.647226: step: 332/531, loss: 0.009623909369111061 2023-01-23 01:54:49.748861: step: 336/531, loss: 0.01307301502674818 2023-01-23 01:54:50.866669: step: 340/531, loss: 0.19303159415721893 2023-01-23 01:54:51.984011: step: 344/531, loss: 0.05890091508626938 2023-01-23 01:54:53.104242: step: 348/531, loss: 0.025438690558075905 2023-01-23 01:54:54.241016: step: 352/531, loss: 0.04901723936200142 2023-01-23 01:54:55.352647: step: 356/531, loss: 0.00047936441842466593 2023-01-23 01:54:56.514656: step: 360/531, loss: 0.21760545670986176 2023-01-23 01:54:57.677853: step: 364/531, loss: 0.0007781505701132119 2023-01-23 01:54:58.796482: step: 368/531, loss: 0.013126516714692116 2023-01-23 01:54:59.935635: step: 372/531, loss: 0.5478253960609436 2023-01-23 01:55:01.059922: step: 376/531, loss: 0.009425354190170765 2023-01-23 01:55:02.213849: step: 380/531, loss: 0.05175914987921715 2023-01-23 01:55:03.347951: step: 384/531, loss: 0.020055677741765976 2023-01-23 01:55:04.457468: step: 388/531, loss: 0.0038026811089366674 2023-01-23 01:55:05.596512: step: 392/531, loss: 0.031622789800167084 2023-01-23 01:55:06.698467: step: 396/531, loss: 0.014116574078798294 2023-01-23 01:55:07.801721: step: 400/531, loss: 0.010621833615005016 2023-01-23 01:55:08.916239: step: 404/531, loss: 0.007936477661132812 2023-01-23 01:55:10.051776: step: 408/531, loss: 0.012959766201674938 2023-01-23 01:55:11.144950: step: 412/531, loss: 0.02434062957763672 2023-01-23 01:55:12.273836: step: 416/531, loss: 0.034087181091308594 2023-01-23 01:55:13.391882: step: 420/531, loss: 0.029158784076571465 2023-01-23 01:55:14.532579: step: 424/531, loss: 0.07322239875793457 2023-01-23 01:55:15.663385: step: 428/531, loss: 0.012482023797929287 2023-01-23 01:55:16.836800: step: 432/531, loss: 0.036682795733213425 2023-01-23 01:55:17.953361: step: 436/531, loss: 0.013294410891830921 2023-01-23 01:55:19.091784: step: 440/531, loss: 0.012344742193818092 2023-01-23 01:55:20.204687: step: 444/531, loss: 0.4608679711818695 2023-01-23 01:55:21.329835: step: 448/531, loss: 0.0045067789033055305 2023-01-23 01:55:22.466209: step: 452/531, loss: 0.004168844316154718 2023-01-23 01:55:23.593892: step: 456/531, loss: 0.12389784306287766 2023-01-23 01:55:24.729388: step: 460/531, loss: 0.1371532380580902 2023-01-23 01:55:25.830212: step: 464/531, loss: 0.0007326602935791016 2023-01-23 01:55:26.944354: step: 468/531, loss: 0.007001447957009077 2023-01-23 01:55:28.075829: step: 472/531, loss: 0.013240909203886986 2023-01-23 01:55:29.192826: step: 476/531, loss: 0.013994836248457432 2023-01-23 01:55:30.289454: step: 480/531, loss: 0.020643234252929688 2023-01-23 01:55:31.408401: step: 484/531, loss: 0.018093157559633255 2023-01-23 01:55:32.526935: step: 488/531, loss: 0.0035318376030772924 2023-01-23 01:55:33.652226: step: 492/531, loss: 0.07602062076330185 2023-01-23 01:55:34.776413: step: 496/531, loss: 0.09833059459924698 2023-01-23 01:55:35.953355: step: 500/531, loss: 0.07449188828468323 2023-01-23 01:55:37.095243: step: 504/531, loss: 0.31471386551856995 2023-01-23 01:55:38.208806: step: 508/531, loss: 0.004318046849220991 2023-01-23 01:55:39.345809: step: 512/531, loss: 0.0010810852982103825 2023-01-23 01:55:40.449130: step: 516/531, loss: 0.049601174890995026 2023-01-23 01:55:41.553910: step: 520/531, loss: 0.006776904687285423 2023-01-23 01:55:42.696335: step: 524/531, loss: 0.00989379920065403 2023-01-23 01:55:43.806111: step: 528/531, loss: 0.0021483423188328743 2023-01-23 01:55:44.956347: step: 532/531, loss: 0.03578987345099449 2023-01-23 01:55:46.092571: step: 536/531, loss: 0.01175546646118164 2023-01-23 01:55:47.203769: step: 540/531, loss: 0.02257823944091797 2023-01-23 01:55:48.314696: step: 544/531, loss: 0.004276084713637829 2023-01-23 01:55:49.449543: step: 548/531, loss: 0.004549980163574219 2023-01-23 01:55:50.583700: step: 552/531, loss: 0.0710611343383789 2023-01-23 01:55:51.714022: step: 556/531, loss: 0.026451872661709785 2023-01-23 01:55:52.862080: step: 560/531, loss: 0.009717846289277077 2023-01-23 01:55:53.984972: step: 564/531, loss: 0.057230375707149506 2023-01-23 01:55:55.097406: step: 568/531, loss: 0.046820640563964844 2023-01-23 01:55:56.272010: step: 572/531, loss: 0.03182820975780487 2023-01-23 01:55:57.387666: step: 576/531, loss: 0.009929275140166283 2023-01-23 01:55:58.488159: step: 580/531, loss: 0.037971243262290955 2023-01-23 01:55:59.618668: step: 584/531, loss: 0.024565409868955612 2023-01-23 01:56:00.747846: step: 588/531, loss: 0.016748523339629173 2023-01-23 01:56:01.896050: step: 592/531, loss: 0.1666925996541977 2023-01-23 01:56:03.033525: step: 596/531, loss: 0.004126167856156826 2023-01-23 01:56:04.148051: step: 600/531, loss: 0.07454414665699005 2023-01-23 01:56:05.264513: step: 604/531, loss: 0.01575784757733345 2023-01-23 01:56:06.421974: step: 608/531, loss: 0.022087670862674713 2023-01-23 01:56:07.569838: step: 612/531, loss: 0.0070129395462572575 2023-01-23 01:56:08.686590: step: 616/531, loss: 0.0024917633272707462 2023-01-23 01:56:09.823373: step: 620/531, loss: 0.005737114232033491 2023-01-23 01:56:10.942806: step: 624/531, loss: 0.013321781530976295 2023-01-23 01:56:12.077725: step: 628/531, loss: 0.01276307087391615 2023-01-23 01:56:13.189006: step: 632/531, loss: 0.02257242240011692 2023-01-23 01:56:14.332869: step: 636/531, loss: 0.016452312469482422 2023-01-23 01:56:15.481128: step: 640/531, loss: 0.08585662394762039 2023-01-23 01:56:16.586981: step: 644/531, loss: 0.01965313032269478 2023-01-23 01:56:17.716346: step: 648/531, loss: 0.05277479439973831 2023-01-23 01:56:18.852337: step: 652/531, loss: 0.025231044739484787 2023-01-23 01:56:19.952429: step: 656/531, loss: 0.0006463051540777087 2023-01-23 01:56:21.076017: step: 660/531, loss: 0.027299975976347923 2023-01-23 01:56:22.220707: step: 664/531, loss: 0.003398561617359519 2023-01-23 01:56:23.337529: step: 668/531, loss: 0.01680312119424343 2023-01-23 01:56:24.464239: step: 672/531, loss: 0.008103752508759499 2023-01-23 01:56:25.573846: step: 676/531, loss: 0.0008049011812545359 2023-01-23 01:56:26.706635: step: 680/531, loss: 0.11100788414478302 2023-01-23 01:56:27.827756: step: 684/531, loss: 0.0529652014374733 2023-01-23 01:56:28.941558: step: 688/531, loss: 0.013436508364975452 2023-01-23 01:56:30.117913: step: 692/531, loss: 0.002786874771118164 2023-01-23 01:56:31.257945: step: 696/531, loss: 0.06669292598962784 2023-01-23 01:56:32.414533: step: 700/531, loss: 0.022096633911132812 2023-01-23 01:56:33.548172: step: 704/531, loss: 0.030561160296201706 2023-01-23 01:56:34.660352: step: 708/531, loss: 0.032714083790779114 2023-01-23 01:56:35.781167: step: 712/531, loss: 0.0023665428161621094 2023-01-23 01:56:36.921117: step: 716/531, loss: 0.024378299713134766 2023-01-23 01:56:38.035007: step: 720/531, loss: 0.04078855365514755 2023-01-23 01:56:39.155477: step: 724/531, loss: 0.055265042930841446 2023-01-23 01:56:40.291665: step: 728/531, loss: 0.003731346223503351 2023-01-23 01:56:41.404500: step: 732/531, loss: 0.0012718201614916325 2023-01-23 01:56:42.542580: step: 736/531, loss: 0.0411074161529541 2023-01-23 01:56:43.658079: step: 740/531, loss: 0.0008794784662313759 2023-01-23 01:56:44.769304: step: 744/531, loss: 0.011661100201308727 2023-01-23 01:56:45.898240: step: 748/531, loss: 0.05096759647130966 2023-01-23 01:56:47.041027: step: 752/531, loss: 0.028563691303133965 2023-01-23 01:56:48.171492: step: 756/531, loss: 0.0282013900578022 2023-01-23 01:56:49.294772: step: 760/531, loss: 0.05781393125653267 2023-01-23 01:56:50.410740: step: 764/531, loss: 0.03658151626586914 2023-01-23 01:56:51.573091: step: 768/531, loss: 0.02291424386203289 2023-01-23 01:56:52.681468: step: 772/531, loss: 0.06625284999608994 2023-01-23 01:56:53.838583: step: 776/531, loss: 0.05680666118860245 2023-01-23 01:56:54.974850: step: 780/531, loss: 0.0570257231593132 2023-01-23 01:56:56.140663: step: 784/531, loss: 0.13939595222473145 2023-01-23 01:56:57.263472: step: 788/531, loss: 0.0068877218291163445 2023-01-23 01:56:58.363582: step: 792/531, loss: 0.00025081634521484375 2023-01-23 01:56:59.521291: step: 796/531, loss: 0.01507587544620037 2023-01-23 01:57:00.670551: step: 800/531, loss: 0.0033044815063476562 2023-01-23 01:57:01.785400: step: 804/531, loss: 0.05417518690228462 2023-01-23 01:57:02.904100: step: 808/531, loss: 0.0012336730724200606 2023-01-23 01:57:04.021688: step: 812/531, loss: 0.008313750848174095 2023-01-23 01:57:05.143631: step: 816/531, loss: 0.0005862236139364541 2023-01-23 01:57:06.258912: step: 820/531, loss: 0.026700783520936966 2023-01-23 01:57:07.409905: step: 824/531, loss: 0.01470184326171875 2023-01-23 01:57:08.516395: step: 828/531, loss: 0.00031981465872377157 2023-01-23 01:57:09.617264: step: 832/531, loss: 0.005941391456872225 2023-01-23 01:57:10.741730: step: 836/531, loss: 0.019776344299316406 2023-01-23 01:57:11.888832: step: 840/531, loss: 0.018439341336488724 2023-01-23 01:57:12.999480: step: 844/531, loss: 0.050380755215883255 2023-01-23 01:57:14.118159: step: 848/531, loss: 0.06971149891614914 2023-01-23 01:57:15.214135: step: 852/531, loss: 0.0756916031241417 2023-01-23 01:57:16.321440: step: 856/531, loss: 0.00580630311742425 2023-01-23 01:57:17.447619: step: 860/531, loss: 0.02053380012512207 2023-01-23 01:57:18.562637: step: 864/531, loss: 0.3435155749320984 2023-01-23 01:57:19.698634: step: 868/531, loss: 0.04386739432811737 2023-01-23 01:57:20.834140: step: 872/531, loss: 0.0003262519894633442 2023-01-23 01:57:21.934645: step: 876/531, loss: 0.013667965307831764 2023-01-23 01:57:23.044320: step: 880/531, loss: 0.018120886757969856 2023-01-23 01:57:24.151909: step: 884/531, loss: 0.0036937713157385588 2023-01-23 01:57:25.277282: step: 888/531, loss: 0.03840336948633194 2023-01-23 01:57:26.376595: step: 892/531, loss: 0.07822151482105255 2023-01-23 01:57:27.483289: step: 896/531, loss: 0.00067310337908566 2023-01-23 01:57:28.600713: step: 900/531, loss: 0.012234210968017578 2023-01-23 01:57:29.710914: step: 904/531, loss: 0.05988330766558647 2023-01-23 01:57:30.880460: step: 908/531, loss: 0.03524265065789223 2023-01-23 01:57:31.986989: step: 912/531, loss: 0.033766794949769974 2023-01-23 01:57:33.131851: step: 916/531, loss: 0.03330698236823082 2023-01-23 01:57:34.258807: step: 920/531, loss: 0.010120200924575329 2023-01-23 01:57:35.377167: step: 924/531, loss: 0.0026053430046886206 2023-01-23 01:57:36.501724: step: 928/531, loss: 0.025379037484526634 2023-01-23 01:57:37.624915: step: 932/531, loss: 0.05553865432739258 2023-01-23 01:57:38.750476: step: 936/531, loss: 0.0015384674770757556 2023-01-23 01:57:39.863816: step: 940/531, loss: 0.23223046958446503 2023-01-23 01:57:40.967906: step: 944/531, loss: 0.08330072462558746 2023-01-23 01:57:42.117797: step: 948/531, loss: 0.054022595286369324 2023-01-23 01:57:43.287722: step: 952/531, loss: 0.0498262420296669 2023-01-23 01:57:44.417166: step: 956/531, loss: 0.017794419080018997 2023-01-23 01:57:45.544069: step: 960/531, loss: 0.033425092697143555 2023-01-23 01:57:46.640443: step: 964/531, loss: 0.04660363495349884 2023-01-23 01:57:47.782544: step: 968/531, loss: 0.05200345814228058 2023-01-23 01:57:48.922797: step: 972/531, loss: 0.022538568824529648 2023-01-23 01:57:50.039137: step: 976/531, loss: 0.09205341339111328 2023-01-23 01:57:51.149567: step: 980/531, loss: 0.06967110931873322 2023-01-23 01:57:52.273178: step: 984/531, loss: 0.10652492195367813 2023-01-23 01:57:53.383623: step: 988/531, loss: 0.027251340448856354 2023-01-23 01:57:54.511419: step: 992/531, loss: 0.04955878108739853 2023-01-23 01:57:55.645982: step: 996/531, loss: 0.0011944533325731754 2023-01-23 01:57:56.764599: step: 1000/531, loss: 0.031995583325624466 2023-01-23 01:57:57.880377: step: 1004/531, loss: 0.008238459005951881 2023-01-23 01:57:59.014449: step: 1008/531, loss: 0.07810582965612411 2023-01-23 01:58:00.132342: step: 1012/531, loss: 0.003288650419563055 2023-01-23 01:58:01.246108: step: 1016/531, loss: 0.013560676015913486 2023-01-23 01:58:02.367345: step: 1020/531, loss: 0.012579631991684437 2023-01-23 01:58:03.508022: step: 1024/531, loss: 0.040154457092285156 2023-01-23 01:58:04.641734: step: 1028/531, loss: 0.025856781750917435 2023-01-23 01:58:05.776299: step: 1032/531, loss: 0.03823661804199219 2023-01-23 01:58:06.919827: step: 1036/531, loss: 0.08782166987657547 2023-01-23 01:58:08.059145: step: 1040/531, loss: 0.05056295543909073 2023-01-23 01:58:09.204558: step: 1044/531, loss: 0.01663675345480442 2023-01-23 01:58:10.323910: step: 1048/531, loss: 0.00223712925799191 2023-01-23 01:58:11.468895: step: 1052/531, loss: 0.06099538877606392 2023-01-23 01:58:12.603622: step: 1056/531, loss: 0.00437583914026618 2023-01-23 01:58:13.739962: step: 1060/531, loss: 0.018632125109434128 2023-01-23 01:58:14.861402: step: 1064/531, loss: 0.014079093933105469 2023-01-23 01:58:15.973460: step: 1068/531, loss: 0.15993347764015198 2023-01-23 01:58:17.099967: step: 1072/531, loss: 0.047476619482040405 2023-01-23 01:58:18.204896: step: 1076/531, loss: 0.06234131008386612 2023-01-23 01:58:19.340053: step: 1080/531, loss: 0.002766543533653021 2023-01-23 01:58:20.498316: step: 1084/531, loss: 0.10883007943630219 2023-01-23 01:58:21.629699: step: 1088/531, loss: 0.020653629675507545 2023-01-23 01:58:22.774276: step: 1092/531, loss: 0.009971046820282936 2023-01-23 01:58:23.898747: step: 1096/531, loss: 0.028186893090605736 2023-01-23 01:58:25.031371: step: 1100/531, loss: 0.000462436699308455 2023-01-23 01:58:26.152548: step: 1104/531, loss: 0.0054476261138916016 2023-01-23 01:58:27.284160: step: 1108/531, loss: 0.02154712565243244 2023-01-23 01:58:28.422220: step: 1112/531, loss: 0.017615413293242455 2023-01-23 01:58:29.539011: step: 1116/531, loss: 0.008717519231140614 2023-01-23 01:58:30.669682: step: 1120/531, loss: 0.4149778187274933 2023-01-23 01:58:31.787437: step: 1124/531, loss: 0.02145233191549778 2023-01-23 01:58:32.929062: step: 1128/531, loss: 0.025443363934755325 2023-01-23 01:58:34.033026: step: 1132/531, loss: 0.04935042932629585 2023-01-23 01:58:35.147287: step: 1136/531, loss: 0.011218786239624023 2023-01-23 01:58:36.247565: step: 1140/531, loss: 0.024904191493988037 2023-01-23 01:58:37.364242: step: 1144/531, loss: 0.010661697015166283 2023-01-23 01:58:38.493455: step: 1148/531, loss: 0.022760486230254173 2023-01-23 01:58:39.633891: step: 1152/531, loss: 0.06754837185144424 2023-01-23 01:58:40.772824: step: 1156/531, loss: 1.0814257860183716 2023-01-23 01:58:41.915719: step: 1160/531, loss: 0.039374351501464844 2023-01-23 01:58:43.042786: step: 1164/531, loss: 0.014348221942782402 2023-01-23 01:58:44.173838: step: 1168/531, loss: 0.025826169177889824 2023-01-23 01:58:45.296153: step: 1172/531, loss: 0.09615497291088104 2023-01-23 01:58:46.414463: step: 1176/531, loss: 0.024796580895781517 2023-01-23 01:58:47.522973: step: 1180/531, loss: 0.040757086127996445 2023-01-23 01:58:48.638169: step: 1184/531, loss: 0.03466777876019478 2023-01-23 01:58:49.762369: step: 1188/531, loss: 0.013293511234223843 2023-01-23 01:58:50.893118: step: 1192/531, loss: 0.0038774493150413036 2023-01-23 01:58:52.024864: step: 1196/531, loss: 0.02116389386355877 2023-01-23 01:58:53.152639: step: 1200/531, loss: 0.011808204464614391 2023-01-23 01:58:54.282000: step: 1204/531, loss: 0.010515451431274414 2023-01-23 01:58:55.409158: step: 1208/531, loss: 0.04086942970752716 2023-01-23 01:58:56.522310: step: 1212/531, loss: 0.022040080279111862 2023-01-23 01:58:57.640039: step: 1216/531, loss: 0.0037306786980479956 2023-01-23 01:58:58.751411: step: 1220/531, loss: 0.029467202723026276 2023-01-23 01:58:59.866221: step: 1224/531, loss: 0.04621296003460884 2023-01-23 01:59:01.009879: step: 1228/531, loss: 0.0851193368434906 2023-01-23 01:59:02.145127: step: 1232/531, loss: 0.11668644845485687 2023-01-23 01:59:03.256009: step: 1236/531, loss: 0.07100821286439896 2023-01-23 01:59:04.375301: step: 1240/531, loss: 0.011982440948486328 2023-01-23 01:59:05.516028: step: 1244/531, loss: 0.05227496474981308 2023-01-23 01:59:06.650444: step: 1248/531, loss: 0.02551898919045925 2023-01-23 01:59:07.769071: step: 1252/531, loss: 0.0015578271122649312 2023-01-23 01:59:08.893234: step: 1256/531, loss: 0.04874400794506073 2023-01-23 01:59:10.030337: step: 1260/531, loss: 0.07148857414722443 2023-01-23 01:59:11.163003: step: 1264/531, loss: 0.018061388283967972 2023-01-23 01:59:12.274950: step: 1268/531, loss: 0.0036325454711914062 2023-01-23 01:59:13.365532: step: 1272/531, loss: 0.014343929477036 2023-01-23 01:59:14.497542: step: 1276/531, loss: 0.0010961532825604081 2023-01-23 01:59:15.632583: step: 1280/531, loss: 0.00013599396334029734 2023-01-23 01:59:16.757392: step: 1284/531, loss: 0.03537940979003906 2023-01-23 01:59:17.902749: step: 1288/531, loss: 0.00506248464807868 2023-01-23 01:59:19.013532: step: 1292/531, loss: 1.215659499168396 2023-01-23 01:59:20.121841: step: 1296/531, loss: 0.000366640102583915 2023-01-23 01:59:21.249046: step: 1300/531, loss: 0.011195016093552113 2023-01-23 01:59:22.356405: step: 1304/531, loss: 0.012623120099306107 2023-01-23 01:59:23.474870: step: 1308/531, loss: 0.0011882781982421875 2023-01-23 01:59:24.600236: step: 1312/531, loss: 0.0003638267517089844 2023-01-23 01:59:25.726352: step: 1316/531, loss: 0.01762266270816326 2023-01-23 01:59:26.853687: step: 1320/531, loss: 0.051662541925907135 2023-01-23 01:59:27.981224: step: 1324/531, loss: 0.058111391961574554 2023-01-23 01:59:29.143435: step: 1328/531, loss: 0.02050752565264702 2023-01-23 01:59:30.283263: step: 1332/531, loss: 0.1445474624633789 2023-01-23 01:59:31.395780: step: 1336/531, loss: 0.03603782504796982 2023-01-23 01:59:32.555609: step: 1340/531, loss: 0.030504370108246803 2023-01-23 01:59:33.670322: step: 1344/531, loss: 0.003217410994693637 2023-01-23 01:59:34.796950: step: 1348/531, loss: 0.004818725399672985 2023-01-23 01:59:35.903059: step: 1352/531, loss: 0.02006988599896431 2023-01-23 01:59:37.023312: step: 1356/531, loss: 0.007939147762954235 2023-01-23 01:59:38.116989: step: 1360/531, loss: 0.06282472610473633 2023-01-23 01:59:39.262958: step: 1364/531, loss: 0.029506146907806396 2023-01-23 01:59:40.361429: step: 1368/531, loss: 0.024111174046993256 2023-01-23 01:59:41.483682: step: 1372/531, loss: 0.020360087975859642 2023-01-23 01:59:42.625451: step: 1376/531, loss: 0.050803374499082565 2023-01-23 01:59:43.736958: step: 1380/531, loss: 0.03791790083050728 2023-01-23 01:59:44.886803: step: 1384/531, loss: 0.0006681442027911544 2023-01-23 01:59:46.006776: step: 1388/531, loss: 0.050988007336854935 2023-01-23 01:59:47.096519: step: 1392/531, loss: 0.055880263447761536 2023-01-23 01:59:48.237347: step: 1396/531, loss: 0.022161483764648438 2023-01-23 01:59:49.365811: step: 1400/531, loss: 0.23915615677833557 2023-01-23 01:59:50.481989: step: 1404/531, loss: 0.135502427816391 2023-01-23 01:59:51.584463: step: 1408/531, loss: 0.03633251413702965 2023-01-23 01:59:52.700547: step: 1412/531, loss: 0.025826549157500267 2023-01-23 01:59:53.809898: step: 1416/531, loss: 0.050603579729795456 2023-01-23 01:59:54.913464: step: 1420/531, loss: 0.025902509689331055 2023-01-23 01:59:56.045316: step: 1424/531, loss: 0.030132580548524857 2023-01-23 01:59:57.158152: step: 1428/531, loss: 0.027805522084236145 2023-01-23 01:59:58.287937: step: 1432/531, loss: 0.07184963673353195 2023-01-23 01:59:59.427660: step: 1436/531, loss: 0.29466742277145386 2023-01-23 02:00:00.612360: step: 1440/531, loss: 0.022292232140898705 2023-01-23 02:00:01.737893: step: 1444/531, loss: 0.03944540023803711 2023-01-23 02:00:02.855129: step: 1448/531, loss: 0.036181118339300156 2023-01-23 02:00:03.996139: step: 1452/531, loss: 0.01693439483642578 2023-01-23 02:00:05.138219: step: 1456/531, loss: 0.04897880554199219 2023-01-23 02:00:06.267142: step: 1460/531, loss: 0.007752609439194202 2023-01-23 02:00:07.385243: step: 1464/531, loss: 0.12552089989185333 2023-01-23 02:00:08.531179: step: 1468/531, loss: 0.017290210351347923 2023-01-23 02:00:09.623005: step: 1472/531, loss: 0.016783738508820534 2023-01-23 02:00:10.764164: step: 1476/531, loss: 0.17274942994117737 2023-01-23 02:00:11.888549: step: 1480/531, loss: 0.016007710248231888 2023-01-23 02:00:12.999896: step: 1484/531, loss: 0.005820083431899548 2023-01-23 02:00:14.148174: step: 1488/531, loss: 0.05739889293909073 2023-01-23 02:00:15.268441: step: 1492/531, loss: 0.09078197926282883 2023-01-23 02:00:16.371596: step: 1496/531, loss: 0.024973679333925247 2023-01-23 02:00:17.493016: step: 1500/531, loss: 0.1017889529466629 2023-01-23 02:00:18.610941: step: 1504/531, loss: 0.057268332690000534 2023-01-23 02:00:19.761802: step: 1508/531, loss: 0.002047634217888117 2023-01-23 02:00:20.862503: step: 1512/531, loss: 0.03226194530725479 2023-01-23 02:00:21.984199: step: 1516/531, loss: 0.022810840979218483 2023-01-23 02:00:23.117138: step: 1520/531, loss: 0.03953952714800835 2023-01-23 02:00:24.237179: step: 1524/531, loss: 0.015357017517089844 2023-01-23 02:00:25.387419: step: 1528/531, loss: 0.002298593521118164 2023-01-23 02:00:26.493932: step: 1532/531, loss: 0.0018476486438885331 2023-01-23 02:00:27.624890: step: 1536/531, loss: 0.18824882805347443 2023-01-23 02:00:28.750639: step: 1540/531, loss: 0.0353180393576622 2023-01-23 02:00:29.839862: step: 1544/531, loss: 0.0010485172970220447 2023-01-23 02:00:30.959892: step: 1548/531, loss: 0.019481182098388672 2023-01-23 02:00:32.098652: step: 1552/531, loss: 0.00020837783813476562 2023-01-23 02:00:33.221519: step: 1556/531, loss: 0.11943188309669495 2023-01-23 02:00:34.354222: step: 1560/531, loss: 0.020120905712246895 2023-01-23 02:00:35.503884: step: 1564/531, loss: 0.03205251693725586 2023-01-23 02:00:36.607592: step: 1568/531, loss: 0.01980610005557537 2023-01-23 02:00:37.730027: step: 1572/531, loss: 0.044774819165468216 2023-01-23 02:00:38.842113: step: 1576/531, loss: 0.030015183612704277 2023-01-23 02:00:39.976852: step: 1580/531, loss: 0.0004452705616131425 2023-01-23 02:00:41.097659: step: 1584/531, loss: 0.024567699059844017 2023-01-23 02:00:42.230303: step: 1588/531, loss: 0.0020863532554358244 2023-01-23 02:00:43.377435: step: 1592/531, loss: 0.13089504837989807 2023-01-23 02:00:44.505409: step: 1596/531, loss: 0.04525146260857582 2023-01-23 02:00:45.628307: step: 1600/531, loss: 0.0004418373282533139 2023-01-23 02:00:46.739163: step: 1604/531, loss: 0.0019834041595458984 2023-01-23 02:00:47.895527: step: 1608/531, loss: 0.02595672756433487 2023-01-23 02:00:49.007460: step: 1612/531, loss: 0.001940059708431363 2023-01-23 02:00:50.142119: step: 1616/531, loss: 0.004043579567223787 2023-01-23 02:00:51.266091: step: 1620/531, loss: 0.0028438568115234375 2023-01-23 02:00:52.394467: step: 1624/531, loss: 0.04108934476971626 2023-01-23 02:00:53.528314: step: 1628/531, loss: 0.04288950189948082 2023-01-23 02:00:54.646643: step: 1632/531, loss: 0.015517997555434704 2023-01-23 02:00:55.796440: step: 1636/531, loss: 0.062074847519397736 2023-01-23 02:00:56.918322: step: 1640/531, loss: 0.07899054884910583 2023-01-23 02:00:58.031635: step: 1644/531, loss: 0.00017976760864257812 2023-01-23 02:00:59.146049: step: 1648/531, loss: 0.012569713406264782 2023-01-23 02:01:00.266663: step: 1652/531, loss: 0.06321258842945099 2023-01-23 02:01:01.381761: step: 1656/531, loss: 0.005524730775505304 2023-01-23 02:01:02.512819: step: 1660/531, loss: 0.018469810485839844 2023-01-23 02:01:03.662238: step: 1664/531, loss: 0.1979011595249176 2023-01-23 02:01:04.786603: step: 1668/531, loss: 0.007015610113739967 2023-01-23 02:01:05.884605: step: 1672/531, loss: 0.055603645741939545 2023-01-23 02:01:07.042402: step: 1676/531, loss: 0.04525108262896538 2023-01-23 02:01:08.180925: step: 1680/531, loss: 0.13455677032470703 2023-01-23 02:01:09.299201: step: 1684/531, loss: 0.004053878597915173 2023-01-23 02:01:10.431587: step: 1688/531, loss: 0.10332445800304413 2023-01-23 02:01:11.557711: step: 1692/531, loss: 0.01812267303466797 2023-01-23 02:01:12.695339: step: 1696/531, loss: 0.007991981692612171 2023-01-23 02:01:13.808568: step: 1700/531, loss: 0.032444193959236145 2023-01-23 02:01:14.946583: step: 1704/531, loss: 0.03719387203454971 2023-01-23 02:01:16.065413: step: 1708/531, loss: 0.045127201825380325 2023-01-23 02:01:17.179702: step: 1712/531, loss: 0.0045719146728515625 2023-01-23 02:01:18.310093: step: 1716/531, loss: 0.009588432498276234 2023-01-23 02:01:19.449167: step: 1720/531, loss: 0.09202108532190323 2023-01-23 02:01:20.578137: step: 1724/531, loss: 0.07666854560375214 2023-01-23 02:01:21.716484: step: 1728/531, loss: 0.003655386157333851 2023-01-23 02:01:22.873887: step: 1732/531, loss: 0.003893280168995261 2023-01-23 02:01:23.982145: step: 1736/531, loss: 0.02083878591656685 2023-01-23 02:01:25.129833: step: 1740/531, loss: 0.006782913580536842 2023-01-23 02:01:26.251415: step: 1744/531, loss: 0.011380195617675781 2023-01-23 02:01:27.403242: step: 1748/531, loss: 0.00877923984080553 2023-01-23 02:01:28.494585: step: 1752/531, loss: 0.0015205859672278166 2023-01-23 02:01:29.618091: step: 1756/531, loss: 0.039209939539432526 2023-01-23 02:01:30.744456: step: 1760/531, loss: 0.13390226662158966 2023-01-23 02:01:31.884291: step: 1764/531, loss: 0.040396787226200104 2023-01-23 02:01:32.992964: step: 1768/531, loss: 0.018426276743412018 2023-01-23 02:01:34.135464: step: 1772/531, loss: 0.03485436737537384 2023-01-23 02:01:35.275726: step: 1776/531, loss: 0.0892486572265625 2023-01-23 02:01:36.436119: step: 1780/531, loss: 0.3212425112724304 2023-01-23 02:01:37.590577: step: 1784/531, loss: 0.2764738202095032 2023-01-23 02:01:38.694700: step: 1788/531, loss: 0.014712715521454811 2023-01-23 02:01:39.807671: step: 1792/531, loss: 0.009042930789291859 2023-01-23 02:01:40.944209: step: 1796/531, loss: 0.05442119017243385 2023-01-23 02:01:42.098042: step: 1800/531, loss: 0.04811840131878853 2023-01-23 02:01:43.251691: step: 1804/531, loss: 0.020531199872493744 2023-01-23 02:01:44.372319: step: 1808/531, loss: 0.02749319188296795 2023-01-23 02:01:45.529392: step: 1812/531, loss: 0.005932045169174671 2023-01-23 02:01:46.653308: step: 1816/531, loss: 0.02140631712973118 2023-01-23 02:01:47.792845: step: 1820/531, loss: 0.045037463307380676 2023-01-23 02:01:48.898849: step: 1824/531, loss: 0.001821756362915039 2023-01-23 02:01:50.028833: step: 1828/531, loss: 0.026884650811553 2023-01-23 02:01:51.163652: step: 1832/531, loss: 0.1425512135028839 2023-01-23 02:01:52.320128: step: 1836/531, loss: 0.023569153621792793 2023-01-23 02:01:53.414817: step: 1840/531, loss: 0.021918583661317825 2023-01-23 02:01:54.541228: step: 1844/531, loss: 0.08577214181423187 2023-01-23 02:01:55.656404: step: 1848/531, loss: 0.025901317596435547 2023-01-23 02:01:56.802230: step: 1852/531, loss: 0.07033157348632812 2023-01-23 02:01:57.940390: step: 1856/531, loss: 0.011713599786162376 2023-01-23 02:01:59.085135: step: 1860/531, loss: 0.02896655909717083 2023-01-23 02:02:00.210287: step: 1864/531, loss: 0.08366508781909943 2023-01-23 02:02:01.336679: step: 1868/531, loss: 0.006102364975959063 2023-01-23 02:02:02.474152: step: 1872/531, loss: 0.007165670394897461 2023-01-23 02:02:03.591469: step: 1876/531, loss: 0.01891203038394451 2023-01-23 02:02:04.682482: step: 1880/531, loss: 0.008925354108214378 2023-01-23 02:02:05.808210: step: 1884/531, loss: 0.028061555698513985 2023-01-23 02:02:06.918793: step: 1888/531, loss: 0.020200157538056374 2023-01-23 02:02:08.026046: step: 1892/531, loss: 0.11156196147203445 2023-01-23 02:02:09.160734: step: 1896/531, loss: 0.018563270568847656 2023-01-23 02:02:10.298504: step: 1900/531, loss: 0.006429863162338734 2023-01-23 02:02:11.446772: step: 1904/531, loss: 0.02931070327758789 2023-01-23 02:02:12.564913: step: 1908/531, loss: 0.5480440258979797 2023-01-23 02:02:13.729501: step: 1912/531, loss: 0.009675979614257812 2023-01-23 02:02:14.862559: step: 1916/531, loss: 0.06420154869556427 2023-01-23 02:02:16.021379: step: 1920/531, loss: 0.18291626870632172 2023-01-23 02:02:17.148359: step: 1924/531, loss: 0.14034004509449005 2023-01-23 02:02:18.292242: step: 1928/531, loss: 0.008494758978486061 2023-01-23 02:02:19.412891: step: 1932/531, loss: 0.05042114108800888 2023-01-23 02:02:20.539320: step: 1936/531, loss: 0.006862545385956764 2023-01-23 02:02:21.708613: step: 1940/531, loss: 0.4576370120048523 2023-01-23 02:02:22.815104: step: 1944/531, loss: 0.026137925684452057 2023-01-23 02:02:23.937108: step: 1948/531, loss: 0.0004225253942422569 2023-01-23 02:02:25.084132: step: 1952/531, loss: 0.3686588406562805 2023-01-23 02:02:26.231555: step: 1956/531, loss: 0.0024030685890465975 2023-01-23 02:02:27.357267: step: 1960/531, loss: 0.0737682357430458 2023-01-23 02:02:28.495887: step: 1964/531, loss: 0.0015135766007006168 2023-01-23 02:02:29.615624: step: 1968/531, loss: 0.15094709396362305 2023-01-23 02:02:30.711775: step: 1972/531, loss: 0.014699364081025124 2023-01-23 02:02:31.847784: step: 1976/531, loss: 0.025695228949189186 2023-01-23 02:02:33.001398: step: 1980/531, loss: 0.13725097477436066 2023-01-23 02:02:34.130648: step: 1984/531, loss: 0.0015345573192462325 2023-01-23 02:02:35.240805: step: 1988/531, loss: 0.03997211903333664 2023-01-23 02:02:36.403432: step: 1992/531, loss: 0.04168253019452095 2023-01-23 02:02:37.499883: step: 1996/531, loss: 0.0008494377252645791 2023-01-23 02:02:38.623955: step: 2000/531, loss: 0.040341950953006744 2023-01-23 02:02:39.745383: step: 2004/531, loss: 0.035220399498939514 2023-01-23 02:02:40.867047: step: 2008/531, loss: 0.14874200522899628 2023-01-23 02:02:41.996182: step: 2012/531, loss: 0.03396720811724663 2023-01-23 02:02:43.153782: step: 2016/531, loss: 0.05444660037755966 2023-01-23 02:02:44.304281: step: 2020/531, loss: 0.016492079943418503 2023-01-23 02:02:45.415619: step: 2024/531, loss: 0.016028976067900658 2023-01-23 02:02:46.555761: step: 2028/531, loss: 0.005664634983986616 2023-01-23 02:02:47.676784: step: 2032/531, loss: 0.004986620042473078 2023-01-23 02:02:48.796890: step: 2036/531, loss: 0.016667796298861504 2023-01-23 02:02:49.936933: step: 2040/531, loss: 0.007364082615822554 2023-01-23 02:02:51.056107: step: 2044/531, loss: 0.017289161682128906 2023-01-23 02:02:52.150140: step: 2048/531, loss: 0.011373138055205345 2023-01-23 02:02:53.278779: step: 2052/531, loss: 0.03544750064611435 2023-01-23 02:02:54.378831: step: 2056/531, loss: 0.055375199764966965 2023-01-23 02:02:55.536711: step: 2060/531, loss: 0.005490303039550781 2023-01-23 02:02:56.666132: step: 2064/531, loss: 0.009616660885512829 2023-01-23 02:02:57.802432: step: 2068/531, loss: 0.007409477140754461 2023-01-23 02:02:58.940585: step: 2072/531, loss: 0.9842849969863892 2023-01-23 02:03:00.077555: step: 2076/531, loss: 0.042870618402957916 2023-01-23 02:03:01.191331: step: 2080/531, loss: 0.023309756070375443 2023-01-23 02:03:02.319684: step: 2084/531, loss: 0.06135749816894531 2023-01-23 02:03:03.417437: step: 2088/531, loss: 0.016766738146543503 2023-01-23 02:03:04.539997: step: 2092/531, loss: 0.007159137632697821 2023-01-23 02:03:05.663231: step: 2096/531, loss: 0.02228107489645481 2023-01-23 02:03:06.798344: step: 2100/531, loss: 0.0033518315758556128 2023-01-23 02:03:07.968650: step: 2104/531, loss: 0.06552505493164062 2023-01-23 02:03:09.138004: step: 2108/531, loss: 0.05066528171300888 2023-01-23 02:03:10.268416: step: 2112/531, loss: 0.024041462689638138 2023-01-23 02:03:11.369160: step: 2116/531, loss: 0.009274578653275967 2023-01-23 02:03:12.494235: step: 2120/531, loss: 0.8296244144439697 2023-01-23 02:03:13.656436: step: 2124/531, loss: 0.07043123245239258 ================================================== Loss: 0.051 -------------------- Dev: {'event': {'p': 0.5931520644511581, 'r': 0.7842876165113183, 'f1': 0.6754587155963302}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Test: {'event': {'p': 0.6328200192492781, 'r': 0.7841383422778772, 'f1': 0.7003994673768309}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Chinese: {'event': {'p': 0.5411764705882353, 'r': 0.8518518518518519, 'f1': 0.6618705035971222}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Korean: {'event': {'p': 0.6507936507936508, 'r': 0.6507936507936508, 'f1': 0.6507936507936508}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Russian: {'event': {'p': 0.42592592592592593, 'r': 0.6388888888888888, 'f1': 0.5111111111111111}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 15 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:03:53.810779: step: 4/531, loss: 0.07297387719154358 2023-01-23 02:03:54.950929: step: 8/531, loss: 0.013412857428193092 2023-01-23 02:03:56.074850: step: 12/531, loss: 0.015209197998046875 2023-01-23 02:03:57.185564: step: 16/531, loss: 0.03069281578063965 2023-01-23 02:03:58.300489: step: 20/531, loss: 0.01795821264386177 2023-01-23 02:03:59.435460: step: 24/531, loss: 0.002276897430419922 2023-01-23 02:04:00.542781: step: 28/531, loss: 0.018523408100008965 2023-01-23 02:04:01.659029: step: 32/531, loss: 0.0015638351906090975 2023-01-23 02:04:02.775605: step: 36/531, loss: 0.0687246322631836 2023-01-23 02:04:03.919243: step: 40/531, loss: 0.0008004188421182334 2023-01-23 02:04:05.037895: step: 44/531, loss: 0.0111115463078022 2023-01-23 02:04:06.194462: step: 48/531, loss: 0.03748369589447975 2023-01-23 02:04:07.362138: step: 52/531, loss: 0.006137276068329811 2023-01-23 02:04:08.492046: step: 56/531, loss: 0.037613775581121445 2023-01-23 02:04:09.638529: step: 60/531, loss: 0.006903409957885742 2023-01-23 02:04:10.748356: step: 64/531, loss: 0.004289579577744007 2023-01-23 02:04:11.888753: step: 68/531, loss: 0.03767652437090874 2023-01-23 02:04:13.020673: step: 72/531, loss: 0.031062128022313118 2023-01-23 02:04:14.145254: step: 76/531, loss: 0.1479346752166748 2023-01-23 02:04:15.251318: step: 80/531, loss: 0.042601823806762695 2023-01-23 02:04:16.374923: step: 84/531, loss: 0.020467758178710938 2023-01-23 02:04:17.484885: step: 88/531, loss: 0.008285988122224808 2023-01-23 02:04:18.634333: step: 92/531, loss: 0.04952993988990784 2023-01-23 02:04:19.751282: step: 96/531, loss: 0.006884193979203701 2023-01-23 02:04:20.857085: step: 100/531, loss: 0.0018438339466229081 2023-01-23 02:04:22.014961: step: 104/531, loss: 0.030244065448641777 2023-01-23 02:04:23.130237: step: 108/531, loss: 0.003540134523063898 2023-01-23 02:04:24.240135: step: 112/531, loss: 0.042096786201000214 2023-01-23 02:04:25.344178: step: 116/531, loss: 0.05252685397863388 2023-01-23 02:04:26.469632: step: 120/531, loss: 0.012580298818647861 2023-01-23 02:04:27.625872: step: 124/531, loss: 0.0137855289503932 2023-01-23 02:04:28.745775: step: 128/531, loss: 0.013552093878388405 2023-01-23 02:04:29.852572: step: 132/531, loss: 0.0006563186761923134 2023-01-23 02:04:30.963731: step: 136/531, loss: 0.015245438553392887 2023-01-23 02:04:32.085229: step: 140/531, loss: 0.09668950736522675 2023-01-23 02:04:33.201331: step: 144/531, loss: 0.11279220134019852 2023-01-23 02:04:34.338083: step: 148/531, loss: 0.009648466482758522 2023-01-23 02:04:35.488851: step: 152/531, loss: 0.048250678926706314 2023-01-23 02:04:36.589205: step: 156/531, loss: 0.010862017050385475 2023-01-23 02:04:37.728186: step: 160/531, loss: 0.0002521038113627583 2023-01-23 02:04:38.838944: step: 164/531, loss: 0.06656661629676819 2023-01-23 02:04:39.976862: step: 168/531, loss: 0.0015022277366369963 2023-01-23 02:04:41.075010: step: 172/531, loss: 0.012348175048828125 2023-01-23 02:04:42.210411: step: 176/531, loss: 0.007479858584702015 2023-01-23 02:04:43.327983: step: 180/531, loss: 0.12902334332466125 2023-01-23 02:04:44.441024: step: 184/531, loss: 0.005042076576501131 2023-01-23 02:04:45.578829: step: 188/531, loss: 0.0935448706150055 2023-01-23 02:04:46.693693: step: 192/531, loss: 0.033365536481142044 2023-01-23 02:04:47.818165: step: 196/531, loss: 0.02251262776553631 2023-01-23 02:04:48.934042: step: 200/531, loss: 0.021362116560339928 2023-01-23 02:04:50.047024: step: 204/531, loss: 0.0004647255118470639 2023-01-23 02:04:51.176844: step: 208/531, loss: 0.02569706365466118 2023-01-23 02:04:52.295116: step: 212/531, loss: 0.018970299512147903 2023-01-23 02:04:53.412251: step: 216/531, loss: 0.12854118645191193 2023-01-23 02:04:54.542753: step: 220/531, loss: 8.144378807628527e-05 2023-01-23 02:04:55.691820: step: 224/531, loss: 0.011375046335160732 2023-01-23 02:04:56.810195: step: 228/531, loss: 0.009011732414364815 2023-01-23 02:04:57.938664: step: 232/531, loss: 0.0008374214521609247 2023-01-23 02:04:59.079657: step: 236/531, loss: 0.01612253114581108 2023-01-23 02:05:00.207273: step: 240/531, loss: 0.008736610412597656 2023-01-23 02:05:01.315625: step: 244/531, loss: 0.06907949596643448 2023-01-23 02:05:02.449825: step: 248/531, loss: 0.037722207605838776 2023-01-23 02:05:03.556892: step: 252/531, loss: 0.10357246547937393 2023-01-23 02:05:04.694312: step: 256/531, loss: 0.14661617577075958 2023-01-23 02:05:05.798268: step: 260/531, loss: 0.011484527960419655 2023-01-23 02:05:06.943478: step: 264/531, loss: 0.0010483742225915194 2023-01-23 02:05:08.076905: step: 268/531, loss: 0.028850747272372246 2023-01-23 02:05:09.202111: step: 272/531, loss: 0.04091396555304527 2023-01-23 02:05:10.344144: step: 276/531, loss: 1.617627739906311 2023-01-23 02:05:11.453733: step: 280/531, loss: 0.005697679705917835 2023-01-23 02:05:12.591311: step: 284/531, loss: 0.026979923248291016 2023-01-23 02:05:13.753942: step: 288/531, loss: 0.625706136226654 2023-01-23 02:05:14.888431: step: 292/531, loss: 0.00203361501917243 2023-01-23 02:05:16.031661: step: 296/531, loss: 0.001901817275211215 2023-01-23 02:05:17.158501: step: 300/531, loss: 0.026924418285489082 2023-01-23 02:05:18.312533: step: 304/531, loss: 0.014453030191361904 2023-01-23 02:05:19.430995: step: 308/531, loss: 0.010524844750761986 2023-01-23 02:05:20.562743: step: 312/531, loss: 0.008693265728652477 2023-01-23 02:05:21.676003: step: 316/531, loss: 0.013535117730498314 2023-01-23 02:05:22.809763: step: 320/531, loss: 0.023793887346982956 2023-01-23 02:05:23.961942: step: 324/531, loss: 0.02077198028564453 2023-01-23 02:05:25.097602: step: 328/531, loss: 0.3561738133430481 2023-01-23 02:05:26.216257: step: 332/531, loss: 0.00772705115377903 2023-01-23 02:05:27.309164: step: 336/531, loss: 0.02726011350750923 2023-01-23 02:05:28.479924: step: 340/531, loss: 0.016624832525849342 2023-01-23 02:05:29.606627: step: 344/531, loss: 0.0034530640114098787 2023-01-23 02:05:30.718668: step: 348/531, loss: 0.005270957946777344 2023-01-23 02:05:31.865577: step: 352/531, loss: 0.021298598498106003 2023-01-23 02:05:32.983417: step: 356/531, loss: 0.17823810875415802 2023-01-23 02:05:34.132356: step: 360/531, loss: 0.028551101684570312 2023-01-23 02:05:35.280247: step: 364/531, loss: 0.037265680730342865 2023-01-23 02:05:36.406126: step: 368/531, loss: 0.015255356207489967 2023-01-23 02:05:37.546312: step: 372/531, loss: 6.006948947906494 2023-01-23 02:05:38.649653: step: 376/531, loss: 0.007457924075424671 2023-01-23 02:05:39.764870: step: 380/531, loss: 0.01645503006875515 2023-01-23 02:05:40.905300: step: 384/531, loss: 0.04083900526165962 2023-01-23 02:05:42.040412: step: 388/531, loss: 0.02556142956018448 2023-01-23 02:05:43.172078: step: 392/531, loss: 0.02190418355166912 2023-01-23 02:05:44.333114: step: 396/531, loss: 0.039310503751039505 2023-01-23 02:05:45.459330: step: 400/531, loss: 0.0006350993644446135 2023-01-23 02:05:46.584452: step: 404/531, loss: 0.008841132745146751 2023-01-23 02:05:47.733829: step: 408/531, loss: 0.01730956882238388 2023-01-23 02:05:48.860950: step: 412/531, loss: 0.019231414422392845 2023-01-23 02:05:50.003580: step: 416/531, loss: 0.05506310611963272 2023-01-23 02:05:51.125269: step: 420/531, loss: 0.021189212799072266 2023-01-23 02:05:52.207364: step: 424/531, loss: 0.036109257489442825 2023-01-23 02:05:53.383955: step: 428/531, loss: 0.07757490128278732 2023-01-23 02:05:54.540383: step: 432/531, loss: 0.09568710625171661 2023-01-23 02:05:55.648266: step: 436/531, loss: 0.12306594848632812 2023-01-23 02:05:56.768290: step: 440/531, loss: 0.022646808996796608 2023-01-23 02:05:57.911731: step: 444/531, loss: 0.01720447465777397 2023-01-23 02:05:59.039074: step: 448/531, loss: 0.02071247063577175 2023-01-23 02:06:00.178565: step: 452/531, loss: 0.001737880753353238 2023-01-23 02:06:01.299822: step: 456/531, loss: 0.004914379213005304 2023-01-23 02:06:02.428599: step: 460/531, loss: 0.00042848585871979594 2023-01-23 02:06:03.557218: step: 464/531, loss: 0.0002808570861816406 2023-01-23 02:06:04.686832: step: 468/531, loss: 0.005483436863869429 2023-01-23 02:06:05.808124: step: 472/531, loss: 0.005069732666015625 2023-01-23 02:06:06.944326: step: 476/531, loss: 0.03879880905151367 2023-01-23 02:06:08.065822: step: 480/531, loss: 0.0582880973815918 2023-01-23 02:06:09.203962: step: 484/531, loss: 0.0715295821428299 2023-01-23 02:06:10.338164: step: 488/531, loss: 0.028585147112607956 2023-01-23 02:06:11.469768: step: 492/531, loss: 0.10833396762609482 2023-01-23 02:06:12.627957: step: 496/531, loss: 0.007443904876708984 2023-01-23 02:06:13.778128: step: 500/531, loss: 0.859030544757843 2023-01-23 02:06:14.891209: step: 504/531, loss: 0.0022531270515173674 2023-01-23 02:06:16.015605: step: 508/531, loss: 0.022901631891727448 2023-01-23 02:06:17.148507: step: 512/531, loss: 0.053965188562870026 2023-01-23 02:06:18.273303: step: 516/531, loss: 0.01820126734673977 2023-01-23 02:06:19.417596: step: 520/531, loss: 0.00041179655818268657 2023-01-23 02:06:20.564036: step: 524/531, loss: 0.029315471649169922 2023-01-23 02:06:21.693514: step: 528/531, loss: 0.023952674120664597 2023-01-23 02:06:22.818374: step: 532/531, loss: 0.012011121958494186 2023-01-23 02:06:23.935524: step: 536/531, loss: 0.05697031319141388 2023-01-23 02:06:25.097157: step: 540/531, loss: 0.029539775103330612 2023-01-23 02:06:26.217257: step: 544/531, loss: 0.08146047592163086 2023-01-23 02:06:27.352955: step: 548/531, loss: 0.05482606589794159 2023-01-23 02:06:28.488680: step: 552/531, loss: 0.04075346142053604 2023-01-23 02:06:29.604555: step: 556/531, loss: 0.07460375130176544 2023-01-23 02:06:30.743582: step: 560/531, loss: 0.036959078162908554 2023-01-23 02:06:31.849765: step: 564/531, loss: 0.006295204162597656 2023-01-23 02:06:32.958874: step: 568/531, loss: 0.033301543444395065 2023-01-23 02:06:34.092914: step: 572/531, loss: 0.006530857179313898 2023-01-23 02:06:35.213881: step: 576/531, loss: 0.0009083747863769531 2023-01-23 02:06:36.333044: step: 580/531, loss: 0.09212875366210938 2023-01-23 02:06:37.472018: step: 584/531, loss: 0.01833324506878853 2023-01-23 02:06:38.589693: step: 588/531, loss: 0.007298278622329235 2023-01-23 02:06:39.697108: step: 592/531, loss: 0.08208150416612625 2023-01-23 02:06:40.801507: step: 596/531, loss: 0.02361927181482315 2023-01-23 02:06:41.932752: step: 600/531, loss: 0.023557664826512337 2023-01-23 02:06:43.037596: step: 604/531, loss: 0.01991300657391548 2023-01-23 02:06:44.182135: step: 608/531, loss: 0.04050483554601669 2023-01-23 02:06:45.316912: step: 612/531, loss: 0.13273735344409943 2023-01-23 02:06:46.417713: step: 616/531, loss: 0.00942087173461914 2023-01-23 02:06:47.525672: step: 620/531, loss: 0.03134417533874512 2023-01-23 02:06:48.678283: step: 624/531, loss: 0.006497574038803577 2023-01-23 02:06:49.796354: step: 628/531, loss: 0.0054931640625 2023-01-23 02:06:50.891743: step: 632/531, loss: 0.017701338976621628 2023-01-23 02:06:52.021363: step: 636/531, loss: 0.1221260130405426 2023-01-23 02:06:53.132145: step: 640/531, loss: 0.15525609254837036 2023-01-23 02:06:54.245627: step: 644/531, loss: 0.02481985092163086 2023-01-23 02:06:55.354451: step: 648/531, loss: 0.0012698173522949219 2023-01-23 02:06:56.470453: step: 652/531, loss: 0.012583018280565739 2023-01-23 02:06:57.612920: step: 656/531, loss: 0.008814429864287376 2023-01-23 02:06:58.711760: step: 660/531, loss: 0.03627414628863335 2023-01-23 02:06:59.854716: step: 664/531, loss: 0.006097603123635054 2023-01-23 02:07:00.954043: step: 668/531, loss: 0.057888224720954895 2023-01-23 02:07:02.071132: step: 672/531, loss: 0.3089749217033386 2023-01-23 02:07:03.190995: step: 676/531, loss: 0.03994159772992134 2023-01-23 02:07:04.288858: step: 680/531, loss: 0.05654086917638779 2023-01-23 02:07:05.410092: step: 684/531, loss: 0.00441974401473999 2023-01-23 02:07:06.525410: step: 688/531, loss: 0.07937698811292648 2023-01-23 02:07:07.670110: step: 692/531, loss: 0.004258906934410334 2023-01-23 02:07:08.777144: step: 696/531, loss: 0.13562817871570587 2023-01-23 02:07:09.929762: step: 700/531, loss: 0.012343215756118298 2023-01-23 02:07:11.074011: step: 704/531, loss: 0.04665699228644371 2023-01-23 02:07:12.232652: step: 708/531, loss: 0.0029415132012218237 2023-01-23 02:07:13.364371: step: 712/531, loss: 0.011162757873535156 2023-01-23 02:07:14.490718: step: 716/531, loss: 0.006087112706154585 2023-01-23 02:07:15.605242: step: 720/531, loss: 0.011725426651537418 2023-01-23 02:07:16.734930: step: 724/531, loss: 0.0020187378395348787 2023-01-23 02:07:17.888824: step: 728/531, loss: 0.007320880889892578 2023-01-23 02:07:19.013523: step: 732/531, loss: 0.004034710116684437 2023-01-23 02:07:20.142298: step: 736/531, loss: 0.009257888421416283 2023-01-23 02:07:21.258226: step: 740/531, loss: 0.014367627911269665 2023-01-23 02:07:22.387350: step: 744/531, loss: 0.040366362780332565 2023-01-23 02:07:23.548096: step: 748/531, loss: 0.05868320167064667 2023-01-23 02:07:24.679584: step: 752/531, loss: 0.05543842166662216 2023-01-23 02:07:25.820977: step: 756/531, loss: 0.031809426844120026 2023-01-23 02:07:26.958766: step: 760/531, loss: 0.04211597517132759 2023-01-23 02:07:28.067612: step: 764/531, loss: 0.023733949288725853 2023-01-23 02:07:29.214133: step: 768/531, loss: 0.0020542144775390625 2023-01-23 02:07:30.324770: step: 772/531, loss: 0.005161953158676624 2023-01-23 02:07:31.423089: step: 776/531, loss: 0.003291606903076172 2023-01-23 02:07:32.572652: step: 780/531, loss: 0.054085634648799896 2023-01-23 02:07:33.683828: step: 784/531, loss: 0.0017705918289721012 2023-01-23 02:07:34.821962: step: 788/531, loss: 0.0028642655815929174 2023-01-23 02:07:35.976765: step: 792/531, loss: 0.017211247235536575 2023-01-23 02:07:37.100010: step: 796/531, loss: 0.042252540588378906 2023-01-23 02:07:38.193792: step: 800/531, loss: 0.03646240383386612 2023-01-23 02:07:39.309109: step: 804/531, loss: 0.014688301831483841 2023-01-23 02:07:40.431112: step: 808/531, loss: 0.0011251450050622225 2023-01-23 02:07:41.568770: step: 812/531, loss: 0.018541526049375534 2023-01-23 02:07:42.727809: step: 816/531, loss: 0.004125928971916437 2023-01-23 02:07:43.837865: step: 820/531, loss: 0.03895749896764755 2023-01-23 02:07:44.931186: step: 824/531, loss: 0.011591959744691849 2023-01-23 02:07:46.065249: step: 828/531, loss: 0.00367565150372684 2023-01-23 02:07:47.191820: step: 832/531, loss: 0.006286144256591797 2023-01-23 02:07:48.307169: step: 836/531, loss: 0.005358791910111904 2023-01-23 02:07:49.433901: step: 840/531, loss: 0.05284185707569122 2023-01-23 02:07:50.528245: step: 844/531, loss: 0.05454883351922035 2023-01-23 02:07:51.653099: step: 848/531, loss: 0.007160949986428022 2023-01-23 02:07:52.766482: step: 852/531, loss: 2.193450927734375e-05 2023-01-23 02:07:53.886543: step: 856/531, loss: 0.005218696314841509 2023-01-23 02:07:55.014307: step: 860/531, loss: 0.06339102238416672 2023-01-23 02:07:56.139006: step: 864/531, loss: 0.004090785980224609 2023-01-23 02:07:57.250423: step: 868/531, loss: 0.01420822087675333 2023-01-23 02:07:58.414608: step: 872/531, loss: 0.055559732019901276 2023-01-23 02:07:59.517855: step: 876/531, loss: 0.011470126919448376 2023-01-23 02:08:00.686532: step: 880/531, loss: 0.0335145965218544 2023-01-23 02:08:01.802441: step: 884/531, loss: 0.03201622888445854 2023-01-23 02:08:02.923104: step: 888/531, loss: 0.036260221153497696 2023-01-23 02:08:04.046797: step: 892/531, loss: 0.04789695888757706 2023-01-23 02:08:05.151314: step: 896/531, loss: 0.21066462993621826 2023-01-23 02:08:06.270220: step: 900/531, loss: 0.25753164291381836 2023-01-23 02:08:07.366492: step: 904/531, loss: 0.08119526505470276 2023-01-23 02:08:08.514868: step: 908/531, loss: 0.0073544979095458984 2023-01-23 02:08:09.641563: step: 912/531, loss: 0.015293694101274014 2023-01-23 02:08:10.767672: step: 916/531, loss: 0.027293777093291283 2023-01-23 02:08:11.915974: step: 920/531, loss: 0.045829202979803085 2023-01-23 02:08:13.104648: step: 924/531, loss: 0.005808639340102673 2023-01-23 02:08:14.213464: step: 928/531, loss: 0.023906899616122246 2023-01-23 02:08:15.306732: step: 932/531, loss: 0.013967323116958141 2023-01-23 02:08:16.418269: step: 936/531, loss: 0.004004573915153742 2023-01-23 02:08:17.525229: step: 940/531, loss: 0.005336951930075884 2023-01-23 02:08:18.646033: step: 944/531, loss: 0.04231281206011772 2023-01-23 02:08:19.744422: step: 948/531, loss: 0.014080810360610485 2023-01-23 02:08:20.852547: step: 952/531, loss: 0.0003616333124227822 2023-01-23 02:08:21.953975: step: 956/531, loss: 0.01487874984741211 2023-01-23 02:08:23.087104: step: 960/531, loss: 0.1063026487827301 2023-01-23 02:08:24.189833: step: 964/531, loss: 0.1509542465209961 2023-01-23 02:08:25.311085: step: 968/531, loss: 0.02918267250061035 2023-01-23 02:08:26.443856: step: 972/531, loss: 0.004044532775878906 2023-01-23 02:08:27.551313: step: 976/531, loss: 0.006843375973403454 2023-01-23 02:08:28.670159: step: 980/531, loss: 0.0710567981004715 2023-01-23 02:08:29.794495: step: 984/531, loss: 0.8567569851875305 2023-01-23 02:08:30.922442: step: 988/531, loss: 0.04856710508465767 2023-01-23 02:08:32.026681: step: 992/531, loss: 0.054155826568603516 2023-01-23 02:08:33.143426: step: 996/531, loss: 0.004180908203125 2023-01-23 02:08:34.247822: step: 1000/531, loss: 0.012790489941835403 2023-01-23 02:08:35.380047: step: 1004/531, loss: 0.0006172180292196572 2023-01-23 02:08:36.506721: step: 1008/531, loss: 0.0117926811799407 2023-01-23 02:08:37.636866: step: 1012/531, loss: 0.28214895725250244 2023-01-23 02:08:38.809906: step: 1016/531, loss: 0.13246899843215942 2023-01-23 02:08:39.939035: step: 1020/531, loss: 0.10410070419311523 2023-01-23 02:08:41.033070: step: 1024/531, loss: 0.014422893524169922 2023-01-23 02:08:42.188930: step: 1028/531, loss: 0.0038860561326146126 2023-01-23 02:08:43.289190: step: 1032/531, loss: 0.015799570828676224 2023-01-23 02:08:44.389637: step: 1036/531, loss: 0.062032513320446014 2023-01-23 02:08:45.531835: step: 1040/531, loss: 0.04151182249188423 2023-01-23 02:08:46.657753: step: 1044/531, loss: 0.011372757144272327 2023-01-23 02:08:47.791950: step: 1048/531, loss: 0.0026180266868323088 2023-01-23 02:08:48.918990: step: 1052/531, loss: 0.06060199812054634 2023-01-23 02:08:50.026359: step: 1056/531, loss: 0.07490377873182297 2023-01-23 02:08:51.158035: step: 1060/531, loss: 0.08485298603773117 2023-01-23 02:08:52.283050: step: 1064/531, loss: 0.020595360547304153 2023-01-23 02:08:53.420053: step: 1068/531, loss: 0.010837269015610218 2023-01-23 02:08:54.554233: step: 1072/531, loss: 3.924369957530871e-05 2023-01-23 02:08:55.684292: step: 1076/531, loss: 0.004365348722785711 2023-01-23 02:08:56.796046: step: 1080/531, loss: 0.0004356384451966733 2023-01-23 02:08:57.943197: step: 1084/531, loss: 0.01933317258954048 2023-01-23 02:08:59.079875: step: 1088/531, loss: 0.020740319043397903 2023-01-23 02:09:00.210779: step: 1092/531, loss: 0.057190895080566406 2023-01-23 02:09:01.341584: step: 1096/531, loss: 0.007929611019790173 2023-01-23 02:09:02.470626: step: 1100/531, loss: 0.023499680683016777 2023-01-23 02:09:03.610814: step: 1104/531, loss: 0.023211481049656868 2023-01-23 02:09:04.739961: step: 1108/531, loss: 0.012027645483613014 2023-01-23 02:09:05.862983: step: 1112/531, loss: 0.01068739965558052 2023-01-23 02:09:06.965666: step: 1116/531, loss: 0.000408172607421875 2023-01-23 02:09:08.089616: step: 1120/531, loss: 0.004358386620879173 2023-01-23 02:09:09.219393: step: 1124/531, loss: 0.4202941954135895 2023-01-23 02:09:10.352972: step: 1128/531, loss: 0.03831319883465767 2023-01-23 02:09:11.547275: step: 1132/531, loss: 0.019341373816132545 2023-01-23 02:09:12.681208: step: 1136/531, loss: 0.013365697115659714 2023-01-23 02:09:13.801010: step: 1140/531, loss: 0.014270162209868431 2023-01-23 02:09:14.935304: step: 1144/531, loss: 0.1033555120229721 2023-01-23 02:09:16.048383: step: 1148/531, loss: 0.00143346784170717 2023-01-23 02:09:17.168792: step: 1152/531, loss: 0.004264545626938343 2023-01-23 02:09:18.293778: step: 1156/531, loss: 0.09733843803405762 2023-01-23 02:09:19.426901: step: 1160/531, loss: 0.0415164940059185 2023-01-23 02:09:20.543883: step: 1164/531, loss: 0.017017554491758347 2023-01-23 02:09:21.685074: step: 1168/531, loss: 0.03323431313037872 2023-01-23 02:09:22.864447: step: 1172/531, loss: 0.00034008026705123484 2023-01-23 02:09:23.987187: step: 1176/531, loss: 0.007091331761330366 2023-01-23 02:09:25.128449: step: 1180/531, loss: 0.10011768341064453 2023-01-23 02:09:26.266432: step: 1184/531, loss: 0.005597543902695179 2023-01-23 02:09:27.411319: step: 1188/531, loss: 0.04480300098657608 2023-01-23 02:09:28.545261: step: 1192/531, loss: 0.018289949744939804 2023-01-23 02:09:29.693987: step: 1196/531, loss: 0.007702135946601629 2023-01-23 02:09:30.810885: step: 1200/531, loss: 0.011410808190703392 2023-01-23 02:09:31.929172: step: 1204/531, loss: 0.5520318746566772 2023-01-23 02:09:33.108127: step: 1208/531, loss: 0.01717986911535263 2023-01-23 02:09:34.253744: step: 1212/531, loss: 0.26149678230285645 2023-01-23 02:09:35.372919: step: 1216/531, loss: 0.02206115797162056 2023-01-23 02:09:36.524112: step: 1220/531, loss: 0.001935768174007535 2023-01-23 02:09:37.653010: step: 1224/531, loss: 0.030426407232880592 2023-01-23 02:09:38.757031: step: 1228/531, loss: 0.014526509679853916 2023-01-23 02:09:39.877287: step: 1232/531, loss: 0.008053588680922985 2023-01-23 02:09:40.990328: step: 1236/531, loss: 0.03564424812793732 2023-01-23 02:09:42.130322: step: 1240/531, loss: 0.0037119868211448193 2023-01-23 02:09:43.237029: step: 1244/531, loss: 0.28150081634521484 2023-01-23 02:09:44.386470: step: 1248/531, loss: 0.014241814613342285 2023-01-23 02:09:45.562052: step: 1252/531, loss: 0.010227775201201439 2023-01-23 02:09:46.675229: step: 1256/531, loss: 0.021290970966219902 2023-01-23 02:09:47.810601: step: 1260/531, loss: 0.018506528809666634 2023-01-23 02:09:48.910948: step: 1264/531, loss: 0.012568999081850052 2023-01-23 02:09:50.027009: step: 1268/531, loss: 0.02321491204202175 2023-01-23 02:09:51.151967: step: 1272/531, loss: 0.0386958122253418 2023-01-23 02:09:52.271070: step: 1276/531, loss: 0.025794124230742455 2023-01-23 02:09:53.393640: step: 1280/531, loss: 0.03951244428753853 2023-01-23 02:09:54.532415: step: 1284/531, loss: 0.018988801166415215 2023-01-23 02:09:55.638573: step: 1288/531, loss: 0.04791221395134926 2023-01-23 02:09:56.775511: step: 1292/531, loss: 0.004714107606559992 2023-01-23 02:09:57.904598: step: 1296/531, loss: 0.15160693228244781 2023-01-23 02:09:59.028354: step: 1300/531, loss: 0.0006214141612872481 2023-01-23 02:10:00.121791: step: 1304/531, loss: 0.028431225568056107 2023-01-23 02:10:01.267491: step: 1308/531, loss: 0.12417946010828018 2023-01-23 02:10:02.401084: step: 1312/531, loss: 0.023177146911621094 2023-01-23 02:10:03.521397: step: 1316/531, loss: 0.044016458094120026 2023-01-23 02:10:04.640503: step: 1320/531, loss: 0.02003050036728382 2023-01-23 02:10:05.783214: step: 1324/531, loss: 0.00012286155833862722 2023-01-23 02:10:06.937078: step: 1328/531, loss: 0.01183023490011692 2023-01-23 02:10:08.037205: step: 1332/531, loss: 0.0051094056107103825 2023-01-23 02:10:09.157124: step: 1336/531, loss: 0.0355289950966835 2023-01-23 02:10:10.267197: step: 1340/531, loss: 0.004100990481674671 2023-01-23 02:10:11.405537: step: 1344/531, loss: 0.009284497238695621 2023-01-23 02:10:12.528330: step: 1348/531, loss: 0.014715003781020641 2023-01-23 02:10:13.642482: step: 1352/531, loss: 0.0006219864008016884 2023-01-23 02:10:14.799545: step: 1356/531, loss: 0.0850229263305664 2023-01-23 02:10:15.933040: step: 1360/531, loss: 0.003867262741550803 2023-01-23 02:10:17.101226: step: 1364/531, loss: 0.7309063076972961 2023-01-23 02:10:18.188247: step: 1368/531, loss: 0.005594921298325062 2023-01-23 02:10:19.317768: step: 1372/531, loss: 0.06299133598804474 2023-01-23 02:10:20.411479: step: 1376/531, loss: 0.020908737555146217 2023-01-23 02:10:21.514925: step: 1380/531, loss: 0.010369682684540749 2023-01-23 02:10:22.642999: step: 1384/531, loss: 0.0023213387466967106 2023-01-23 02:10:23.784274: step: 1388/531, loss: 0.015612220391631126 2023-01-23 02:10:24.935328: step: 1392/531, loss: 0.0018318176735192537 2023-01-23 02:10:26.027847: step: 1396/531, loss: 0.002593779470771551 2023-01-23 02:10:27.205500: step: 1400/531, loss: 0.05279693752527237 2023-01-23 02:10:28.373164: step: 1404/531, loss: 0.019677162170410156 2023-01-23 02:10:29.496452: step: 1408/531, loss: 0.0005475044017657638 2023-01-23 02:10:30.609222: step: 1412/531, loss: 0.050246335566043854 2023-01-23 02:10:31.728125: step: 1416/531, loss: 0.027476787567138672 2023-01-23 02:10:32.827718: step: 1420/531, loss: 0.005717468447983265 2023-01-23 02:10:33.934868: step: 1424/531, loss: 0.031079886481165886 2023-01-23 02:10:35.046534: step: 1428/531, loss: 0.0009953498374670744 2023-01-23 02:10:36.162456: step: 1432/531, loss: 0.011181640438735485 2023-01-23 02:10:37.296678: step: 1436/531, loss: 0.01950206607580185 2023-01-23 02:10:38.420915: step: 1440/531, loss: 0.03135652467608452 2023-01-23 02:10:39.612562: step: 1444/531, loss: 0.029941465705633163 2023-01-23 02:10:40.728468: step: 1448/531, loss: 0.048130229115486145 2023-01-23 02:10:41.884333: step: 1452/531, loss: 0.06347064673900604 2023-01-23 02:10:42.999687: step: 1456/531, loss: 0.010523248463869095 2023-01-23 02:10:44.117410: step: 1460/531, loss: 0.07009506225585938 2023-01-23 02:10:45.240965: step: 1464/531, loss: 0.0037800788413733244 2023-01-23 02:10:46.359083: step: 1468/531, loss: 0.01076512411236763 2023-01-23 02:10:47.499465: step: 1472/531, loss: 0.004036140628159046 2023-01-23 02:10:48.627703: step: 1476/531, loss: 0.01759796217083931 2023-01-23 02:10:49.782621: step: 1480/531, loss: 0.0016837120056152344 2023-01-23 02:10:50.902996: step: 1484/531, loss: 0.00872583407908678 2023-01-23 02:10:51.998673: step: 1488/531, loss: 0.004686451051384211 2023-01-23 02:10:53.133843: step: 1492/531, loss: 0.019215773791074753 2023-01-23 02:10:54.249509: step: 1496/531, loss: 0.0013466834789142013 2023-01-23 02:10:55.378073: step: 1500/531, loss: 0.03810291737318039 2023-01-23 02:10:56.507956: step: 1504/531, loss: 0.0024088858626782894 2023-01-23 02:10:57.620651: step: 1508/531, loss: 0.004228973761200905 2023-01-23 02:10:58.762958: step: 1512/531, loss: 0.011442375369369984 2023-01-23 02:10:59.890639: step: 1516/531, loss: 0.046485330909490585 2023-01-23 02:11:01.005124: step: 1520/531, loss: 0.06269045174121857 2023-01-23 02:11:02.123926: step: 1524/531, loss: 0.02621002309024334 2023-01-23 02:11:03.248783: step: 1528/531, loss: 0.0002887725713662803 2023-01-23 02:11:04.406115: step: 1532/531, loss: 0.01461338996887207 2023-01-23 02:11:05.558496: step: 1536/531, loss: 0.023173904046416283 2023-01-23 02:11:06.681703: step: 1540/531, loss: 0.004830074496567249 2023-01-23 02:11:07.828001: step: 1544/531, loss: 0.0016841889591887593 2023-01-23 02:11:08.947483: step: 1548/531, loss: 0.07658348232507706 2023-01-23 02:11:10.075098: step: 1552/531, loss: 0.005435085389763117 2023-01-23 02:11:11.197368: step: 1556/531, loss: 0.04328766092658043 2023-01-23 02:11:12.316296: step: 1560/531, loss: 0.02664356306195259 2023-01-23 02:11:13.426059: step: 1564/531, loss: 0.04607276991009712 2023-01-23 02:11:14.544663: step: 1568/531, loss: 0.02232837677001953 2023-01-23 02:11:15.696927: step: 1572/531, loss: 0.0014587403275072575 2023-01-23 02:11:16.809040: step: 1576/531, loss: 0.005397796630859375 2023-01-23 02:11:17.921370: step: 1580/531, loss: 0.008266639895737171 2023-01-23 02:11:19.031466: step: 1584/531, loss: 0.02608470991253853 2023-01-23 02:11:20.180604: step: 1588/531, loss: 0.005488777067512274 2023-01-23 02:11:21.336658: step: 1592/531, loss: 0.007983780466020107 2023-01-23 02:11:22.432352: step: 1596/531, loss: 0.04418673366308212 2023-01-23 02:11:23.550000: step: 1600/531, loss: 0.02939739264547825 2023-01-23 02:11:24.659976: step: 1604/531, loss: 0.05591907724738121 2023-01-23 02:11:25.808165: step: 1608/531, loss: 0.031000901013612747 2023-01-23 02:11:26.943233: step: 1612/531, loss: 0.036405373364686966 2023-01-23 02:11:28.054131: step: 1616/531, loss: 0.026595687493681908 2023-01-23 02:11:29.180693: step: 1620/531, loss: 0.00045881271944381297 2023-01-23 02:11:30.351845: step: 1624/531, loss: 0.02132396772503853 2023-01-23 02:11:31.463138: step: 1628/531, loss: 0.05131196975708008 2023-01-23 02:11:32.613104: step: 1632/531, loss: 0.0006000518915243447 2023-01-23 02:11:33.724201: step: 1636/531, loss: 0.02595844306051731 2023-01-23 02:11:34.839853: step: 1640/531, loss: 0.01328048761934042 2023-01-23 02:11:35.970457: step: 1644/531, loss: 0.01994762383401394 2023-01-23 02:11:37.130462: step: 1648/531, loss: 0.0467371940612793 2023-01-23 02:11:38.249459: step: 1652/531, loss: 0.012303399853408337 2023-01-23 02:11:39.359047: step: 1656/531, loss: 0.060896873474121094 2023-01-23 02:11:40.472335: step: 1660/531, loss: 0.012256860733032227 2023-01-23 02:11:41.615383: step: 1664/531, loss: 0.0036530494689941406 2023-01-23 02:11:42.743268: step: 1668/531, loss: 0.012294769287109375 2023-01-23 02:11:43.855646: step: 1672/531, loss: 0.008628464303910732 2023-01-23 02:11:44.968755: step: 1676/531, loss: 0.008865357376635075 2023-01-23 02:11:46.064652: step: 1680/531, loss: 0.02076139487326145 2023-01-23 02:11:47.180447: step: 1684/531, loss: 0.01755077950656414 2023-01-23 02:11:48.305591: step: 1688/531, loss: 0.016353609040379524 2023-01-23 02:11:49.449256: step: 1692/531, loss: 0.07838574051856995 2023-01-23 02:11:50.567393: step: 1696/531, loss: 0.014557838439941406 2023-01-23 02:11:51.675035: step: 1700/531, loss: 0.0006496429559774697 2023-01-23 02:11:52.794297: step: 1704/531, loss: 0.019725609570741653 2023-01-23 02:11:53.922859: step: 1708/531, loss: 0.04597339779138565 2023-01-23 02:11:55.058529: step: 1712/531, loss: 0.6879486441612244 2023-01-23 02:11:56.168287: step: 1716/531, loss: 0.10511636734008789 2023-01-23 02:11:57.295505: step: 1720/531, loss: 0.001472091767936945 2023-01-23 02:11:58.425946: step: 1724/531, loss: 0.06482386589050293 2023-01-23 02:11:59.572498: step: 1728/531, loss: 0.083740234375 2023-01-23 02:12:00.708284: step: 1732/531, loss: 0.00684013357385993 2023-01-23 02:12:01.840562: step: 1736/531, loss: 0.026021480560302734 2023-01-23 02:12:02.966663: step: 1740/531, loss: 0.0027683258522301912 2023-01-23 02:12:04.098536: step: 1744/531, loss: 0.007908154278993607 2023-01-23 02:12:05.211353: step: 1748/531, loss: 0.0014129638439044356 2023-01-23 02:12:06.343006: step: 1752/531, loss: 0.02902822569012642 2023-01-23 02:12:07.461128: step: 1756/531, loss: 0.01378488540649414 2023-01-23 02:12:08.592589: step: 1760/531, loss: 0.013411951251327991 2023-01-23 02:12:09.735822: step: 1764/531, loss: 0.000992977642454207 2023-01-23 02:12:10.860101: step: 1768/531, loss: 0.015405083075165749 2023-01-23 02:12:11.961442: step: 1772/531, loss: 0.05477790907025337 2023-01-23 02:12:13.111579: step: 1776/531, loss: 0.007916450500488281 2023-01-23 02:12:14.230512: step: 1780/531, loss: 0.01865072175860405 2023-01-23 02:12:15.376691: step: 1784/531, loss: 0.020322799682617188 2023-01-23 02:12:16.484713: step: 1788/531, loss: 0.014657402411103249 2023-01-23 02:12:17.593472: step: 1792/531, loss: 0.5551536679267883 2023-01-23 02:12:18.706289: step: 1796/531, loss: 0.03054780885577202 2023-01-23 02:12:19.814005: step: 1800/531, loss: 0.020275497809052467 2023-01-23 02:12:20.919925: step: 1804/531, loss: 0.04089293256402016 2023-01-23 02:12:22.026906: step: 1808/531, loss: 0.07146720588207245 2023-01-23 02:12:23.163459: step: 1812/531, loss: 0.030412817373871803 2023-01-23 02:12:24.282364: step: 1816/531, loss: 0.022384166717529297 2023-01-23 02:12:25.396800: step: 1820/531, loss: 0.016867447644472122 2023-01-23 02:12:26.539432: step: 1824/531, loss: 0.043793488293886185 2023-01-23 02:12:27.641005: step: 1828/531, loss: 0.07372169196605682 2023-01-23 02:12:28.755492: step: 1832/531, loss: 0.00610122736543417 2023-01-23 02:12:29.874406: step: 1836/531, loss: 0.09406938403844833 2023-01-23 02:12:31.014732: step: 1840/531, loss: 0.0034509659744799137 2023-01-23 02:12:32.138520: step: 1844/531, loss: 0.00033597947913222015 2023-01-23 02:12:33.276798: step: 1848/531, loss: 0.10502815246582031 2023-01-23 02:12:34.389399: step: 1852/531, loss: 0.00011377334885764867 2023-01-23 02:12:35.520523: step: 1856/531, loss: 0.021224401891231537 2023-01-23 02:12:36.647384: step: 1860/531, loss: 0.1867271214723587 2023-01-23 02:12:37.740391: step: 1864/531, loss: 0.09906559437513351 2023-01-23 02:12:38.843132: step: 1868/531, loss: 0.054347991943359375 2023-01-23 02:12:40.009226: step: 1872/531, loss: 0.04394808039069176 2023-01-23 02:12:41.161797: step: 1876/531, loss: 0.04727435111999512 2023-01-23 02:12:42.270358: step: 1880/531, loss: 0.02191181294620037 2023-01-23 02:12:43.394075: step: 1884/531, loss: 0.00016999244689941406 2023-01-23 02:12:44.516438: step: 1888/531, loss: 0.14360013604164124 2023-01-23 02:12:45.667666: step: 1892/531, loss: 0.09328394383192062 2023-01-23 02:12:46.794633: step: 1896/531, loss: 0.02176504209637642 2023-01-23 02:12:47.926407: step: 1900/531, loss: 0.0007488250848837197 2023-01-23 02:12:49.055622: step: 1904/531, loss: 0.00410571089014411 2023-01-23 02:12:50.170348: step: 1908/531, loss: 0.0034383772872388363 2023-01-23 02:12:51.289679: step: 1912/531, loss: 0.019974900409579277 2023-01-23 02:12:52.415401: step: 1916/531, loss: 0.05625209957361221 2023-01-23 02:12:53.543395: step: 1920/531, loss: 0.03726644814014435 2023-01-23 02:12:54.660845: step: 1924/531, loss: 0.001233005546964705 2023-01-23 02:12:55.802681: step: 1928/531, loss: 0.07823486626148224 2023-01-23 02:12:56.932532: step: 1932/531, loss: 0.002031993819400668 2023-01-23 02:12:58.023956: step: 1936/531, loss: 0.00043740271939896047 2023-01-23 02:12:59.168390: step: 1940/531, loss: 0.03054656833410263 2023-01-23 02:13:00.299811: step: 1944/531, loss: 0.0013572931056842208 2023-01-23 02:13:01.402378: step: 1948/531, loss: 0.05219784006476402 2023-01-23 02:13:02.540509: step: 1952/531, loss: 0.027754690498113632 2023-01-23 02:13:03.649621: step: 1956/531, loss: 0.05327634885907173 2023-01-23 02:13:04.756312: step: 1960/531, loss: 0.06169535964727402 2023-01-23 02:13:05.857095: step: 1964/531, loss: 0.0258149616420269 2023-01-23 02:13:06.982005: step: 1968/531, loss: 0.007311439607292414 2023-01-23 02:13:08.103245: step: 1972/531, loss: 0.02728271484375 2023-01-23 02:13:09.214873: step: 1976/531, loss: 0.000264072441495955 2023-01-23 02:13:10.367406: step: 1980/531, loss: 0.030834389850497246 2023-01-23 02:13:11.485552: step: 1984/531, loss: 0.0017687797080725431 2023-01-23 02:13:12.614826: step: 1988/531, loss: 0.05214662477374077 2023-01-23 02:13:13.747318: step: 1992/531, loss: 0.007312393747270107 2023-01-23 02:13:14.889290: step: 1996/531, loss: 0.036153316497802734 2023-01-23 02:13:16.002385: step: 2000/531, loss: 0.0008543014992028475 2023-01-23 02:13:17.100004: step: 2004/531, loss: 0.06206831708550453 2023-01-23 02:13:18.223913: step: 2008/531, loss: 0.0641569122672081 2023-01-23 02:13:19.349563: step: 2012/531, loss: 0.021302510052919388 2023-01-23 02:13:20.482271: step: 2016/531, loss: 0.002459716983139515 2023-01-23 02:13:21.627620: step: 2020/531, loss: 0.04087505117058754 2023-01-23 02:13:22.743933: step: 2024/531, loss: 0.00456657400354743 2023-01-23 02:13:23.882978: step: 2028/531, loss: 0.0673370361328125 2023-01-23 02:13:25.005582: step: 2032/531, loss: 0.015209055505692959 2023-01-23 02:13:26.121401: step: 2036/531, loss: 0.0769510269165039 2023-01-23 02:13:27.239928: step: 2040/531, loss: 0.04460039362311363 2023-01-23 02:13:28.332845: step: 2044/531, loss: 0.0016989231808111072 2023-01-23 02:13:29.429423: step: 2048/531, loss: 0.022928334772586823 2023-01-23 02:13:30.552533: step: 2052/531, loss: 0.03409919887781143 2023-01-23 02:13:31.685196: step: 2056/531, loss: 0.013751983642578125 2023-01-23 02:13:32.803974: step: 2060/531, loss: 0.042627573013305664 2023-01-23 02:13:33.981680: step: 2064/531, loss: 0.03130446746945381 2023-01-23 02:13:35.119206: step: 2068/531, loss: 0.037835218012332916 2023-01-23 02:13:36.219278: step: 2072/531, loss: 0.0014135361416265368 2023-01-23 02:13:37.332356: step: 2076/531, loss: 0.0061817169189453125 2023-01-23 02:13:38.456578: step: 2080/531, loss: 0.0003016948467120528 2023-01-23 02:13:39.563148: step: 2084/531, loss: 0.05926189571619034 2023-01-23 02:13:40.712785: step: 2088/531, loss: 0.07403528690338135 2023-01-23 02:13:41.794136: step: 2092/531, loss: 0.006610870826989412 2023-01-23 02:13:42.938456: step: 2096/531, loss: 0.00043539999751374125 2023-01-23 02:13:44.046639: step: 2100/531, loss: 0.010175991803407669 2023-01-23 02:13:45.171356: step: 2104/531, loss: 0.00267620082013309 2023-01-23 02:13:46.304781: step: 2108/531, loss: 0.039144039154052734 2023-01-23 02:13:47.451822: step: 2112/531, loss: 0.037192247807979584 2023-01-23 02:13:48.602025: step: 2116/531, loss: 0.007502746302634478 2023-01-23 02:13:49.744075: step: 2120/531, loss: 0.016350459307432175 2023-01-23 02:13:50.857946: step: 2124/531, loss: 0.10162334144115448 ================================================== Loss: 0.056 -------------------- Dev: {'event': {'p': 0.5949494949494949, 'r': 0.7842876165113183, 'f1': 0.6766226306720274}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Test: {'event': {'p': 0.6321564885496184, 'r': 0.7901013714967203, 'f1': 0.7023588656241717}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Chinese: {'event': {'p': 0.5853658536585366, 'r': 0.8888888888888888, 'f1': 0.7058823529411764}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Korean: {'event': {'p': 0.6071428571428571, 'r': 0.5396825396825397, 'f1': 0.5714285714285714}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Russian: {'event': {'p': 0.44680851063829785, 'r': 0.5833333333333334, 'f1': 0.5060240963855422}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 16 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:14:30.729054: step: 4/531, loss: 0.2521275579929352 2023-01-23 02:14:31.835274: step: 8/531, loss: 0.006723976228386164 2023-01-23 02:14:32.965731: step: 12/531, loss: 0.0032130242325365543 2023-01-23 02:14:34.090895: step: 16/531, loss: 0.03483247756958008 2023-01-23 02:14:35.196078: step: 20/531, loss: 0.004098653793334961 2023-01-23 02:14:36.300341: step: 24/531, loss: 0.00012521744065452367 2023-01-23 02:14:37.412102: step: 28/531, loss: 0.0009501457097940147 2023-01-23 02:14:38.542976: step: 32/531, loss: 0.01824159547686577 2023-01-23 02:14:39.690827: step: 36/531, loss: 0.00128688826225698 2023-01-23 02:14:40.817091: step: 40/531, loss: 0.04172954708337784 2023-01-23 02:14:41.940841: step: 44/531, loss: 0.0052734375931322575 2023-01-23 02:14:43.025786: step: 48/531, loss: 0.0008638381841592491 2023-01-23 02:14:44.177670: step: 52/531, loss: 0.02124939113855362 2023-01-23 02:14:45.308850: step: 56/531, loss: 0.010611867532134056 2023-01-23 02:14:46.440841: step: 60/531, loss: 0.046534158289432526 2023-01-23 02:14:47.553590: step: 64/531, loss: 0.008616066537797451 2023-01-23 02:14:48.697291: step: 68/531, loss: 0.022318650037050247 2023-01-23 02:14:49.831635: step: 72/531, loss: 0.015177154913544655 2023-01-23 02:14:50.958158: step: 76/531, loss: 0.009215832687914371 2023-01-23 02:14:52.071849: step: 80/531, loss: 0.0042175292037427425 2023-01-23 02:14:53.218454: step: 84/531, loss: 0.007029390893876553 2023-01-23 02:14:54.351653: step: 88/531, loss: 0.007113838102668524 2023-01-23 02:14:55.528475: step: 92/531, loss: 0.023394394665956497 2023-01-23 02:14:56.696701: step: 96/531, loss: 0.04689941555261612 2023-01-23 02:14:57.809166: step: 100/531, loss: 0.6909419894218445 2023-01-23 02:14:58.958610: step: 104/531, loss: 0.014201641082763672 2023-01-23 02:15:00.118055: step: 108/531, loss: 0.017704010009765625 2023-01-23 02:15:01.232762: step: 112/531, loss: 0.002296066377311945 2023-01-23 02:15:02.353843: step: 116/531, loss: 0.05105840042233467 2023-01-23 02:15:03.482947: step: 120/531, loss: 0.0019364356994628906 2023-01-23 02:15:04.585049: step: 124/531, loss: 0.11028274148702621 2023-01-23 02:15:05.716351: step: 128/531, loss: 0.00010409355309093371 2023-01-23 02:15:06.889616: step: 132/531, loss: 0.038350678980350494 2023-01-23 02:15:08.012293: step: 136/531, loss: 0.005510807037353516 2023-01-23 02:15:09.139033: step: 140/531, loss: 0.021910762414336205 2023-01-23 02:15:10.286186: step: 144/531, loss: 0.011336899362504482 2023-01-23 02:15:11.392649: step: 148/531, loss: 0.0018449783092364669 2023-01-23 02:15:12.521268: step: 152/531, loss: 0.009054851718246937 2023-01-23 02:15:13.660880: step: 156/531, loss: 0.022633124142885208 2023-01-23 02:15:14.771856: step: 160/531, loss: 0.023503970354795456 2023-01-23 02:15:15.887257: step: 164/531, loss: 0.01003198605030775 2023-01-23 02:15:16.985926: step: 168/531, loss: 0.073033906519413 2023-01-23 02:15:18.122390: step: 172/531, loss: 0.00035648344783112407 2023-01-23 02:15:19.260600: step: 176/531, loss: 0.014524651691317558 2023-01-23 02:15:20.367624: step: 180/531, loss: 9.17434663278982e-05 2023-01-23 02:15:21.490220: step: 184/531, loss: 0.11682777106761932 2023-01-23 02:15:22.599927: step: 188/531, loss: 0.03980856016278267 2023-01-23 02:15:23.713951: step: 192/531, loss: 0.034307099878787994 2023-01-23 02:15:24.811006: step: 196/531, loss: 0.018357181921601295 2023-01-23 02:15:25.934390: step: 200/531, loss: 0.009448242373764515 2023-01-23 02:15:27.096208: step: 204/531, loss: 0.005802631378173828 2023-01-23 02:15:28.204964: step: 208/531, loss: 0.01721201092004776 2023-01-23 02:15:29.353904: step: 212/531, loss: 0.05015993118286133 2023-01-23 02:15:30.474288: step: 216/531, loss: 0.0062583922408521175 2023-01-23 02:15:31.609398: step: 220/531, loss: 0.6739804744720459 2023-01-23 02:15:32.738324: step: 224/531, loss: 0.0036876678932458162 2023-01-23 02:15:33.855692: step: 228/531, loss: 0.02613544464111328 2023-01-23 02:15:34.972984: step: 232/531, loss: 0.020329046994447708 2023-01-23 02:15:36.109863: step: 236/531, loss: 0.011905861087143421 2023-01-23 02:15:37.196369: step: 240/531, loss: 0.03851490095257759 2023-01-23 02:15:38.311695: step: 244/531, loss: 0.026909923180937767 2023-01-23 02:15:39.445880: step: 248/531, loss: 0.05736055597662926 2023-01-23 02:15:40.563756: step: 252/531, loss: 0.008288002572953701 2023-01-23 02:15:41.669342: step: 256/531, loss: 0.049338627606630325 2023-01-23 02:15:42.800548: step: 260/531, loss: 0.037832166999578476 2023-01-23 02:15:43.933686: step: 264/531, loss: 0.004467487335205078 2023-01-23 02:15:45.065039: step: 268/531, loss: 0.025663472712039948 2023-01-23 02:15:46.200922: step: 272/531, loss: 0.00010519028000999242 2023-01-23 02:15:47.333382: step: 276/531, loss: 0.03365039825439453 2023-01-23 02:15:48.451496: step: 280/531, loss: 0.0009822845458984375 2023-01-23 02:15:49.573088: step: 284/531, loss: 0.031194305047392845 2023-01-23 02:15:50.674285: step: 288/531, loss: 0.0016650677425786853 2023-01-23 02:15:51.782395: step: 292/531, loss: 0.24374103546142578 2023-01-23 02:15:52.931354: step: 296/531, loss: 0.035718392580747604 2023-01-23 02:15:54.067558: step: 300/531, loss: 0.06796932220458984 2023-01-23 02:15:55.237758: step: 304/531, loss: 0.2714877724647522 2023-01-23 02:15:56.407811: step: 308/531, loss: 0.027812767773866653 2023-01-23 02:15:57.526549: step: 312/531, loss: 0.03317318111658096 2023-01-23 02:15:58.671119: step: 316/531, loss: 0.03608722612261772 2023-01-23 02:15:59.759046: step: 320/531, loss: 0.06954164803028107 2023-01-23 02:16:00.879777: step: 324/531, loss: 0.024041511118412018 2023-01-23 02:16:02.019209: step: 328/531, loss: 0.0024276257026940584 2023-01-23 02:16:03.138421: step: 332/531, loss: 0.0506032258272171 2023-01-23 02:16:04.240843: step: 336/531, loss: 0.2203085869550705 2023-01-23 02:16:05.354303: step: 340/531, loss: 0.13547289371490479 2023-01-23 02:16:06.482004: step: 344/531, loss: 0.013545037247240543 2023-01-23 02:16:07.617607: step: 348/531, loss: 0.0306059829890728 2023-01-23 02:16:08.743701: step: 352/531, loss: 0.013831520453095436 2023-01-23 02:16:09.867301: step: 356/531, loss: 0.005435275845229626 2023-01-23 02:16:10.977183: step: 360/531, loss: 0.02701110951602459 2023-01-23 02:16:12.108084: step: 364/531, loss: 0.03365010395646095 2023-01-23 02:16:13.195844: step: 368/531, loss: 0.0042716506868600845 2023-01-23 02:16:14.341775: step: 372/531, loss: 0.021305084228515625 2023-01-23 02:16:15.454559: step: 376/531, loss: 0.031024647876620293 2023-01-23 02:16:16.553377: step: 380/531, loss: 0.034691907465457916 2023-01-23 02:16:17.685686: step: 384/531, loss: 0.00045037269592285156 2023-01-23 02:16:18.786011: step: 388/531, loss: 0.0058265686966478825 2023-01-23 02:16:19.906461: step: 392/531, loss: 0.00031147003755904734 2023-01-23 02:16:21.023554: step: 396/531, loss: 0.059551481157541275 2023-01-23 02:16:22.168152: step: 400/531, loss: 0.0031669619493186474 2023-01-23 02:16:23.304963: step: 404/531, loss: 0.0009012222290039062 2023-01-23 02:16:24.429255: step: 408/531, loss: 0.0007403374183923006 2023-01-23 02:16:25.560175: step: 412/531, loss: 0.09400273114442825 2023-01-23 02:16:26.726166: step: 416/531, loss: 0.01275711040943861 2023-01-23 02:16:27.859632: step: 420/531, loss: 0.016127299517393112 2023-01-23 02:16:28.994372: step: 424/531, loss: 0.007638359442353249 2023-01-23 02:16:30.124626: step: 428/531, loss: 0.0017737388843670487 2023-01-23 02:16:31.247423: step: 432/531, loss: 0.06195177882909775 2023-01-23 02:16:32.347943: step: 436/531, loss: 0.013400459662079811 2023-01-23 02:16:33.492837: step: 440/531, loss: 0.04996900632977486 2023-01-23 02:16:34.609777: step: 444/531, loss: 0.010785484686493874 2023-01-23 02:16:35.737623: step: 448/531, loss: 0.04684562608599663 2023-01-23 02:16:36.864625: step: 452/531, loss: 0.13047657907009125 2023-01-23 02:16:38.015543: step: 456/531, loss: 0.019899750128388405 2023-01-23 02:16:39.123218: step: 460/531, loss: 0.06284981220960617 2023-01-23 02:16:40.262920: step: 464/531, loss: 0.09371356666088104 2023-01-23 02:16:41.375557: step: 468/531, loss: 0.016196012496948242 2023-01-23 02:16:42.511840: step: 472/531, loss: 0.0013395310379564762 2023-01-23 02:16:43.613971: step: 476/531, loss: 0.025235557928681374 2023-01-23 02:16:44.734171: step: 480/531, loss: 0.019955921918153763 2023-01-23 02:16:45.864781: step: 484/531, loss: 0.03935671225190163 2023-01-23 02:16:46.988388: step: 488/531, loss: 0.014825058169662952 2023-01-23 02:16:48.177103: step: 492/531, loss: 0.08416537940502167 2023-01-23 02:16:49.299069: step: 496/531, loss: 0.039305686950683594 2023-01-23 02:16:50.403036: step: 500/531, loss: 0.0184999480843544 2023-01-23 02:16:51.532672: step: 504/531, loss: 0.017728902399539948 2023-01-23 02:16:52.656260: step: 508/531, loss: 0.006518745329231024 2023-01-23 02:16:53.774815: step: 512/531, loss: 0.0006950378301553428 2023-01-23 02:16:54.898409: step: 516/531, loss: 0.1484702080488205 2023-01-23 02:16:56.009559: step: 520/531, loss: 0.039586544036865234 2023-01-23 02:16:57.143735: step: 524/531, loss: 0.0001199722319142893 2023-01-23 02:16:58.280523: step: 528/531, loss: 0.007123088929802179 2023-01-23 02:16:59.436044: step: 532/531, loss: 0.08496637642383575 2023-01-23 02:17:00.555352: step: 536/531, loss: 0.014231586828827858 2023-01-23 02:17:01.689937: step: 540/531, loss: 0.0029655457474291325 2023-01-23 02:17:02.795478: step: 544/531, loss: 0.0591578483581543 2023-01-23 02:17:03.921734: step: 548/531, loss: 0.011067485436797142 2023-01-23 02:17:05.049380: step: 552/531, loss: 0.13879604637622833 2023-01-23 02:17:06.170877: step: 556/531, loss: 0.0028553009033203125 2023-01-23 02:17:07.277334: step: 560/531, loss: 0.0029915333725512028 2023-01-23 02:17:08.387074: step: 564/531, loss: 0.00028715134249068797 2023-01-23 02:17:09.488855: step: 568/531, loss: 0.02383270487189293 2023-01-23 02:17:10.633470: step: 572/531, loss: 0.04071855545043945 2023-01-23 02:17:11.754215: step: 576/531, loss: 0.0007885218365117908 2023-01-23 02:17:12.900025: step: 580/531, loss: 0.01653594896197319 2023-01-23 02:17:14.054325: step: 584/531, loss: 0.031295206397771835 2023-01-23 02:17:15.192301: step: 588/531, loss: 0.0028391839005053043 2023-01-23 02:17:16.295739: step: 592/531, loss: 0.00976715050637722 2023-01-23 02:17:17.420321: step: 596/531, loss: 0.01821594312787056 2023-01-23 02:17:18.598623: step: 600/531, loss: 0.0069814687594771385 2023-01-23 02:17:19.719274: step: 604/531, loss: 0.008567428216338158 2023-01-23 02:17:20.854799: step: 608/531, loss: 0.01463625393807888 2023-01-23 02:17:21.971185: step: 612/531, loss: 0.02741723135113716 2023-01-23 02:17:23.079427: step: 616/531, loss: 0.042025376111269 2023-01-23 02:17:24.232516: step: 620/531, loss: 0.03121819533407688 2023-01-23 02:17:25.349659: step: 624/531, loss: 0.021138859912753105 2023-01-23 02:17:26.468303: step: 628/531, loss: 0.005286884494125843 2023-01-23 02:17:27.583051: step: 632/531, loss: 0.010327721014618874 2023-01-23 02:17:28.730715: step: 636/531, loss: 0.009678078815340996 2023-01-23 02:17:29.874332: step: 640/531, loss: 0.0026352882850915194 2023-01-23 02:17:31.001523: step: 644/531, loss: 0.0016733170486986637 2023-01-23 02:17:32.130216: step: 648/531, loss: 0.00276870746165514 2023-01-23 02:17:33.260360: step: 652/531, loss: 0.00866556167602539 2023-01-23 02:17:34.389136: step: 656/531, loss: 0.01553268451243639 2023-01-23 02:17:35.513658: step: 660/531, loss: 0.04496021196246147 2023-01-23 02:17:36.657381: step: 664/531, loss: 0.009823323227465153 2023-01-23 02:17:37.763817: step: 668/531, loss: 0.019964218139648438 2023-01-23 02:17:38.876808: step: 672/531, loss: 0.018725013360381126 2023-01-23 02:17:40.007998: step: 676/531, loss: 0.049706265330314636 2023-01-23 02:17:41.099467: step: 680/531, loss: 0.041196297854185104 2023-01-23 02:17:42.262752: step: 684/531, loss: 0.005528164096176624 2023-01-23 02:17:43.428656: step: 688/531, loss: 0.04586810991168022 2023-01-23 02:17:44.545882: step: 692/531, loss: 0.028386402875185013 2023-01-23 02:17:45.662912: step: 696/531, loss: 0.05595235899090767 2023-01-23 02:17:46.798530: step: 700/531, loss: 0.00014429092698264867 2023-01-23 02:17:47.955284: step: 704/531, loss: 0.028806114569306374 2023-01-23 02:17:49.054894: step: 708/531, loss: 0.07829227298498154 2023-01-23 02:17:50.167266: step: 712/531, loss: 0.03620128706097603 2023-01-23 02:17:51.271745: step: 716/531, loss: 0.0049717905931174755 2023-01-23 02:17:52.408113: step: 720/531, loss: 0.0002758026239462197 2023-01-23 02:17:53.527147: step: 724/531, loss: 0.11680107563734055 2023-01-23 02:17:54.662088: step: 728/531, loss: 0.004488754086196423 2023-01-23 02:17:55.793365: step: 732/531, loss: 0.06606468558311462 2023-01-23 02:17:56.890707: step: 736/531, loss: 0.08832956105470657 2023-01-23 02:17:58.036093: step: 740/531, loss: 0.000797271728515625 2023-01-23 02:17:59.173136: step: 744/531, loss: 0.2113761007785797 2023-01-23 02:18:00.294391: step: 748/531, loss: 0.0008353710290975869 2023-01-23 02:18:01.420878: step: 752/531, loss: 0.004295444581657648 2023-01-23 02:18:02.557959: step: 756/531, loss: 0.056002333760261536 2023-01-23 02:18:03.723539: step: 760/531, loss: 0.01753845252096653 2023-01-23 02:18:04.833524: step: 764/531, loss: 0.006696129217743874 2023-01-23 02:18:05.957903: step: 768/531, loss: 0.08233197778463364 2023-01-23 02:18:07.084261: step: 772/531, loss: 0.009122371673583984 2023-01-23 02:18:08.235596: step: 776/531, loss: 0.01749582216143608 2023-01-23 02:18:09.338046: step: 780/531, loss: 0.0007336616399697959 2023-01-23 02:18:10.469649: step: 784/531, loss: 0.030775070190429688 2023-01-23 02:18:11.565154: step: 788/531, loss: 0.004580402746796608 2023-01-23 02:18:12.685873: step: 792/531, loss: 0.012348365969955921 2023-01-23 02:18:13.817175: step: 796/531, loss: 0.08743209391832352 2023-01-23 02:18:14.946007: step: 800/531, loss: 0.03028078004717827 2023-01-23 02:18:16.055441: step: 804/531, loss: 0.00019941330538131297 2023-01-23 02:18:17.164097: step: 808/531, loss: 0.009217072278261185 2023-01-23 02:18:18.297382: step: 812/531, loss: 0.0025737525429576635 2023-01-23 02:18:19.415625: step: 816/531, loss: 0.017647838220000267 2023-01-23 02:18:20.552408: step: 820/531, loss: 0.026299476623535156 2023-01-23 02:18:21.671293: step: 824/531, loss: 0.08873309940099716 2023-01-23 02:18:22.827447: step: 828/531, loss: 0.01612529717385769 2023-01-23 02:18:23.960916: step: 832/531, loss: 0.005986213684082031 2023-01-23 02:18:25.091812: step: 836/531, loss: 0.011033820919692516 2023-01-23 02:18:26.212857: step: 840/531, loss: 0.18404893577098846 2023-01-23 02:18:27.331455: step: 844/531, loss: 0.010575294494628906 2023-01-23 02:18:28.456414: step: 848/531, loss: 0.018220329657197 2023-01-23 02:18:29.586214: step: 852/531, loss: 0.022934913635253906 2023-01-23 02:18:30.691276: step: 856/531, loss: 0.051935575902462006 2023-01-23 02:18:31.776467: step: 860/531, loss: 0.0011620521545410156 2023-01-23 02:18:32.893438: step: 864/531, loss: 0.062047481536865234 2023-01-23 02:18:34.021524: step: 868/531, loss: 0.0014314651489257812 2023-01-23 02:18:35.117191: step: 872/531, loss: 0.008043861947953701 2023-01-23 02:18:36.249491: step: 876/531, loss: 0.02506437338888645 2023-01-23 02:18:37.393983: step: 880/531, loss: 0.2291662096977234 2023-01-23 02:18:38.508734: step: 884/531, loss: 0.11822376400232315 2023-01-23 02:18:39.622459: step: 888/531, loss: 0.04041347652673721 2023-01-23 02:18:40.748955: step: 892/531, loss: 0.08089856803417206 2023-01-23 02:18:41.888262: step: 896/531, loss: 0.00497360248118639 2023-01-23 02:18:43.003946: step: 900/531, loss: 0.0037691593170166016 2023-01-23 02:18:44.111642: step: 904/531, loss: 0.03270168602466583 2023-01-23 02:18:45.205324: step: 908/531, loss: 0.0006089209928177297 2023-01-23 02:18:46.319310: step: 912/531, loss: 0.018484115600585938 2023-01-23 02:18:47.447652: step: 916/531, loss: 0.011260032653808594 2023-01-23 02:18:48.586634: step: 920/531, loss: 0.001990461489185691 2023-01-23 02:18:49.703647: step: 924/531, loss: 8.468628220725805e-05 2023-01-23 02:18:50.849536: step: 928/531, loss: 0.014310264959931374 2023-01-23 02:18:52.009415: step: 932/531, loss: 0.004746341612190008 2023-01-23 02:18:53.116005: step: 936/531, loss: 6.699562072753906e-05 2023-01-23 02:18:54.251943: step: 940/531, loss: 0.0012751579051837325 2023-01-23 02:18:55.380349: step: 944/531, loss: 0.008514786139130592 2023-01-23 02:18:56.535219: step: 948/531, loss: 0.01791400834918022 2023-01-23 02:18:57.651375: step: 952/531, loss: 0.023016929626464844 2023-01-23 02:18:58.746761: step: 956/531, loss: 0.021587753668427467 2023-01-23 02:18:59.873484: step: 960/531, loss: 0.024913977831602097 2023-01-23 02:19:00.998894: step: 964/531, loss: 0.011025715619325638 2023-01-23 02:19:02.121411: step: 968/531, loss: 0.030767440795898438 2023-01-23 02:19:03.243473: step: 972/531, loss: 0.05347614362835884 2023-01-23 02:19:04.397544: step: 976/531, loss: 0.0028078078757971525 2023-01-23 02:19:05.538237: step: 980/531, loss: 0.023520469665527344 2023-01-23 02:19:06.666518: step: 984/531, loss: 0.3827234208583832 2023-01-23 02:19:07.822001: step: 988/531, loss: 0.01490707416087389 2023-01-23 02:19:08.965765: step: 992/531, loss: 0.024048617109656334 2023-01-23 02:19:10.108674: step: 996/531, loss: 0.00401649484410882 2023-01-23 02:19:11.210232: step: 1000/531, loss: 0.04343271628022194 2023-01-23 02:19:12.362907: step: 1004/531, loss: 0.10270857810974121 2023-01-23 02:19:13.481922: step: 1008/531, loss: 0.033948518335819244 2023-01-23 02:19:14.610102: step: 1012/531, loss: 0.008781624026596546 2023-01-23 02:19:15.711451: step: 1016/531, loss: 0.026502227410674095 2023-01-23 02:19:16.851979: step: 1020/531, loss: 0.05910911783576012 2023-01-23 02:19:17.991283: step: 1024/531, loss: 0.0040795328095555305 2023-01-23 02:19:19.102435: step: 1028/531, loss: 0.00801396369934082 2023-01-23 02:19:20.202123: step: 1032/531, loss: 0.000980377197265625 2023-01-23 02:19:21.349090: step: 1036/531, loss: 0.003254509065300226 2023-01-23 02:19:22.459235: step: 1040/531, loss: 0.00266609201207757 2023-01-23 02:19:23.574453: step: 1044/531, loss: 0.0018165111541748047 2023-01-23 02:19:24.697306: step: 1048/531, loss: 0.017932796850800514 2023-01-23 02:19:25.830739: step: 1052/531, loss: 0.0004673957882914692 2023-01-23 02:19:26.959767: step: 1056/531, loss: 0.029655171558260918 2023-01-23 02:19:28.083705: step: 1060/531, loss: 0.00557022076100111 2023-01-23 02:19:29.200069: step: 1064/531, loss: 0.005026531405746937 2023-01-23 02:19:30.322552: step: 1068/531, loss: 0.021099090576171875 2023-01-23 02:19:31.448854: step: 1072/531, loss: 0.0560942180454731 2023-01-23 02:19:32.581332: step: 1076/531, loss: 0.8823589086532593 2023-01-23 02:19:33.688568: step: 1080/531, loss: 0.7515289187431335 2023-01-23 02:19:34.804510: step: 1084/531, loss: 0.018741607666015625 2023-01-23 02:19:35.917314: step: 1088/531, loss: 0.005289173219352961 2023-01-23 02:19:37.036026: step: 1092/531, loss: 0.023261072114109993 2023-01-23 02:19:38.186554: step: 1096/531, loss: 0.33323726058006287 2023-01-23 02:19:39.318130: step: 1100/531, loss: 0.004673004150390625 2023-01-23 02:19:40.483325: step: 1104/531, loss: 0.04440317302942276 2023-01-23 02:19:41.616669: step: 1108/531, loss: 0.002774047665297985 2023-01-23 02:19:42.740105: step: 1112/531, loss: 0.012278461828827858 2023-01-23 02:19:43.847567: step: 1116/531, loss: 0.0557108148932457 2023-01-23 02:19:45.015754: step: 1120/531, loss: 0.015136814676225185 2023-01-23 02:19:46.132029: step: 1124/531, loss: 0.006009864620864391 2023-01-23 02:19:47.262284: step: 1128/531, loss: 0.011852741241455078 2023-01-23 02:19:48.374956: step: 1132/531, loss: 0.004689216613769531 2023-01-23 02:19:49.503919: step: 1136/531, loss: 0.0004371643008198589 2023-01-23 02:19:50.622964: step: 1140/531, loss: 0.0035324099007993937 2023-01-23 02:19:51.736869: step: 1144/531, loss: 0.036432839930057526 2023-01-23 02:19:52.854610: step: 1148/531, loss: 0.010029220022261143 2023-01-23 02:19:53.945101: step: 1152/531, loss: 0.0002424240083200857 2023-01-23 02:19:55.065304: step: 1156/531, loss: 0.0031097412575036287 2023-01-23 02:19:56.186997: step: 1160/531, loss: 0.02171182632446289 2023-01-23 02:19:57.304067: step: 1164/531, loss: 0.0006812096107751131 2023-01-23 02:19:58.414929: step: 1168/531, loss: 0.002594852354377508 2023-01-23 02:19:59.569157: step: 1172/531, loss: 0.020543480291962624 2023-01-23 02:20:00.712688: step: 1176/531, loss: 0.12869758903980255 2023-01-23 02:20:01.839448: step: 1180/531, loss: 0.03174304962158203 2023-01-23 02:20:02.977561: step: 1184/531, loss: 0.027428055182099342 2023-01-23 02:20:04.111010: step: 1188/531, loss: 0.00010080337233375758 2023-01-23 02:20:05.264383: step: 1192/531, loss: 0.048986244946718216 2023-01-23 02:20:06.378367: step: 1196/531, loss: 0.00484886160120368 2023-01-23 02:20:07.504065: step: 1200/531, loss: 0.0312800407409668 2023-01-23 02:20:08.647105: step: 1204/531, loss: 0.01782398298382759 2023-01-23 02:20:09.751492: step: 1208/531, loss: 0.029322339221835136 2023-01-23 02:20:10.878121: step: 1212/531, loss: 0.02630338817834854 2023-01-23 02:20:12.029377: step: 1216/531, loss: 0.028730392456054688 2023-01-23 02:20:13.138059: step: 1220/531, loss: 0.004925251007080078 2023-01-23 02:20:14.224832: step: 1224/531, loss: 0.02300058677792549 2023-01-23 02:20:15.384308: step: 1228/531, loss: 0.013107872568070889 2023-01-23 02:20:16.516121: step: 1232/531, loss: 0.011837387457489967 2023-01-23 02:20:17.659807: step: 1236/531, loss: 0.15630368888378143 2023-01-23 02:20:18.767610: step: 1240/531, loss: 0.010937786661088467 2023-01-23 02:20:19.883878: step: 1244/531, loss: 0.0007453918224200606 2023-01-23 02:20:21.039872: step: 1248/531, loss: 0.01017913781106472 2023-01-23 02:20:22.153147: step: 1252/531, loss: 0.037652015686035156 2023-01-23 02:20:23.279343: step: 1256/531, loss: 0.002389621688053012 2023-01-23 02:20:24.384401: step: 1260/531, loss: 0.003894805908203125 2023-01-23 02:20:25.513590: step: 1264/531, loss: 0.027416039258241653 2023-01-23 02:20:26.661754: step: 1268/531, loss: 0.0053002359345555305 2023-01-23 02:20:27.759424: step: 1272/531, loss: 0.02048034779727459 2023-01-23 02:20:28.905901: step: 1276/531, loss: 0.0036555291153490543 2023-01-23 02:20:30.026169: step: 1280/531, loss: 0.04128255695104599 2023-01-23 02:20:31.155953: step: 1284/531, loss: 0.0275256410241127 2023-01-23 02:20:32.280617: step: 1288/531, loss: 0.07578639686107635 2023-01-23 02:20:33.432153: step: 1292/531, loss: 0.5570371747016907 2023-01-23 02:20:34.600098: step: 1296/531, loss: 0.023293782025575638 2023-01-23 02:20:35.737521: step: 1300/531, loss: 0.13164043426513672 2023-01-23 02:20:36.852518: step: 1304/531, loss: 0.01826038397848606 2023-01-23 02:20:37.971580: step: 1308/531, loss: 0.00021269322314765304 2023-01-23 02:20:39.050221: step: 1312/531, loss: 0.005787086673080921 2023-01-23 02:20:40.162004: step: 1316/531, loss: 0.018176458775997162 2023-01-23 02:20:41.269407: step: 1320/531, loss: 0.002549457596614957 2023-01-23 02:20:42.397788: step: 1324/531, loss: 0.011810492724180222 2023-01-23 02:20:43.542313: step: 1328/531, loss: 0.02890009991824627 2023-01-23 02:20:44.670665: step: 1332/531, loss: 0.00713381776586175 2023-01-23 02:20:45.807264: step: 1336/531, loss: 0.004637718200683594 2023-01-23 02:20:46.946185: step: 1340/531, loss: 0.00917725544422865 2023-01-23 02:20:48.070911: step: 1344/531, loss: 0.007003212347626686 2023-01-23 02:20:49.201281: step: 1348/531, loss: 0.06715298444032669 2023-01-23 02:20:50.333277: step: 1352/531, loss: 0.0100204823538661 2023-01-23 02:20:51.451183: step: 1356/531, loss: 0.018655776977539062 2023-01-23 02:20:52.567951: step: 1360/531, loss: 0.027485277503728867 2023-01-23 02:20:53.685246: step: 1364/531, loss: 0.022505952045321465 2023-01-23 02:20:54.824711: step: 1368/531, loss: 0.00748596154153347 2023-01-23 02:20:55.946694: step: 1372/531, loss: 0.010336565785109997 2023-01-23 02:20:57.064683: step: 1376/531, loss: 0.025951862335205078 2023-01-23 02:20:58.208366: step: 1380/531, loss: 0.01791992224752903 2023-01-23 02:20:59.353230: step: 1384/531, loss: 0.05021476745605469 2023-01-23 02:21:00.451999: step: 1388/531, loss: 0.05160551145672798 2023-01-23 02:21:01.595521: step: 1392/531, loss: 0.005590343847870827 2023-01-23 02:21:02.731232: step: 1396/531, loss: 0.009225940331816673 2023-01-23 02:21:03.860476: step: 1400/531, loss: 0.025502199307084084 2023-01-23 02:21:05.018282: step: 1404/531, loss: 0.03535986319184303 2023-01-23 02:21:06.126348: step: 1408/531, loss: 0.04016933590173721 2023-01-23 02:21:07.233932: step: 1412/531, loss: 0.00041294097900390625 2023-01-23 02:21:08.358305: step: 1416/531, loss: 0.013364791870117188 2023-01-23 02:21:09.491007: step: 1420/531, loss: 0.07080783694982529 2023-01-23 02:21:10.599020: step: 1424/531, loss: 0.056271836161613464 2023-01-23 02:21:11.744588: step: 1428/531, loss: 0.013304853811860085 2023-01-23 02:21:12.903110: step: 1432/531, loss: 0.007565975189208984 2023-01-23 02:21:14.016108: step: 1436/531, loss: 0.06375379860401154 2023-01-23 02:21:15.144112: step: 1440/531, loss: 0.0005605697515420616 2023-01-23 02:21:16.257422: step: 1444/531, loss: 0.03965587541460991 2023-01-23 02:21:17.385419: step: 1448/531, loss: 0.01696167141199112 2023-01-23 02:21:18.511125: step: 1452/531, loss: 0.049842797219753265 2023-01-23 02:21:19.645602: step: 1456/531, loss: 0.11034837365150452 2023-01-23 02:21:20.777674: step: 1460/531, loss: 0.08664512634277344 2023-01-23 02:21:21.914748: step: 1464/531, loss: 0.04803180694580078 2023-01-23 02:21:23.019330: step: 1468/531, loss: 0.0025374414399266243 2023-01-23 02:21:24.141425: step: 1472/531, loss: 0.04747343063354492 2023-01-23 02:21:25.298451: step: 1476/531, loss: 0.03770873695611954 2023-01-23 02:21:26.436061: step: 1480/531, loss: 0.011066246777772903 2023-01-23 02:21:27.550967: step: 1484/531, loss: 0.008639907464385033 2023-01-23 02:21:28.654066: step: 1488/531, loss: 0.027574921026825905 2023-01-23 02:21:29.778531: step: 1492/531, loss: 0.008526134304702282 2023-01-23 02:21:30.915117: step: 1496/531, loss: 0.006079673767089844 2023-01-23 02:21:32.065298: step: 1500/531, loss: 0.00024662018404342234 2023-01-23 02:21:33.178673: step: 1504/531, loss: 0.0664302408695221 2023-01-23 02:21:34.325922: step: 1508/531, loss: 0.008763599209487438 2023-01-23 02:21:35.458602: step: 1512/531, loss: 0.03532352298498154 2023-01-23 02:21:36.558850: step: 1516/531, loss: 0.0046096802689135075 2023-01-23 02:21:37.707655: step: 1520/531, loss: 0.03270306438207626 2023-01-23 02:21:38.853364: step: 1524/531, loss: 0.009535981342196465 2023-01-23 02:21:39.969448: step: 1528/531, loss: 0.0007839202880859375 2023-01-23 02:21:41.092849: step: 1532/531, loss: 0.04288983345031738 2023-01-23 02:21:42.222755: step: 1536/531, loss: 0.0056164744310081005 2023-01-23 02:21:43.341776: step: 1540/531, loss: 0.0013189315795898438 2023-01-23 02:21:44.495699: step: 1544/531, loss: 0.05837249755859375 2023-01-23 02:21:45.634032: step: 1548/531, loss: 0.03498687595129013 2023-01-23 02:21:46.732909: step: 1552/531, loss: 0.00036249158438295126 2023-01-23 02:21:47.884467: step: 1556/531, loss: 0.03256988525390625 2023-01-23 02:21:49.004038: step: 1560/531, loss: 0.03097829781472683 2023-01-23 02:21:50.167658: step: 1564/531, loss: 0.02753310278058052 2023-01-23 02:21:51.275051: step: 1568/531, loss: 0.7763586044311523 2023-01-23 02:21:52.409915: step: 1572/531, loss: 0.028718186542391777 2023-01-23 02:21:53.538303: step: 1576/531, loss: 0.039102934300899506 2023-01-23 02:21:54.646094: step: 1580/531, loss: 0.2947107255458832 2023-01-23 02:21:55.786898: step: 1584/531, loss: 0.0159041415899992 2023-01-23 02:21:56.911445: step: 1588/531, loss: 0.025876998901367188 2023-01-23 02:21:58.054634: step: 1592/531, loss: 0.000474822532851249 2023-01-23 02:21:59.172371: step: 1596/531, loss: 0.004958438687026501 2023-01-23 02:22:00.295027: step: 1600/531, loss: 0.01568899117410183 2023-01-23 02:22:01.451802: step: 1604/531, loss: 0.025624370202422142 2023-01-23 02:22:02.633446: step: 1608/531, loss: 0.05420932546257973 2023-01-23 02:22:03.768316: step: 1612/531, loss: 0.07152099907398224 2023-01-23 02:22:04.868782: step: 1616/531, loss: 0.0027930261567234993 2023-01-23 02:22:05.992123: step: 1620/531, loss: 0.019649459049105644 2023-01-23 02:22:07.114648: step: 1624/531, loss: 0.0008291244739666581 2023-01-23 02:22:08.249934: step: 1628/531, loss: 0.005993843078613281 2023-01-23 02:22:09.366452: step: 1632/531, loss: 0.015912247821688652 2023-01-23 02:22:10.509975: step: 1636/531, loss: 0.12007541954517365 2023-01-23 02:22:11.626895: step: 1640/531, loss: 0.0017176627879962325 2023-01-23 02:22:12.743025: step: 1644/531, loss: 0.0002313137229066342 2023-01-23 02:22:13.877419: step: 1648/531, loss: 0.006108665373176336 2023-01-23 02:22:14.984682: step: 1652/531, loss: 0.0014146803878247738 2023-01-23 02:22:16.085551: step: 1656/531, loss: 0.008822060190141201 2023-01-23 02:22:17.169987: step: 1660/531, loss: 0.03849220275878906 2023-01-23 02:22:18.310885: step: 1664/531, loss: 0.08142280578613281 2023-01-23 02:22:19.471642: step: 1668/531, loss: 4.253387305652723e-05 2023-01-23 02:22:20.603360: step: 1672/531, loss: 0.13841933012008667 2023-01-23 02:22:21.719551: step: 1676/531, loss: 0.023546766489744186 2023-01-23 02:22:22.844168: step: 1680/531, loss: 0.04385824128985405 2023-01-23 02:22:23.978183: step: 1684/531, loss: 0.005815600976347923 2023-01-23 02:22:25.082212: step: 1688/531, loss: 0.04885225370526314 2023-01-23 02:22:26.192048: step: 1692/531, loss: 0.07603836059570312 2023-01-23 02:22:27.310319: step: 1696/531, loss: 0.010076189413666725 2023-01-23 02:22:28.447110: step: 1700/531, loss: 0.018894482403993607 2023-01-23 02:22:29.580779: step: 1704/531, loss: 0.0474395751953125 2023-01-23 02:22:30.704047: step: 1708/531, loss: 0.00023088455782271922 2023-01-23 02:22:31.836861: step: 1712/531, loss: 0.0016429901588708162 2023-01-23 02:22:32.996168: step: 1716/531, loss: 0.1495567411184311 2023-01-23 02:22:34.122447: step: 1720/531, loss: 0.07339973747730255 2023-01-23 02:22:35.242154: step: 1724/531, loss: 0.01040344312787056 2023-01-23 02:22:36.383251: step: 1728/531, loss: 0.015002298168838024 2023-01-23 02:22:37.499957: step: 1732/531, loss: 0.06674917042255402 2023-01-23 02:22:38.644191: step: 1736/531, loss: 0.05027579143643379 2023-01-23 02:22:39.774769: step: 1740/531, loss: 0.01479187048971653 2023-01-23 02:22:40.893399: step: 1744/531, loss: 0.00819168146699667 2023-01-23 02:22:42.035005: step: 1748/531, loss: 0.2440943717956543 2023-01-23 02:22:43.153293: step: 1752/531, loss: 0.040297795087099075 2023-01-23 02:22:44.260828: step: 1756/531, loss: 0.07959786057472229 2023-01-23 02:22:45.358490: step: 1760/531, loss: 0.19656601548194885 2023-01-23 02:22:46.493909: step: 1764/531, loss: 0.039853859692811966 2023-01-23 02:22:47.620703: step: 1768/531, loss: 0.08470363914966583 2023-01-23 02:22:48.722231: step: 1772/531, loss: 0.01954016648232937 2023-01-23 02:22:49.848629: step: 1776/531, loss: 0.024799538776278496 2023-01-23 02:22:50.975124: step: 1780/531, loss: 0.13329735398292542 2023-01-23 02:22:52.087795: step: 1784/531, loss: 0.011051177978515625 2023-01-23 02:22:53.211384: step: 1788/531, loss: 0.027523614466190338 2023-01-23 02:22:54.350274: step: 1792/531, loss: 0.044104576110839844 2023-01-23 02:22:55.462059: step: 1796/531, loss: 0.005805015563964844 2023-01-23 02:22:56.576060: step: 1800/531, loss: 0.015549277886748314 2023-01-23 02:22:57.716216: step: 1804/531, loss: 0.07079067081212997 2023-01-23 02:22:58.838593: step: 1808/531, loss: 0.002153110457584262 2023-01-23 02:22:59.971974: step: 1812/531, loss: 0.012756729498505592 2023-01-23 02:23:01.106890: step: 1816/531, loss: 0.12018308788537979 2023-01-23 02:23:02.217035: step: 1820/531, loss: 0.024119997397065163 2023-01-23 02:23:03.365990: step: 1824/531, loss: 0.15997090935707092 2023-01-23 02:23:04.493816: step: 1828/531, loss: 0.0011074065696448088 2023-01-23 02:23:05.630521: step: 1832/531, loss: 0.00395278912037611 2023-01-23 02:23:06.756970: step: 1836/531, loss: 0.05136556550860405 2023-01-23 02:23:07.869524: step: 1840/531, loss: 0.07652731239795685 2023-01-23 02:23:08.987075: step: 1844/531, loss: 0.019054604694247246 2023-01-23 02:23:10.094762: step: 1848/531, loss: 0.004462623968720436 2023-01-23 02:23:11.215109: step: 1852/531, loss: 0.0002476692316122353 2023-01-23 02:23:12.344163: step: 1856/531, loss: 0.009767914190888405 2023-01-23 02:23:13.459141: step: 1860/531, loss: 0.011298369616270065 2023-01-23 02:23:14.588885: step: 1864/531, loss: 0.01149454154074192 2023-01-23 02:23:15.656210: step: 1868/531, loss: 0.00012493133544921875 2023-01-23 02:23:16.783608: step: 1872/531, loss: 0.014038467779755592 2023-01-23 02:23:17.916436: step: 1876/531, loss: 0.03041086345911026 2023-01-23 02:23:19.053631: step: 1880/531, loss: 0.013987446203827858 2023-01-23 02:23:20.210597: step: 1884/531, loss: 0.03564281389117241 2023-01-23 02:23:21.373600: step: 1888/531, loss: 0.006274223793298006 2023-01-23 02:23:22.501208: step: 1892/531, loss: 0.03384409099817276 2023-01-23 02:23:23.626042: step: 1896/531, loss: 0.022681105881929398 2023-01-23 02:23:24.737389: step: 1900/531, loss: 0.0470980666577816 2023-01-23 02:23:25.856914: step: 1904/531, loss: 0.001577663468196988 2023-01-23 02:23:26.972833: step: 1908/531, loss: 0.010176277719438076 2023-01-23 02:23:28.099460: step: 1912/531, loss: 0.07056045532226562 2023-01-23 02:23:29.250129: step: 1916/531, loss: 0.04085388407111168 2023-01-23 02:23:30.354993: step: 1920/531, loss: 0.0014092446072027087 2023-01-23 02:23:31.495768: step: 1924/531, loss: 0.039504241198301315 2023-01-23 02:23:32.602540: step: 1928/531, loss: 0.09229431301355362 2023-01-23 02:23:33.713653: step: 1932/531, loss: 0.060762979090213776 2023-01-23 02:23:34.842109: step: 1936/531, loss: 0.053397275507450104 2023-01-23 02:23:35.956482: step: 1940/531, loss: 0.015315055847167969 2023-01-23 02:23:37.058023: step: 1944/531, loss: 0.002593803219497204 2023-01-23 02:23:38.190937: step: 1948/531, loss: 0.006776046939194202 2023-01-23 02:23:39.302313: step: 1952/531, loss: 0.030817175284028053 2023-01-23 02:23:40.391470: step: 1956/531, loss: 0.027423668652772903 2023-01-23 02:23:41.503435: step: 1960/531, loss: 0.03988667204976082 2023-01-23 02:23:42.662578: step: 1964/531, loss: 0.03127577528357506 2023-01-23 02:23:43.763162: step: 1968/531, loss: 0.0006717682117596269 2023-01-23 02:23:44.884288: step: 1972/531, loss: 0.005677032750099897 2023-01-23 02:23:45.987434: step: 1976/531, loss: 0.00278739957138896 2023-01-23 02:23:47.094624: step: 1980/531, loss: 0.012585902586579323 2023-01-23 02:23:48.213494: step: 1984/531, loss: 0.0019260406261309981 2023-01-23 02:23:49.315318: step: 1988/531, loss: 0.000286102294921875 2023-01-23 02:23:50.432208: step: 1992/531, loss: 0.011226083151996136 2023-01-23 02:23:51.559180: step: 1996/531, loss: 0.011379433795809746 2023-01-23 02:23:52.677799: step: 2000/531, loss: 0.06207103654742241 2023-01-23 02:23:53.829234: step: 2004/531, loss: 0.0024404525756835938 2023-01-23 02:23:54.909023: step: 2008/531, loss: 0.0077700139954686165 2023-01-23 02:23:56.021258: step: 2012/531, loss: 0.02217264100909233 2023-01-23 02:23:57.164077: step: 2016/531, loss: 0.021509552374482155 2023-01-23 02:23:58.277221: step: 2020/531, loss: 0.03579392284154892 2023-01-23 02:23:59.373680: step: 2024/531, loss: 0.0034992219880223274 2023-01-23 02:24:00.471829: step: 2028/531, loss: 0.0017119408585131168 2023-01-23 02:24:01.580606: step: 2032/531, loss: 0.06444092094898224 2023-01-23 02:24:02.702075: step: 2036/531, loss: 0.07358045876026154 2023-01-23 02:24:03.817515: step: 2040/531, loss: 0.06879615783691406 2023-01-23 02:24:04.944462: step: 2044/531, loss: 0.08124520629644394 2023-01-23 02:24:06.070594: step: 2048/531, loss: 0.010954475030303001 2023-01-23 02:24:07.187051: step: 2052/531, loss: 0.055268190801143646 2023-01-23 02:24:08.294951: step: 2056/531, loss: 0.027300072833895683 2023-01-23 02:24:09.406210: step: 2060/531, loss: 0.0009501933818683028 2023-01-23 02:24:10.545862: step: 2064/531, loss: 0.05097408592700958 2023-01-23 02:24:11.663895: step: 2068/531, loss: 0.08390650898218155 2023-01-23 02:24:12.787126: step: 2072/531, loss: 0.024344541132450104 2023-01-23 02:24:13.913303: step: 2076/531, loss: 0.0316440612077713 2023-01-23 02:24:15.025557: step: 2080/531, loss: 0.017836641520261765 2023-01-23 02:24:16.146550: step: 2084/531, loss: 0.045054152607917786 2023-01-23 02:24:17.275205: step: 2088/531, loss: 0.014429998584091663 2023-01-23 02:24:18.388739: step: 2092/531, loss: 0.03557949140667915 2023-01-23 02:24:19.532953: step: 2096/531, loss: 0.003995704464614391 2023-01-23 02:24:20.639566: step: 2100/531, loss: 0.004146385006606579 2023-01-23 02:24:21.758221: step: 2104/531, loss: 0.03479357063770294 2023-01-23 02:24:22.957382: step: 2108/531, loss: 0.0670807883143425 2023-01-23 02:24:24.082753: step: 2112/531, loss: 0.228485107421875 2023-01-23 02:24:25.234612: step: 2116/531, loss: 0.06485709547996521 2023-01-23 02:24:26.343852: step: 2120/531, loss: 0.02121448516845703 2023-01-23 02:24:27.437904: step: 2124/531, loss: 0.026298046112060547 ================================================== Loss: 0.040 -------------------- Dev: {'event': {'p': 0.5829244357212954, 'r': 0.7909454061251664, 'f1': 0.671186440677966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Test: {'event': {'p': 0.6184456928838952, 'r': 0.7877161598091831, 'f1': 0.6928927353789667}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Chinese: {'event': {'p': 0.5568181818181818, 'r': 0.9074074074074074, 'f1': 0.6901408450704225}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Korean: {'event': {'p': 0.5873015873015873, 'r': 0.5873015873015873, 'f1': 0.5873015873015873}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Russian: {'event': {'p': 0.41304347826086957, 'r': 0.5277777777777778, 'f1': 0.4634146341463415}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 17 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:25:07.433649: step: 4/531, loss: 0.0031986236572265625 2023-01-23 02:25:08.591891: step: 8/531, loss: 0.5320137143135071 2023-01-23 02:25:09.740663: step: 12/531, loss: 0.5069739818572998 2023-01-23 02:25:10.841245: step: 16/531, loss: 0.009532738476991653 2023-01-23 02:25:11.979321: step: 20/531, loss: 0.0035482405219227076 2023-01-23 02:25:13.130094: step: 24/531, loss: 0.0088049890473485 2023-01-23 02:25:14.276476: step: 28/531, loss: 0.011547851376235485 2023-01-23 02:25:15.403142: step: 32/531, loss: 0.004515457432717085 2023-01-23 02:25:16.529670: step: 36/531, loss: 0.0002967834589071572 2023-01-23 02:25:17.645591: step: 40/531, loss: 0.002246254589408636 2023-01-23 02:25:18.763307: step: 44/531, loss: 0.003928470425307751 2023-01-23 02:25:19.899316: step: 48/531, loss: 0.019620036706328392 2023-01-23 02:25:21.007401: step: 52/531, loss: 0.08387431502342224 2023-01-23 02:25:22.161128: step: 56/531, loss: 0.02247028425335884 2023-01-23 02:25:23.317272: step: 60/531, loss: 0.0017900466918945312 2023-01-23 02:25:24.442393: step: 64/531, loss: 0.006113243289291859 2023-01-23 02:25:25.533943: step: 68/531, loss: 0.0006719947559759021 2023-01-23 02:25:26.647292: step: 72/531, loss: 0.0399663932621479 2023-01-23 02:25:27.783630: step: 76/531, loss: 0.0005016326904296875 2023-01-23 02:25:28.923487: step: 80/531, loss: 0.031781867146492004 2023-01-23 02:25:30.041776: step: 84/531, loss: 0.00866022054105997 2023-01-23 02:25:31.172010: step: 88/531, loss: 0.0023365020751953125 2023-01-23 02:25:32.325712: step: 92/531, loss: 0.0009809971088543534 2023-01-23 02:25:33.471342: step: 96/531, loss: 0.02555999718606472 2023-01-23 02:25:34.624851: step: 100/531, loss: 0.011400032788515091 2023-01-23 02:25:35.780046: step: 104/531, loss: 0.00790328998118639 2023-01-23 02:25:36.915182: step: 108/531, loss: 0.0219573974609375 2023-01-23 02:25:38.060127: step: 112/531, loss: 0.021747207269072533 2023-01-23 02:25:39.216521: step: 116/531, loss: 0.002005290938541293 2023-01-23 02:25:40.372003: step: 120/531, loss: 0.006027412600815296 2023-01-23 02:25:41.491995: step: 124/531, loss: 0.0036631585098803043 2023-01-23 02:25:42.639204: step: 128/531, loss: 0.05078888311982155 2023-01-23 02:25:43.785656: step: 132/531, loss: 0.0008088112226687372 2023-01-23 02:25:44.968588: step: 136/531, loss: 0.02565746381878853 2023-01-23 02:25:46.094995: step: 140/531, loss: 0.023259354755282402 2023-01-23 02:25:47.242664: step: 144/531, loss: 0.1761014759540558 2023-01-23 02:25:48.377545: step: 148/531, loss: 0.021817494183778763 2023-01-23 02:25:49.508471: step: 152/531, loss: 0.30317115783691406 2023-01-23 02:25:50.643562: step: 156/531, loss: 0.008369636721909046 2023-01-23 02:25:51.759101: step: 160/531, loss: 0.015198517590761185 2023-01-23 02:25:52.906860: step: 164/531, loss: 0.00023193359083961695 2023-01-23 02:25:54.024589: step: 168/531, loss: 0.0331784263253212 2023-01-23 02:25:55.164392: step: 172/531, loss: 0.01221532840281725 2023-01-23 02:25:56.283073: step: 176/531, loss: 0.030248405411839485 2023-01-23 02:25:57.418851: step: 180/531, loss: 0.01745452918112278 2023-01-23 02:25:58.511002: step: 184/531, loss: 0.08195304870605469 2023-01-23 02:25:59.661655: step: 188/531, loss: 0.0026899336371570826 2023-01-23 02:26:00.802312: step: 192/531, loss: 0.04017810896039009 2023-01-23 02:26:01.917816: step: 196/531, loss: 0.0007333278772421181 2023-01-23 02:26:03.049525: step: 200/531, loss: 0.05020543187856674 2023-01-23 02:26:04.165324: step: 204/531, loss: 0.0006281852838583291 2023-01-23 02:26:05.283595: step: 208/531, loss: 0.0010903358925133944 2023-01-23 02:26:06.447838: step: 212/531, loss: 0.023430442437529564 2023-01-23 02:26:07.574618: step: 216/531, loss: 0.00016498565673828125 2023-01-23 02:26:08.706838: step: 220/531, loss: 0.01691589318215847 2023-01-23 02:26:09.863154: step: 224/531, loss: 0.004601574502885342 2023-01-23 02:26:10.983875: step: 228/531, loss: 0.03575267642736435 2023-01-23 02:26:12.111461: step: 232/531, loss: 0.0010628222953528166 2023-01-23 02:26:13.246718: step: 236/531, loss: 0.03856668248772621 2023-01-23 02:26:14.351126: step: 240/531, loss: 0.013074302114546299 2023-01-23 02:26:15.519667: step: 244/531, loss: 0.015597343444824219 2023-01-23 02:26:16.655443: step: 248/531, loss: 0.006758022587746382 2023-01-23 02:26:17.761047: step: 252/531, loss: 0.002910518553107977 2023-01-23 02:26:18.923059: step: 256/531, loss: 0.060192495584487915 2023-01-23 02:26:20.041428: step: 260/531, loss: 0.002361297607421875 2023-01-23 02:26:21.161156: step: 264/531, loss: 0.040784645825624466 2023-01-23 02:26:22.272782: step: 268/531, loss: 0.08249111473560333 2023-01-23 02:26:23.402190: step: 272/531, loss: 0.0693538710474968 2023-01-23 02:26:24.516572: step: 276/531, loss: 0.017268657684326172 2023-01-23 02:26:25.617258: step: 280/531, loss: 0.04931144788861275 2023-01-23 02:26:26.753286: step: 284/531, loss: 0.00895309541374445 2023-01-23 02:26:27.828247: step: 288/531, loss: 0.03828335180878639 2023-01-23 02:26:28.952426: step: 292/531, loss: 0.012966537848114967 2023-01-23 02:26:30.052340: step: 296/531, loss: 0.11732187122106552 2023-01-23 02:26:31.150647: step: 300/531, loss: 0.0023147582542151213 2023-01-23 02:26:32.262097: step: 304/531, loss: 0.019328927621245384 2023-01-23 02:26:33.364491: step: 308/531, loss: 0.015468169003725052 2023-01-23 02:26:34.485599: step: 312/531, loss: 0.020824242383241653 2023-01-23 02:26:35.605955: step: 316/531, loss: 0.006169796455651522 2023-01-23 02:26:36.753002: step: 320/531, loss: 0.04382152855396271 2023-01-23 02:26:37.865569: step: 324/531, loss: 0.0020770072005689144 2023-01-23 02:26:38.940993: step: 328/531, loss: 0.002003932138904929 2023-01-23 02:26:40.064634: step: 332/531, loss: 0.003553199814632535 2023-01-23 02:26:41.166489: step: 336/531, loss: 0.007602119352668524 2023-01-23 02:26:42.269963: step: 340/531, loss: 0.0023725510109215975 2023-01-23 02:26:43.424360: step: 344/531, loss: 0.036425113677978516 2023-01-23 02:26:44.568820: step: 348/531, loss: 0.004952430725097656 2023-01-23 02:26:45.725917: step: 352/531, loss: 0.03351888433098793 2023-01-23 02:26:46.881272: step: 356/531, loss: 0.013051033020019531 2023-01-23 02:26:48.034605: step: 360/531, loss: 0.0611141212284565 2023-01-23 02:26:49.170452: step: 364/531, loss: 0.00011730194091796875 2023-01-23 02:26:50.291122: step: 368/531, loss: 0.025170328095555305 2023-01-23 02:26:51.409873: step: 372/531, loss: 0.007721138186752796 2023-01-23 02:26:52.522091: step: 376/531, loss: 0.3585384488105774 2023-01-23 02:26:53.624298: step: 380/531, loss: 0.004516696557402611 2023-01-23 02:26:54.724217: step: 384/531, loss: 6.804466102039441e-05 2023-01-23 02:26:55.872763: step: 388/531, loss: 0.013208294287323952 2023-01-23 02:26:57.001117: step: 392/531, loss: 0.06282816082239151 2023-01-23 02:26:58.139295: step: 396/531, loss: 0.032610226422548294 2023-01-23 02:26:59.251408: step: 400/531, loss: 0.020388901233673096 2023-01-23 02:27:00.395591: step: 404/531, loss: 7.62939453125e-05 2023-01-23 02:27:01.512920: step: 408/531, loss: 0.00119190220721066 2023-01-23 02:27:02.641750: step: 412/531, loss: 0.030810164287686348 2023-01-23 02:27:03.772867: step: 416/531, loss: 0.0690387710928917 2023-01-23 02:27:04.880766: step: 420/531, loss: 0.03976564481854439 2023-01-23 02:27:05.986444: step: 424/531, loss: 0.00027680397033691406 2023-01-23 02:27:07.106559: step: 428/531, loss: 0.0035211562644690275 2023-01-23 02:27:08.253187: step: 432/531, loss: 0.03640933334827423 2023-01-23 02:27:09.356768: step: 436/531, loss: 0.004938125144690275 2023-01-23 02:27:10.491038: step: 440/531, loss: 0.006573486141860485 2023-01-23 02:27:11.600529: step: 444/531, loss: 0.012926865369081497 2023-01-23 02:27:12.728509: step: 448/531, loss: 0.0031326294410973787 2023-01-23 02:27:13.838702: step: 452/531, loss: 0.0006568908574990928 2023-01-23 02:27:14.991524: step: 456/531, loss: 0.028955651447176933 2023-01-23 02:27:16.112937: step: 460/531, loss: 0.0002869248273782432 2023-01-23 02:27:17.258546: step: 464/531, loss: 0.021189594641327858 2023-01-23 02:27:18.384166: step: 468/531, loss: 0.0020441533997654915 2023-01-23 02:27:19.509980: step: 472/531, loss: 0.12576398253440857 2023-01-23 02:27:20.636456: step: 476/531, loss: 0.09705781936645508 2023-01-23 02:27:21.768200: step: 480/531, loss: 0.0003538132004905492 2023-01-23 02:27:22.902704: step: 484/531, loss: 0.0006099700694903731 2023-01-23 02:27:24.019708: step: 488/531, loss: 0.007028484717011452 2023-01-23 02:27:25.150604: step: 492/531, loss: 0.06639537960290909 2023-01-23 02:27:26.253576: step: 496/531, loss: 0.03950157016515732 2023-01-23 02:27:27.370379: step: 500/531, loss: 0.0003246307314839214 2023-01-23 02:27:28.499702: step: 504/531, loss: 0.00967254675924778 2023-01-23 02:27:29.601725: step: 508/531, loss: 0.03699235990643501 2023-01-23 02:27:30.749107: step: 512/531, loss: 0.003169250674545765 2023-01-23 02:27:31.897972: step: 516/531, loss: 0.008223151788115501 2023-01-23 02:27:33.074829: step: 520/531, loss: 0.0464656837284565 2023-01-23 02:27:34.182264: step: 524/531, loss: 0.021243000403046608 2023-01-23 02:27:35.306217: step: 528/531, loss: 0.0004507065168581903 2023-01-23 02:27:36.423980: step: 532/531, loss: 0.0018720626831054688 2023-01-23 02:27:37.574748: step: 536/531, loss: 0.004398250486701727 2023-01-23 02:27:38.681231: step: 540/531, loss: 0.03261289745569229 2023-01-23 02:27:39.812231: step: 544/531, loss: 0.14559994637966156 2023-01-23 02:27:40.948222: step: 548/531, loss: 0.20377102494239807 2023-01-23 02:27:42.069478: step: 552/531, loss: 0.022357940673828125 2023-01-23 02:27:43.190879: step: 556/531, loss: 0.013386642560362816 2023-01-23 02:27:44.282656: step: 560/531, loss: 0.013295555487275124 2023-01-23 02:27:45.407196: step: 564/531, loss: 0.013185120187699795 2023-01-23 02:27:46.573201: step: 568/531, loss: 0.030263328924775124 2023-01-23 02:27:47.714279: step: 572/531, loss: 0.00020799637422896922 2023-01-23 02:27:48.823426: step: 576/531, loss: 0.005970478057861328 2023-01-23 02:27:49.955459: step: 580/531, loss: 0.006392097566276789 2023-01-23 02:27:51.085702: step: 584/531, loss: 0.035562705248594284 2023-01-23 02:27:52.203389: step: 588/531, loss: 6.67572021484375e-06 2023-01-23 02:27:53.338133: step: 592/531, loss: 0.037072427570819855 2023-01-23 02:27:54.489271: step: 596/531, loss: 0.41086071729660034 2023-01-23 02:27:55.623572: step: 600/531, loss: 0.0015911102527752519 2023-01-23 02:27:56.736432: step: 604/531, loss: 0.00020041465177200735 2023-01-23 02:27:57.859307: step: 608/531, loss: 0.0020437240600585938 2023-01-23 02:27:58.950565: step: 612/531, loss: 0.0009173393482342362 2023-01-23 02:28:00.079549: step: 616/531, loss: 0.0029235840775072575 2023-01-23 02:28:01.205624: step: 620/531, loss: 0.09266014397144318 2023-01-23 02:28:02.338247: step: 624/531, loss: 0.011912918649613857 2023-01-23 02:28:03.446806: step: 628/531, loss: 0.0043045044876635075 2023-01-23 02:28:04.542496: step: 632/531, loss: 0.007423209957778454 2023-01-23 02:28:05.655658: step: 636/531, loss: 0.02342095598578453 2023-01-23 02:28:06.787110: step: 640/531, loss: 0.05944962427020073 2023-01-23 02:28:07.921992: step: 644/531, loss: 0.018491221591830254 2023-01-23 02:28:09.038323: step: 648/531, loss: 0.007088088896125555 2023-01-23 02:28:10.177584: step: 652/531, loss: 0.05974221229553223 2023-01-23 02:28:11.333843: step: 656/531, loss: 0.04714660719037056 2023-01-23 02:28:12.468569: step: 660/531, loss: 0.026032831519842148 2023-01-23 02:28:13.554468: step: 664/531, loss: 0.03264961019158363 2023-01-23 02:28:14.660778: step: 668/531, loss: 0.004107475280761719 2023-01-23 02:28:15.765846: step: 672/531, loss: 0.006307029630988836 2023-01-23 02:28:16.902579: step: 676/531, loss: 0.10223130881786346 2023-01-23 02:28:18.042839: step: 680/531, loss: 0.0007387161022052169 2023-01-23 02:28:19.190861: step: 684/531, loss: 0.05891399458050728 2023-01-23 02:28:20.321168: step: 688/531, loss: 0.008532142266631126 2023-01-23 02:28:21.472445: step: 692/531, loss: 0.03570614010095596 2023-01-23 02:28:22.615315: step: 696/531, loss: 0.001989173935726285 2023-01-23 02:28:23.745597: step: 700/531, loss: 0.05263977125287056 2023-01-23 02:28:24.834749: step: 704/531, loss: 0.00011072159395553172 2023-01-23 02:28:25.943667: step: 708/531, loss: 0.0004876971070189029 2023-01-23 02:28:27.051808: step: 712/531, loss: 0.19191202521324158 2023-01-23 02:28:28.200670: step: 716/531, loss: 2.4175644284696318e-05 2023-01-23 02:28:29.322570: step: 720/531, loss: 0.023565959185361862 2023-01-23 02:28:30.436783: step: 724/531, loss: 0.007044029422104359 2023-01-23 02:28:31.569118: step: 728/531, loss: 0.024537088349461555 2023-01-23 02:28:32.697498: step: 732/531, loss: 0.05185890197753906 2023-01-23 02:28:33.822103: step: 736/531, loss: 0.12506332993507385 2023-01-23 02:28:34.975060: step: 740/531, loss: 0.01971883699297905 2023-01-23 02:28:36.113147: step: 744/531, loss: 0.19170723855495453 2023-01-23 02:28:37.208305: step: 748/531, loss: 0.008958840742707253 2023-01-23 02:28:38.346173: step: 752/531, loss: 0.022042179480195045 2023-01-23 02:28:39.489193: step: 756/531, loss: 0.08817371726036072 2023-01-23 02:28:40.626791: step: 760/531, loss: 0.00028228759765625 2023-01-23 02:28:41.735310: step: 764/531, loss: 0.03675012290477753 2023-01-23 02:28:42.851584: step: 768/531, loss: 0.014083052054047585 2023-01-23 02:28:43.983326: step: 772/531, loss: 0.04045858606696129 2023-01-23 02:28:45.103811: step: 776/531, loss: 0.07263302803039551 2023-01-23 02:28:46.255272: step: 780/531, loss: 0.11687088012695312 2023-01-23 02:28:47.399192: step: 784/531, loss: 0.02087393030524254 2023-01-23 02:28:48.525470: step: 788/531, loss: 0.017788220196962357 2023-01-23 02:28:49.643542: step: 792/531, loss: 0.006742477882653475 2023-01-23 02:28:50.752998: step: 796/531, loss: 0.015903282910585403 2023-01-23 02:28:51.903253: step: 800/531, loss: 0.035421278327703476 2023-01-23 02:28:53.015670: step: 804/531, loss: 0.0316472165286541 2023-01-23 02:28:54.129944: step: 808/531, loss: 0.014603614807128906 2023-01-23 02:28:55.241886: step: 812/531, loss: 0.0020620347931981087 2023-01-23 02:28:56.376395: step: 816/531, loss: 0.004338884260505438 2023-01-23 02:28:57.486301: step: 820/531, loss: 0.004135322757065296 2023-01-23 02:28:58.592854: step: 824/531, loss: 0.009090399369597435 2023-01-23 02:28:59.720656: step: 828/531, loss: 0.006570053286850452 2023-01-23 02:29:00.830711: step: 832/531, loss: 0.10632772743701935 2023-01-23 02:29:01.965820: step: 836/531, loss: 0.0006173134315758944 2023-01-23 02:29:03.081703: step: 840/531, loss: 0.029095936566591263 2023-01-23 02:29:04.203501: step: 844/531, loss: 0.028853273019194603 2023-01-23 02:29:05.306894: step: 848/531, loss: 0.01092376746237278 2023-01-23 02:29:06.428594: step: 852/531, loss: 0.009807205758988857 2023-01-23 02:29:07.550358: step: 856/531, loss: 0.06452493369579315 2023-01-23 02:29:08.675822: step: 860/531, loss: 0.021578026935458183 2023-01-23 02:29:09.795802: step: 864/531, loss: 0.029333340004086494 2023-01-23 02:29:10.937618: step: 868/531, loss: 0.12489080429077148 2023-01-23 02:29:12.100531: step: 872/531, loss: 0.043154526501894 2023-01-23 02:29:13.214997: step: 876/531, loss: 0.00032444001408293843 2023-01-23 02:29:14.348237: step: 880/531, loss: 0.009951448999345303 2023-01-23 02:29:15.455568: step: 884/531, loss: 0.015851879492402077 2023-01-23 02:29:16.571077: step: 888/531, loss: 0.001773405005224049 2023-01-23 02:29:17.743130: step: 892/531, loss: 0.00020847321138717234 2023-01-23 02:29:18.842139: step: 896/531, loss: 0.0075699808076024055 2023-01-23 02:29:19.965635: step: 900/531, loss: 0.00603752164170146 2023-01-23 02:29:21.094604: step: 904/531, loss: 0.0033434866927564144 2023-01-23 02:29:22.203573: step: 908/531, loss: 0.047861289232969284 2023-01-23 02:29:23.280061: step: 912/531, loss: 4.19616708313697e-06 2023-01-23 02:29:24.379411: step: 916/531, loss: 0.04440012201666832 2023-01-23 02:29:25.483088: step: 920/531, loss: 0.006049918942153454 2023-01-23 02:29:26.624678: step: 924/531, loss: 0.0009041786543093622 2023-01-23 02:29:27.767152: step: 928/531, loss: 0.09411583095788956 2023-01-23 02:29:28.908214: step: 932/531, loss: 0.0007430076948367059 2023-01-23 02:29:30.018411: step: 936/531, loss: 0.010750055313110352 2023-01-23 02:29:31.142591: step: 940/531, loss: 0.0014025687705725431 2023-01-23 02:29:32.271637: step: 944/531, loss: 0.070703886449337 2023-01-23 02:29:33.415289: step: 948/531, loss: 0.05619654431939125 2023-01-23 02:29:34.539269: step: 952/531, loss: 0.00015192032151389867 2023-01-23 02:29:35.679173: step: 956/531, loss: 0.048120878636837006 2023-01-23 02:29:36.786770: step: 960/531, loss: 0.006114935968071222 2023-01-23 02:29:37.913754: step: 964/531, loss: 0.059396080672740936 2023-01-23 02:29:39.024105: step: 968/531, loss: 0.046480562537908554 2023-01-23 02:29:40.146926: step: 972/531, loss: 0.018199730664491653 2023-01-23 02:29:41.270495: step: 976/531, loss: 0.004629802890121937 2023-01-23 02:29:42.387355: step: 980/531, loss: 0.06770829856395721 2023-01-23 02:29:43.530070: step: 984/531, loss: 0.0492284782230854 2023-01-23 02:29:44.658919: step: 988/531, loss: 0.019356489181518555 2023-01-23 02:29:45.801907: step: 992/531, loss: 0.05121574550867081 2023-01-23 02:29:46.900454: step: 996/531, loss: 0.01897287368774414 2023-01-23 02:29:48.039308: step: 1000/531, loss: 0.026590729132294655 2023-01-23 02:29:49.175039: step: 1004/531, loss: 0.2064417004585266 2023-01-23 02:29:50.285211: step: 1008/531, loss: 0.11226468533277512 2023-01-23 02:29:51.414836: step: 1012/531, loss: 0.006961155217140913 2023-01-23 02:29:52.517164: step: 1016/531, loss: 0.0844184160232544 2023-01-23 02:29:53.622629: step: 1020/531, loss: 0.03108227252960205 2023-01-23 02:29:54.701537: step: 1024/531, loss: 0.008012198843061924 2023-01-23 02:29:55.818123: step: 1028/531, loss: 0.0230731014162302 2023-01-23 02:29:56.908534: step: 1032/531, loss: 0.0010379791492596269 2023-01-23 02:29:58.019629: step: 1036/531, loss: 0.00010013580322265625 2023-01-23 02:29:59.141907: step: 1040/531, loss: 0.02672729641199112 2023-01-23 02:30:00.262047: step: 1044/531, loss: 0.030762579292058945 2023-01-23 02:30:01.372545: step: 1048/531, loss: 0.00024585722712799907 2023-01-23 02:30:02.530339: step: 1052/531, loss: 0.07710965722799301 2023-01-23 02:30:03.680959: step: 1056/531, loss: 0.03268461301922798 2023-01-23 02:30:04.777355: step: 1060/531, loss: 0.007287788670510054 2023-01-23 02:30:05.894888: step: 1064/531, loss: 0.0023835182655602694 2023-01-23 02:30:07.024460: step: 1068/531, loss: 0.0029266357887536287 2023-01-23 02:30:08.137904: step: 1072/531, loss: 0.020317936316132545 2023-01-23 02:30:09.267789: step: 1076/531, loss: 0.009944153018295765 2023-01-23 02:30:10.406656: step: 1080/531, loss: 0.008577156811952591 2023-01-23 02:30:11.529477: step: 1084/531, loss: 0.0352330207824707 2023-01-23 02:30:12.642249: step: 1088/531, loss: 0.0334162712097168 2023-01-23 02:30:13.762944: step: 1092/531, loss: 0.03544321283698082 2023-01-23 02:30:14.895841: step: 1096/531, loss: 0.0872875228524208 2023-01-23 02:30:16.038804: step: 1100/531, loss: 0.010074043646454811 2023-01-23 02:30:17.177876: step: 1104/531, loss: 0.004648732952773571 2023-01-23 02:30:18.287207: step: 1108/531, loss: 0.0029380798805505037 2023-01-23 02:30:19.449112: step: 1112/531, loss: 0.08681049197912216 2023-01-23 02:30:20.591500: step: 1116/531, loss: 0.005612802691757679 2023-01-23 02:30:21.738234: step: 1120/531, loss: 0.02386474609375 2023-01-23 02:30:22.843235: step: 1124/531, loss: 0.002441501710563898 2023-01-23 02:30:23.997325: step: 1128/531, loss: 0.0048583983443677425 2023-01-23 02:30:25.127312: step: 1132/531, loss: 0.01808915287256241 2023-01-23 02:30:26.262675: step: 1136/531, loss: 0.06909604370594025 2023-01-23 02:30:27.377268: step: 1140/531, loss: 0.04931602627038956 2023-01-23 02:30:28.498088: step: 1144/531, loss: 0.01605062559247017 2023-01-23 02:30:29.636181: step: 1148/531, loss: 0.010746479034423828 2023-01-23 02:30:30.748120: step: 1152/531, loss: 0.03742561489343643 2023-01-23 02:30:31.845056: step: 1156/531, loss: 0.03844413906335831 2023-01-23 02:30:32.999272: step: 1160/531, loss: 0.02562236785888672 2023-01-23 02:30:34.150854: step: 1164/531, loss: 0.0006140232435427606 2023-01-23 02:30:35.290176: step: 1168/531, loss: 0.07575778663158417 2023-01-23 02:30:36.425033: step: 1172/531, loss: 0.008207941427826881 2023-01-23 02:30:37.568120: step: 1176/531, loss: 0.004946804139763117 2023-01-23 02:30:38.698154: step: 1180/531, loss: 0.006978416815400124 2023-01-23 02:30:39.806230: step: 1184/531, loss: 0.002919435501098633 2023-01-23 02:30:40.914345: step: 1188/531, loss: 0.10103556513786316 2023-01-23 02:30:42.052960: step: 1192/531, loss: 0.007282781880348921 2023-01-23 02:30:43.182592: step: 1196/531, loss: 0.02560882642865181 2023-01-23 02:30:44.310818: step: 1200/531, loss: 0.011707651428878307 2023-01-23 02:30:45.443671: step: 1204/531, loss: 0.0035593032371252775 2023-01-23 02:30:46.575101: step: 1208/531, loss: 0.010898208245635033 2023-01-23 02:30:47.701680: step: 1212/531, loss: 0.0002610206720419228 2023-01-23 02:30:48.818273: step: 1216/531, loss: 0.017278386279940605 2023-01-23 02:30:49.945405: step: 1220/531, loss: 0.00962533988058567 2023-01-23 02:30:51.062581: step: 1224/531, loss: 0.10481176525354385 2023-01-23 02:30:52.188600: step: 1228/531, loss: 0.016588782891631126 2023-01-23 02:30:53.325225: step: 1232/531, loss: 0.04270181804895401 2023-01-23 02:30:54.437048: step: 1236/531, loss: 0.014360617846250534 2023-01-23 02:30:55.551207: step: 1240/531, loss: 0.03247566521167755 2023-01-23 02:30:56.645541: step: 1244/531, loss: 0.006392478942871094 2023-01-23 02:30:57.787871: step: 1248/531, loss: 5.464554124046117e-05 2023-01-23 02:30:58.902218: step: 1252/531, loss: 0.004841423127800226 2023-01-23 02:31:00.008096: step: 1256/531, loss: 0.0003465652698650956 2023-01-23 02:31:01.115588: step: 1260/531, loss: 0.002984476275742054 2023-01-23 02:31:02.200112: step: 1264/531, loss: 0.07485266029834747 2023-01-23 02:31:03.344750: step: 1268/531, loss: 0.00509147671982646 2023-01-23 02:31:04.451989: step: 1272/531, loss: 0.02361741103231907 2023-01-23 02:31:05.570501: step: 1276/531, loss: 0.04300554841756821 2023-01-23 02:31:06.673487: step: 1280/531, loss: 0.033553026616573334 2023-01-23 02:31:07.781056: step: 1284/531, loss: 0.003080177353695035 2023-01-23 02:31:08.927470: step: 1288/531, loss: 0.052766039967536926 2023-01-23 02:31:10.086113: step: 1292/531, loss: 0.41905081272125244 2023-01-23 02:31:11.192620: step: 1296/531, loss: 0.051436807960271835 2023-01-23 02:31:12.334488: step: 1300/531, loss: 0.02467336505651474 2023-01-23 02:31:13.416349: step: 1304/531, loss: 0.004202556796371937 2023-01-23 02:31:14.520610: step: 1308/531, loss: 0.011218547821044922 2023-01-23 02:31:15.625354: step: 1312/531, loss: 1.220703143189894e-05 2023-01-23 02:31:16.727881: step: 1316/531, loss: 0.1347368359565735 2023-01-23 02:31:17.842085: step: 1320/531, loss: 0.003582596778869629 2023-01-23 02:31:18.964076: step: 1324/531, loss: 0.012189961038529873 2023-01-23 02:31:20.095554: step: 1328/531, loss: 0.022995997220277786 2023-01-23 02:31:21.228299: step: 1332/531, loss: 0.0008674621349200606 2023-01-23 02:31:22.380832: step: 1336/531, loss: 0.01600198820233345 2023-01-23 02:31:23.512912: step: 1340/531, loss: 0.03642052412033081 2023-01-23 02:31:24.657405: step: 1344/531, loss: 0.01587972790002823 2023-01-23 02:31:25.785304: step: 1348/531, loss: 0.021015549078583717 2023-01-23 02:31:26.888837: step: 1352/531, loss: 0.0028820992447435856 2023-01-23 02:31:28.038043: step: 1356/531, loss: 0.046047236770391464 2023-01-23 02:31:29.160323: step: 1360/531, loss: 0.05033550411462784 2023-01-23 02:31:30.315272: step: 1364/531, loss: 0.0029968260787427425 2023-01-23 02:31:31.424361: step: 1368/531, loss: 0.10321059077978134 2023-01-23 02:31:32.573873: step: 1372/531, loss: 0.017705153673887253 2023-01-23 02:31:33.669230: step: 1376/531, loss: 0.031148577108979225 2023-01-23 02:31:34.795285: step: 1380/531, loss: 0.003689289093017578 2023-01-23 02:31:35.903491: step: 1384/531, loss: 0.010345744900405407 2023-01-23 02:31:37.035227: step: 1388/531, loss: 0.018085099756717682 2023-01-23 02:31:38.145929: step: 1392/531, loss: 0.05871400982141495 2023-01-23 02:31:39.296839: step: 1396/531, loss: 0.022579384967684746 2023-01-23 02:31:40.425007: step: 1400/531, loss: 0.03146848827600479 2023-01-23 02:31:41.568507: step: 1404/531, loss: 0.003318405244499445 2023-01-23 02:31:42.701332: step: 1408/531, loss: 0.01587653160095215 2023-01-23 02:31:43.840114: step: 1412/531, loss: 0.021695328876376152 2023-01-23 02:31:45.015413: step: 1416/531, loss: 0.00048320292262360454 2023-01-23 02:31:46.165558: step: 1420/531, loss: 0.0008377075428143144 2023-01-23 02:31:47.280559: step: 1424/531, loss: 0.03805284574627876 2023-01-23 02:31:48.427265: step: 1428/531, loss: 0.012700892053544521 2023-01-23 02:31:49.541361: step: 1432/531, loss: 0.0009196281898766756 2023-01-23 02:31:50.641043: step: 1436/531, loss: 0.04095878824591637 2023-01-23 02:31:51.806138: step: 1440/531, loss: 0.016668032854795456 2023-01-23 02:31:52.915173: step: 1444/531, loss: 0.013554097153246403 2023-01-23 02:31:54.095935: step: 1448/531, loss: 0.031142044812440872 2023-01-23 02:31:55.212524: step: 1452/531, loss: 0.0043694498017430305 2023-01-23 02:31:56.343145: step: 1456/531, loss: 5.397796485340223e-05 2023-01-23 02:31:57.458847: step: 1460/531, loss: 0.005466270260512829 2023-01-23 02:31:58.610002: step: 1464/531, loss: 0.024178888648748398 2023-01-23 02:31:59.723334: step: 1468/531, loss: 0.04167165979743004 2023-01-23 02:32:00.831705: step: 1472/531, loss: 0.030733108520507812 2023-01-23 02:32:02.012026: step: 1476/531, loss: 0.03330421447753906 2023-01-23 02:32:03.123450: step: 1480/531, loss: 0.03384876251220703 2023-01-23 02:32:04.268555: step: 1484/531, loss: 0.043265726417303085 2023-01-23 02:32:05.425520: step: 1488/531, loss: 0.02325616031885147 2023-01-23 02:32:06.528069: step: 1492/531, loss: 0.01712975464761257 2023-01-23 02:32:07.670254: step: 1496/531, loss: 0.0035588263999670744 2023-01-23 02:32:08.819071: step: 1500/531, loss: 0.013007258996367455 2023-01-23 02:32:09.934724: step: 1504/531, loss: 0.03081197664141655 2023-01-23 02:32:11.094170: step: 1508/531, loss: 0.08359622955322266 2023-01-23 02:32:12.211503: step: 1512/531, loss: 0.11903619766235352 2023-01-23 02:32:13.324078: step: 1516/531, loss: 0.003787422087043524 2023-01-23 02:32:14.423720: step: 1520/531, loss: 0.017204951494932175 2023-01-23 02:32:15.569958: step: 1524/531, loss: 0.04301281273365021 2023-01-23 02:32:16.689615: step: 1528/531, loss: 0.020871736109256744 2023-01-23 02:32:17.812422: step: 1532/531, loss: 0.06296758353710175 2023-01-23 02:32:18.954478: step: 1536/531, loss: 0.022336198017001152 2023-01-23 02:32:20.088060: step: 1540/531, loss: 0.0026634240057319403 2023-01-23 02:32:21.181704: step: 1544/531, loss: 0.020304488018155098 2023-01-23 02:32:22.279643: step: 1548/531, loss: 0.26819151639938354 2023-01-23 02:32:23.393310: step: 1552/531, loss: 0.10329543054103851 2023-01-23 02:32:24.520932: step: 1556/531, loss: 0.06942959129810333 2023-01-23 02:32:25.667258: step: 1560/531, loss: 0.02178172953426838 2023-01-23 02:32:26.780914: step: 1564/531, loss: 0.006759166717529297 2023-01-23 02:32:27.911673: step: 1568/531, loss: 0.009154033847153187 2023-01-23 02:32:29.025198: step: 1572/531, loss: 0.00357303605414927 2023-01-23 02:32:30.169566: step: 1576/531, loss: 0.02376260794699192 2023-01-23 02:32:31.269621: step: 1580/531, loss: 0.003288459964096546 2023-01-23 02:32:32.405527: step: 1584/531, loss: 0.05271320790052414 2023-01-23 02:32:33.580439: step: 1588/531, loss: 0.00923767127096653 2023-01-23 02:32:34.695431: step: 1592/531, loss: 0.0039206743240356445 2023-01-23 02:32:35.790318: step: 1596/531, loss: 0.029753495007753372 2023-01-23 02:32:36.915639: step: 1600/531, loss: 0.002079105470329523 2023-01-23 02:32:38.022231: step: 1604/531, loss: 0.07929325103759766 2023-01-23 02:32:39.119006: step: 1608/531, loss: 0.1175076961517334 2023-01-23 02:32:40.231000: step: 1612/531, loss: 0.0063987732864916325 2023-01-23 02:32:41.346158: step: 1616/531, loss: 0.004934978671371937 2023-01-23 02:32:42.446679: step: 1620/531, loss: 0.0006974220159463584 2023-01-23 02:32:43.553045: step: 1624/531, loss: 0.007775688543915749 2023-01-23 02:32:44.669809: step: 1628/531, loss: 0.008225584402680397 2023-01-23 02:32:45.818111: step: 1632/531, loss: 0.0029627324547618628 2023-01-23 02:32:46.941330: step: 1636/531, loss: 0.007986927404999733 2023-01-23 02:32:48.054876: step: 1640/531, loss: 0.011193371377885342 2023-01-23 02:32:49.175716: step: 1644/531, loss: 0.004993439186364412 2023-01-23 02:32:50.277671: step: 1648/531, loss: 0.0030838013626635075 2023-01-23 02:32:51.395375: step: 1652/531, loss: 0.012137413024902344 2023-01-23 02:32:52.517314: step: 1656/531, loss: 0.04378509521484375 2023-01-23 02:32:53.649990: step: 1660/531, loss: 0.020855236798524857 2023-01-23 02:32:54.774731: step: 1664/531, loss: 0.01168045960366726 2023-01-23 02:32:55.874135: step: 1668/531, loss: 0.041135504841804504 2023-01-23 02:32:56.994101: step: 1672/531, loss: 0.022951697930693626 2023-01-23 02:32:58.125501: step: 1676/531, loss: 0.006878089625388384 2023-01-23 02:32:59.245884: step: 1680/531, loss: 0.04147787019610405 2023-01-23 02:33:00.374032: step: 1684/531, loss: 0.016452407464385033 2023-01-23 02:33:01.480942: step: 1688/531, loss: 0.07864789664745331 2023-01-23 02:33:02.610840: step: 1692/531, loss: 0.07175607979297638 2023-01-23 02:33:03.740240: step: 1696/531, loss: 0.011507321149110794 2023-01-23 02:33:04.855211: step: 1700/531, loss: 0.0196990966796875 2023-01-23 02:33:05.977959: step: 1704/531, loss: 0.016415024176239967 2023-01-23 02:33:07.136230: step: 1708/531, loss: 0.08067063987255096 2023-01-23 02:33:08.263094: step: 1712/531, loss: 0.00034780503483489156 2023-01-23 02:33:09.370245: step: 1716/531, loss: 0.0003574371512513608 2023-01-23 02:33:10.467869: step: 1720/531, loss: 0.052779581397771835 2023-01-23 02:33:11.606247: step: 1724/531, loss: 0.054026223719120026 2023-01-23 02:33:12.754767: step: 1728/531, loss: 1.1945576667785645 2023-01-23 02:33:13.882440: step: 1732/531, loss: 0.00395545968785882 2023-01-23 02:33:14.984661: step: 1736/531, loss: 0.0035877227783203125 2023-01-23 02:33:16.114344: step: 1740/531, loss: 0.0029854297172278166 2023-01-23 02:33:17.223648: step: 1744/531, loss: 0.015355301089584827 2023-01-23 02:33:18.342325: step: 1748/531, loss: 0.005847454071044922 2023-01-23 02:33:19.489491: step: 1752/531, loss: 0.06560830771923065 2023-01-23 02:33:20.641220: step: 1756/531, loss: 0.06070220470428467 2023-01-23 02:33:21.781778: step: 1760/531, loss: 0.02496366575360298 2023-01-23 02:33:22.918539: step: 1764/531, loss: 0.0004014253499917686 2023-01-23 02:33:24.047458: step: 1768/531, loss: 0.007133770268410444 2023-01-23 02:33:25.178503: step: 1772/531, loss: 0.006471824832260609 2023-01-23 02:33:26.306378: step: 1776/531, loss: 0.00371398963034153 2023-01-23 02:33:27.420745: step: 1780/531, loss: 0.03792600706219673 2023-01-23 02:33:28.532941: step: 1784/531, loss: 0.06512375175952911 2023-01-23 02:33:29.647501: step: 1788/531, loss: 0.28476059436798096 2023-01-23 02:33:30.770193: step: 1792/531, loss: 0.014986038208007812 2023-01-23 02:33:31.879396: step: 1796/531, loss: 9.813308861339465e-05 2023-01-23 02:33:32.983884: step: 1800/531, loss: 0.0012739181984215975 2023-01-23 02:33:34.090008: step: 1804/531, loss: 0.02565941959619522 2023-01-23 02:33:35.198529: step: 1808/531, loss: 0.00871200580149889 2023-01-23 02:33:36.337587: step: 1812/531, loss: 0.0061314585618674755 2023-01-23 02:33:37.429262: step: 1816/531, loss: 0.04541795328259468 2023-01-23 02:33:38.538258: step: 1820/531, loss: 0.002659416291862726 2023-01-23 02:33:39.671591: step: 1824/531, loss: 0.000358390825567767 2023-01-23 02:33:40.783578: step: 1828/531, loss: 0.4325523376464844 2023-01-23 02:33:41.938591: step: 1832/531, loss: 0.00283660925924778 2023-01-23 02:33:43.064116: step: 1836/531, loss: 0.0032539365347474813 2023-01-23 02:33:44.171240: step: 1840/531, loss: 0.020129824057221413 2023-01-23 02:33:45.284964: step: 1844/531, loss: 0.02207355387508869 2023-01-23 02:33:46.425708: step: 1848/531, loss: 0.02607870101928711 2023-01-23 02:33:47.568397: step: 1852/531, loss: 0.004940795712172985 2023-01-23 02:33:48.687702: step: 1856/531, loss: 0.003156471299007535 2023-01-23 02:33:49.782840: step: 1860/531, loss: 0.005729723256081343 2023-01-23 02:33:50.899691: step: 1864/531, loss: 0.0014324665535241365 2023-01-23 02:33:51.997228: step: 1868/531, loss: 0.08723773807287216 2023-01-23 02:33:53.102381: step: 1872/531, loss: 0.05804614722728729 2023-01-23 02:33:54.205890: step: 1876/531, loss: 0.0036375047639012337 2023-01-23 02:33:55.318185: step: 1880/531, loss: 0.020519733428955078 2023-01-23 02:33:56.417749: step: 1884/531, loss: 0.00825204886496067 2023-01-23 02:33:57.555542: step: 1888/531, loss: 0.014622688293457031 2023-01-23 02:33:58.697552: step: 1892/531, loss: 7.953643944347277e-05 2023-01-23 02:33:59.808082: step: 1896/531, loss: 0.03589363023638725 2023-01-23 02:34:00.928596: step: 1900/531, loss: 0.005812644958496094 2023-01-23 02:34:02.042414: step: 1904/531, loss: 0.0670391097664833 2023-01-23 02:34:03.165257: step: 1908/531, loss: 0.03451528772711754 2023-01-23 02:34:04.311084: step: 1912/531, loss: 0.0002455711364746094 2023-01-23 02:34:05.448244: step: 1916/531, loss: 0.014847183600068092 2023-01-23 02:34:06.618706: step: 1920/531, loss: 0.004889106843620539 2023-01-23 02:34:07.750184: step: 1924/531, loss: 0.0003566741943359375 2023-01-23 02:34:08.881317: step: 1928/531, loss: 0.00217609410174191 2023-01-23 02:34:10.012347: step: 1932/531, loss: 0.02753629721701145 2023-01-23 02:34:11.132799: step: 1936/531, loss: 0.002470302628353238 2023-01-23 02:34:12.252613: step: 1940/531, loss: 0.050551604479551315 2023-01-23 02:34:13.363600: step: 1944/531, loss: 0.1101495772600174 2023-01-23 02:34:14.466471: step: 1948/531, loss: 2.8467178708524443e-05 2023-01-23 02:34:15.606840: step: 1952/531, loss: 0.05049147456884384 2023-01-23 02:34:16.722739: step: 1956/531, loss: 0.047983553260564804 2023-01-23 02:34:17.880144: step: 1960/531, loss: 0.008704758249223232 2023-01-23 02:34:18.991135: step: 1964/531, loss: 0.039812374860048294 2023-01-23 02:34:20.093774: step: 1968/531, loss: 0.014674377627670765 2023-01-23 02:34:21.226457: step: 1972/531, loss: 0.03880615159869194 2023-01-23 02:34:22.356435: step: 1976/531, loss: 0.023801708593964577 2023-01-23 02:34:23.474249: step: 1980/531, loss: 0.06367473304271698 2023-01-23 02:34:24.600502: step: 1984/531, loss: 0.4308701157569885 2023-01-23 02:34:25.713106: step: 1988/531, loss: 0.03270473703742027 2023-01-23 02:34:26.841238: step: 1992/531, loss: 0.03408947214484215 2023-01-23 02:34:27.960862: step: 1996/531, loss: 0.00036849977914243937 2023-01-23 02:34:29.064532: step: 2000/531, loss: 0.05244779586791992 2023-01-23 02:34:30.168010: step: 2004/531, loss: 0.0030225752852857113 2023-01-23 02:34:31.251341: step: 2008/531, loss: 0.0031193732284009457 2023-01-23 02:34:32.397168: step: 2012/531, loss: 0.05383634567260742 2023-01-23 02:34:33.565899: step: 2016/531, loss: 0.0071891783736646175 2023-01-23 02:34:34.700151: step: 2020/531, loss: 0.004887747578322887 2023-01-23 02:34:35.827645: step: 2024/531, loss: 0.024155426770448685 2023-01-23 02:34:36.938916: step: 2028/531, loss: 0.0014318466419354081 2023-01-23 02:34:38.057448: step: 2032/531, loss: 0.004417228512465954 2023-01-23 02:34:39.197285: step: 2036/531, loss: 0.025801371783018112 2023-01-23 02:34:40.359848: step: 2040/531, loss: 0.22373075783252716 2023-01-23 02:34:41.504676: step: 2044/531, loss: 0.0161622054874897 2023-01-23 02:34:42.646236: step: 2048/531, loss: 0.0446162223815918 2023-01-23 02:34:43.761036: step: 2052/531, loss: 0.030378341674804688 2023-01-23 02:34:44.914794: step: 2056/531, loss: 0.005412006750702858 2023-01-23 02:34:46.058265: step: 2060/531, loss: 0.0014930724864825606 2023-01-23 02:34:47.190280: step: 2064/531, loss: 0.04890189319849014 2023-01-23 02:34:48.323465: step: 2068/531, loss: 0.3260103464126587 2023-01-23 02:34:49.438094: step: 2072/531, loss: 0.0030933856032788754 2023-01-23 02:34:50.544227: step: 2076/531, loss: 0.0007666110759600997 2023-01-23 02:34:51.668338: step: 2080/531, loss: 0.02080850675702095 2023-01-23 02:34:52.795300: step: 2084/531, loss: 0.006679678335785866 2023-01-23 02:34:53.994557: step: 2088/531, loss: 0.02182779461145401 2023-01-23 02:34:55.087518: step: 2092/531, loss: 0.0011119842529296875 2023-01-23 02:34:56.212817: step: 2096/531, loss: 0.0038054464384913445 2023-01-23 02:34:57.319581: step: 2100/531, loss: 0.02427225187420845 2023-01-23 02:34:58.463752: step: 2104/531, loss: 0.013285351917147636 2023-01-23 02:34:59.617281: step: 2108/531, loss: 0.05540408939123154 2023-01-23 02:35:00.739586: step: 2112/531, loss: 0.026139356195926666 2023-01-23 02:35:01.897123: step: 2116/531, loss: 0.013423491269350052 2023-01-23 02:35:03.011044: step: 2120/531, loss: 0.025715352967381477 2023-01-23 02:35:04.143825: step: 2124/531, loss: 0.017140675336122513 ================================================== Loss: 0.036 -------------------- Dev: {'event': {'p': 0.5725490196078431, 'r': 0.7776298268974701, 'f1': 0.6595143986448334}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Test: {'event': {'p': 0.6345966958211856, 'r': 0.7787716159809183, 'f1': 0.6993306559571619}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Chinese: {'event': {'p': 0.5925925925925926, 'r': 0.8888888888888888, 'f1': 0.711111111111111}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Korean: {'event': {'p': 0.6206896551724138, 'r': 0.5714285714285714, 'f1': 0.5950413223140496}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Russian: {'event': {'p': 0.34, 'r': 0.4722222222222222, 'f1': 0.39534883720930236}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 18 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:35:44.346484: step: 4/531, loss: 0.03148212656378746 2023-01-23 02:35:45.454268: step: 8/531, loss: 0.009138393215835094 2023-01-23 02:35:46.612520: step: 12/531, loss: 0.03735532611608505 2023-01-23 02:35:47.729372: step: 16/531, loss: 0.06599961221218109 2023-01-23 02:35:48.871149: step: 20/531, loss: 0.0032851221039891243 2023-01-23 02:35:49.984228: step: 24/531, loss: 0.0003922939649783075 2023-01-23 02:35:51.097418: step: 28/531, loss: 0.0018051147926598787 2023-01-23 02:35:52.261083: step: 32/531, loss: 0.006088924594223499 2023-01-23 02:35:53.419586: step: 36/531, loss: 0.0009399413829669356 2023-01-23 02:35:54.553415: step: 40/531, loss: 0.008615875616669655 2023-01-23 02:35:55.684681: step: 44/531, loss: 0.006732272915542126 2023-01-23 02:35:56.803612: step: 48/531, loss: 0.002960777375847101 2023-01-23 02:35:57.969273: step: 52/531, loss: 0.004584789276123047 2023-01-23 02:35:59.093907: step: 56/531, loss: 0.020881080999970436 2023-01-23 02:36:00.211088: step: 60/531, loss: 0.03961324691772461 2023-01-23 02:36:01.369554: step: 64/531, loss: 0.0013036250602453947 2023-01-23 02:36:02.497396: step: 68/531, loss: 0.0003477096324786544 2023-01-23 02:36:03.632645: step: 72/531, loss: 0.008290481753647327 2023-01-23 02:36:04.779772: step: 76/531, loss: 0.0030734301544725895 2023-01-23 02:36:05.900536: step: 80/531, loss: 0.06168098375201225 2023-01-23 02:36:07.026702: step: 84/531, loss: 0.0008183956379070878 2023-01-23 02:36:08.161141: step: 88/531, loss: 0.0022891045082360506 2023-01-23 02:36:09.269848: step: 92/531, loss: 0.0003192901785951108 2023-01-23 02:36:10.408619: step: 96/531, loss: 0.03565606847405434 2023-01-23 02:36:11.516526: step: 100/531, loss: 0.011751865968108177 2023-01-23 02:36:12.641965: step: 104/531, loss: 0.0006559849134646356 2023-01-23 02:36:13.767755: step: 108/531, loss: 0.002317237900570035 2023-01-23 02:36:14.911262: step: 112/531, loss: 0.003711700439453125 2023-01-23 02:36:16.040746: step: 116/531, loss: 0.02535572089254856 2023-01-23 02:36:17.145510: step: 120/531, loss: 0.0025195598136633635 2023-01-23 02:36:18.275794: step: 124/531, loss: 0.0004847526433877647 2023-01-23 02:36:19.391872: step: 128/531, loss: 0.0026532651390880346 2023-01-23 02:36:20.517619: step: 132/531, loss: 3.2901763916015625e-05 2023-01-23 02:36:21.644319: step: 136/531, loss: 0.00017480850510764867 2023-01-23 02:36:22.779473: step: 140/531, loss: 0.03460407257080078 2023-01-23 02:36:23.930643: step: 144/531, loss: 0.01572561264038086 2023-01-23 02:36:25.070005: step: 148/531, loss: 0.011056089773774147 2023-01-23 02:36:26.189140: step: 152/531, loss: 0.013468456454575062 2023-01-23 02:36:27.315346: step: 156/531, loss: 0.010213661007583141 2023-01-23 02:36:28.449263: step: 160/531, loss: 0.014640617184340954 2023-01-23 02:36:29.573732: step: 164/531, loss: 0.036945246160030365 2023-01-23 02:36:30.726733: step: 168/531, loss: 0.06317062675952911 2023-01-23 02:36:31.838287: step: 172/531, loss: 0.033351898193359375 2023-01-23 02:36:32.964620: step: 176/531, loss: 0.012452125549316406 2023-01-23 02:36:34.059394: step: 180/531, loss: 0.006300449836999178 2023-01-23 02:36:35.188393: step: 184/531, loss: 0.002720308257266879 2023-01-23 02:36:36.290266: step: 188/531, loss: 0.0008658409351482987 2023-01-23 02:36:37.413460: step: 192/531, loss: 0.0006795406225137413 2023-01-23 02:36:38.530758: step: 196/531, loss: 0.0012495041592046618 2023-01-23 02:36:39.642074: step: 200/531, loss: 0.01313471794128418 2023-01-23 02:36:40.761284: step: 204/531, loss: 0.004545689094811678 2023-01-23 02:36:41.889218: step: 208/531, loss: 0.008954811841249466 2023-01-23 02:36:43.029683: step: 212/531, loss: 0.003331947373226285 2023-01-23 02:36:44.144025: step: 216/531, loss: 1.277923547604587e-05 2023-01-23 02:36:45.273197: step: 220/531, loss: 0.0645398199558258 2023-01-23 02:36:46.437380: step: 224/531, loss: 0.004652786068618298 2023-01-23 02:36:47.582719: step: 228/531, loss: 0.00368671421892941 2023-01-23 02:36:48.726383: step: 232/531, loss: 0.016959620639681816 2023-01-23 02:36:49.843388: step: 236/531, loss: 0.02703724056482315 2023-01-23 02:36:50.957523: step: 240/531, loss: 0.07674713432788849 2023-01-23 02:36:52.075523: step: 244/531, loss: 0.005436897277832031 2023-01-23 02:36:53.197158: step: 248/531, loss: 0.043573666363954544 2023-01-23 02:36:54.366482: step: 252/531, loss: 0.004396534059196711 2023-01-23 02:36:55.478351: step: 256/531, loss: 0.00010556057532085106 2023-01-23 02:36:56.591903: step: 260/531, loss: 0.0010340213775634766 2023-01-23 02:36:57.707436: step: 264/531, loss: 0.0028854371048510075 2023-01-23 02:36:58.827860: step: 268/531, loss: 0.003803730010986328 2023-01-23 02:36:59.934043: step: 272/531, loss: 0.03769569471478462 2023-01-23 02:37:01.080521: step: 276/531, loss: 0.004761695861816406 2023-01-23 02:37:02.211902: step: 280/531, loss: 6.027221752447076e-05 2023-01-23 02:37:03.342678: step: 284/531, loss: 0.006964683532714844 2023-01-23 02:37:04.489487: step: 288/531, loss: 0.0022860527969896793 2023-01-23 02:37:05.608324: step: 292/531, loss: 0.0256697665899992 2023-01-23 02:37:06.734273: step: 296/531, loss: 0.0028236389625817537 2023-01-23 02:37:07.849296: step: 300/531, loss: 0.9506793022155762 2023-01-23 02:37:08.930517: step: 304/531, loss: 0.035123251378536224 2023-01-23 02:37:10.070906: step: 308/531, loss: 0.027022648602724075 2023-01-23 02:37:11.193570: step: 312/531, loss: 0.018496274948120117 2023-01-23 02:37:12.322573: step: 316/531, loss: 0.0009350032196380198 2023-01-23 02:37:13.441006: step: 320/531, loss: 0.07793302834033966 2023-01-23 02:37:14.602771: step: 324/531, loss: 0.1767623871564865 2023-01-23 02:37:15.730308: step: 328/531, loss: 0.17111587524414062 2023-01-23 02:37:16.865554: step: 332/531, loss: 0.0017011642921715975 2023-01-23 02:37:17.973568: step: 336/531, loss: 0.005476903636008501 2023-01-23 02:37:19.077019: step: 340/531, loss: 0.04162655025720596 2023-01-23 02:37:20.200782: step: 344/531, loss: 0.004769611172378063 2023-01-23 02:37:21.317448: step: 348/531, loss: 0.003454208606854081 2023-01-23 02:37:22.432109: step: 352/531, loss: 0.0018506050109863281 2023-01-23 02:37:23.559911: step: 356/531, loss: 0.0025665522553026676 2023-01-23 02:37:24.698545: step: 360/531, loss: 0.012250995263457298 2023-01-23 02:37:25.823779: step: 364/531, loss: 0.006645584013313055 2023-01-23 02:37:27.000223: step: 368/531, loss: 0.0030879019759595394 2023-01-23 02:37:28.103543: step: 372/531, loss: 0.025671081617474556 2023-01-23 02:37:29.218566: step: 376/531, loss: 0.0050674439407885075 2023-01-23 02:37:30.349538: step: 380/531, loss: 0.04109544679522514 2023-01-23 02:37:31.491455: step: 384/531, loss: 0.004543876741081476 2023-01-23 02:37:32.629953: step: 388/531, loss: 0.0060402872040867805 2023-01-23 02:37:33.781866: step: 392/531, loss: 0.2047102004289627 2023-01-23 02:37:34.916891: step: 396/531, loss: 0.00027523041353560984 2023-01-23 02:37:36.061904: step: 400/531, loss: 0.0024860382545739412 2023-01-23 02:37:37.195120: step: 404/531, loss: 0.05309438705444336 2023-01-23 02:37:38.347321: step: 408/531, loss: 0.0016244888538494706 2023-01-23 02:37:39.472743: step: 412/531, loss: 0.004697990138083696 2023-01-23 02:37:40.571560: step: 416/531, loss: 0.001513147377409041 2023-01-23 02:37:41.699490: step: 420/531, loss: 0.0012660622596740723 2023-01-23 02:37:42.811534: step: 424/531, loss: 0.0015071869129315019 2023-01-23 02:37:43.960784: step: 428/531, loss: 0.01059427298605442 2023-01-23 02:37:45.084885: step: 432/531, loss: 0.0016851425170898438 2023-01-23 02:37:46.229152: step: 436/531, loss: 0.10824041813611984 2023-01-23 02:37:47.334614: step: 440/531, loss: 0.06368827819824219 2023-01-23 02:37:48.451086: step: 444/531, loss: 0.0011883736588060856 2023-01-23 02:37:49.566473: step: 448/531, loss: 0.004915905185043812 2023-01-23 02:37:50.713710: step: 452/531, loss: 0.003677892731502652 2023-01-23 02:37:51.831672: step: 456/531, loss: 0.00048294069711118937 2023-01-23 02:37:52.957030: step: 460/531, loss: 0.010367012582719326 2023-01-23 02:37:54.099353: step: 464/531, loss: 0.00116901402361691 2023-01-23 02:37:55.218510: step: 468/531, loss: 0.008903217501938343 2023-01-23 02:37:56.354117: step: 472/531, loss: 0.012033653445541859 2023-01-23 02:37:57.461051: step: 476/531, loss: 0.020710373297333717 2023-01-23 02:37:58.570398: step: 480/531, loss: 0.03330807760357857 2023-01-23 02:37:59.722613: step: 484/531, loss: 0.0028486251831054688 2023-01-23 02:38:00.828161: step: 488/531, loss: 0.0011233806144446135 2023-01-23 02:38:01.936500: step: 492/531, loss: 0.4351252615451813 2023-01-23 02:38:03.062387: step: 496/531, loss: 0.021006012335419655 2023-01-23 02:38:04.179318: step: 500/531, loss: 0.005177307408303022 2023-01-23 02:38:05.312089: step: 504/531, loss: 0.0018013000953942537 2023-01-23 02:38:06.434326: step: 508/531, loss: 0.00504646310582757 2023-01-23 02:38:07.580057: step: 512/531, loss: 0.01098618470132351 2023-01-23 02:38:08.707623: step: 516/531, loss: 0.02823019027709961 2023-01-23 02:38:09.844067: step: 520/531, loss: 0.018952036276459694 2023-01-23 02:38:10.981401: step: 524/531, loss: 0.003096246626228094 2023-01-23 02:38:12.110636: step: 528/531, loss: 0.005417061038315296 2023-01-23 02:38:13.243852: step: 532/531, loss: 1.5354156857938506e-05 2023-01-23 02:38:14.371022: step: 536/531, loss: 2.7322768801241182e-05 2023-01-23 02:38:15.491775: step: 540/531, loss: 0.0006726026767864823 2023-01-23 02:38:16.602912: step: 544/531, loss: 0.00288047781214118 2023-01-23 02:38:17.747687: step: 548/531, loss: 0.0016357897548004985 2023-01-23 02:38:18.901065: step: 552/531, loss: 0.0012948991497978568 2023-01-23 02:38:20.030936: step: 556/531, loss: 0.08109064400196075 2023-01-23 02:38:21.153673: step: 560/531, loss: 0.00536532374098897 2023-01-23 02:38:22.262347: step: 564/531, loss: 0.0013417243026196957 2023-01-23 02:38:23.376419: step: 568/531, loss: 0.008205318823456764 2023-01-23 02:38:24.466616: step: 572/531, loss: 0.005808735266327858 2023-01-23 02:38:25.555780: step: 576/531, loss: 0.04868336021900177 2023-01-23 02:38:26.696344: step: 580/531, loss: 0.06672382354736328 2023-01-23 02:38:27.799176: step: 584/531, loss: 0.010288047604262829 2023-01-23 02:38:28.914004: step: 588/531, loss: 0.006402492988854647 2023-01-23 02:38:30.052766: step: 592/531, loss: 0.00665626535192132 2023-01-23 02:38:31.174516: step: 596/531, loss: 0.01090784091502428 2023-01-23 02:38:32.292000: step: 600/531, loss: 0.0033571242820471525 2023-01-23 02:38:33.410535: step: 604/531, loss: 0.02584400214254856 2023-01-23 02:38:34.537494: step: 608/531, loss: 0.034188270568847656 2023-01-23 02:38:35.637268: step: 612/531, loss: 0.0041165826842188835 2023-01-23 02:38:36.770751: step: 616/531, loss: 0.0005871772882528603 2023-01-23 02:38:37.877732: step: 620/531, loss: 0.002373981522396207 2023-01-23 02:38:38.981511: step: 624/531, loss: 0.0195940975099802 2023-01-23 02:38:40.092721: step: 628/531, loss: 0.0017193795647472143 2023-01-23 02:38:41.216920: step: 632/531, loss: 0.009539700113236904 2023-01-23 02:38:42.352657: step: 636/531, loss: 0.008821678347885609 2023-01-23 02:38:43.465440: step: 640/531, loss: 0.0013298034900799394 2023-01-23 02:38:44.583216: step: 644/531, loss: 0.050620272755622864 2023-01-23 02:38:45.716931: step: 648/531, loss: 9.50813337112777e-05 2023-01-23 02:38:46.836949: step: 652/531, loss: 0.02529296837747097 2023-01-23 02:38:47.944948: step: 656/531, loss: 0.15853634476661682 2023-01-23 02:38:49.076826: step: 660/531, loss: 0.005134010221809149 2023-01-23 02:38:50.228717: step: 664/531, loss: 0.04036960378289223 2023-01-23 02:38:51.364071: step: 668/531, loss: 0.0036497116088867188 2023-01-23 02:38:52.501167: step: 672/531, loss: 0.0021221160423010588 2023-01-23 02:38:53.647686: step: 676/531, loss: 0.04682498052716255 2023-01-23 02:38:54.750753: step: 680/531, loss: 0.0021339417435228825 2023-01-23 02:38:55.862179: step: 684/531, loss: 0.022316742688417435 2023-01-23 02:38:56.978153: step: 688/531, loss: 0.010634375736117363 2023-01-23 02:38:58.108618: step: 692/531, loss: 0.015162086114287376 2023-01-23 02:38:59.254884: step: 696/531, loss: 0.028644420206546783 2023-01-23 02:39:00.371402: step: 700/531, loss: 0.011274337768554688 2023-01-23 02:39:01.498849: step: 704/531, loss: 0.016017531976103783 2023-01-23 02:39:02.658398: step: 708/531, loss: 0.0025228499434888363 2023-01-23 02:39:03.760298: step: 712/531, loss: 0.002355384873226285 2023-01-23 02:39:04.919271: step: 716/531, loss: 0.00970306433737278 2023-01-23 02:39:06.029490: step: 720/531, loss: 0.061035919934511185 2023-01-23 02:39:07.160794: step: 724/531, loss: 0.00026006699772551656 2023-01-23 02:39:08.289005: step: 728/531, loss: 0.06541280448436737 2023-01-23 02:39:09.437072: step: 732/531, loss: 0.04330329969525337 2023-01-23 02:39:10.550870: step: 736/531, loss: 0.006733036134392023 2023-01-23 02:39:11.691639: step: 740/531, loss: 0.08207492530345917 2023-01-23 02:39:12.818902: step: 744/531, loss: 0.010949230752885342 2023-01-23 02:39:13.952236: step: 748/531, loss: 0.003070259001106024 2023-01-23 02:39:15.097032: step: 752/531, loss: 1.8405915398034267e-05 2023-01-23 02:39:16.228965: step: 756/531, loss: 0.0004062652587890625 2023-01-23 02:39:17.377278: step: 760/531, loss: 0.6218775510787964 2023-01-23 02:39:18.501412: step: 764/531, loss: 0.043080996721982956 2023-01-23 02:39:19.616694: step: 768/531, loss: 0.02203083038330078 2023-01-23 02:39:20.762079: step: 772/531, loss: 0.005775070283561945 2023-01-23 02:39:21.891610: step: 776/531, loss: 0.023534394800662994 2023-01-23 02:39:23.046992: step: 780/531, loss: 0.007564353756606579 2023-01-23 02:39:24.185693: step: 784/531, loss: 0.022302530705928802 2023-01-23 02:39:25.324768: step: 788/531, loss: 0.004060268402099609 2023-01-23 02:39:26.441125: step: 792/531, loss: 0.014555168338119984 2023-01-23 02:39:27.559395: step: 796/531, loss: 0.0020805359818041325 2023-01-23 02:39:28.661778: step: 800/531, loss: 0.008567524142563343 2023-01-23 02:39:29.757426: step: 804/531, loss: 0.021487999707460403 2023-01-23 02:39:30.856121: step: 808/531, loss: 0.020430423319339752 2023-01-23 02:39:31.982488: step: 812/531, loss: 0.00170307164080441 2023-01-23 02:39:33.142296: step: 816/531, loss: 0.10097865760326385 2023-01-23 02:39:34.265693: step: 820/531, loss: 0.00042905809823423624 2023-01-23 02:39:35.383661: step: 824/531, loss: 0.03677425533533096 2023-01-23 02:39:36.477434: step: 828/531, loss: 0.1829545944929123 2023-01-23 02:39:37.590905: step: 832/531, loss: 0.0034366610925644636 2023-01-23 02:39:38.702632: step: 836/531, loss: 1.640319896978326e-05 2023-01-23 02:39:39.830796: step: 840/531, loss: 0.028613854199647903 2023-01-23 02:39:40.925268: step: 844/531, loss: 0.052391670644283295 2023-01-23 02:39:42.051590: step: 848/531, loss: 0.11765146255493164 2023-01-23 02:39:43.180440: step: 852/531, loss: 0.013640928082168102 2023-01-23 02:39:44.305828: step: 856/531, loss: 0.0018596648005768657 2023-01-23 02:39:45.427351: step: 860/531, loss: 0.044037818908691406 2023-01-23 02:39:46.514474: step: 864/531, loss: 0.0026501226238906384 2023-01-23 02:39:47.632037: step: 868/531, loss: 0.00021567345538642257 2023-01-23 02:39:48.756156: step: 872/531, loss: 0.01994319073855877 2023-01-23 02:39:49.854235: step: 876/531, loss: 0.017752457410097122 2023-01-23 02:39:50.985134: step: 880/531, loss: 0.0008252143743447959 2023-01-23 02:39:52.113742: step: 884/531, loss: 0.005032062530517578 2023-01-23 02:39:53.234010: step: 888/531, loss: 0.03981513902544975 2023-01-23 02:39:54.358434: step: 892/531, loss: 6.4849853515625e-05 2023-01-23 02:39:55.475670: step: 896/531, loss: 0.03224677965044975 2023-01-23 02:39:56.621599: step: 900/531, loss: 0.002524471143260598 2023-01-23 02:39:57.739088: step: 904/531, loss: 0.0009510040399618447 2023-01-23 02:39:58.876564: step: 908/531, loss: 0.005900669377297163 2023-01-23 02:39:59.990118: step: 912/531, loss: 0.03943004459142685 2023-01-23 02:40:01.132793: step: 916/531, loss: 0.010253643617033958 2023-01-23 02:40:02.254434: step: 920/531, loss: 0.0015802383422851562 2023-01-23 02:40:03.360506: step: 924/531, loss: 0.0019981383811682463 2023-01-23 02:40:04.477484: step: 928/531, loss: 0.03139305114746094 2023-01-23 02:40:05.606253: step: 932/531, loss: 0.005165481939911842 2023-01-23 02:40:06.735502: step: 936/531, loss: 0.013060332275927067 2023-01-23 02:40:07.847165: step: 940/531, loss: 0.0742708221077919 2023-01-23 02:40:09.001088: step: 944/531, loss: 0.00038471221341751516 2023-01-23 02:40:10.141099: step: 948/531, loss: 0.003289794782176614 2023-01-23 02:40:11.282555: step: 952/531, loss: 0.002192688174545765 2023-01-23 02:40:12.397836: step: 956/531, loss: 0.0007761001470498741 2023-01-23 02:40:13.518912: step: 960/531, loss: 0.0030113221146166325 2023-01-23 02:40:14.654495: step: 964/531, loss: 0.012280750088393688 2023-01-23 02:40:15.794914: step: 968/531, loss: 0.0018809319008141756 2023-01-23 02:40:16.927654: step: 972/531, loss: 0.0034415246918797493 2023-01-23 02:40:18.047217: step: 976/531, loss: 0.0027545930352061987 2023-01-23 02:40:19.180509: step: 980/531, loss: 0.029851054772734642 2023-01-23 02:40:20.322800: step: 984/531, loss: 0.0008300781482830644 2023-01-23 02:40:21.441814: step: 988/531, loss: 0.00027751922607421875 2023-01-23 02:40:22.555516: step: 992/531, loss: 0.008925247937440872 2023-01-23 02:40:23.713926: step: 996/531, loss: 0.025669574737548828 2023-01-23 02:40:24.846878: step: 1000/531, loss: 0.0010390281677246094 2023-01-23 02:40:25.989946: step: 1004/531, loss: 0.0013409615494310856 2023-01-23 02:40:27.111649: step: 1008/531, loss: 0.0231767650693655 2023-01-23 02:40:28.234873: step: 1012/531, loss: 0.01842346228659153 2023-01-23 02:40:29.334197: step: 1016/531, loss: 0.0015836716629564762 2023-01-23 02:40:30.466648: step: 1020/531, loss: 0.018020058050751686 2023-01-23 02:40:31.582881: step: 1024/531, loss: 0.0014772891299799085 2023-01-23 02:40:32.695789: step: 1028/531, loss: 0.015034866519272327 2023-01-23 02:40:33.807305: step: 1032/531, loss: 0.004685521125793457 2023-01-23 02:40:34.943688: step: 1036/531, loss: 0.02277698554098606 2023-01-23 02:40:36.076545: step: 1040/531, loss: 0.01061859168112278 2023-01-23 02:40:37.202358: step: 1044/531, loss: 0.00027976036653853953 2023-01-23 02:40:38.303537: step: 1048/531, loss: 0.014395476318895817 2023-01-23 02:40:39.423189: step: 1052/531, loss: 0.009355640038847923 2023-01-23 02:40:40.495694: step: 1056/531, loss: 0.001991176512092352 2023-01-23 02:40:41.604569: step: 1060/531, loss: 0.0010662078857421875 2023-01-23 02:40:42.727847: step: 1064/531, loss: 0.003956603817641735 2023-01-23 02:40:43.847261: step: 1068/531, loss: 0.015176678076386452 2023-01-23 02:40:44.990955: step: 1072/531, loss: 0.009981728158891201 2023-01-23 02:40:46.126501: step: 1076/531, loss: 0.00026569367037154734 2023-01-23 02:40:47.270784: step: 1080/531, loss: 0.06001472473144531 2023-01-23 02:40:48.404302: step: 1084/531, loss: 0.020830536261200905 2023-01-23 02:40:49.564756: step: 1088/531, loss: 5.2261355449445546e-05 2023-01-23 02:40:50.664812: step: 1092/531, loss: 0.039948273450136185 2023-01-23 02:40:51.825108: step: 1096/531, loss: 0.009328365325927734 2023-01-23 02:40:52.972035: step: 1100/531, loss: 0.04995737224817276 2023-01-23 02:40:54.080629: step: 1104/531, loss: 0.039972592145204544 2023-01-23 02:40:55.229893: step: 1108/531, loss: 0.12053127586841583 2023-01-23 02:40:56.403480: step: 1112/531, loss: 0.1123877540230751 2023-01-23 02:40:57.532225: step: 1116/531, loss: 0.0007764816400595009 2023-01-23 02:40:58.638445: step: 1120/531, loss: 3.4904482163256034e-05 2023-01-23 02:40:59.763424: step: 1124/531, loss: 4.435181472217664e-05 2023-01-23 02:41:00.888808: step: 1128/531, loss: 0.016382312402129173 2023-01-23 02:41:02.086421: step: 1132/531, loss: 0.005985927768051624 2023-01-23 02:41:03.195602: step: 1136/531, loss: 0.01979808881878853 2023-01-23 02:41:04.336359: step: 1140/531, loss: 0.011768150143325329 2023-01-23 02:41:05.475320: step: 1144/531, loss: 0.028656959533691406 2023-01-23 02:41:06.610907: step: 1148/531, loss: 0.01071310043334961 2023-01-23 02:41:07.724286: step: 1152/531, loss: 0.004043102264404297 2023-01-23 02:41:08.849786: step: 1156/531, loss: 0.0013257981045171618 2023-01-23 02:41:09.990151: step: 1160/531, loss: 0.01778564415872097 2023-01-23 02:41:11.114486: step: 1164/531, loss: 0.022917555645108223 2023-01-23 02:41:12.238256: step: 1168/531, loss: 0.009804916568100452 2023-01-23 02:41:13.353060: step: 1172/531, loss: 0.0028961184434592724 2023-01-23 02:41:14.462136: step: 1176/531, loss: 0.034318890422582626 2023-01-23 02:41:15.564230: step: 1180/531, loss: 0.019968032836914062 2023-01-23 02:41:16.696413: step: 1184/531, loss: 0.013418246060609818 2023-01-23 02:41:17.836354: step: 1188/531, loss: 0.00021619796461891383 2023-01-23 02:41:18.979718: step: 1192/531, loss: 0.006854534149169922 2023-01-23 02:41:20.096939: step: 1196/531, loss: 0.0023751261178404093 2023-01-23 02:41:21.253177: step: 1200/531, loss: 0.005202579312026501 2023-01-23 02:41:22.363844: step: 1204/531, loss: 0.03944401815533638 2023-01-23 02:41:23.496183: step: 1208/531, loss: 0.043874166905879974 2023-01-23 02:41:24.636194: step: 1212/531, loss: 0.02390737645328045 2023-01-23 02:41:25.780188: step: 1216/531, loss: 0.015093469992280006 2023-01-23 02:41:26.880151: step: 1220/531, loss: 0.02066631428897381 2023-01-23 02:41:28.001294: step: 1224/531, loss: 0.01851978339254856 2023-01-23 02:41:29.111909: step: 1228/531, loss: 0.0015571594703942537 2023-01-23 02:41:30.211769: step: 1232/531, loss: 0.00017633438983466476 2023-01-23 02:41:31.360839: step: 1236/531, loss: 0.002330017276108265 2023-01-23 02:41:32.492036: step: 1240/531, loss: 0.02168307453393936 2023-01-23 02:41:33.608184: step: 1244/531, loss: 0.00017604828462935984 2023-01-23 02:41:34.711655: step: 1248/531, loss: 4.225969314575195e-05 2023-01-23 02:41:35.817138: step: 1252/531, loss: 0.007478904910385609 2023-01-23 02:41:36.953675: step: 1256/531, loss: 0.008706665597856045 2023-01-23 02:41:38.057885: step: 1260/531, loss: 0.0162827018648386 2023-01-23 02:41:39.149424: step: 1264/531, loss: 0.5142583847045898 2023-01-23 02:41:40.326852: step: 1268/531, loss: 0.011841107159852982 2023-01-23 02:41:41.474458: step: 1272/531, loss: 0.12716332077980042 2023-01-23 02:41:42.601806: step: 1276/531, loss: 0.0002827644348144531 2023-01-23 02:41:43.734746: step: 1280/531, loss: 0.04491768032312393 2023-01-23 02:41:44.825772: step: 1284/531, loss: 0.0021846771705895662 2023-01-23 02:41:45.971404: step: 1288/531, loss: 0.05561580881476402 2023-01-23 02:41:47.097095: step: 1292/531, loss: 0.02542896196246147 2023-01-23 02:41:48.215744: step: 1296/531, loss: 0.011576270684599876 2023-01-23 02:41:49.323961: step: 1300/531, loss: 0.0013802527682855725 2023-01-23 02:41:50.433549: step: 1304/531, loss: 0.0012086868518963456 2023-01-23 02:41:51.534945: step: 1308/531, loss: 0.0012704730033874512 2023-01-23 02:41:52.676209: step: 1312/531, loss: 0.005118275061249733 2023-01-23 02:41:53.798356: step: 1316/531, loss: 0.016048623248934746 2023-01-23 02:41:54.908343: step: 1320/531, loss: 0.039365578442811966 2023-01-23 02:41:56.035619: step: 1324/531, loss: 0.14676152169704437 2023-01-23 02:41:57.145091: step: 1328/531, loss: 0.0005740642664022744 2023-01-23 02:41:58.264322: step: 1332/531, loss: 0.011646652594208717 2023-01-23 02:41:59.378755: step: 1336/531, loss: 0.03494701534509659 2023-01-23 02:42:00.482860: step: 1340/531, loss: 0.017216969281435013 2023-01-23 02:42:01.599888: step: 1344/531, loss: 0.0002588629722595215 2023-01-23 02:42:02.744466: step: 1348/531, loss: 0.03356371074914932 2023-01-23 02:42:03.860546: step: 1352/531, loss: 0.0009752869373187423 2023-01-23 02:42:04.959418: step: 1356/531, loss: 0.004111766815185547 2023-01-23 02:42:06.073809: step: 1360/531, loss: 0.02898387983441353 2023-01-23 02:42:07.209368: step: 1364/531, loss: 0.019160747528076172 2023-01-23 02:42:08.317473: step: 1368/531, loss: 0.008901119232177734 2023-01-23 02:42:09.441943: step: 1372/531, loss: 0.03926844522356987 2023-01-23 02:42:10.557069: step: 1376/531, loss: 0.007487107068300247 2023-01-23 02:42:11.666045: step: 1380/531, loss: 0.013589096255600452 2023-01-23 02:42:12.808452: step: 1384/531, loss: 0.0008289336692541838 2023-01-23 02:42:13.926431: step: 1388/531, loss: 0.004898262210190296 2023-01-23 02:42:15.035393: step: 1392/531, loss: 0.030797291547060013 2023-01-23 02:42:16.164573: step: 1396/531, loss: 0.032578181475400925 2023-01-23 02:42:17.302222: step: 1400/531, loss: 0.0035278319846838713 2023-01-23 02:42:18.428586: step: 1404/531, loss: 0.07108192145824432 2023-01-23 02:42:19.545036: step: 1408/531, loss: 0.0009618759504519403 2023-01-23 02:42:20.630534: step: 1412/531, loss: 0.0014719485770910978 2023-01-23 02:42:21.747892: step: 1416/531, loss: 0.014411736279726028 2023-01-23 02:42:22.849947: step: 1420/531, loss: 0.07227764278650284 2023-01-23 02:42:23.968241: step: 1424/531, loss: 0.0005627631908282638 2023-01-23 02:42:25.098944: step: 1428/531, loss: 0.002186393830925226 2023-01-23 02:42:26.224794: step: 1432/531, loss: 0.004756069276481867 2023-01-23 02:42:27.329107: step: 1436/531, loss: 0.013280868530273438 2023-01-23 02:42:28.442108: step: 1440/531, loss: 0.07637672126293182 2023-01-23 02:42:29.551960: step: 1444/531, loss: 0.00012483597674872726 2023-01-23 02:42:30.662080: step: 1448/531, loss: 0.028372764587402344 2023-01-23 02:42:31.794127: step: 1452/531, loss: 0.13273964822292328 2023-01-23 02:42:32.931466: step: 1456/531, loss: 0.004987239837646484 2023-01-23 02:42:34.049103: step: 1460/531, loss: 0.010878301225602627 2023-01-23 02:42:35.181677: step: 1464/531, loss: 0.00035729407682083547 2023-01-23 02:42:36.320293: step: 1468/531, loss: 0.008220863528549671 2023-01-23 02:42:37.436338: step: 1472/531, loss: 0.08306674659252167 2023-01-23 02:42:38.577642: step: 1476/531, loss: 0.0025267601013183594 2023-01-23 02:42:39.704590: step: 1480/531, loss: 0.002458858536556363 2023-01-23 02:42:40.849991: step: 1484/531, loss: 0.014220905490219593 2023-01-23 02:42:41.958005: step: 1488/531, loss: 0.035814955830574036 2023-01-23 02:42:43.049689: step: 1492/531, loss: 0.0010467767715454102 2023-01-23 02:42:44.176636: step: 1496/531, loss: 0.026447296142578125 2023-01-23 02:42:45.305144: step: 1500/531, loss: 0.004253196530044079 2023-01-23 02:42:46.440519: step: 1504/531, loss: 0.010828590951859951 2023-01-23 02:42:47.574940: step: 1508/531, loss: 0.012438202276825905 2023-01-23 02:42:48.707569: step: 1512/531, loss: 0.12399540096521378 2023-01-23 02:42:49.823672: step: 1516/531, loss: 0.05304145812988281 2023-01-23 02:42:50.945210: step: 1520/531, loss: 0.9977067708969116 2023-01-23 02:42:52.051598: step: 1524/531, loss: 0.0004220008850097656 2023-01-23 02:42:53.169384: step: 1528/531, loss: 0.021581172943115234 2023-01-23 02:42:54.320035: step: 1532/531, loss: 0.013091754168272018 2023-01-23 02:42:55.452584: step: 1536/531, loss: 0.0030873059295117855 2023-01-23 02:42:56.562596: step: 1540/531, loss: 0.012424659915268421 2023-01-23 02:42:57.693713: step: 1544/531, loss: 0.004489040467888117 2023-01-23 02:42:58.841893: step: 1548/531, loss: 0.06501083076000214 2023-01-23 02:42:59.966993: step: 1552/531, loss: 0.013829231262207031 2023-01-23 02:43:01.097608: step: 1556/531, loss: 0.0030202865600585938 2023-01-23 02:43:02.240832: step: 1560/531, loss: 0.030702590942382812 2023-01-23 02:43:03.358912: step: 1564/531, loss: 0.007418537512421608 2023-01-23 02:43:04.494496: step: 1568/531, loss: 0.013056755065917969 2023-01-23 02:43:05.592875: step: 1572/531, loss: 0.02622990682721138 2023-01-23 02:43:06.699899: step: 1576/531, loss: 0.00029087066650390625 2023-01-23 02:43:07.830201: step: 1580/531, loss: 0.0006665230030193925 2023-01-23 02:43:08.959616: step: 1584/531, loss: 0.020923137664794922 2023-01-23 02:43:10.082648: step: 1588/531, loss: 0.002063846681267023 2023-01-23 02:43:11.208994: step: 1592/531, loss: 0.035413406789302826 2023-01-23 02:43:12.316201: step: 1596/531, loss: 0.003982734400779009 2023-01-23 02:43:13.433739: step: 1600/531, loss: 0.015372944064438343 2023-01-23 02:43:14.575841: step: 1604/531, loss: 0.0336545966565609 2023-01-23 02:43:15.704779: step: 1608/531, loss: 0.001318645547144115 2023-01-23 02:43:16.880479: step: 1612/531, loss: 0.0953337699174881 2023-01-23 02:43:17.998653: step: 1616/531, loss: 0.013346386142075062 2023-01-23 02:43:19.153417: step: 1620/531, loss: 0.0012282371753826737 2023-01-23 02:43:20.301093: step: 1624/531, loss: 0.00912400521337986 2023-01-23 02:43:21.447831: step: 1628/531, loss: 0.017554188147187233 2023-01-23 02:43:22.602453: step: 1632/531, loss: 0.3540692925453186 2023-01-23 02:43:23.738076: step: 1636/531, loss: 0.003894519992172718 2023-01-23 02:43:24.874783: step: 1640/531, loss: 0.0267702117562294 2023-01-23 02:43:25.976237: step: 1644/531, loss: 0.02014951780438423 2023-01-23 02:43:27.094702: step: 1648/531, loss: 0.016979217529296875 2023-01-23 02:43:28.203013: step: 1652/531, loss: 0.0168896671384573 2023-01-23 02:43:29.354830: step: 1656/531, loss: 0.048267364501953125 2023-01-23 02:43:30.457542: step: 1660/531, loss: 0.009469985961914062 2023-01-23 02:43:31.597373: step: 1664/531, loss: 0.005391311831772327 2023-01-23 02:43:32.696466: step: 1668/531, loss: 0.0007425307994708419 2023-01-23 02:43:33.801794: step: 1672/531, loss: 0.0420406349003315 2023-01-23 02:43:34.911328: step: 1676/531, loss: 0.0016560613876208663 2023-01-23 02:43:36.036122: step: 1680/531, loss: 0.005781936924904585 2023-01-23 02:43:37.174103: step: 1684/531, loss: 0.013836097903549671 2023-01-23 02:43:38.296142: step: 1688/531, loss: 0.04584769159555435 2023-01-23 02:43:39.418289: step: 1692/531, loss: 0.7160861492156982 2023-01-23 02:43:40.600498: step: 1696/531, loss: 0.007689857389777899 2023-01-23 02:43:41.693740: step: 1700/531, loss: 0.025319673120975494 2023-01-23 02:43:42.839136: step: 1704/531, loss: 0.0014995576348155737 2023-01-23 02:43:43.937325: step: 1708/531, loss: 0.0001924514799611643 2023-01-23 02:43:45.117056: step: 1712/531, loss: 0.011108208447694778 2023-01-23 02:43:46.244058: step: 1716/531, loss: 2.5463104975642636e-05 2023-01-23 02:43:47.368382: step: 1720/531, loss: 0.0024192810524255037 2023-01-23 02:43:48.475735: step: 1724/531, loss: 0.005981254391372204 2023-01-23 02:43:49.596478: step: 1728/531, loss: 0.006235494278371334 2023-01-23 02:43:50.717596: step: 1732/531, loss: 0.0011915683280676603 2023-01-23 02:43:51.806554: step: 1736/531, loss: 0.02448296546936035 2023-01-23 02:43:52.924497: step: 1740/531, loss: 0.00030746462289243937 2023-01-23 02:43:54.058308: step: 1744/531, loss: 0.002175998641178012 2023-01-23 02:43:55.176935: step: 1748/531, loss: 0.014426803216338158 2023-01-23 02:43:56.317679: step: 1752/531, loss: 0.0007915496826171875 2023-01-23 02:43:57.461992: step: 1756/531, loss: 0.01966848410665989 2023-01-23 02:43:58.585225: step: 1760/531, loss: 0.0036152838729321957 2023-01-23 02:43:59.676938: step: 1764/531, loss: 0.004521941766142845 2023-01-23 02:44:00.818012: step: 1768/531, loss: 0.0029969215393066406 2023-01-23 02:44:01.918586: step: 1772/531, loss: 0.1358700692653656 2023-01-23 02:44:03.029972: step: 1776/531, loss: 0.00028514862060546875 2023-01-23 02:44:04.162085: step: 1780/531, loss: 0.05023372173309326 2023-01-23 02:44:05.295691: step: 1784/531, loss: 0.00014681815810035914 2023-01-23 02:44:06.420649: step: 1788/531, loss: 0.1009206771850586 2023-01-23 02:44:07.542778: step: 1792/531, loss: 0.008335304446518421 2023-01-23 02:44:08.655656: step: 1796/531, loss: 0.007214164361357689 2023-01-23 02:44:09.783188: step: 1800/531, loss: 0.01632719114422798 2023-01-23 02:44:10.894148: step: 1804/531, loss: 0.011091900058090687 2023-01-23 02:44:12.023200: step: 1808/531, loss: 0.009494018740952015 2023-01-23 02:44:13.122733: step: 1812/531, loss: 4.9591064453125e-05 2023-01-23 02:44:14.224223: step: 1816/531, loss: 0.0048836711794137955 2023-01-23 02:44:15.360856: step: 1820/531, loss: 0.011188220232725143 2023-01-23 02:44:16.477959: step: 1824/531, loss: 0.03418397903442383 2023-01-23 02:44:17.590257: step: 1828/531, loss: 0.006816101260483265 2023-01-23 02:44:18.734647: step: 1832/531, loss: 0.014355897903442383 2023-01-23 02:44:19.859519: step: 1836/531, loss: 0.012384224683046341 2023-01-23 02:44:20.979250: step: 1840/531, loss: 0.015866853296756744 2023-01-23 02:44:22.097153: step: 1844/531, loss: 0.005111503414809704 2023-01-23 02:44:23.210645: step: 1848/531, loss: 0.0036849023308604956 2023-01-23 02:44:24.307201: step: 1852/531, loss: 0.00725555419921875 2023-01-23 02:44:25.419072: step: 1856/531, loss: 0.006637954618781805 2023-01-23 02:44:26.537344: step: 1860/531, loss: 0.027082158252596855 2023-01-23 02:44:27.684946: step: 1864/531, loss: 6.027221752447076e-05 2023-01-23 02:44:28.819241: step: 1868/531, loss: 0.004913997836410999 2023-01-23 02:44:29.940484: step: 1872/531, loss: 0.01860027387738228 2023-01-23 02:44:31.027736: step: 1876/531, loss: 0.0009579658508300781 2023-01-23 02:44:32.163141: step: 1880/531, loss: 0.013873673044145107 2023-01-23 02:44:33.292180: step: 1884/531, loss: 0.0077481744810938835 2023-01-23 02:44:34.413089: step: 1888/531, loss: 0.0016108512645587325 2023-01-23 02:44:35.532349: step: 1892/531, loss: 0.02321157418191433 2023-01-23 02:44:36.638519: step: 1896/531, loss: 0.00941934622824192 2023-01-23 02:44:37.775331: step: 1900/531, loss: 0.012918282300233841 2023-01-23 02:44:38.910073: step: 1904/531, loss: 0.09151711314916611 2023-01-23 02:44:40.055893: step: 1908/531, loss: 0.023932266980409622 2023-01-23 02:44:41.193154: step: 1912/531, loss: 0.0008336066966876388 2023-01-23 02:44:42.354439: step: 1916/531, loss: 0.0006187439430505037 2023-01-23 02:44:43.465773: step: 1920/531, loss: 0.03745322674512863 2023-01-23 02:44:44.577344: step: 1924/531, loss: 0.0685700923204422 2023-01-23 02:44:45.733124: step: 1928/531, loss: 0.03850212320685387 2023-01-23 02:44:46.889982: step: 1932/531, loss: 0.03474188223481178 2023-01-23 02:44:48.031356: step: 1936/531, loss: 0.07472896575927734 2023-01-23 02:44:49.152538: step: 1940/531, loss: 0.0002773284795694053 2023-01-23 02:44:50.272901: step: 1944/531, loss: 0.01052703894674778 2023-01-23 02:44:51.392809: step: 1948/531, loss: 0.002759838243946433 2023-01-23 02:44:52.528373: step: 1952/531, loss: 0.01571073569357395 2023-01-23 02:44:53.633902: step: 1956/531, loss: 0.019237900152802467 2023-01-23 02:44:54.804778: step: 1960/531, loss: 0.006918144412338734 2023-01-23 02:44:55.925644: step: 1964/531, loss: 0.0060765743255615234 2023-01-23 02:44:57.054660: step: 1968/531, loss: 0.203074648976326 2023-01-23 02:44:58.173758: step: 1972/531, loss: 0.02259836345911026 2023-01-23 02:44:59.293121: step: 1976/531, loss: 0.05362844839692116 2023-01-23 02:45:00.470179: step: 1980/531, loss: 0.04101886600255966 2023-01-23 02:45:01.606381: step: 1984/531, loss: 0.10275831818580627 2023-01-23 02:45:02.725670: step: 1988/531, loss: 0.005096626468002796 2023-01-23 02:45:03.869190: step: 1992/531, loss: 0.04721679538488388 2023-01-23 02:45:05.009963: step: 1996/531, loss: 0.00303573627024889 2023-01-23 02:45:06.151320: step: 2000/531, loss: 0.004309463780373335 2023-01-23 02:45:07.274581: step: 2004/531, loss: 0.0004177093505859375 2023-01-23 02:45:08.387283: step: 2008/531, loss: 0.04394808039069176 2023-01-23 02:45:09.525981: step: 2012/531, loss: 0.004927921574562788 2023-01-23 02:45:10.646910: step: 2016/531, loss: 0.010747337713837624 2023-01-23 02:45:11.757521: step: 2020/531, loss: 0.007941722869873047 2023-01-23 02:45:12.932046: step: 2024/531, loss: 0.0010666847229003906 2023-01-23 02:45:14.059370: step: 2028/531, loss: 0.01838255114853382 2023-01-23 02:45:15.177084: step: 2032/531, loss: 0.013684415258467197 2023-01-23 02:45:16.290767: step: 2036/531, loss: 0.041971590369939804 2023-01-23 02:45:17.408316: step: 2040/531, loss: 0.04240410402417183 2023-01-23 02:45:18.533105: step: 2044/531, loss: 0.0019712448120117188 2023-01-23 02:45:19.701788: step: 2048/531, loss: 0.028433896601200104 2023-01-23 02:45:20.836987: step: 2052/531, loss: 0.0034172057639807463 2023-01-23 02:45:21.947517: step: 2056/531, loss: 0.00012826919555664062 2023-01-23 02:45:23.093662: step: 2060/531, loss: 0.0026869773864746094 2023-01-23 02:45:24.213774: step: 2064/531, loss: 0.006666278932243586 2023-01-23 02:45:25.326078: step: 2068/531, loss: 0.03172168880701065 2023-01-23 02:45:26.452879: step: 2072/531, loss: 0.001980256987735629 2023-01-23 02:45:27.576389: step: 2076/531, loss: 0.04443387687206268 2023-01-23 02:45:28.697767: step: 2080/531, loss: 0.029921580106019974 2023-01-23 02:45:29.797328: step: 2084/531, loss: 0.004064416978508234 2023-01-23 02:45:30.919419: step: 2088/531, loss: 0.02584085613489151 2023-01-23 02:45:32.025148: step: 2092/531, loss: 0.05431981384754181 2023-01-23 02:45:33.185656: step: 2096/531, loss: 0.0022117614280432463 2023-01-23 02:45:34.298367: step: 2100/531, loss: 5.0139427912654355e-05 2023-01-23 02:45:35.393206: step: 2104/531, loss: 0.16524915397167206 2023-01-23 02:45:36.502821: step: 2108/531, loss: 0.0037687302101403475 2023-01-23 02:45:37.644894: step: 2112/531, loss: 0.00509986886754632 2023-01-23 02:45:38.749791: step: 2116/531, loss: 0.023537253960967064 2023-01-23 02:45:39.869715: step: 2120/531, loss: 6.151199340820312e-05 2023-01-23 02:45:41.023950: step: 2124/531, loss: 0.00567970285192132 ================================================== Loss: 0.028 -------------------- Dev: {'event': {'p': 0.6068111455108359, 'r': 0.7829560585885486, 'f1': 0.6837209302325582}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Test: {'event': {'p': 0.6335282651072125, 'r': 0.7751937984496124, 'f1': 0.6972378653794583}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Chinese: {'event': {'p': 0.5833333333333334, 'r': 0.9074074074074074, 'f1': 0.7101449275362318}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Korean: {'event': {'p': 0.6981132075471698, 'r': 0.5873015873015873, 'f1': 0.6379310344827586}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Russian: {'event': {'p': 0.4186046511627907, 'r': 0.5, 'f1': 0.45569620253164556}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 19 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:46:20.859701: step: 4/531, loss: 0.021774865686893463 2023-01-23 02:46:21.985605: step: 8/531, loss: 0.003297806018963456 2023-01-23 02:46:23.113297: step: 12/531, loss: 0.0008193016401492059 2023-01-23 02:46:24.229280: step: 16/531, loss: 0.004964637570083141 2023-01-23 02:46:25.418305: step: 20/531, loss: 0.026790238916873932 2023-01-23 02:46:26.562493: step: 24/531, loss: 0.004458617884665728 2023-01-23 02:46:27.701852: step: 28/531, loss: 0.0053986553102731705 2023-01-23 02:46:28.810040: step: 32/531, loss: 0.00088586809579283 2023-01-23 02:46:29.939349: step: 36/531, loss: 0.001613616943359375 2023-01-23 02:46:31.103586: step: 40/531, loss: 0.002263720380142331 2023-01-23 02:46:32.227596: step: 44/531, loss: 0.009144973941147327 2023-01-23 02:46:33.345197: step: 48/531, loss: 0.019995499402284622 2023-01-23 02:46:34.484350: step: 52/531, loss: 0.01346435584127903 2023-01-23 02:46:35.595113: step: 56/531, loss: 0.00715184211730957 2023-01-23 02:46:36.702535: step: 60/531, loss: 0.0001561522512929514 2023-01-23 02:46:37.815533: step: 64/531, loss: 0.00826277770102024 2023-01-23 02:46:38.950048: step: 68/531, loss: 0.00022821426682639867 2023-01-23 02:46:40.072465: step: 72/531, loss: 0.0005769729614257812 2023-01-23 02:46:41.195450: step: 76/531, loss: 0.0006278991932049394 2023-01-23 02:46:42.334533: step: 80/531, loss: 0.0016788482898846269 2023-01-23 02:46:43.476261: step: 84/531, loss: 0.02372589334845543 2023-01-23 02:46:44.570708: step: 88/531, loss: 0.04442644119262695 2023-01-23 02:46:45.680438: step: 92/531, loss: 0.03994712978601456 2023-01-23 02:46:46.804752: step: 96/531, loss: 8.316040475619957e-05 2023-01-23 02:46:47.941433: step: 100/531, loss: 0.001708173775114119 2023-01-23 02:46:49.061910: step: 104/531, loss: 0.009626675397157669 2023-01-23 02:46:50.187087: step: 108/531, loss: 0.023299027234315872 2023-01-23 02:46:51.351513: step: 112/531, loss: 0.0004995345952920616 2023-01-23 02:46:52.450224: step: 116/531, loss: 0.00029392243595793843 2023-01-23 02:46:53.569487: step: 120/531, loss: 0.08871670067310333 2023-01-23 02:46:54.702201: step: 124/531, loss: 0.02886180952191353 2023-01-23 02:46:55.840921: step: 128/531, loss: 0.005850434303283691 2023-01-23 02:46:56.971501: step: 132/531, loss: 0.0003031254163943231 2023-01-23 02:46:58.092456: step: 136/531, loss: 0.007513666059821844 2023-01-23 02:46:59.228142: step: 140/531, loss: 0.014364433474838734 2023-01-23 02:47:00.343953: step: 144/531, loss: 1.621246337890625e-05 2023-01-23 02:47:01.526785: step: 148/531, loss: 0.016949845477938652 2023-01-23 02:47:02.648335: step: 152/531, loss: 0.03637523949146271 2023-01-23 02:47:03.789336: step: 156/531, loss: 0.008419609628617764 2023-01-23 02:47:04.921234: step: 160/531, loss: 0.009605360217392445 2023-01-23 02:47:06.050969: step: 164/531, loss: 0.0005672454717569053 2023-01-23 02:47:07.170457: step: 168/531, loss: 9.479522850597277e-05 2023-01-23 02:47:08.309659: step: 172/531, loss: 0.004299068823456764 2023-01-23 02:47:09.448109: step: 176/531, loss: 0.010895348154008389 2023-01-23 02:47:10.602097: step: 180/531, loss: 0.022380853071808815 2023-01-23 02:47:11.714534: step: 184/531, loss: 0.03881235048174858 2023-01-23 02:47:12.810761: step: 188/531, loss: 0.0012378693791106343 2023-01-23 02:47:13.957676: step: 192/531, loss: 0.0003843307786155492 2023-01-23 02:47:15.079433: step: 196/531, loss: 4.014969090349041e-05 2023-01-23 02:47:16.186318: step: 200/531, loss: 1.7309188478975557e-05 2023-01-23 02:47:17.310140: step: 204/531, loss: 0.0024289132561534643 2023-01-23 02:47:18.481974: step: 208/531, loss: 0.023194218054413795 2023-01-23 02:47:19.582239: step: 212/531, loss: 0.0009517669677734375 2023-01-23 02:47:20.699998: step: 216/531, loss: 0.0016796112759038806 2023-01-23 02:47:21.813251: step: 220/531, loss: 0.00046072006807662547 2023-01-23 02:47:22.927711: step: 224/531, loss: 0.0037144662346690893 2023-01-23 02:47:24.066098: step: 228/531, loss: 0.011960029602050781 2023-01-23 02:47:25.207235: step: 232/531, loss: 0.04152221605181694 2023-01-23 02:47:26.325680: step: 236/531, loss: 0.007193088531494141 2023-01-23 02:47:27.440922: step: 240/531, loss: 0.029358768835663795 2023-01-23 02:47:28.538166: step: 244/531, loss: 0.00495758093893528 2023-01-23 02:47:29.633626: step: 248/531, loss: 0.019596051424741745 2023-01-23 02:47:30.750591: step: 252/531, loss: 0.026639176532626152 2023-01-23 02:47:31.872197: step: 256/531, loss: 0.026469087228178978 2023-01-23 02:47:32.972483: step: 260/531, loss: 0.0074745179153978825 2023-01-23 02:47:34.103350: step: 264/531, loss: 9.298324584960938e-05 2023-01-23 02:47:35.198165: step: 268/531, loss: 0.01766366884112358 2023-01-23 02:47:36.367614: step: 272/531, loss: 0.00015125275240279734 2023-01-23 02:47:37.493119: step: 276/531, loss: 7.233619544422254e-05 2023-01-23 02:47:38.656732: step: 280/531, loss: 0.0015003205044195056 2023-01-23 02:47:39.774498: step: 284/531, loss: 0.007439041044563055 2023-01-23 02:47:40.893570: step: 288/531, loss: 0.00200061802752316 2023-01-23 02:47:42.058759: step: 292/531, loss: 0.007681847084313631 2023-01-23 02:47:43.188428: step: 296/531, loss: 0.07629070430994034 2023-01-23 02:47:44.330662: step: 300/531, loss: 7.085800461936742e-05 2023-01-23 02:47:45.443701: step: 304/531, loss: 0.022782612591981888 2023-01-23 02:47:46.589264: step: 308/531, loss: 0.01322860736399889 2023-01-23 02:47:47.706022: step: 312/531, loss: 0.0021564005874097347 2023-01-23 02:47:48.807211: step: 316/531, loss: 0.00033664703369140625 2023-01-23 02:47:49.943664: step: 320/531, loss: 6.408691842807457e-05 2023-01-23 02:47:51.047434: step: 324/531, loss: 0.015746403485536575 2023-01-23 02:47:52.203143: step: 328/531, loss: 0.0006113052368164062 2023-01-23 02:47:53.317935: step: 332/531, loss: 0.0001567840517964214 2023-01-23 02:47:54.497370: step: 336/531, loss: 0.0001987457217182964 2023-01-23 02:47:55.656310: step: 340/531, loss: 0.002033424563705921 2023-01-23 02:47:56.811621: step: 344/531, loss: 0.006628227420151234 2023-01-23 02:47:57.931088: step: 348/531, loss: 0.00033245087251998484 2023-01-23 02:47:59.073935: step: 352/531, loss: 0.0070993900299072266 2023-01-23 02:48:00.170107: step: 356/531, loss: 0.0002586364862509072 2023-01-23 02:48:01.313213: step: 360/531, loss: 0.00681352661922574 2023-01-23 02:48:02.428172: step: 364/531, loss: 0.0006393432850018144 2023-01-23 02:48:03.536425: step: 368/531, loss: 0.00043745042057707906 2023-01-23 02:48:04.652227: step: 372/531, loss: 0.016713649034500122 2023-01-23 02:48:05.756130: step: 376/531, loss: 6.008148193359375e-05 2023-01-23 02:48:06.881955: step: 380/531, loss: 0.0001295089750783518 2023-01-23 02:48:08.034803: step: 384/531, loss: 4.09603126172442e-05 2023-01-23 02:48:09.150290: step: 388/531, loss: 0.02054738998413086 2023-01-23 02:48:10.246403: step: 392/531, loss: 0.012370300479233265 2023-01-23 02:48:11.372277: step: 396/531, loss: 0.0016174317570403218 2023-01-23 02:48:12.489878: step: 400/531, loss: 0.004982471466064453 2023-01-23 02:48:13.615564: step: 404/531, loss: 0.020618915557861328 2023-01-23 02:48:14.733388: step: 408/531, loss: 0.0016371726524084806 2023-01-23 02:48:15.870800: step: 412/531, loss: 0.009154129773378372 2023-01-23 02:48:17.005273: step: 416/531, loss: 0.0031499862670898438 2023-01-23 02:48:18.131177: step: 420/531, loss: 0.009401131421327591 2023-01-23 02:48:19.259493: step: 424/531, loss: 0.03657817840576172 2023-01-23 02:48:20.396709: step: 428/531, loss: 0.004002475645393133 2023-01-23 02:48:21.515724: step: 432/531, loss: 0.03427467495203018 2023-01-23 02:48:22.650406: step: 436/531, loss: 0.0012998580932617188 2023-01-23 02:48:23.770027: step: 440/531, loss: 0.008352660574018955 2023-01-23 02:48:24.888259: step: 444/531, loss: 0.010051536373794079 2023-01-23 02:48:26.023516: step: 448/531, loss: 0.023071765899658203 2023-01-23 02:48:27.139686: step: 452/531, loss: 0.0051774028688669205 2023-01-23 02:48:28.284714: step: 456/531, loss: 0.047353506088256836 2023-01-23 02:48:29.385008: step: 460/531, loss: 0.0015575409634038806 2023-01-23 02:48:30.510614: step: 464/531, loss: 0.028129005804657936 2023-01-23 02:48:31.635016: step: 468/531, loss: 0.10703869163990021 2023-01-23 02:48:32.764675: step: 472/531, loss: 2.150845766067505 2023-01-23 02:48:33.876776: step: 476/531, loss: 0.08871117234230042 2023-01-23 02:48:34.993132: step: 480/531, loss: 0.02733917161822319 2023-01-23 02:48:36.112660: step: 484/531, loss: 0.0016666412120684981 2023-01-23 02:48:37.221711: step: 488/531, loss: 0.038111019879579544 2023-01-23 02:48:38.342140: step: 492/531, loss: 0.0211913101375103 2023-01-23 02:48:39.483581: step: 496/531, loss: 0.040123939514160156 2023-01-23 02:48:40.575260: step: 500/531, loss: 0.001449155854061246 2023-01-23 02:48:41.730944: step: 504/531, loss: 0.010195351205766201 2023-01-23 02:48:42.885717: step: 508/531, loss: 0.014333915896713734 2023-01-23 02:48:44.029124: step: 512/531, loss: 0.009818458929657936 2023-01-23 02:48:45.137351: step: 516/531, loss: 0.0017593861557543278 2023-01-23 02:48:46.274580: step: 520/531, loss: 0.03319110721349716 2023-01-23 02:48:47.403838: step: 524/531, loss: 0.001635646796785295 2023-01-23 02:48:48.557743: step: 528/531, loss: 0.0005859375232830644 2023-01-23 02:48:49.681283: step: 532/531, loss: 0.004174709320068359 2023-01-23 02:48:50.774108: step: 536/531, loss: 1.4686585018353071e-05 2023-01-23 02:48:51.867053: step: 540/531, loss: 0.02717127837240696 2023-01-23 02:48:52.979966: step: 544/531, loss: 0.07589860260486603 2023-01-23 02:48:54.101107: step: 548/531, loss: 0.0011615753173828125 2023-01-23 02:48:55.252229: step: 552/531, loss: 6.50405854685232e-05 2023-01-23 02:48:56.389451: step: 556/531, loss: 0.0038873672019690275 2023-01-23 02:48:57.543625: step: 560/531, loss: 0.002979278564453125 2023-01-23 02:48:58.658370: step: 564/531, loss: 0.01357269287109375 2023-01-23 02:48:59.761036: step: 568/531, loss: 0.0006916999700479209 2023-01-23 02:49:00.888036: step: 572/531, loss: 0.037612345069646835 2023-01-23 02:49:02.024856: step: 576/531, loss: 0.061872102320194244 2023-01-23 02:49:03.144452: step: 580/531, loss: 0.0006643772358074784 2023-01-23 02:49:04.248232: step: 584/531, loss: 0.0032600881531834602 2023-01-23 02:49:05.343965: step: 588/531, loss: 0.018627069890499115 2023-01-23 02:49:06.520066: step: 592/531, loss: 0.006424427032470703 2023-01-23 02:49:07.654476: step: 596/531, loss: 0.028169060125947 2023-01-23 02:49:08.769515: step: 600/531, loss: 0.00293979630805552 2023-01-23 02:49:09.892150: step: 604/531, loss: 0.00856781005859375 2023-01-23 02:49:11.002912: step: 608/531, loss: 0.0031015395652502775 2023-01-23 02:49:12.121989: step: 612/531, loss: 0.11968812346458435 2023-01-23 02:49:13.261017: step: 616/531, loss: 0.002677345182746649 2023-01-23 02:49:14.394404: step: 620/531, loss: 0.010006952099502087 2023-01-23 02:49:15.531119: step: 624/531, loss: 7.791518874000758e-05 2023-01-23 02:49:16.638183: step: 628/531, loss: 0.09418831020593643 2023-01-23 02:49:17.768912: step: 632/531, loss: 0.0015654563903808594 2023-01-23 02:49:18.895971: step: 636/531, loss: 0.038721658289432526 2023-01-23 02:49:20.056035: step: 640/531, loss: 0.0008864403353072703 2023-01-23 02:49:21.140632: step: 644/531, loss: 0.009693908505141735 2023-01-23 02:49:22.253731: step: 648/531, loss: 0.004512977320700884 2023-01-23 02:49:23.374332: step: 652/531, loss: 0.08417234569787979 2023-01-23 02:49:24.485465: step: 656/531, loss: 0.05561542510986328 2023-01-23 02:49:25.627808: step: 660/531, loss: 0.03448457643389702 2023-01-23 02:49:26.746529: step: 664/531, loss: 0.006698226556181908 2023-01-23 02:49:27.869688: step: 668/531, loss: 0.0004410743713378906 2023-01-23 02:49:28.987025: step: 672/531, loss: 0.0008738517644815147 2023-01-23 02:49:30.135810: step: 676/531, loss: 0.012852382846176624 2023-01-23 02:49:31.255480: step: 680/531, loss: 0.00514068640768528 2023-01-23 02:49:32.392530: step: 684/531, loss: 0.013738918118178844 2023-01-23 02:49:33.508994: step: 688/531, loss: 0.003887748811393976 2023-01-23 02:49:34.615982: step: 692/531, loss: 0.004408836364746094 2023-01-23 02:49:35.726873: step: 696/531, loss: 0.0009400367271155119 2023-01-23 02:49:36.873401: step: 700/531, loss: 0.005610466003417969 2023-01-23 02:49:37.975542: step: 704/531, loss: 0.046169281005859375 2023-01-23 02:49:39.129195: step: 708/531, loss: 0.0016885282238945365 2023-01-23 02:49:40.241977: step: 712/531, loss: 0.07701750099658966 2023-01-23 02:49:41.348780: step: 716/531, loss: 0.00010318756540073082 2023-01-23 02:49:42.470894: step: 720/531, loss: 0.034327082335948944 2023-01-23 02:49:43.579125: step: 724/531, loss: 0.039343737065792084 2023-01-23 02:49:44.744478: step: 728/531, loss: 0.001779746962711215 2023-01-23 02:49:45.885384: step: 732/531, loss: 0.0003326416190247983 2023-01-23 02:49:47.008958: step: 736/531, loss: 0.01098089199513197 2023-01-23 02:49:48.117254: step: 740/531, loss: 0.00043025019112974405 2023-01-23 02:49:49.225687: step: 744/531, loss: 0.004878139588981867 2023-01-23 02:49:50.379253: step: 748/531, loss: 0.16960486769676208 2023-01-23 02:49:51.506182: step: 752/531, loss: 0.007018971722573042 2023-01-23 02:49:52.665275: step: 756/531, loss: 0.27476367354393005 2023-01-23 02:49:53.813738: step: 760/531, loss: 0.010851479135453701 2023-01-23 02:49:54.960058: step: 764/531, loss: 0.0008899689419195056 2023-01-23 02:49:56.090119: step: 768/531, loss: 0.07832906395196915 2023-01-23 02:49:57.211683: step: 772/531, loss: 0.006233406253159046 2023-01-23 02:49:58.326774: step: 776/531, loss: 0.02190704457461834 2023-01-23 02:49:59.417801: step: 780/531, loss: 0.009016704745590687 2023-01-23 02:50:00.532363: step: 784/531, loss: 0.004018593113869429 2023-01-23 02:50:01.660076: step: 788/531, loss: 0.12882958352565765 2023-01-23 02:50:02.771238: step: 792/531, loss: 0.03900623321533203 2023-01-23 02:50:03.918107: step: 796/531, loss: 0.003976059146225452 2023-01-23 02:50:05.016677: step: 800/531, loss: 0.003622150281444192 2023-01-23 02:50:06.112470: step: 804/531, loss: 0.000869369541760534 2023-01-23 02:50:07.244984: step: 808/531, loss: 0.0006271362653933465 2023-01-23 02:50:08.368024: step: 812/531, loss: 0.001170349190942943 2023-01-23 02:50:09.527811: step: 816/531, loss: 0.005314064212143421 2023-01-23 02:50:10.636639: step: 820/531, loss: 0.01682577282190323 2023-01-23 02:50:11.802145: step: 824/531, loss: 0.09665050357580185 2023-01-23 02:50:12.895999: step: 828/531, loss: 0.010643578134477139 2023-01-23 02:50:14.010291: step: 832/531, loss: 0.004461956210434437 2023-01-23 02:50:15.152408: step: 836/531, loss: 0.025493621826171875 2023-01-23 02:50:16.265134: step: 840/531, loss: 0.00021114348783157766 2023-01-23 02:50:17.369393: step: 844/531, loss: 0.0010432243579998612 2023-01-23 02:50:18.496005: step: 848/531, loss: 0.0006978035089559853 2023-01-23 02:50:19.590628: step: 852/531, loss: 0.0020401955116540194 2023-01-23 02:50:20.723091: step: 856/531, loss: 0.007204628083854914 2023-01-23 02:50:21.880786: step: 860/531, loss: 0.00022249220637604594 2023-01-23 02:50:22.991524: step: 864/531, loss: 0.0034241676330566406 2023-01-23 02:50:24.122927: step: 868/531, loss: 0.015542030334472656 2023-01-23 02:50:25.228785: step: 872/531, loss: 0.014749717898666859 2023-01-23 02:50:26.356610: step: 876/531, loss: 0.2659747004508972 2023-01-23 02:50:27.507468: step: 880/531, loss: 0.0003009796200785786 2023-01-23 02:50:28.662396: step: 884/531, loss: 0.0334935188293457 2023-01-23 02:50:29.794801: step: 888/531, loss: 0.00046634674072265625 2023-01-23 02:50:30.923939: step: 892/531, loss: 0.0377628318965435 2023-01-23 02:50:32.041928: step: 896/531, loss: 0.0030347825959324837 2023-01-23 02:50:33.165866: step: 900/531, loss: 0.0003682136593852192 2023-01-23 02:50:34.275440: step: 904/531, loss: 0.001639080117456615 2023-01-23 02:50:35.367664: step: 908/531, loss: 0.00545191764831543 2023-01-23 02:50:36.531189: step: 912/531, loss: 5.9413909184513614e-05 2023-01-23 02:50:37.649918: step: 916/531, loss: 0.011179161258041859 2023-01-23 02:50:38.752474: step: 920/531, loss: 0.02012462541460991 2023-01-23 02:50:39.881807: step: 924/531, loss: 0.0021423338912427425 2023-01-23 02:50:40.993466: step: 928/531, loss: 0.0006941795581951737 2023-01-23 02:50:42.121676: step: 932/531, loss: 0.016993045806884766 2023-01-23 02:50:43.255781: step: 936/531, loss: 0.0271956454962492 2023-01-23 02:50:44.353412: step: 940/531, loss: 0.009504509158432484 2023-01-23 02:50:45.442930: step: 944/531, loss: 7.572174217784777e-05 2023-01-23 02:50:46.597686: step: 948/531, loss: 0.003237152239307761 2023-01-23 02:50:47.716940: step: 952/531, loss: 0.001349163125269115 2023-01-23 02:50:48.843579: step: 956/531, loss: 0.0029471160378307104 2023-01-23 02:50:49.939851: step: 960/531, loss: 0.0008769988780841231 2023-01-23 02:50:51.072754: step: 964/531, loss: 0.0027378082741051912 2023-01-23 02:50:52.189428: step: 968/531, loss: 6.790161569369957e-05 2023-01-23 02:50:53.338931: step: 972/531, loss: 0.007440376095473766 2023-01-23 02:50:54.459858: step: 976/531, loss: 2.212524486822076e-05 2023-01-23 02:50:55.552582: step: 980/531, loss: 0.0023380278144031763 2023-01-23 02:50:56.661394: step: 984/531, loss: 0.01171875 2023-01-23 02:50:57.803253: step: 988/531, loss: 0.08126763999462128 2023-01-23 02:50:58.955683: step: 992/531, loss: 0.005520439241081476 2023-01-23 02:51:00.082508: step: 996/531, loss: 0.02872181124985218 2023-01-23 02:51:01.195367: step: 1000/531, loss: 0.00659027136862278 2023-01-23 02:51:02.314579: step: 1004/531, loss: 0.002119636395946145 2023-01-23 02:51:03.442231: step: 1008/531, loss: 0.023251915350556374 2023-01-23 02:51:04.545121: step: 1012/531, loss: 0.007507658097893 2023-01-23 02:51:05.669129: step: 1016/531, loss: 0.009218406863510609 2023-01-23 02:51:06.807667: step: 1020/531, loss: 7.848739915061742e-05 2023-01-23 02:51:07.932070: step: 1024/531, loss: 0.023998452350497246 2023-01-23 02:51:09.026694: step: 1028/531, loss: 0.0024919509887695312 2023-01-23 02:51:10.134274: step: 1032/531, loss: 0.3282504975795746 2023-01-23 02:51:11.252345: step: 1036/531, loss: 0.07005710899829865 2023-01-23 02:51:12.385933: step: 1040/531, loss: 0.002349138492718339 2023-01-23 02:51:13.511794: step: 1044/531, loss: 0.014036941342055798 2023-01-23 02:51:14.648163: step: 1048/531, loss: 0.9460031390190125 2023-01-23 02:51:15.768151: step: 1052/531, loss: 0.0005957603571005166 2023-01-23 02:51:16.936037: step: 1056/531, loss: 0.0005673408741131425 2023-01-23 02:51:18.074551: step: 1060/531, loss: 0.022977065294981003 2023-01-23 02:51:19.246970: step: 1064/531, loss: 0.0015851021744310856 2023-01-23 02:51:20.409311: step: 1068/531, loss: 0.0017631531227380037 2023-01-23 02:51:21.511112: step: 1072/531, loss: 0.010807324200868607 2023-01-23 02:51:22.622398: step: 1076/531, loss: 0.0019211769104003906 2023-01-23 02:51:23.764874: step: 1080/531, loss: 0.01241006888449192 2023-01-23 02:51:24.881640: step: 1084/531, loss: 0.001371192978695035 2023-01-23 02:51:26.041139: step: 1088/531, loss: 0.022568846121430397 2023-01-23 02:51:27.189355: step: 1092/531, loss: 0.0575714111328125 2023-01-23 02:51:28.274024: step: 1096/531, loss: 0.00023736954608466476 2023-01-23 02:51:29.409180: step: 1100/531, loss: 0.0002596855047158897 2023-01-23 02:51:30.517619: step: 1104/531, loss: 0.02332897298038006 2023-01-23 02:51:31.654308: step: 1108/531, loss: 0.0010480880737304688 2023-01-23 02:51:32.790211: step: 1112/531, loss: 0.025191783905029297 2023-01-23 02:51:33.910491: step: 1116/531, loss: 0.002181148622184992 2023-01-23 02:51:35.022964: step: 1120/531, loss: 0.06768856197595596 2023-01-23 02:51:36.136127: step: 1124/531, loss: 0.02094268798828125 2023-01-23 02:51:37.269751: step: 1128/531, loss: 0.0018142700428143144 2023-01-23 02:51:38.394067: step: 1132/531, loss: 0.0005220413440838456 2023-01-23 02:51:39.518908: step: 1136/531, loss: 0.004665279295295477 2023-01-23 02:51:40.650739: step: 1140/531, loss: 0.004373789299279451 2023-01-23 02:51:41.758705: step: 1144/531, loss: 0.0011593819363042712 2023-01-23 02:51:42.921400: step: 1148/531, loss: 0.01715850830078125 2023-01-23 02:51:44.048500: step: 1152/531, loss: 0.014469528570771217 2023-01-23 02:51:45.152052: step: 1156/531, loss: 0.06168556213378906 2023-01-23 02:51:46.279189: step: 1160/531, loss: 0.017923450097441673 2023-01-23 02:51:47.413178: step: 1164/531, loss: 7.877350435592234e-05 2023-01-23 02:51:48.514885: step: 1168/531, loss: 0.15074072778224945 2023-01-23 02:51:49.625519: step: 1172/531, loss: 0.003982043359428644 2023-01-23 02:51:50.744428: step: 1176/531, loss: 0.0001735687255859375 2023-01-23 02:51:51.849990: step: 1180/531, loss: 0.013539505191147327 2023-01-23 02:51:52.949980: step: 1184/531, loss: 0.00222358712926507 2023-01-23 02:51:54.039797: step: 1188/531, loss: 0.06036806106567383 2023-01-23 02:51:55.153056: step: 1192/531, loss: 0.02926473505795002 2023-01-23 02:51:56.272334: step: 1196/531, loss: 0.005973434541374445 2023-01-23 02:51:57.398049: step: 1200/531, loss: 0.07603234797716141 2023-01-23 02:51:58.527135: step: 1204/531, loss: 0.08309116214513779 2023-01-23 02:51:59.661426: step: 1208/531, loss: 0.001554298447445035 2023-01-23 02:52:00.770232: step: 1212/531, loss: 0.016304684802889824 2023-01-23 02:52:01.898902: step: 1216/531, loss: 0.0026255608536303043 2023-01-23 02:52:03.011969: step: 1220/531, loss: 0.005945563316345215 2023-01-23 02:52:04.148463: step: 1224/531, loss: 0.011342979036271572 2023-01-23 02:52:05.288709: step: 1228/531, loss: 0.0019277870887890458 2023-01-23 02:52:06.455313: step: 1232/531, loss: 0.004613018594682217 2023-01-23 02:52:07.568585: step: 1236/531, loss: 0.008495235815644264 2023-01-23 02:52:08.696772: step: 1240/531, loss: 0.0004703998565673828 2023-01-23 02:52:09.836495: step: 1244/531, loss: 0.009263992309570312 2023-01-23 02:52:10.950088: step: 1248/531, loss: 0.026507757604122162 2023-01-23 02:52:12.092782: step: 1252/531, loss: 0.0037362095899879932 2023-01-23 02:52:13.201287: step: 1256/531, loss: 0.00014877320791129023 2023-01-23 02:52:14.371734: step: 1260/531, loss: 0.025048065930604935 2023-01-23 02:52:15.495028: step: 1264/531, loss: 0.015803813934326172 2023-01-23 02:52:16.609549: step: 1268/531, loss: 0.019459737464785576 2023-01-23 02:52:17.720880: step: 1272/531, loss: 5.708222389221191 2023-01-23 02:52:18.846101: step: 1276/531, loss: 0.006019210908561945 2023-01-23 02:52:19.961345: step: 1280/531, loss: 0.0006554603460244834 2023-01-23 02:52:21.074828: step: 1284/531, loss: 0.025496290996670723 2023-01-23 02:52:22.201039: step: 1288/531, loss: 0.0019849776290357113 2023-01-23 02:52:23.306587: step: 1292/531, loss: 0.0039237975142896175 2023-01-23 02:52:24.413549: step: 1296/531, loss: 0.002945423126220703 2023-01-23 02:52:25.548803: step: 1300/531, loss: 0.0004101276572328061 2023-01-23 02:52:26.667334: step: 1304/531, loss: 0.01688413694500923 2023-01-23 02:52:27.791975: step: 1308/531, loss: 0.006579685490578413 2023-01-23 02:52:28.908164: step: 1312/531, loss: 0.0008119583362713456 2023-01-23 02:52:30.011255: step: 1316/531, loss: 0.03907661512494087 2023-01-23 02:52:31.123308: step: 1320/531, loss: 0.06547622382640839 2023-01-23 02:52:32.291232: step: 1324/531, loss: 0.0116767892614007 2023-01-23 02:52:33.416414: step: 1328/531, loss: 0.0016048431862145662 2023-01-23 02:52:34.538660: step: 1332/531, loss: 0.0022387029603123665 2023-01-23 02:52:35.661619: step: 1336/531, loss: 0.05269908905029297 2023-01-23 02:52:36.786069: step: 1340/531, loss: 0.00705718994140625 2023-01-23 02:52:37.903701: step: 1344/531, loss: 0.01972327195107937 2023-01-23 02:52:39.030294: step: 1348/531, loss: 0.010999679565429688 2023-01-23 02:52:40.156693: step: 1352/531, loss: 7.190704491222277e-05 2023-01-23 02:52:41.299634: step: 1356/531, loss: 0.05779323726892471 2023-01-23 02:52:42.434103: step: 1360/531, loss: 0.10737772285938263 2023-01-23 02:52:43.554286: step: 1364/531, loss: 0.011308384127914906 2023-01-23 02:52:44.666943: step: 1368/531, loss: 0.005716132931411266 2023-01-23 02:52:45.756406: step: 1372/531, loss: 0.002205276396125555 2023-01-23 02:52:46.884178: step: 1376/531, loss: 0.11157312989234924 2023-01-23 02:52:48.025517: step: 1380/531, loss: 0.039624977856874466 2023-01-23 02:52:49.143096: step: 1384/531, loss: 0.0033893585205078125 2023-01-23 02:52:50.267356: step: 1388/531, loss: 0.02301044389605522 2023-01-23 02:52:51.379875: step: 1392/531, loss: 1.4877320609230082e-05 2023-01-23 02:52:52.502917: step: 1396/531, loss: 0.016780473291873932 2023-01-23 02:52:53.610064: step: 1400/531, loss: 0.0001243591250386089 2023-01-23 02:52:54.732346: step: 1404/531, loss: 0.00256862654350698 2023-01-23 02:52:55.841530: step: 1408/531, loss: 0.02869243733584881 2023-01-23 02:52:56.990419: step: 1412/531, loss: 0.0808895081281662 2023-01-23 02:52:58.110892: step: 1416/531, loss: 0.005414771847426891 2023-01-23 02:52:59.216832: step: 1420/531, loss: 0.026924896985292435 2023-01-23 02:53:00.387611: step: 1424/531, loss: 0.005845832638442516 2023-01-23 02:53:01.522236: step: 1428/531, loss: 0.019009782001376152 2023-01-23 02:53:02.645513: step: 1432/531, loss: 0.010701370425522327 2023-01-23 02:53:03.752513: step: 1436/531, loss: 0.00855798739939928 2023-01-23 02:53:04.895052: step: 1440/531, loss: 0.07895880192518234 2023-01-23 02:53:06.010818: step: 1444/531, loss: 0.0001310408115386963 2023-01-23 02:53:07.090586: step: 1448/531, loss: 6.456374831032008e-05 2023-01-23 02:53:08.214318: step: 1452/531, loss: 0.027254678308963776 2023-01-23 02:53:09.325181: step: 1456/531, loss: 8.285045623779297e-06 2023-01-23 02:53:10.471948: step: 1460/531, loss: 0.0012935638660565019 2023-01-23 02:53:11.672700: step: 1464/531, loss: 5.92231735936366e-05 2023-01-23 02:53:12.784149: step: 1468/531, loss: 0.00257530203089118 2023-01-23 02:53:13.902211: step: 1472/531, loss: 0.0187561996281147 2023-01-23 02:53:15.008985: step: 1476/531, loss: 0.0022233964409679174 2023-01-23 02:53:16.117013: step: 1480/531, loss: 0.033853914588689804 2023-01-23 02:53:17.248910: step: 1484/531, loss: 0.008032608777284622 2023-01-23 02:53:18.368055: step: 1488/531, loss: 0.058172985911369324 2023-01-23 02:53:19.485705: step: 1492/531, loss: 0.01960144005715847 2023-01-23 02:53:20.602811: step: 1496/531, loss: 0.0006505012279376388 2023-01-23 02:53:21.751689: step: 1500/531, loss: 0.03543548658490181 2023-01-23 02:53:22.886957: step: 1504/531, loss: 0.020316410809755325 2023-01-23 02:53:24.026534: step: 1508/531, loss: 0.0008294105646200478 2023-01-23 02:53:25.165944: step: 1512/531, loss: 0.0038497925270348787 2023-01-23 02:53:26.285149: step: 1516/531, loss: 0.00017452239990234375 2023-01-23 02:53:27.379581: step: 1520/531, loss: 0.025977134704589844 2023-01-23 02:53:28.503096: step: 1524/531, loss: 0.19000402092933655 2023-01-23 02:53:29.621003: step: 1528/531, loss: 0.0007034302107058465 2023-01-23 02:53:30.709113: step: 1532/531, loss: 0.0004435062292031944 2023-01-23 02:53:31.838839: step: 1536/531, loss: 0.038680270314216614 2023-01-23 02:53:32.960084: step: 1540/531, loss: 0.0119050033390522 2023-01-23 02:53:34.065555: step: 1544/531, loss: 0.009246635250747204 2023-01-23 02:53:35.219944: step: 1548/531, loss: 0.0768747329711914 2023-01-23 02:53:36.342969: step: 1552/531, loss: 0.002993679139763117 2023-01-23 02:53:37.455246: step: 1556/531, loss: 0.009084701538085938 2023-01-23 02:53:38.571162: step: 1560/531, loss: 0.026015853509306908 2023-01-23 02:53:39.718325: step: 1564/531, loss: 0.025530051440000534 2023-01-23 02:53:40.869555: step: 1568/531, loss: 0.00481414794921875 2023-01-23 02:53:41.992082: step: 1572/531, loss: 0.0170959010720253 2023-01-23 02:53:43.111534: step: 1576/531, loss: 0.0021110535599291325 2023-01-23 02:53:44.247557: step: 1580/531, loss: 0.003279113909229636 2023-01-23 02:53:45.389640: step: 1584/531, loss: 0.013786125928163528 2023-01-23 02:53:46.530420: step: 1588/531, loss: 0.4355245530605316 2023-01-23 02:53:47.640498: step: 1592/531, loss: 0.02680683322250843 2023-01-23 02:53:48.769408: step: 1596/531, loss: 0.001596164656803012 2023-01-23 02:53:49.929340: step: 1600/531, loss: 0.044547270983457565 2023-01-23 02:53:51.078977: step: 1604/531, loss: 0.0033332600723952055 2023-01-23 02:53:52.172741: step: 1608/531, loss: 0.0007431983831338584 2023-01-23 02:53:53.317723: step: 1612/531, loss: 0.05588526651263237 2023-01-23 02:53:54.415466: step: 1616/531, loss: 0.01378555316478014 2023-01-23 02:53:55.579386: step: 1620/531, loss: 0.007518768310546875 2023-01-23 02:53:56.686982: step: 1624/531, loss: 0.5234982371330261 2023-01-23 02:53:57.812375: step: 1628/531, loss: 0.03787059709429741 2023-01-23 02:53:58.940588: step: 1632/531, loss: 0.033288098871707916 2023-01-23 02:54:00.044325: step: 1636/531, loss: 0.06309814751148224 2023-01-23 02:54:01.178522: step: 1640/531, loss: 0.0019058703910559416 2023-01-23 02:54:02.321678: step: 1644/531, loss: 0.00012168884131824598 2023-01-23 02:54:03.467918: step: 1648/531, loss: 0.01869392767548561 2023-01-23 02:54:04.604014: step: 1652/531, loss: 0.011718559078872204 2023-01-23 02:54:05.708130: step: 1656/531, loss: 0.08411464095115662 2023-01-23 02:54:06.843116: step: 1660/531, loss: 0.10163459926843643 2023-01-23 02:54:07.982151: step: 1664/531, loss: 0.0006858825217932463 2023-01-23 02:54:09.116567: step: 1668/531, loss: 0.00040068625821731985 2023-01-23 02:54:10.255877: step: 1672/531, loss: 0.006806755438446999 2023-01-23 02:54:11.411045: step: 1676/531, loss: 0.022788619622588158 2023-01-23 02:54:12.546503: step: 1680/531, loss: 0.0009676933404989541 2023-01-23 02:54:13.648458: step: 1684/531, loss: 0.0006309509626589715 2023-01-23 02:54:14.793044: step: 1688/531, loss: 0.021967411041259766 2023-01-23 02:54:15.931697: step: 1692/531, loss: 0.0069030760787427425 2023-01-23 02:54:17.021353: step: 1696/531, loss: 0.024108124896883965 2023-01-23 02:54:18.133015: step: 1700/531, loss: 0.011985397897660732 2023-01-23 02:54:19.245796: step: 1704/531, loss: 0.0008909702301025391 2023-01-23 02:54:20.354082: step: 1708/531, loss: 0.028653526678681374 2023-01-23 02:54:21.475451: step: 1712/531, loss: 0.0013363838661462069 2023-01-23 02:54:22.589061: step: 1716/531, loss: 0.0009044647449627519 2023-01-23 02:54:23.763193: step: 1720/531, loss: 0.40769681334495544 2023-01-23 02:54:24.851797: step: 1724/531, loss: 0.01176919974386692 2023-01-23 02:54:25.975953: step: 1728/531, loss: 0.0016537666087970138 2023-01-23 02:54:27.118844: step: 1732/531, loss: 0.002243518829345703 2023-01-23 02:54:28.262904: step: 1736/531, loss: 0.0081787109375 2023-01-23 02:54:29.404147: step: 1740/531, loss: 0.023046303540468216 2023-01-23 02:54:30.520591: step: 1744/531, loss: 0.0001750946103129536 2023-01-23 02:54:31.649832: step: 1748/531, loss: 0.018784141167998314 2023-01-23 02:54:32.748271: step: 1752/531, loss: 0.0007564544794149697 2023-01-23 02:54:33.860438: step: 1756/531, loss: 0.0015965461498126388 2023-01-23 02:54:35.004072: step: 1760/531, loss: 0.04813232645392418 2023-01-23 02:54:36.125708: step: 1764/531, loss: 0.01738262176513672 2023-01-23 02:54:37.241302: step: 1768/531, loss: 0.016205787658691406 2023-01-23 02:54:38.410516: step: 1772/531, loss: 0.015374946407973766 2023-01-23 02:54:39.538871: step: 1776/531, loss: 0.09460468590259552 2023-01-23 02:54:40.670306: step: 1780/531, loss: 0.03217830881476402 2023-01-23 02:54:41.787845: step: 1784/531, loss: 0.0019771577790379524 2023-01-23 02:54:42.897206: step: 1788/531, loss: 0.045255471020936966 2023-01-23 02:54:44.036541: step: 1792/531, loss: 0.5086870193481445 2023-01-23 02:54:45.159669: step: 1796/531, loss: 0.00323238386772573 2023-01-23 02:54:46.306210: step: 1800/531, loss: 0.01699838787317276 2023-01-23 02:54:47.456298: step: 1804/531, loss: 0.0033195496071130037 2023-01-23 02:54:48.586394: step: 1808/531, loss: 0.009302711114287376 2023-01-23 02:54:49.701212: step: 1812/531, loss: 0.01246786117553711 2023-01-23 02:54:50.843650: step: 1816/531, loss: 0.018048763275146484 2023-01-23 02:54:51.949394: step: 1820/531, loss: 0.021665286272764206 2023-01-23 02:54:53.056615: step: 1824/531, loss: 0.05903530493378639 2023-01-23 02:54:54.201535: step: 1828/531, loss: 0.002219820162281394 2023-01-23 02:54:55.327128: step: 1832/531, loss: 0.0009521216270513833 2023-01-23 02:54:56.475430: step: 1836/531, loss: 0.0267473217099905 2023-01-23 02:54:57.607098: step: 1840/531, loss: 3.442764500505291e-05 2023-01-23 02:54:58.735990: step: 1844/531, loss: 0.0003047943173442036 2023-01-23 02:54:59.871891: step: 1848/531, loss: 0.0029417993500828743 2023-01-23 02:55:01.000509: step: 1852/531, loss: 0.0022022249177098274 2023-01-23 02:55:02.095434: step: 1856/531, loss: 0.008856392465531826 2023-01-23 02:55:03.246809: step: 1860/531, loss: 0.024514103308320045 2023-01-23 02:55:04.364308: step: 1864/531, loss: 0.010797357186675072 2023-01-23 02:55:05.529262: step: 1868/531, loss: 0.008493900299072266 2023-01-23 02:55:06.668025: step: 1872/531, loss: 0.0013776780106127262 2023-01-23 02:55:07.803170: step: 1876/531, loss: 0.008048439398407936 2023-01-23 02:55:08.941537: step: 1880/531, loss: 0.011584472842514515 2023-01-23 02:55:10.042521: step: 1884/531, loss: 0.020602131262421608 2023-01-23 02:55:11.143420: step: 1888/531, loss: 0.000511550868395716 2023-01-23 02:55:12.285345: step: 1892/531, loss: 0.00052471156232059 2023-01-23 02:55:13.408233: step: 1896/531, loss: 0.023732900619506836 2023-01-23 02:55:14.515971: step: 1900/531, loss: 0.0006944656488485634 2023-01-23 02:55:15.647260: step: 1904/531, loss: 0.017507171258330345 2023-01-23 02:55:16.766991: step: 1908/531, loss: 0.0008890152093954384 2023-01-23 02:55:17.893937: step: 1912/531, loss: 0.521493136882782 2023-01-23 02:55:19.022702: step: 1916/531, loss: 0.006575965788215399 2023-01-23 02:55:20.174852: step: 1920/531, loss: 0.03630819171667099 2023-01-23 02:55:21.291615: step: 1924/531, loss: 0.0015388488536700606 2023-01-23 02:55:22.419281: step: 1928/531, loss: 0.0010128498543053865 2023-01-23 02:55:23.556702: step: 1932/531, loss: 9.243488602805883e-05 2023-01-23 02:55:24.672488: step: 1936/531, loss: 0.04502449184656143 2023-01-23 02:55:25.766956: step: 1940/531, loss: 0.024698257446289062 2023-01-23 02:55:26.866718: step: 1944/531, loss: 0.049653541296720505 2023-01-23 02:55:28.002358: step: 1948/531, loss: 0.007354449946433306 2023-01-23 02:55:29.097893: step: 1952/531, loss: 0.0004199981631245464 2023-01-23 02:55:30.229924: step: 1956/531, loss: 0.03657131269574165 2023-01-23 02:55:31.338565: step: 1960/531, loss: 0.02404041402041912 2023-01-23 02:55:32.479735: step: 1964/531, loss: 0.003380298614501953 2023-01-23 02:55:33.615523: step: 1968/531, loss: 0.04798908159136772 2023-01-23 02:55:34.742095: step: 1972/531, loss: 6.561279587913305e-05 2023-01-23 02:55:35.857640: step: 1976/531, loss: 0.06883364170789719 2023-01-23 02:55:36.984944: step: 1980/531, loss: 0.105682373046875 2023-01-23 02:55:38.107367: step: 1984/531, loss: 0.03601713478565216 2023-01-23 02:55:39.231692: step: 1988/531, loss: 0.05721559748053551 2023-01-23 02:55:40.371443: step: 1992/531, loss: 0.001248264336027205 2023-01-23 02:55:41.496869: step: 1996/531, loss: 0.004532671067863703 2023-01-23 02:55:42.606335: step: 2000/531, loss: 0.009381581097841263 2023-01-23 02:55:43.698075: step: 2004/531, loss: 0.007079505827277899 2023-01-23 02:55:44.836629: step: 2008/531, loss: 0.027020836248993874 2023-01-23 02:55:45.947431: step: 2012/531, loss: 0.005078315734863281 2023-01-23 02:55:47.049234: step: 2016/531, loss: 0.07335501164197922 2023-01-23 02:55:48.160065: step: 2020/531, loss: 0.0004953384632244706 2023-01-23 02:55:49.288062: step: 2024/531, loss: 0.015381241217255592 2023-01-23 02:55:50.413199: step: 2028/531, loss: 0.04582581669092178 2023-01-23 02:55:51.534049: step: 2032/531, loss: 0.00015506744966842234 2023-01-23 02:55:52.670380: step: 2036/531, loss: 0.0020740509498864412 2023-01-23 02:55:53.820043: step: 2040/531, loss: 0.731652021408081 2023-01-23 02:55:54.956758: step: 2044/531, loss: 0.016856957226991653 2023-01-23 02:55:56.098351: step: 2048/531, loss: 0.036249447613954544 2023-01-23 02:55:57.211295: step: 2052/531, loss: 0.3892022967338562 2023-01-23 02:55:58.344685: step: 2056/531, loss: 0.0001808166562113911 2023-01-23 02:55:59.474508: step: 2060/531, loss: 0.05925578996539116 2023-01-23 02:56:00.619162: step: 2064/531, loss: 0.002898311708122492 2023-01-23 02:56:01.733795: step: 2068/531, loss: 0.19325199723243713 2023-01-23 02:56:02.888711: step: 2072/531, loss: 0.006012630648910999 2023-01-23 02:56:04.013237: step: 2076/531, loss: 0.0014399291248992085 2023-01-23 02:56:05.134440: step: 2080/531, loss: 0.063079833984375 2023-01-23 02:56:06.254118: step: 2084/531, loss: 0.025150300934910774 2023-01-23 02:56:07.390599: step: 2088/531, loss: 0.0005260050529614091 2023-01-23 02:56:08.532282: step: 2092/531, loss: 0.06699495017528534 2023-01-23 02:56:09.649794: step: 2096/531, loss: 0.0049537657760083675 2023-01-23 02:56:10.785623: step: 2100/531, loss: 0.0001588821323821321 2023-01-23 02:56:11.933576: step: 2104/531, loss: 0.0003912449174094945 2023-01-23 02:56:13.018234: step: 2108/531, loss: 0.007337617687880993 2023-01-23 02:56:14.161976: step: 2112/531, loss: 0.10829510539770126 2023-01-23 02:56:15.321662: step: 2116/531, loss: 0.056516267359256744 2023-01-23 02:56:16.452795: step: 2120/531, loss: 0.05558471754193306 2023-01-23 02:56:17.556805: step: 2124/531, loss: 0.009617852978408337 ================================================== Loss: 0.042 -------------------- Dev: {'event': {'p': 0.5828343313373253, 'r': 0.7776298268974701, 'f1': 0.6662863662293211}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Test: {'event': {'p': 0.6200832947709394, 'r': 0.7990459153249851, 'f1': 0.6982803543512246}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Chinese: {'event': {'p': 0.5384615384615384, 'r': 0.9074074074074074, 'f1': 0.6758620689655174}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Korean: {'event': {'p': 0.68, 'r': 0.5396825396825397, 'f1': 0.6017699115044247}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Russian: {'event': {'p': 0.40476190476190477, 'r': 0.4722222222222222, 'f1': 0.4358974358974359}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 20 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:56:57.641100: step: 4/531, loss: 0.0042684078216552734 2023-01-23 02:56:58.766577: step: 8/531, loss: 0.00369839696213603 2023-01-23 02:56:59.896224: step: 12/531, loss: 0.005042648408561945 2023-01-23 02:57:01.028568: step: 16/531, loss: 0.0013494491577148438 2023-01-23 02:57:02.133325: step: 20/531, loss: 0.012805701233446598 2023-01-23 02:57:03.273844: step: 24/531, loss: 0.3416140079498291 2023-01-23 02:57:04.359331: step: 28/531, loss: 0.0012895584804937243 2023-01-23 02:57:05.476241: step: 32/531, loss: 0.012044906616210938 2023-01-23 02:57:06.577853: step: 36/531, loss: 0.0002877235529012978 2023-01-23 02:57:07.692622: step: 40/531, loss: 0.0004231453058309853 2023-01-23 02:57:08.810935: step: 44/531, loss: 0.0647466629743576 2023-01-23 02:57:09.948450: step: 48/531, loss: 0.029975654557347298 2023-01-23 02:57:11.086795: step: 52/531, loss: 0.0002577781560830772 2023-01-23 02:57:12.222639: step: 56/531, loss: 0.010384988971054554 2023-01-23 02:57:13.355636: step: 60/531, loss: 0.003957128617912531 2023-01-23 02:57:14.476973: step: 64/531, loss: 0.0008245467906817794 2023-01-23 02:57:15.608450: step: 68/531, loss: 0.0007278442499227822 2023-01-23 02:57:16.738954: step: 72/531, loss: 0.019107341766357422 2023-01-23 02:57:17.888887: step: 76/531, loss: 0.008124161511659622 2023-01-23 02:57:18.989402: step: 80/531, loss: 0.005074691958725452 2023-01-23 02:57:20.102365: step: 84/531, loss: 0.0038271904923021793 2023-01-23 02:57:21.211560: step: 88/531, loss: 0.0018447876209393144 2023-01-23 02:57:22.365884: step: 92/531, loss: 0.004069709684699774 2023-01-23 02:57:23.491564: step: 96/531, loss: 0.009900283999741077 2023-01-23 02:57:24.614191: step: 100/531, loss: 0.009450817480683327 2023-01-23 02:57:25.755929: step: 104/531, loss: 0.023413658142089844 2023-01-23 02:57:26.899170: step: 108/531, loss: 0.01458053570240736 2023-01-23 02:57:28.018936: step: 112/531, loss: 0.001660156180150807 2023-01-23 02:57:29.131433: step: 116/531, loss: 0.06835594028234482 2023-01-23 02:57:30.252812: step: 120/531, loss: 0.0029195547103881836 2023-01-23 02:57:31.399573: step: 124/531, loss: 0.020675089210271835 2023-01-23 02:57:32.538511: step: 128/531, loss: 0.0027666091918945312 2023-01-23 02:57:33.663000: step: 132/531, loss: 0.00020875930204056203 2023-01-23 02:57:34.771918: step: 136/531, loss: 0.0036311151925474405 2023-01-23 02:57:35.920583: step: 140/531, loss: 0.0023852349258959293 2023-01-23 02:57:37.048613: step: 144/531, loss: 0.0293103214353323 2023-01-23 02:57:38.172648: step: 148/531, loss: 0.014716720208525658 2023-01-23 02:57:39.336631: step: 152/531, loss: 2.727508581301663e-05 2023-01-23 02:57:40.432337: step: 156/531, loss: 0.001646280288696289 2023-01-23 02:57:41.552122: step: 160/531, loss: 0.00989227369427681 2023-01-23 02:57:42.729565: step: 164/531, loss: 3.228187779313885e-05 2023-01-23 02:57:43.884811: step: 168/531, loss: 0.013320541009306908 2023-01-23 02:57:45.010781: step: 172/531, loss: 0.019048595800995827 2023-01-23 02:57:46.133016: step: 176/531, loss: 0.0021677017211914062 2023-01-23 02:57:47.267648: step: 180/531, loss: 0.011674595065414906 2023-01-23 02:57:48.390807: step: 184/531, loss: 0.014705467969179153 2023-01-23 02:57:49.538740: step: 188/531, loss: 0.04396438971161842 2023-01-23 02:57:50.694775: step: 192/531, loss: 0.017233848571777344 2023-01-23 02:57:51.819129: step: 196/531, loss: 0.00020437240891624242 2023-01-23 02:57:52.936893: step: 200/531, loss: 0.006771183107048273 2023-01-23 02:57:54.065006: step: 204/531, loss: 0.030660247430205345 2023-01-23 02:57:55.171582: step: 208/531, loss: 0.039941027760505676 2023-01-23 02:57:56.281152: step: 212/531, loss: 0.017331313341856003 2023-01-23 02:57:57.406754: step: 216/531, loss: 0.0004978657234460115 2023-01-23 02:57:58.513790: step: 220/531, loss: 0.02194214053452015 2023-01-23 02:57:59.632433: step: 224/531, loss: 0.0006645202520303428 2023-01-23 02:58:00.778945: step: 228/531, loss: 0.0003487587091512978 2023-01-23 02:58:01.891868: step: 232/531, loss: 0.00278053293004632 2023-01-23 02:58:03.012142: step: 236/531, loss: 0.004224204923957586 2023-01-23 02:58:04.138400: step: 240/531, loss: 0.011733436957001686 2023-01-23 02:58:05.263635: step: 244/531, loss: 0.004566955845803022 2023-01-23 02:58:06.364566: step: 248/531, loss: 0.004250908270478249 2023-01-23 02:58:07.513620: step: 252/531, loss: 0.05702371522784233 2023-01-23 02:58:08.690630: step: 256/531, loss: 0.020575236529111862 2023-01-23 02:58:09.801874: step: 260/531, loss: 5.3596493671648204e-05 2023-01-23 02:58:10.944416: step: 264/531, loss: 0.03170504420995712 2023-01-23 02:58:12.099589: step: 268/531, loss: 0.0038485052064061165 2023-01-23 02:58:13.214567: step: 272/531, loss: 0.0015896796248853207 2023-01-23 02:58:14.312273: step: 276/531, loss: 9.32693510549143e-05 2023-01-23 02:58:15.455224: step: 280/531, loss: 0.020946789532899857 2023-01-23 02:58:16.603658: step: 284/531, loss: 0.006133079994469881 2023-01-23 02:58:17.730360: step: 288/531, loss: 0.00022163391986396164 2023-01-23 02:58:18.841182: step: 292/531, loss: 0.006408786866813898 2023-01-23 02:58:19.928528: step: 296/531, loss: 0.0061737061478197575 2023-01-23 02:58:21.072790: step: 300/531, loss: 0.012976265512406826 2023-01-23 02:58:22.219412: step: 304/531, loss: 0.007125473581254482 2023-01-23 02:58:23.353596: step: 308/531, loss: 0.0043090819381177425 2023-01-23 02:58:24.461010: step: 312/531, loss: 0.0018391608027741313 2023-01-23 02:58:25.591094: step: 316/531, loss: 0.03563044220209122 2023-01-23 02:58:26.714029: step: 320/531, loss: 0.012878989800810814 2023-01-23 02:58:27.845430: step: 324/531, loss: 0.0625690445303917 2023-01-23 02:58:28.973057: step: 328/531, loss: 0.012818765826523304 2023-01-23 02:58:30.105619: step: 332/531, loss: 0.00028896331787109375 2023-01-23 02:58:31.220712: step: 336/531, loss: 0.013665581122040749 2023-01-23 02:58:32.308132: step: 340/531, loss: 0.0013178824447095394 2023-01-23 02:58:33.425292: step: 344/531, loss: 0.05526390299201012 2023-01-23 02:58:34.547119: step: 348/531, loss: 0.1336648017168045 2023-01-23 02:58:35.686675: step: 352/531, loss: 0.02966899797320366 2023-01-23 02:58:36.831255: step: 356/531, loss: 0.01762561686336994 2023-01-23 02:58:37.958618: step: 360/531, loss: 3.24249276673072e-06 2023-01-23 02:58:39.103549: step: 364/531, loss: 0.03783607482910156 2023-01-23 02:58:40.211630: step: 368/531, loss: 0.0004861831839662045 2023-01-23 02:58:41.315684: step: 372/531, loss: 0.021983718499541283 2023-01-23 02:58:42.456512: step: 376/531, loss: 0.00438003521412611 2023-01-23 02:58:43.585375: step: 380/531, loss: 0.026051903143525124 2023-01-23 02:58:44.693518: step: 384/531, loss: 0.0002876281796488911 2023-01-23 02:58:45.798067: step: 388/531, loss: 3.805160304182209e-05 2023-01-23 02:58:46.898882: step: 392/531, loss: 0.0014912605984136462 2023-01-23 02:58:47.990013: step: 396/531, loss: 0.02807464636862278 2023-01-23 02:58:49.128636: step: 400/531, loss: 4.684925079345703e-05 2023-01-23 02:58:50.311497: step: 404/531, loss: -9.15527380129788e-06 2023-01-23 02:58:51.429466: step: 408/531, loss: 0.0007923126104287803 2023-01-23 02:58:52.584217: step: 412/531, loss: 0.06204424053430557 2023-01-23 02:58:53.690119: step: 416/531, loss: 0.03123779408633709 2023-01-23 02:58:54.796511: step: 420/531, loss: 0.03875160217285156 2023-01-23 02:58:55.926153: step: 424/531, loss: 0.022251605987548828 2023-01-23 02:58:57.057668: step: 428/531, loss: 0.0013034821022301912 2023-01-23 02:58:58.203944: step: 432/531, loss: 0.022576000541448593 2023-01-23 02:58:59.298554: step: 436/531, loss: 0.06838814914226532 2023-01-23 02:59:00.456479: step: 440/531, loss: 0.007851028814911842 2023-01-23 02:59:01.595183: step: 444/531, loss: 0.03305073082447052 2023-01-23 02:59:02.754068: step: 448/531, loss: 3.662109520519152e-05 2023-01-23 02:59:03.890035: step: 452/531, loss: 0.023189593106508255 2023-01-23 02:59:05.065874: step: 456/531, loss: 7.314681715797633e-05 2023-01-23 02:59:06.188402: step: 460/531, loss: 0.012070274911820889 2023-01-23 02:59:07.279096: step: 464/531, loss: 0.161982923746109 2023-01-23 02:59:08.407824: step: 468/531, loss: 0.01486282330006361 2023-01-23 02:59:09.538103: step: 472/531, loss: 0.04620418697595596 2023-01-23 02:59:10.687950: step: 476/531, loss: 0.034688565880060196 2023-01-23 02:59:11.874129: step: 480/531, loss: 0.00039467812166549265 2023-01-23 02:59:12.972716: step: 484/531, loss: 0.01094350777566433 2023-01-23 02:59:14.116904: step: 488/531, loss: 0.00190229422878474 2023-01-23 02:59:15.222954: step: 492/531, loss: 0.0003351211780682206 2023-01-23 02:59:16.346339: step: 496/531, loss: 0.00015544891357421875 2023-01-23 02:59:17.456384: step: 500/531, loss: 0.017740439623594284 2023-01-23 02:59:18.600374: step: 504/531, loss: 0.008106040768325329 2023-01-23 02:59:19.700430: step: 508/531, loss: 0.002338457154110074 2023-01-23 02:59:20.804334: step: 512/531, loss: 0.0009860038990154862 2023-01-23 02:59:21.936119: step: 516/531, loss: 0.004951191134750843 2023-01-23 02:59:23.048533: step: 520/531, loss: 0.004124474711716175 2023-01-23 02:59:24.184412: step: 524/531, loss: 0.005018138792365789 2023-01-23 02:59:25.299047: step: 528/531, loss: 0.030269861221313477 2023-01-23 02:59:26.404448: step: 532/531, loss: 0.0020617484115064144 2023-01-23 02:59:27.520637: step: 536/531, loss: 0.020287513732910156 2023-01-23 02:59:28.633585: step: 540/531, loss: 0.0015100956661626697 2023-01-23 02:59:29.775186: step: 544/531, loss: 0.010562137700617313 2023-01-23 02:59:30.951514: step: 548/531, loss: 0.022211646661162376 2023-01-23 02:59:32.070283: step: 552/531, loss: 0.02851085737347603 2023-01-23 02:59:33.218565: step: 556/531, loss: 0.013420294970273972 2023-01-23 02:59:34.343295: step: 560/531, loss: 0.007181072607636452 2023-01-23 02:59:35.478575: step: 564/531, loss: 0.0031265737488865852 2023-01-23 02:59:36.573556: step: 568/531, loss: 0.0005976438988000154 2023-01-23 02:59:37.683119: step: 572/531, loss: 0.029396438971161842 2023-01-23 02:59:38.794742: step: 576/531, loss: 0.017939282581210136 2023-01-23 02:59:39.951512: step: 580/531, loss: 0.0003444671747274697 2023-01-23 02:59:41.055468: step: 584/531, loss: 0.0017328262329101562 2023-01-23 02:59:42.193121: step: 588/531, loss: 0.03702230378985405 2023-01-23 02:59:43.324648: step: 592/531, loss: 0.03456936031579971 2023-01-23 02:59:44.445988: step: 596/531, loss: 7.944107346702367e-05 2023-01-23 02:59:45.562144: step: 600/531, loss: 2.0408631826285273e-05 2023-01-23 02:59:46.679851: step: 604/531, loss: 0.0016758800484240055 2023-01-23 02:59:47.793815: step: 608/531, loss: 7.820128666935489e-05 2023-01-23 02:59:48.909678: step: 612/531, loss: 0.7401914596557617 2023-01-23 02:59:50.009552: step: 616/531, loss: 1.3780593690171372e-05 2023-01-23 02:59:51.138424: step: 620/531, loss: 0.05583017319440842 2023-01-23 02:59:52.271256: step: 624/531, loss: 0.05132894963026047 2023-01-23 02:59:53.393149: step: 628/531, loss: 0.02747788466513157 2023-01-23 02:59:54.511195: step: 632/531, loss: 0.008495330810546875 2023-01-23 02:59:55.630065: step: 636/531, loss: 0.014155673794448376 2023-01-23 02:59:56.761633: step: 640/531, loss: 0.01801624335348606 2023-01-23 02:59:57.872572: step: 644/531, loss: 0.009556293487548828 2023-01-23 02:59:58.977327: step: 648/531, loss: 0.005221748258918524 2023-01-23 03:00:00.116606: step: 652/531, loss: 0.0032155036460608244 2023-01-23 03:00:01.221117: step: 656/531, loss: 0.0009287834400311112 2023-01-23 03:00:02.336988: step: 660/531, loss: 0.0001548767031636089 2023-01-23 03:00:03.456478: step: 664/531, loss: 0.14776574075222015 2023-01-23 03:00:04.613123: step: 668/531, loss: 0.009481430053710938 2023-01-23 03:00:05.726921: step: 672/531, loss: 0.0005083084106445312 2023-01-23 03:00:06.865011: step: 676/531, loss: 0.01818552054464817 2023-01-23 03:00:07.994777: step: 680/531, loss: 0.013166999444365501 2023-01-23 03:00:09.117271: step: 684/531, loss: 0.004916286561638117 2023-01-23 03:00:10.254572: step: 688/531, loss: 0.11361849308013916 2023-01-23 03:00:11.361691: step: 692/531, loss: 0.009963035583496094 2023-01-23 03:00:12.492663: step: 696/531, loss: 0.010951138101518154 2023-01-23 03:00:13.606587: step: 700/531, loss: 0.009215355850756168 2023-01-23 03:00:14.746815: step: 704/531, loss: 0.013612176291644573 2023-01-23 03:00:15.877707: step: 708/531, loss: 0.037805747240781784 2023-01-23 03:00:17.015601: step: 712/531, loss: 0.00729260453954339 2023-01-23 03:00:18.159230: step: 716/531, loss: 0.0005199909210205078 2023-01-23 03:00:19.309991: step: 720/531, loss: 0.04019484296441078 2023-01-23 03:00:20.425201: step: 724/531, loss: 0.0002541542053222656 2023-01-23 03:00:21.541643: step: 728/531, loss: 0.05018596723675728 2023-01-23 03:00:22.656342: step: 732/531, loss: 0.006511020474135876 2023-01-23 03:00:23.785066: step: 736/531, loss: 0.02578449249267578 2023-01-23 03:00:24.899582: step: 740/531, loss: 0.002951240399852395 2023-01-23 03:00:26.026145: step: 744/531, loss: 0.0661001205444336 2023-01-23 03:00:27.140195: step: 748/531, loss: 0.00022172929311636835 2023-01-23 03:00:28.279819: step: 752/531, loss: 0.01023883931338787 2023-01-23 03:00:29.415145: step: 756/531, loss: 0.010994339361786842 2023-01-23 03:00:30.537954: step: 760/531, loss: 0.08174996823072433 2023-01-23 03:00:31.681029: step: 764/531, loss: 0.0003194332239218056 2023-01-23 03:00:32.820929: step: 768/531, loss: 0.0016126633854582906 2023-01-23 03:00:33.956947: step: 772/531, loss: 0.0016916274325922132 2023-01-23 03:00:35.083800: step: 776/531, loss: 0.04539242014288902 2023-01-23 03:00:36.212982: step: 780/531, loss: 0.064208984375 2023-01-23 03:00:37.379908: step: 784/531, loss: 0.012694835662841797 2023-01-23 03:00:38.508705: step: 788/531, loss: 0.027092458680272102 2023-01-23 03:00:39.625849: step: 792/531, loss: 9.694100299384445e-05 2023-01-23 03:00:40.741579: step: 796/531, loss: 0.017609262838959694 2023-01-23 03:00:41.883756: step: 800/531, loss: 0.03239727020263672 2023-01-23 03:00:42.982991: step: 804/531, loss: 0.05240011215209961 2023-01-23 03:00:44.115208: step: 808/531, loss: 0.0032236098777502775 2023-01-23 03:00:45.199101: step: 812/531, loss: 0.004499530885368586 2023-01-23 03:00:46.303814: step: 816/531, loss: 0.07213011384010315 2023-01-23 03:00:47.407652: step: 820/531, loss: 4.110336521989666e-05 2023-01-23 03:00:48.522342: step: 824/531, loss: 0.10355091094970703 2023-01-23 03:00:49.621600: step: 828/531, loss: 0.028632069006562233 2023-01-23 03:00:50.754131: step: 832/531, loss: 0.0016375542618334293 2023-01-23 03:00:51.857403: step: 836/531, loss: 0.011282539926469326 2023-01-23 03:00:52.987881: step: 840/531, loss: 0.01265411451458931 2023-01-23 03:00:54.131318: step: 844/531, loss: 0.0001771926908986643 2023-01-23 03:00:55.258108: step: 848/531, loss: 0.0029514310881495476 2023-01-23 03:00:56.408421: step: 852/531, loss: 0.0058231353759765625 2023-01-23 03:00:57.521644: step: 856/531, loss: 0.045072462409734726 2023-01-23 03:00:58.629545: step: 860/531, loss: 0.0525270476937294 2023-01-23 03:00:59.744040: step: 864/531, loss: 0.053490832448005676 2023-01-23 03:01:00.858008: step: 868/531, loss: 0.024404335767030716 2023-01-23 03:01:01.985586: step: 872/531, loss: 0.01981201209127903 2023-01-23 03:01:03.109932: step: 876/531, loss: 7.743835885776207e-05 2023-01-23 03:01:04.257065: step: 880/531, loss: 0.022589493542909622 2023-01-23 03:01:05.397899: step: 884/531, loss: 0.02989177778363228 2023-01-23 03:01:06.509223: step: 888/531, loss: 0.009578084573149681 2023-01-23 03:01:07.627825: step: 892/531, loss: 0.0008292675483971834 2023-01-23 03:01:08.760762: step: 896/531, loss: 0.00026760101900435984 2023-01-23 03:01:09.892342: step: 900/531, loss: 0.00034728049649856985 2023-01-23 03:01:11.007960: step: 904/531, loss: 0.03461713716387749 2023-01-23 03:01:12.114634: step: 908/531, loss: 0.03369007259607315 2023-01-23 03:01:13.254895: step: 912/531, loss: 0.009516382589936256 2023-01-23 03:01:14.392468: step: 916/531, loss: 8.916855222196318e-06 2023-01-23 03:01:15.522764: step: 920/531, loss: 3.2234194804914296e-05 2023-01-23 03:01:16.628769: step: 924/531, loss: 0.0039116861298680305 2023-01-23 03:01:17.778941: step: 928/531, loss: 0.0034274100326001644 2023-01-23 03:01:18.916511: step: 932/531, loss: 0.025771331042051315 2023-01-23 03:01:20.051530: step: 936/531, loss: 6.332397606456652e-05 2023-01-23 03:01:21.173048: step: 940/531, loss: 0.03230180963873863 2023-01-23 03:01:22.317566: step: 944/531, loss: 0.04185495525598526 2023-01-23 03:01:23.430567: step: 948/531, loss: 0.0006002665031701326 2023-01-23 03:01:24.576375: step: 952/531, loss: 0.0015363693237304688 2023-01-23 03:01:25.726960: step: 956/531, loss: 0.001834869384765625 2023-01-23 03:01:26.856553: step: 960/531, loss: 0.0002623796754051 2023-01-23 03:01:27.999352: step: 964/531, loss: 0.004636573605239391 2023-01-23 03:01:29.115538: step: 968/531, loss: 0.005076503846794367 2023-01-23 03:01:30.243463: step: 972/531, loss: 0.0037137032486498356 2023-01-23 03:01:31.380600: step: 976/531, loss: 0.001048183417879045 2023-01-23 03:01:32.533201: step: 980/531, loss: 0.284993439912796 2023-01-23 03:01:33.669467: step: 984/531, loss: 0.05883808434009552 2023-01-23 03:01:34.794365: step: 988/531, loss: 0.05026283115148544 2023-01-23 03:01:35.939941: step: 992/531, loss: 7.514953904319555e-05 2023-01-23 03:01:37.073810: step: 996/531, loss: 0.0011873244075104594 2023-01-23 03:01:38.177130: step: 1000/531, loss: 0.0012302398681640625 2023-01-23 03:01:39.320524: step: 1004/531, loss: 0.04406299814581871 2023-01-23 03:01:40.425857: step: 1008/531, loss: 7.05719003235572e-06 2023-01-23 03:01:41.541343: step: 1012/531, loss: 0.0010879517067223787 2023-01-23 03:01:42.672602: step: 1016/531, loss: 0.04696397855877876 2023-01-23 03:01:43.828839: step: 1020/531, loss: 0.05769595876336098 2023-01-23 03:01:44.953412: step: 1024/531, loss: 0.011763953603804111 2023-01-23 03:01:46.119169: step: 1028/531, loss: 0.08096809685230255 2023-01-23 03:01:47.250734: step: 1032/531, loss: 0.0016600609524175525 2023-01-23 03:01:48.365974: step: 1036/531, loss: 0.0026065825950354338 2023-01-23 03:01:49.483639: step: 1040/531, loss: 0.044809531420469284 2023-01-23 03:01:50.580479: step: 1044/531, loss: -5.14984139954322e-06 2023-01-23 03:01:51.695486: step: 1048/531, loss: 0.01935424841940403 2023-01-23 03:01:52.827056: step: 1052/531, loss: 0.008493566885590553 2023-01-23 03:01:53.951348: step: 1056/531, loss: 0.027629852294921875 2023-01-23 03:01:55.066596: step: 1060/531, loss: 0.04375104978680611 2023-01-23 03:01:56.188523: step: 1064/531, loss: 0.05173168331384659 2023-01-23 03:01:57.293121: step: 1068/531, loss: 0.007929420098662376 2023-01-23 03:01:58.412184: step: 1072/531, loss: 0.005633068270981312 2023-01-23 03:01:59.512984: step: 1076/531, loss: 0.0002724170626606792 2023-01-23 03:02:00.632294: step: 1080/531, loss: 0.025803662836551666 2023-01-23 03:02:01.743146: step: 1084/531, loss: 0.02024364471435547 2023-01-23 03:02:02.866304: step: 1088/531, loss: 0.015231132507324219 2023-01-23 03:02:03.981882: step: 1092/531, loss: 0.0738285630941391 2023-01-23 03:02:05.094001: step: 1096/531, loss: 0.008661603555083275 2023-01-23 03:02:06.229460: step: 1100/531, loss: 0.04922294616699219 2023-01-23 03:02:07.357453: step: 1104/531, loss: 0.16663475334644318 2023-01-23 03:02:08.472559: step: 1108/531, loss: 0.023339366540312767 2023-01-23 03:02:09.591141: step: 1112/531, loss: 0.00014371871657203883 2023-01-23 03:02:10.746801: step: 1116/531, loss: 0.010211181826889515 2023-01-23 03:02:11.892167: step: 1120/531, loss: 0.005398273468017578 2023-01-23 03:02:13.007133: step: 1124/531, loss: 0.03627920150756836 2023-01-23 03:02:14.118284: step: 1128/531, loss: 0.0025251389015465975 2023-01-23 03:02:15.251399: step: 1132/531, loss: 0.05681133642792702 2023-01-23 03:02:16.364110: step: 1136/531, loss: 0.0036290884017944336 2023-01-23 03:02:17.467147: step: 1140/531, loss: 0.000954055751208216 2023-01-23 03:02:18.545604: step: 1144/531, loss: 0.0013440132606774569 2023-01-23 03:02:19.660609: step: 1148/531, loss: 0.09740272164344788 2023-01-23 03:02:20.774351: step: 1152/531, loss: 0.009726906195282936 2023-01-23 03:02:21.871466: step: 1156/531, loss: 0.01283731497824192 2023-01-23 03:02:23.025708: step: 1160/531, loss: 0.18539848923683167 2023-01-23 03:02:24.161658: step: 1164/531, loss: 0.0008284568903036416 2023-01-23 03:02:25.280788: step: 1168/531, loss: 0.03062448464334011 2023-01-23 03:02:26.404449: step: 1172/531, loss: 0.03205757215619087 2023-01-23 03:02:27.540118: step: 1176/531, loss: 0.0003143310605082661 2023-01-23 03:02:28.649046: step: 1180/531, loss: 0.001223659492097795 2023-01-23 03:02:29.767390: step: 1184/531, loss: 0.0013794898986816406 2023-01-23 03:02:30.886691: step: 1188/531, loss: 0.0566675178706646 2023-01-23 03:02:32.014829: step: 1192/531, loss: 0.005577277857810259 2023-01-23 03:02:33.160486: step: 1196/531, loss: 0.00086383824236691 2023-01-23 03:02:34.269120: step: 1200/531, loss: 0.0007397652370855212 2023-01-23 03:02:35.405861: step: 1204/531, loss: 0.013715553097426891 2023-01-23 03:02:36.539639: step: 1208/531, loss: 0.0007748603820800781 2023-01-23 03:02:37.639343: step: 1212/531, loss: 0.0014644861221313477 2023-01-23 03:02:38.749408: step: 1216/531, loss: 0.0001615524379303679 2023-01-23 03:02:39.874694: step: 1220/531, loss: 0.004193687811493874 2023-01-23 03:02:40.990518: step: 1224/531, loss: 0.027457524091005325 2023-01-23 03:02:42.132877: step: 1228/531, loss: 0.05862589180469513 2023-01-23 03:02:43.238176: step: 1232/531, loss: 0.00013465881056617945 2023-01-23 03:02:44.388519: step: 1236/531, loss: 0.009566117078065872 2023-01-23 03:02:45.545101: step: 1240/531, loss: 0.008403015322983265 2023-01-23 03:02:46.674766: step: 1244/531, loss: 0.12102308869361877 2023-01-23 03:02:47.791806: step: 1248/531, loss: 0.011609697714447975 2023-01-23 03:02:48.887507: step: 1252/531, loss: 0.00329170236364007 2023-01-23 03:02:50.017736: step: 1256/531, loss: 0.00012073516700183973 2023-01-23 03:02:51.169490: step: 1260/531, loss: 0.01811056211590767 2023-01-23 03:02:52.291028: step: 1264/531, loss: 0.0027015686500817537 2023-01-23 03:02:53.390264: step: 1268/531, loss: 0.030917834490537643 2023-01-23 03:02:54.556285: step: 1272/531, loss: 0.06088867038488388 2023-01-23 03:02:55.676914: step: 1276/531, loss: 0.004056167788803577 2023-01-23 03:02:56.812368: step: 1280/531, loss: 6.256103370105848e-05 2023-01-23 03:02:57.945302: step: 1284/531, loss: 0.00182933802716434 2023-01-23 03:02:59.063118: step: 1288/531, loss: 0.022030258551239967 2023-01-23 03:03:00.199843: step: 1292/531, loss: 0.01325597707182169 2023-01-23 03:03:01.309222: step: 1296/531, loss: 0.0031570433638989925 2023-01-23 03:03:02.422353: step: 1300/531, loss: 0.019679641351103783 2023-01-23 03:03:03.545632: step: 1304/531, loss: 0.019195174798369408 2023-01-23 03:03:04.647116: step: 1308/531, loss: 0.019798660650849342 2023-01-23 03:03:05.788537: step: 1312/531, loss: 0.06110658869147301 2023-01-23 03:03:06.921521: step: 1316/531, loss: 1.4257430848374497e-05 2023-01-23 03:03:08.026336: step: 1320/531, loss: 0.01452417392283678 2023-01-23 03:03:09.140879: step: 1324/531, loss: 0.0004093170282430947 2023-01-23 03:03:10.263223: step: 1328/531, loss: 0.0006435394170694053 2023-01-23 03:03:11.380716: step: 1332/531, loss: 0.002630615374073386 2023-01-23 03:03:12.527469: step: 1336/531, loss: 0.012231254950165749 2023-01-23 03:03:13.630118: step: 1340/531, loss: 0.009826661087572575 2023-01-23 03:03:14.735110: step: 1344/531, loss: 0.007898139767348766 2023-01-23 03:03:15.864202: step: 1348/531, loss: 0.0005296707386150956 2023-01-23 03:03:16.986719: step: 1352/531, loss: 0.0034811021760106087 2023-01-23 03:03:18.135597: step: 1356/531, loss: 0.00015001297288108617 2023-01-23 03:03:19.264909: step: 1360/531, loss: 5.035400317865424e-05 2023-01-23 03:03:20.405233: step: 1364/531, loss: 0.08050546795129776 2023-01-23 03:03:21.539088: step: 1368/531, loss: 0.00036306382389739156 2023-01-23 03:03:22.639528: step: 1372/531, loss: 0.007416200824081898 2023-01-23 03:03:23.755224: step: 1376/531, loss: 0.00243301410228014 2023-01-23 03:03:24.883556: step: 1380/531, loss: 0.0012866974575445056 2023-01-23 03:03:25.992350: step: 1384/531, loss: 0.005496311001479626 2023-01-23 03:03:27.123339: step: 1388/531, loss: 0.012728882022202015 2023-01-23 03:03:28.272626: step: 1392/531, loss: 0.008410453796386719 2023-01-23 03:03:29.379024: step: 1396/531, loss: 0.0006304740672931075 2023-01-23 03:03:30.462756: step: 1400/531, loss: 0.0028811455704271793 2023-01-23 03:03:31.576562: step: 1404/531, loss: 0.032659340649843216 2023-01-23 03:03:32.690355: step: 1408/531, loss: 6.4849853515625e-05 2023-01-23 03:03:33.818065: step: 1412/531, loss: 0.0002590179501567036 2023-01-23 03:03:34.942691: step: 1416/531, loss: 0.0003361702256370336 2023-01-23 03:03:36.036106: step: 1420/531, loss: 0.0027702334336936474 2023-01-23 03:03:37.173413: step: 1424/531, loss: 0.00035161973210051656 2023-01-23 03:03:38.335037: step: 1428/531, loss: 0.03153223916888237 2023-01-23 03:03:39.454962: step: 1432/531, loss: 0.1308523267507553 2023-01-23 03:03:40.558566: step: 1436/531, loss: 8.535385131835938e-05 2023-01-23 03:03:41.675971: step: 1440/531, loss: 0.002167320344597101 2023-01-23 03:03:42.787661: step: 1444/531, loss: 0.05468940734863281 2023-01-23 03:03:43.936756: step: 1448/531, loss: 0.0017781734932214022 2023-01-23 03:03:45.086376: step: 1452/531, loss: 0.00983057077974081 2023-01-23 03:03:46.197112: step: 1456/531, loss: 0.0014261245960369706 2023-01-23 03:03:47.403742: step: 1460/531, loss: 0.020213890820741653 2023-01-23 03:03:48.505334: step: 1464/531, loss: 0.0014238357543945312 2023-01-23 03:03:49.651332: step: 1468/531, loss: 0.0012582779163494706 2023-01-23 03:03:50.796587: step: 1472/531, loss: 0.06024932861328125 2023-01-23 03:03:51.946512: step: 1476/531, loss: 0.0026653290260583162 2023-01-23 03:03:53.073525: step: 1480/531, loss: 0.0003662109375 2023-01-23 03:03:54.205564: step: 1484/531, loss: 0.019578194245696068 2023-01-23 03:03:55.343150: step: 1488/531, loss: 0.7144562602043152 2023-01-23 03:03:56.453209: step: 1492/531, loss: 0.022637223824858665 2023-01-23 03:03:57.561219: step: 1496/531, loss: 0.0021830559708178043 2023-01-23 03:03:58.668918: step: 1500/531, loss: 0.06123180687427521 2023-01-23 03:03:59.804005: step: 1504/531, loss: 0.13065338134765625 2023-01-23 03:04:00.927354: step: 1508/531, loss: 0.009461641311645508 2023-01-23 03:04:02.060632: step: 1512/531, loss: 0.0020399093627929688 2023-01-23 03:04:03.172179: step: 1516/531, loss: 2.6512147087487392e-05 2023-01-23 03:04:04.331528: step: 1520/531, loss: 0.004387474153190851 2023-01-23 03:04:05.453085: step: 1524/531, loss: 8.02040085545741e-05 2023-01-23 03:04:06.585345: step: 1528/531, loss: 0.0429140105843544 2023-01-23 03:04:07.713571: step: 1532/531, loss: 0.026005173102021217 2023-01-23 03:04:08.850654: step: 1536/531, loss: 0.00029506682767532766 2023-01-23 03:04:10.010139: step: 1540/531, loss: 0.003241634229198098 2023-01-23 03:04:11.130054: step: 1544/531, loss: 0.013512182049453259 2023-01-23 03:04:12.259951: step: 1548/531, loss: 0.011196708306670189 2023-01-23 03:04:13.401190: step: 1552/531, loss: 0.03624091297388077 2023-01-23 03:04:14.538007: step: 1556/531, loss: 0.005675077438354492 2023-01-23 03:04:15.677243: step: 1560/531, loss: 0.389894962310791 2023-01-23 03:04:16.792240: step: 1564/531, loss: 0.05377388000488281 2023-01-23 03:04:17.921037: step: 1568/531, loss: 4.692077709478326e-05 2023-01-23 03:04:19.040890: step: 1572/531, loss: 0.005256080999970436 2023-01-23 03:04:20.206110: step: 1576/531, loss: 0.06646442413330078 2023-01-23 03:04:21.371469: step: 1580/531, loss: 0.025849809870123863 2023-01-23 03:04:22.520702: step: 1584/531, loss: 0.002214813372120261 2023-01-23 03:04:23.662172: step: 1588/531, loss: 0.010465431958436966 2023-01-23 03:04:24.773832: step: 1592/531, loss: 0.001987457275390625 2023-01-23 03:04:25.893800: step: 1596/531, loss: 0.03647336736321449 2023-01-23 03:04:27.020107: step: 1600/531, loss: 0.03838767856359482 2023-01-23 03:04:28.200511: step: 1604/531, loss: 0.0023769380059093237 2023-01-23 03:04:29.315767: step: 1608/531, loss: 0.0491090752184391 2023-01-23 03:04:30.423698: step: 1612/531, loss: 0.004688739776611328 2023-01-23 03:04:31.540848: step: 1616/531, loss: 0.02584237977862358 2023-01-23 03:04:32.643317: step: 1620/531, loss: 0.003821754362434149 2023-01-23 03:04:33.771453: step: 1624/531, loss: 0.0035602569114416838 2023-01-23 03:04:34.904035: step: 1628/531, loss: 0.02424316480755806 2023-01-23 03:04:35.993246: step: 1632/531, loss: 0.0003437995910644531 2023-01-23 03:04:37.117167: step: 1636/531, loss: 0.00293045025318861 2023-01-23 03:04:38.230491: step: 1640/531, loss: 0.009319783188402653 2023-01-23 03:04:39.373360: step: 1644/531, loss: 0.0001636505185160786 2023-01-23 03:04:40.523303: step: 1648/531, loss: 0.0028985023964196444 2023-01-23 03:04:41.644222: step: 1652/531, loss: 0.006133651826530695 2023-01-23 03:04:42.783510: step: 1656/531, loss: 0.07312975078821182 2023-01-23 03:04:43.916180: step: 1660/531, loss: 0.0003799438418354839 2023-01-23 03:04:45.042585: step: 1664/531, loss: 0.06162299960851669 2023-01-23 03:04:46.192509: step: 1668/531, loss: 0.2518285810947418 2023-01-23 03:04:47.341027: step: 1672/531, loss: 0.019063664600253105 2023-01-23 03:04:48.430471: step: 1676/531, loss: 0.0037837030831724405 2023-01-23 03:04:49.540974: step: 1680/531, loss: 0.006030035205185413 2023-01-23 03:04:50.654455: step: 1684/531, loss: 0.0010579109657555819 2023-01-23 03:04:51.769151: step: 1688/531, loss: 0.0010961532825604081 2023-01-23 03:04:52.886838: step: 1692/531, loss: 0.001509475870989263 2023-01-23 03:04:54.010747: step: 1696/531, loss: 0.0951637253165245 2023-01-23 03:04:55.111819: step: 1700/531, loss: 0.0021266937255859375 2023-01-23 03:04:56.244689: step: 1704/531, loss: 0.05188446119427681 2023-01-23 03:04:57.359473: step: 1708/531, loss: 0.015315246768295765 2023-01-23 03:04:58.489658: step: 1712/531, loss: 0.0017190932994708419 2023-01-23 03:04:59.614230: step: 1716/531, loss: 0.0050140381790697575 2023-01-23 03:05:00.729442: step: 1720/531, loss: 0.010280800983309746 2023-01-23 03:05:01.849674: step: 1724/531, loss: 0.001546669052913785 2023-01-23 03:05:02.953262: step: 1728/531, loss: 0.0005205154302529991 2023-01-23 03:05:04.095186: step: 1732/531, loss: 0.015781210735440254 2023-01-23 03:05:05.265534: step: 1736/531, loss: 0.0031764984596520662 2023-01-23 03:05:06.380929: step: 1740/531, loss: 0.00025262832059524953 2023-01-23 03:05:07.513029: step: 1744/531, loss: 0.0007285118335857987 2023-01-23 03:05:08.640809: step: 1748/531, loss: 0.009483146481215954 2023-01-23 03:05:09.723026: step: 1752/531, loss: 0.02316894568502903 2023-01-23 03:05:10.831066: step: 1756/531, loss: 0.002701664110645652 2023-01-23 03:05:11.945482: step: 1760/531, loss: 0.04713869094848633 2023-01-23 03:05:13.075173: step: 1764/531, loss: 0.02914752997457981 2023-01-23 03:05:14.192964: step: 1768/531, loss: 0.02108459547162056 2023-01-23 03:05:15.334554: step: 1772/531, loss: 0.009310532361268997 2023-01-23 03:05:16.470939: step: 1776/531, loss: 8.449554297840223e-05 2023-01-23 03:05:17.589468: step: 1780/531, loss: 0.014852524735033512 2023-01-23 03:05:18.699825: step: 1784/531, loss: 0.00016794205293990672 2023-01-23 03:05:19.808458: step: 1788/531, loss: 0.0005270958063192666 2023-01-23 03:05:20.916695: step: 1792/531, loss: 0.025333786383271217 2023-01-23 03:05:22.055481: step: 1796/531, loss: 0.004635669756680727 2023-01-23 03:05:23.181285: step: 1800/531, loss: 0.0591367706656456 2023-01-23 03:05:24.306817: step: 1804/531, loss: 0.002317118691280484 2023-01-23 03:05:25.449093: step: 1808/531, loss: 0.03819999843835831 2023-01-23 03:05:26.563317: step: 1812/531, loss: 0.004540347959846258 2023-01-23 03:05:27.691457: step: 1816/531, loss: 0.03649768978357315 2023-01-23 03:05:28.825416: step: 1820/531, loss: 0.0001979827939067036 2023-01-23 03:05:29.950316: step: 1824/531, loss: 0.0007092476007528603 2023-01-23 03:05:31.056429: step: 1828/531, loss: 0.05888576805591583 2023-01-23 03:05:32.202767: step: 1832/531, loss: 0.0001443863002350554 2023-01-23 03:05:33.315218: step: 1836/531, loss: 0.023372652009129524 2023-01-23 03:05:34.437407: step: 1840/531, loss: 0.005185222718864679 2023-01-23 03:05:35.543954: step: 1844/531, loss: 0.0047592162154614925 2023-01-23 03:05:36.679351: step: 1848/531, loss: 0.00039634708082303405 2023-01-23 03:05:37.800884: step: 1852/531, loss: 0.04666700214147568 2023-01-23 03:05:38.911982: step: 1856/531, loss: 0.036513231694698334 2023-01-23 03:05:40.036486: step: 1860/531, loss: 0.0015548706287518144 2023-01-23 03:05:41.158443: step: 1864/531, loss: 0.005625820253044367 2023-01-23 03:05:42.261484: step: 1868/531, loss: 0.0010433197021484375 2023-01-23 03:05:43.348600: step: 1872/531, loss: 0.0011726379161700606 2023-01-23 03:05:44.487963: step: 1876/531, loss: 0.002943325089290738 2023-01-23 03:05:45.604207: step: 1880/531, loss: 0.06522709131240845 2023-01-23 03:05:46.761886: step: 1884/531, loss: 0.01231012400239706 2023-01-23 03:05:47.903950: step: 1888/531, loss: 0.02914581261575222 2023-01-23 03:05:49.016722: step: 1892/531, loss: 0.011760901659727097 2023-01-23 03:05:50.104546: step: 1896/531, loss: 0.017786217853426933 2023-01-23 03:05:51.233570: step: 1900/531, loss: 0.0454469695687294 2023-01-23 03:05:52.352832: step: 1904/531, loss: 0.032624050974845886 2023-01-23 03:05:53.469200: step: 1908/531, loss: 0.0006206512334756553 2023-01-23 03:05:54.567857: step: 1912/531, loss: 1.1730194273695815e-05 2023-01-23 03:05:55.668839: step: 1916/531, loss: 0.002918291138485074 2023-01-23 03:05:56.768933: step: 1920/531, loss: 0.0014688491355627775 2023-01-23 03:05:57.886174: step: 1924/531, loss: 0.0140831945464015 2023-01-23 03:05:59.016922: step: 1928/531, loss: 0.047412872314453125 2023-01-23 03:06:00.158542: step: 1932/531, loss: 0.007380438037216663 2023-01-23 03:06:01.273251: step: 1936/531, loss: 0.1811823844909668 2023-01-23 03:06:02.423168: step: 1940/531, loss: 0.0011631011730059981 2023-01-23 03:06:03.587784: step: 1944/531, loss: 0.004854393191635609 2023-01-23 03:06:04.718764: step: 1948/531, loss: 0.04157309606671333 2023-01-23 03:06:05.853833: step: 1952/531, loss: 0.0320344939827919 2023-01-23 03:06:06.968444: step: 1956/531, loss: 0.00010437965829623863 2023-01-23 03:06:08.087085: step: 1960/531, loss: 0.04512915760278702 2023-01-23 03:06:09.164042: step: 1964/531, loss: 0.0011899947421625257 2023-01-23 03:06:10.294466: step: 1968/531, loss: 9.679794311523438e-05 2023-01-23 03:06:11.420096: step: 1972/531, loss: 0.04750671237707138 2023-01-23 03:06:12.552130: step: 1976/531, loss: 0.010211371816694736 2023-01-23 03:06:13.658939: step: 1980/531, loss: 0.050099946558475494 2023-01-23 03:06:14.784428: step: 1984/531, loss: 0.12275819480419159 2023-01-23 03:06:15.920324: step: 1988/531, loss: 0.005821800325065851 2023-01-23 03:06:17.032443: step: 1992/531, loss: 0.003816795302554965 2023-01-23 03:06:18.154675: step: 1996/531, loss: 0.04299984127283096 2023-01-23 03:06:19.271026: step: 2000/531, loss: 0.012952995486557484 2023-01-23 03:06:20.373482: step: 2004/531, loss: 0.008868217468261719 2023-01-23 03:06:21.545517: step: 2008/531, loss: 0.006070422939956188 2023-01-23 03:06:22.663030: step: 2012/531, loss: 0.02142333984375 2023-01-23 03:06:23.803059: step: 2016/531, loss: 0.04205293580889702 2023-01-23 03:06:24.929487: step: 2020/531, loss: 0.0023247720673680305 2023-01-23 03:06:26.029949: step: 2024/531, loss: 0.0001031875581247732 2023-01-23 03:06:27.162793: step: 2028/531, loss: 0.007774734869599342 2023-01-23 03:06:28.275276: step: 2032/531, loss: 0.023336218670010567 2023-01-23 03:06:29.409413: step: 2036/531, loss: 0.07117538154125214 2023-01-23 03:06:30.529436: step: 2040/531, loss: 9.72747784544481e-06 2023-01-23 03:06:31.685134: step: 2044/531, loss: 0.7680648565292358 2023-01-23 03:06:32.808523: step: 2048/531, loss: 0.01672239415347576 2023-01-23 03:06:33.917220: step: 2052/531, loss: 0.04720611870288849 2023-01-23 03:06:35.039118: step: 2056/531, loss: 0.003008556552231312 2023-01-23 03:06:36.135891: step: 2060/531, loss: 0.002344799228012562 2023-01-23 03:06:37.247911: step: 2064/531, loss: 0.003798294346779585 2023-01-23 03:06:38.358269: step: 2068/531, loss: 0.002208709716796875 2023-01-23 03:06:39.472301: step: 2072/531, loss: 0.0554235465824604 2023-01-23 03:06:40.574202: step: 2076/531, loss: 0.04692363739013672 2023-01-23 03:06:41.702486: step: 2080/531, loss: 0.01076283399015665 2023-01-23 03:06:42.819012: step: 2084/531, loss: 0.007159805856645107 2023-01-23 03:06:43.944475: step: 2088/531, loss: 0.013034534640610218 2023-01-23 03:06:45.061473: step: 2092/531, loss: 0.0003288745938334614 2023-01-23 03:06:46.162902: step: 2096/531, loss: 0.0013932227157056332 2023-01-23 03:06:47.260843: step: 2100/531, loss: 1.9073513612966053e-07 2023-01-23 03:06:48.388692: step: 2104/531, loss: 0.0057579996064305305 2023-01-23 03:06:49.499090: step: 2108/531, loss: 1.640319896978326e-05 2023-01-23 03:06:50.614032: step: 2112/531, loss: 0.04350471496582031 2023-01-23 03:06:51.769624: step: 2116/531, loss: 0.027050448581576347 2023-01-23 03:06:52.886165: step: 2120/531, loss: 0.01781024970114231 2023-01-23 03:06:54.005592: step: 2124/531, loss: 0.0002718210162129253 ================================================== Loss: 0.025 -------------------- Dev: {'event': {'p': 0.6014492753623188, 'r': 0.7736351531291611, 'f1': 0.6767617938264415}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Test: {'event': {'p': 0.6327710843373494, 'r': 0.7829457364341085, 'f1': 0.6998933901918976}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Chinese: {'event': {'p': 0.5697674418604651, 'r': 0.9074074074074074, 'f1': 0.7}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Korean: {'event': {'p': 0.6923076923076923, 'r': 0.5714285714285714, 'f1': 0.6260869565217392}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Russian: {'event': {'p': 0.4666666666666667, 'r': 0.5833333333333334, 'f1': 0.5185185185185186}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 21 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:07:34.047441: step: 4/531, loss: 0.00146484375 2023-01-23 03:07:35.151299: step: 8/531, loss: 0.0020875930786132812 2023-01-23 03:07:36.265269: step: 12/531, loss: 0.0012967109214514494 2023-01-23 03:07:37.405583: step: 16/531, loss: 0.013607836328446865 2023-01-23 03:07:38.509112: step: 20/531, loss: 0.00043334963265806437 2023-01-23 03:07:39.656936: step: 24/531, loss: 1.9073486328125e-05 2023-01-23 03:07:40.789966: step: 28/531, loss: 0.0003990173281636089 2023-01-23 03:07:41.939480: step: 32/531, loss: 0.0001104354887502268 2023-01-23 03:07:43.047791: step: 36/531, loss: 0.0005913734203204513 2023-01-23 03:07:44.170830: step: 40/531, loss: 0.13356667757034302 2023-01-23 03:07:45.274448: step: 44/531, loss: 0.005254173651337624 2023-01-23 03:07:46.435382: step: 48/531, loss: 0.051177024841308594 2023-01-23 03:07:47.601807: step: 52/531, loss: 0.023906327784061432 2023-01-23 03:07:48.712082: step: 56/531, loss: 0.0020660876762121916 2023-01-23 03:07:49.846663: step: 60/531, loss: 0.005906486418098211 2023-01-23 03:07:50.980214: step: 64/531, loss: 0.004739284515380859 2023-01-23 03:07:52.097497: step: 68/531, loss: 0.0002316475147381425 2023-01-23 03:07:53.237031: step: 72/531, loss: 0.0008271217811852694 2023-01-23 03:07:54.338097: step: 76/531, loss: 0.00032782554626464844 2023-01-23 03:07:55.451250: step: 80/531, loss: 0.0009369850158691406 2023-01-23 03:07:56.562252: step: 84/531, loss: 0.00010261536226607859 2023-01-23 03:07:57.710211: step: 88/531, loss: 6.47544875391759e-05 2023-01-23 03:07:58.810228: step: 92/531, loss: 0.0006538868183270097 2023-01-23 03:07:59.954694: step: 96/531, loss: 0.016814231872558594 2023-01-23 03:08:01.114128: step: 100/531, loss: 0.0439663901925087 2023-01-23 03:08:02.214199: step: 104/531, loss: 0.0002846717834472656 2023-01-23 03:08:03.343755: step: 108/531, loss: 0.020621873438358307 2023-01-23 03:08:04.491856: step: 112/531, loss: 0.012073660269379616 2023-01-23 03:08:05.619176: step: 116/531, loss: 6.141662743175402e-05 2023-01-23 03:08:06.748122: step: 120/531, loss: 0.022858619689941406 2023-01-23 03:08:07.855215: step: 124/531, loss: 0.0006330490577965975 2023-01-23 03:08:08.973517: step: 128/531, loss: 0.016435718163847923 2023-01-23 03:08:10.087880: step: 132/531, loss: 4.9591067181609105e-06 2023-01-23 03:08:11.244300: step: 136/531, loss: 0.0004067897971253842 2023-01-23 03:08:12.390732: step: 140/531, loss: 0.00202598562464118 2023-01-23 03:08:13.503041: step: 144/531, loss: 0.0033483505249023438 2023-01-23 03:08:14.613590: step: 148/531, loss: 0.014723777770996094 2023-01-23 03:08:15.754649: step: 152/531, loss: 0.04154505580663681 2023-01-23 03:08:16.864243: step: 156/531, loss: 0.001240205834619701 2023-01-23 03:08:17.973468: step: 160/531, loss: 0.0006565094226971269 2023-01-23 03:08:19.102460: step: 164/531, loss: 0.0018360138637945056 2023-01-23 03:08:20.252607: step: 168/531, loss: 0.006486702244728804 2023-01-23 03:08:21.402576: step: 172/531, loss: 0.014039230532944202 2023-01-23 03:08:22.532312: step: 176/531, loss: 0.018541861325502396 2023-01-23 03:08:23.678041: step: 180/531, loss: 0.026401232928037643 2023-01-23 03:08:24.826781: step: 184/531, loss: -4.5490265620173886e-05 2023-01-23 03:08:25.946342: step: 188/531, loss: 0.005261516664177179 2023-01-23 03:08:27.086820: step: 192/531, loss: 0.025089550763368607 2023-01-23 03:08:28.212158: step: 196/531, loss: 0.0013472557766363025 2023-01-23 03:08:29.330521: step: 200/531, loss: 0.0096772201359272 2023-01-23 03:08:30.513499: step: 204/531, loss: 0.040186215192079544 2023-01-23 03:08:31.605947: step: 208/531, loss: 0.00039920807466842234 2023-01-23 03:08:32.732196: step: 212/531, loss: 0.003230953123420477 2023-01-23 03:08:33.839623: step: 216/531, loss: 6.828307959949598e-05 2023-01-23 03:08:34.941542: step: 220/531, loss: 0.01564493216574192 2023-01-23 03:08:36.054710: step: 224/531, loss: 0.00013256072998046875 2023-01-23 03:08:37.147535: step: 228/531, loss: 1.0204315003647935e-05 2023-01-23 03:08:38.253651: step: 232/531, loss: 0.0025107385590672493 2023-01-23 03:08:39.371347: step: 236/531, loss: 0.0042258743196725845 2023-01-23 03:08:40.511645: step: 240/531, loss: 0.04343080148100853 2023-01-23 03:08:41.674750: step: 244/531, loss: 0.11086282879114151 2023-01-23 03:08:42.803131: step: 248/531, loss: 0.035529520362615585 2023-01-23 03:08:43.930955: step: 252/531, loss: 0.00010232925706077367 2023-01-23 03:08:45.045649: step: 256/531, loss: 7.162093970691785e-05 2023-01-23 03:08:46.121456: step: 260/531, loss: 5.91278076171875e-05 2023-01-23 03:08:47.239901: step: 264/531, loss: 0.047910213470458984 2023-01-23 03:08:48.389250: step: 268/531, loss: 0.02231750451028347 2023-01-23 03:08:49.513953: step: 272/531, loss: 0.02125406265258789 2023-01-23 03:08:50.681473: step: 276/531, loss: 0.05889244005084038 2023-01-23 03:08:51.797071: step: 280/531, loss: 0.0010015488369390368 2023-01-23 03:08:52.917314: step: 284/531, loss: 0.02187175862491131 2023-01-23 03:08:54.053550: step: 288/531, loss: 0.044797711074352264 2023-01-23 03:08:55.159061: step: 292/531, loss: 0.003364086151123047 2023-01-23 03:08:56.258954: step: 296/531, loss: 0.0004421234189067036 2023-01-23 03:08:57.371501: step: 300/531, loss: 0.010637665167450905 2023-01-23 03:08:58.479419: step: 304/531, loss: 0.00030364992562681437 2023-01-23 03:08:59.589692: step: 308/531, loss: 0.0019585611298680305 2023-01-23 03:09:00.724892: step: 312/531, loss: 0.0017821788787841797 2023-01-23 03:09:01.866096: step: 316/531, loss: 0.02088318020105362 2023-01-23 03:09:02.974038: step: 320/531, loss: 0.046460725367069244 2023-01-23 03:09:04.081155: step: 324/531, loss: 0.013900232501327991 2023-01-23 03:09:05.197551: step: 328/531, loss: 0.0014139175182208419 2023-01-23 03:09:06.349170: step: 332/531, loss: 0.0013911246787756681 2023-01-23 03:09:07.481413: step: 336/531, loss: 0.001390647841617465 2023-01-23 03:09:08.592189: step: 340/531, loss: 0.0015871047507971525 2023-01-23 03:09:09.691960: step: 344/531, loss: 0.03544578701257706 2023-01-23 03:09:10.796467: step: 348/531, loss: 0.003041553543880582 2023-01-23 03:09:11.927695: step: 352/531, loss: 0.0003403901937417686 2023-01-23 03:09:13.034741: step: 356/531, loss: 0.0008621216402389109 2023-01-23 03:09:14.134842: step: 360/531, loss: 0.004464340396225452 2023-01-23 03:09:15.250475: step: 364/531, loss: 0.01719536818563938 2023-01-23 03:09:16.367907: step: 368/531, loss: 0.00042705534724518657 2023-01-23 03:09:17.482038: step: 372/531, loss: 0.020365525037050247 2023-01-23 03:09:18.603024: step: 376/531, loss: 0.006990456487983465 2023-01-23 03:09:19.745968: step: 380/531, loss: 0.0030241012573242188 2023-01-23 03:09:20.872935: step: 384/531, loss: 0.003670120146125555 2023-01-23 03:09:22.011633: step: 388/531, loss: 0.0026265145279467106 2023-01-23 03:09:23.113179: step: 392/531, loss: 0.0006460666772909462 2023-01-23 03:09:24.237129: step: 396/531, loss: 0.01367797888815403 2023-01-23 03:09:25.373940: step: 400/531, loss: 0.0006935120327398181 2023-01-23 03:09:26.495909: step: 404/531, loss: 0.0001470565766794607 2023-01-23 03:09:27.619814: step: 408/531, loss: 0.0013921738136559725 2023-01-23 03:09:28.746693: step: 412/531, loss: 0.0008535385131835938 2023-01-23 03:09:29.916528: step: 416/531, loss: 0.0018232346046715975 2023-01-23 03:09:31.030781: step: 420/531, loss: 0.06915612518787384 2023-01-23 03:09:32.153388: step: 424/531, loss: 1.659393274167087e-05 2023-01-23 03:09:33.291152: step: 428/531, loss: 0.010183239355683327 2023-01-23 03:09:34.436288: step: 432/531, loss: 0.10106963664293289 2023-01-23 03:09:35.594666: step: 436/531, loss: 0.0005860805977135897 2023-01-23 03:09:36.699572: step: 440/531, loss: 0.000606536865234375 2023-01-23 03:09:37.834228: step: 444/531, loss: 0.003342056181281805 2023-01-23 03:09:38.989940: step: 448/531, loss: 0.02114391326904297 2023-01-23 03:09:40.128346: step: 452/531, loss: 0.010066414251923561 2023-01-23 03:09:41.277238: step: 456/531, loss: 0.00310516357421875 2023-01-23 03:09:42.427530: step: 460/531, loss: 0.009950828738510609 2023-01-23 03:09:43.555905: step: 464/531, loss: 0.013081550598144531 2023-01-23 03:09:44.672641: step: 468/531, loss: 0.02598094940185547 2023-01-23 03:09:45.816713: step: 472/531, loss: 0.004311180207878351 2023-01-23 03:09:46.951064: step: 476/531, loss: 0.0015502929454669356 2023-01-23 03:09:48.078042: step: 480/531, loss: 0.010531758889555931 2023-01-23 03:09:49.180834: step: 484/531, loss: 0.0016803741455078125 2023-01-23 03:09:50.267643: step: 488/531, loss: 0.0012009143829345703 2023-01-23 03:09:51.393235: step: 492/531, loss: 0.008922958746552467 2023-01-23 03:09:52.488972: step: 496/531, loss: 0.0038921355735510588 2023-01-23 03:09:53.625596: step: 500/531, loss: 0.03865957260131836 2023-01-23 03:09:54.746138: step: 504/531, loss: 0.061750032007694244 2023-01-23 03:09:55.863512: step: 508/531, loss: 0.05350237339735031 2023-01-23 03:09:56.999417: step: 512/531, loss: 0.0003196716425009072 2023-01-23 03:09:58.114252: step: 516/531, loss: 0.002222061390057206 2023-01-23 03:09:59.215915: step: 520/531, loss: 0.0030922412406653166 2023-01-23 03:10:00.353763: step: 524/531, loss: 0.006308854091912508 2023-01-23 03:10:01.458896: step: 528/531, loss: 0.00019273758516646922 2023-01-23 03:10:02.576472: step: 532/531, loss: 0.002003574278205633 2023-01-23 03:10:03.703459: step: 536/531, loss: 0.0009062767494469881 2023-01-23 03:10:04.827661: step: 540/531, loss: 0.019324254244565964 2023-01-23 03:10:05.977524: step: 544/531, loss: 0.05106038972735405 2023-01-23 03:10:07.090371: step: 548/531, loss: 0.005242157261818647 2023-01-23 03:10:08.218177: step: 552/531, loss: 0.006154107861220837 2023-01-23 03:10:09.325286: step: 556/531, loss: 0.00226593017578125 2023-01-23 03:10:10.458940: step: 560/531, loss: 0.0009725570562295616 2023-01-23 03:10:11.592071: step: 564/531, loss: 0.013335037976503372 2023-01-23 03:10:12.737921: step: 568/531, loss: 0.01642150990664959 2023-01-23 03:10:13.859048: step: 572/531, loss: 0.023996639996767044 2023-01-23 03:10:15.009410: step: 576/531, loss: 0.007940865121781826 2023-01-23 03:10:16.148758: step: 580/531, loss: 0.0250091552734375 2023-01-23 03:10:17.280425: step: 584/531, loss: 0.0006420135614462197 2023-01-23 03:10:18.405793: step: 588/531, loss: 0.003153037978336215 2023-01-23 03:10:19.518419: step: 592/531, loss: 0.015019607730209827 2023-01-23 03:10:20.614420: step: 596/531, loss: 0.0002541974245104939 2023-01-23 03:10:21.732014: step: 600/531, loss: 0.02427959442138672 2023-01-23 03:10:22.860966: step: 604/531, loss: 3.1948088690114673e-06 2023-01-23 03:10:23.998043: step: 608/531, loss: 0.007500267121940851 2023-01-23 03:10:25.130524: step: 612/531, loss: 0.029379844665527344 2023-01-23 03:10:26.268031: step: 616/531, loss: 0.015662861987948418 2023-01-23 03:10:27.354606: step: 620/531, loss: 9.031295485328883e-05 2023-01-23 03:10:28.488892: step: 624/531, loss: 0.0016520499484613538 2023-01-23 03:10:29.646077: step: 628/531, loss: 0.0005409240839071572 2023-01-23 03:10:30.805565: step: 632/531, loss: 6.103515261202119e-06 2023-01-23 03:10:31.929870: step: 636/531, loss: 0.00010414123971713707 2023-01-23 03:10:33.053304: step: 640/531, loss: 0.0018703535897657275 2023-01-23 03:10:34.161710: step: 644/531, loss: 0.1352323442697525 2023-01-23 03:10:35.306569: step: 648/531, loss: 0.00041370390681549907 2023-01-23 03:10:36.427791: step: 652/531, loss: 0.0060867308638989925 2023-01-23 03:10:37.548170: step: 656/531, loss: 0.005110740661621094 2023-01-23 03:10:38.705146: step: 660/531, loss: 0.006535148713737726 2023-01-23 03:10:39.844635: step: 664/531, loss: 6.866455805720761e-06 2023-01-23 03:10:40.959494: step: 668/531, loss: 0.001667022705078125 2023-01-23 03:10:42.104000: step: 672/531, loss: 0.0010066985851153731 2023-01-23 03:10:43.223405: step: 676/531, loss: 0.003857183502987027 2023-01-23 03:10:44.382306: step: 680/531, loss: 0.024642562493681908 2023-01-23 03:10:45.497439: step: 684/531, loss: 0.00031766892061568797 2023-01-23 03:10:46.598179: step: 688/531, loss: 0.001850032713264227 2023-01-23 03:10:47.719769: step: 692/531, loss: 0.0009451866499148309 2023-01-23 03:10:48.845769: step: 696/531, loss: 0.8067765235900879 2023-01-23 03:10:49.971836: step: 700/531, loss: 0.006209373474121094 2023-01-23 03:10:51.080025: step: 704/531, loss: 0.007160186767578125 2023-01-23 03:10:52.189250: step: 708/531, loss: 0.02107868157327175 2023-01-23 03:10:53.306363: step: 712/531, loss: 0.002664947649464011 2023-01-23 03:10:54.478876: step: 716/531, loss: 0.004686165135353804 2023-01-23 03:10:55.592979: step: 720/531, loss: 1.1253358024987392e-05 2023-01-23 03:10:56.701609: step: 724/531, loss: 0.00550346402451396 2023-01-23 03:10:57.817801: step: 728/531, loss: 0.004561042878776789 2023-01-23 03:10:58.946358: step: 732/531, loss: 9.5367431640625e-07 2023-01-23 03:11:00.076511: step: 736/531, loss: 0.01009903009980917 2023-01-23 03:11:01.178883: step: 740/531, loss: 0.004872035700827837 2023-01-23 03:11:02.330948: step: 744/531, loss: 0.0005254745483398438 2023-01-23 03:11:03.450820: step: 748/531, loss: 0.0020072937477380037 2023-01-23 03:11:04.566626: step: 752/531, loss: 0.24814629554748535 2023-01-23 03:11:05.691882: step: 756/531, loss: 0.047196581959724426 2023-01-23 03:11:06.813358: step: 760/531, loss: 0.01591968536376953 2023-01-23 03:11:07.947471: step: 764/531, loss: 0.06380748748779297 2023-01-23 03:11:09.052701: step: 768/531, loss: 0.0012100220192223787 2023-01-23 03:11:10.194745: step: 772/531, loss: 0.008951187133789062 2023-01-23 03:11:11.322031: step: 776/531, loss: 0.0027610778342932463 2023-01-23 03:11:12.456002: step: 780/531, loss: 5.474090721691027e-05 2023-01-23 03:11:13.602291: step: 784/531, loss: 0.017109109088778496 2023-01-23 03:11:14.737540: step: 788/531, loss: 0.03129778057336807 2023-01-23 03:11:15.842271: step: 792/531, loss: 6.790160841774195e-05 2023-01-23 03:11:16.986150: step: 796/531, loss: 0.10087509453296661 2023-01-23 03:11:18.095154: step: 800/531, loss: 0.0007387161022052169 2023-01-23 03:11:19.221871: step: 804/531, loss: 0.00988006591796875 2023-01-23 03:11:20.364221: step: 808/531, loss: 0.01801433600485325 2023-01-23 03:11:21.494066: step: 812/531, loss: 0.04567756503820419 2023-01-23 03:11:22.593616: step: 816/531, loss: 0.00035381317138671875 2023-01-23 03:11:23.715879: step: 820/531, loss: 0.0003025055048055947 2023-01-23 03:11:24.833700: step: 824/531, loss: 0.10238619148731232 2023-01-23 03:11:25.959648: step: 828/531, loss: 0.00135459890589118 2023-01-23 03:11:27.076331: step: 832/531, loss: 6.64711042190902e-05 2023-01-23 03:11:28.204484: step: 836/531, loss: 5.7220458984375e-05 2023-01-23 03:11:29.312947: step: 840/531, loss: 0.03538379818201065 2023-01-23 03:11:30.452437: step: 844/531, loss: 2.0694731574621983e-05 2023-01-23 03:11:31.584359: step: 848/531, loss: 0.0050491332076489925 2023-01-23 03:11:32.688388: step: 852/531, loss: 4.5490265620173886e-05 2023-01-23 03:11:33.838270: step: 856/531, loss: 0.007400035858154297 2023-01-23 03:11:34.949384: step: 860/531, loss: 0.003403949784114957 2023-01-23 03:11:36.061327: step: 864/531, loss: 0.0034372329246252775 2023-01-23 03:11:37.189466: step: 868/531, loss: 0.010037804022431374 2023-01-23 03:11:38.318933: step: 872/531, loss: 0.02905855141580105 2023-01-23 03:11:39.473817: step: 876/531, loss: 0.013191891834139824 2023-01-23 03:11:40.577150: step: 880/531, loss: 0.03658923879265785 2023-01-23 03:11:41.703463: step: 884/531, loss: 0.09703254699707031 2023-01-23 03:11:42.835333: step: 888/531, loss: 0.08679771423339844 2023-01-23 03:11:43.952027: step: 892/531, loss: 0.002456950955092907 2023-01-23 03:11:45.050851: step: 896/531, loss: 0.0008205414051190019 2023-01-23 03:11:46.183270: step: 900/531, loss: 0.0001329422084381804 2023-01-23 03:11:47.319355: step: 904/531, loss: 0.0007326126215048134 2023-01-23 03:11:48.444908: step: 908/531, loss: 0.0014490127796307206 2023-01-23 03:11:49.580373: step: 912/531, loss: 0.0018097878200933337 2023-01-23 03:11:50.692500: step: 916/531, loss: 0.011127281934022903 2023-01-23 03:11:51.803069: step: 920/531, loss: 0.0047990805469453335 2023-01-23 03:11:52.899859: step: 924/531, loss: 0.0009864807361736894 2023-01-23 03:11:54.010762: step: 928/531, loss: 0.00030226705712266266 2023-01-23 03:11:55.113752: step: 932/531, loss: 0.03904876857995987 2023-01-23 03:11:56.247471: step: 936/531, loss: 0.007645892910659313 2023-01-23 03:11:57.354129: step: 940/531, loss: 3.7860871088923886e-05 2023-01-23 03:11:58.495013: step: 944/531, loss: 0.0008703232160769403 2023-01-23 03:11:59.605072: step: 948/531, loss: 0.014857864007353783 2023-01-23 03:12:00.711277: step: 952/531, loss: 0.026413489133119583 2023-01-23 03:12:01.853629: step: 956/531, loss: 0.0027343749534338713 2023-01-23 03:12:02.961050: step: 960/531, loss: 0.0012426376342773438 2023-01-23 03:12:04.096736: step: 964/531, loss: 0.03441353142261505 2023-01-23 03:12:05.230234: step: 968/531, loss: 1.5926361811580136e-05 2023-01-23 03:12:06.334971: step: 972/531, loss: 0.00088415143545717 2023-01-23 03:12:07.463841: step: 976/531, loss: 0.002712822053581476 2023-01-23 03:12:08.584092: step: 980/531, loss: 0.002924347063526511 2023-01-23 03:12:09.733408: step: 984/531, loss: 0.0028717995155602694 2023-01-23 03:12:10.832685: step: 988/531, loss: 2.3078917365637608e-05 2023-01-23 03:12:11.956115: step: 992/531, loss: 0.007679557893425226 2023-01-23 03:12:13.066609: step: 996/531, loss: 2.384185791015625e-07 2023-01-23 03:12:14.178693: step: 1000/531, loss: 0.018398379907011986 2023-01-23 03:12:15.275946: step: 1004/531, loss: 0.005536174867302179 2023-01-23 03:12:16.407398: step: 1008/531, loss: 0.07396488636732101 2023-01-23 03:12:17.554570: step: 1012/531, loss: 0.01290912739932537 2023-01-23 03:12:18.709249: step: 1016/531, loss: 0.0480104461312294 2023-01-23 03:12:19.833737: step: 1020/531, loss: 0.004693222232162952 2023-01-23 03:12:20.944709: step: 1024/531, loss: 0.09185962378978729 2023-01-23 03:12:22.053966: step: 1028/531, loss: 0.03025207668542862 2023-01-23 03:12:23.157367: step: 1032/531, loss: 0.0014249802334234118 2023-01-23 03:12:24.253995: step: 1036/531, loss: 0.0010518074268475175 2023-01-23 03:12:25.377810: step: 1040/531, loss: 0.023694612085819244 2023-01-23 03:12:26.492348: step: 1044/531, loss: 0.009563731960952282 2023-01-23 03:12:27.678170: step: 1048/531, loss: 0.0575651153922081 2023-01-23 03:12:28.828348: step: 1052/531, loss: 0.0002812385791912675 2023-01-23 03:12:29.926760: step: 1056/531, loss: 0.02248263545334339 2023-01-23 03:12:31.031672: step: 1060/531, loss: 0.0036524771712720394 2023-01-23 03:12:32.147096: step: 1064/531, loss: 0.015282250009477139 2023-01-23 03:12:33.272902: step: 1068/531, loss: 0.00022683144197799265 2023-01-23 03:12:34.396776: step: 1072/531, loss: 0.004895114805549383 2023-01-23 03:12:35.511667: step: 1076/531, loss: 0.007122325710952282 2023-01-23 03:12:36.666041: step: 1080/531, loss: 0.003162384033203125 2023-01-23 03:12:37.796198: step: 1084/531, loss: 0.0004028320254292339 2023-01-23 03:12:38.969957: step: 1088/531, loss: 0.010314464569091797 2023-01-23 03:12:40.107172: step: 1092/531, loss: 0.03436090052127838 2023-01-23 03:12:41.197063: step: 1096/531, loss: 0.0002773284795694053 2023-01-23 03:12:42.365918: step: 1100/531, loss: 0.0539461150765419 2023-01-23 03:12:43.470358: step: 1104/531, loss: 0.013398217968642712 2023-01-23 03:12:44.598697: step: 1108/531, loss: 0.05912666395306587 2023-01-23 03:12:45.701879: step: 1112/531, loss: 0.00031495094299316406 2023-01-23 03:12:46.825119: step: 1116/531, loss: 0.00014991759962867945 2023-01-23 03:12:47.945452: step: 1120/531, loss: 0.014097308740019798 2023-01-23 03:12:49.057826: step: 1124/531, loss: 0.004456711001694202 2023-01-23 03:12:50.180778: step: 1128/531, loss: 0.00144023890607059 2023-01-23 03:12:51.338766: step: 1132/531, loss: 0.020263100042939186 2023-01-23 03:12:52.484744: step: 1136/531, loss: 0.0009884476894512773 2023-01-23 03:12:53.589841: step: 1140/531, loss: 0.0029548644088208675 2023-01-23 03:12:54.693009: step: 1144/531, loss: 0.00010752677917480469 2023-01-23 03:12:55.811942: step: 1148/531, loss: 0.0015525819035246968 2023-01-23 03:12:56.915425: step: 1152/531, loss: 0.009813498705625534 2023-01-23 03:12:58.034745: step: 1156/531, loss: 0.14392872154712677 2023-01-23 03:12:59.149640: step: 1160/531, loss: 0.016886424273252487 2023-01-23 03:13:00.268654: step: 1164/531, loss: 0.002191734267398715 2023-01-23 03:13:01.399162: step: 1168/531, loss: 0.4961088299751282 2023-01-23 03:13:02.542815: step: 1172/531, loss: 0.003760910127311945 2023-01-23 03:13:03.686861: step: 1176/531, loss: 0.04500961676239967 2023-01-23 03:13:04.813929: step: 1180/531, loss: 0.00046291350736282766 2023-01-23 03:13:05.939222: step: 1184/531, loss: 0.014890193939208984 2023-01-23 03:13:07.072362: step: 1188/531, loss: 0.003946161363273859 2023-01-23 03:13:08.191084: step: 1192/531, loss: 0.025676345452666283 2023-01-23 03:13:09.327593: step: 1196/531, loss: 0.00022430419630836695 2023-01-23 03:13:10.487045: step: 1200/531, loss: 0.0009365081787109375 2023-01-23 03:13:11.600353: step: 1204/531, loss: 0.09767065197229385 2023-01-23 03:13:12.715553: step: 1208/531, loss: 0.010264492593705654 2023-01-23 03:13:13.829904: step: 1212/531, loss: 0.0013503074878826737 2023-01-23 03:13:14.980353: step: 1216/531, loss: 0.0004410266992636025 2023-01-23 03:13:16.122054: step: 1220/531, loss: 0.013649463653564453 2023-01-23 03:13:17.247866: step: 1224/531, loss: 0.04093818739056587 2023-01-23 03:13:18.374711: step: 1228/531, loss: 0.0016511917347088456 2023-01-23 03:13:19.504456: step: 1232/531, loss: 0.00041027070255950093 2023-01-23 03:13:20.614454: step: 1236/531, loss: 0.0008877754444256425 2023-01-23 03:13:21.731783: step: 1240/531, loss: 0.0006544113275595009 2023-01-23 03:13:22.840089: step: 1244/531, loss: 0.0018414973746985197 2023-01-23 03:13:23.994972: step: 1248/531, loss: 0.022600937634706497 2023-01-23 03:13:25.128087: step: 1252/531, loss: 0.0005557060358114541 2023-01-23 03:13:26.271965: step: 1256/531, loss: 0.051259662955999374 2023-01-23 03:13:27.375201: step: 1260/531, loss: 0.012520790100097656 2023-01-23 03:13:28.492456: step: 1264/531, loss: 0.000640869140625 2023-01-23 03:13:29.612679: step: 1268/531, loss: 0.00015935898409225047 2023-01-23 03:13:30.738041: step: 1272/531, loss: 0.003366661025211215 2023-01-23 03:13:31.841348: step: 1276/531, loss: 0.008142232894897461 2023-01-23 03:13:32.955657: step: 1280/531, loss: 0.008933067321777344 2023-01-23 03:13:34.096603: step: 1284/531, loss: 0.0006323814741335809 2023-01-23 03:13:35.209130: step: 1288/531, loss: 0.0003190040588378906 2023-01-23 03:13:36.352976: step: 1292/531, loss: 0.0005426406860351562 2023-01-23 03:13:37.473854: step: 1296/531, loss: 0.029558563604950905 2023-01-23 03:13:38.585800: step: 1300/531, loss: 0.0012809752952307463 2023-01-23 03:13:39.720136: step: 1304/531, loss: 4.420280674821697e-05 2023-01-23 03:13:40.856899: step: 1308/531, loss: 0.0018699646461755037 2023-01-23 03:13:41.975809: step: 1312/531, loss: 0.011875820346176624 2023-01-23 03:13:43.111195: step: 1316/531, loss: 6.217956251930445e-05 2023-01-23 03:13:44.232946: step: 1320/531, loss: 0.016339685767889023 2023-01-23 03:13:45.324618: step: 1324/531, loss: 0.0027258514892309904 2023-01-23 03:13:46.478947: step: 1328/531, loss: 0.009500885382294655 2023-01-23 03:13:47.624416: step: 1332/531, loss: 0.06279821693897247 2023-01-23 03:13:48.764035: step: 1336/531, loss: 0.0006595611339434981 2023-01-23 03:13:49.903479: step: 1340/531, loss: 0.00013465881056617945 2023-01-23 03:13:51.035042: step: 1344/531, loss: 0.03590374067425728 2023-01-23 03:13:52.142181: step: 1348/531, loss: 0.010112190619111061 2023-01-23 03:13:53.249566: step: 1352/531, loss: 0.002113437745720148 2023-01-23 03:13:54.345653: step: 1356/531, loss: 0.004073619842529297 2023-01-23 03:13:55.471464: step: 1360/531, loss: 0.005312538240104914 2023-01-23 03:13:56.577047: step: 1364/531, loss: 0.00039082765579223633 2023-01-23 03:13:57.738575: step: 1368/531, loss: 0.02067432552576065 2023-01-23 03:13:58.869992: step: 1372/531, loss: 0.0008126497268676758 2023-01-23 03:13:59.976593: step: 1376/531, loss: 0.0003488540824037045 2023-01-23 03:14:01.110781: step: 1380/531, loss: 0.00210742955096066 2023-01-23 03:14:02.237720: step: 1384/531, loss: 0.04609685018658638 2023-01-23 03:14:03.363975: step: 1388/531, loss: 0.01796741597354412 2023-01-23 03:14:04.494361: step: 1392/531, loss: 0.4730847477912903 2023-01-23 03:14:05.602345: step: 1396/531, loss: 0.0002134323149221018 2023-01-23 03:14:06.731124: step: 1400/531, loss: 0.055722616612911224 2023-01-23 03:14:07.839124: step: 1404/531, loss: 0.00030803680419921875 2023-01-23 03:14:08.952963: step: 1408/531, loss: 0.0014691352844238281 2023-01-23 03:14:10.090430: step: 1412/531, loss: 0.029279422014951706 2023-01-23 03:14:11.225689: step: 1416/531, loss: 0.01132297609001398 2023-01-23 03:14:12.367424: step: 1420/531, loss: 0.007129383273422718 2023-01-23 03:14:13.529201: step: 1424/531, loss: 0.00130462646484375 2023-01-23 03:14:14.671578: step: 1428/531, loss: 0.009108830243349075 2023-01-23 03:14:15.795787: step: 1432/531, loss: 0.0004878044128417969 2023-01-23 03:14:16.925691: step: 1436/531, loss: 0.0058769225142896175 2023-01-23 03:14:18.076217: step: 1440/531, loss: 0.00014991761418059468 2023-01-23 03:14:19.180235: step: 1444/531, loss: 0.02115612104535103 2023-01-23 03:14:20.314254: step: 1448/531, loss: 0.0010305404430255294 2023-01-23 03:14:21.456671: step: 1452/531, loss: 0.018008803948760033 2023-01-23 03:14:22.587228: step: 1456/531, loss: 0.005129051394760609 2023-01-23 03:14:23.733052: step: 1460/531, loss: 0.10103468596935272 2023-01-23 03:14:24.874082: step: 1464/531, loss: 0.022390173748135567 2023-01-23 03:14:25.985564: step: 1468/531, loss: 0.00415878277271986 2023-01-23 03:14:27.141635: step: 1472/531, loss: 0.03391876071691513 2023-01-23 03:14:28.233706: step: 1476/531, loss: 0.00482788123190403 2023-01-23 03:14:29.332595: step: 1480/531, loss: 5.254745337879285e-05 2023-01-23 03:14:30.440744: step: 1484/531, loss: 0.03279914706945419 2023-01-23 03:14:31.564095: step: 1488/531, loss: 0.0030317306518554688 2023-01-23 03:14:32.679816: step: 1492/531, loss: 0.00019760131544899195 2023-01-23 03:14:33.796945: step: 1496/531, loss: 0.01295461691915989 2023-01-23 03:14:34.912963: step: 1500/531, loss: 0.0004896164173260331 2023-01-23 03:14:36.035229: step: 1504/531, loss: 0.00054889329476282 2023-01-23 03:14:37.126347: step: 1508/531, loss: 0.01773662678897381 2023-01-23 03:14:38.252055: step: 1512/531, loss: 0.00033740996150299907 2023-01-23 03:14:39.353238: step: 1516/531, loss: 0.004768943414092064 2023-01-23 03:14:40.469917: step: 1520/531, loss: 0.03475676104426384 2023-01-23 03:14:41.589958: step: 1524/531, loss: 0.0009329319000244141 2023-01-23 03:14:42.711091: step: 1528/531, loss: 0.00011625289334915578 2023-01-23 03:14:43.829598: step: 1532/531, loss: 0.0006427764892578125 2023-01-23 03:14:44.990039: step: 1536/531, loss: 0.06848545372486115 2023-01-23 03:14:46.143022: step: 1540/531, loss: 0.5803816914558411 2023-01-23 03:14:47.276071: step: 1544/531, loss: 7.615089998580515e-05 2023-01-23 03:14:48.405842: step: 1548/531, loss: 0.001247406005859375 2023-01-23 03:14:49.567279: step: 1552/531, loss: 0.024210358038544655 2023-01-23 03:14:50.682145: step: 1556/531, loss: 0.0819338783621788 2023-01-23 03:14:51.829957: step: 1560/531, loss: 0.00032300950260832906 2023-01-23 03:14:52.950570: step: 1564/531, loss: 0.00016479492478538305 2023-01-23 03:14:54.049247: step: 1568/531, loss: 0.008023262023925781 2023-01-23 03:14:55.140588: step: 1572/531, loss: 0.00020880700321868062 2023-01-23 03:14:56.272231: step: 1576/531, loss: 0.0007566452259197831 2023-01-23 03:14:57.410368: step: 1580/531, loss: 0.0019371032249182463 2023-01-23 03:14:58.544636: step: 1584/531, loss: 0.030925419181585312 2023-01-23 03:14:59.651812: step: 1588/531, loss: 0.0006134033319540322 2023-01-23 03:15:00.789175: step: 1592/531, loss: 0.010429573245346546 2023-01-23 03:15:01.939412: step: 1596/531, loss: 0.05681400001049042 2023-01-23 03:15:03.054953: step: 1600/531, loss: 0.00024051667423918843 2023-01-23 03:15:04.166332: step: 1604/531, loss: -7.534027190558845e-06 2023-01-23 03:15:05.299178: step: 1608/531, loss: 0.0036504745949059725 2023-01-23 03:15:06.420062: step: 1612/531, loss: 0.007227051071822643 2023-01-23 03:15:07.541241: step: 1616/531, loss: 0.0005006790161132812 2023-01-23 03:15:08.659125: step: 1620/531, loss: 0.004570198245346546 2023-01-23 03:15:09.753775: step: 1624/531, loss: 0.000118255615234375 2023-01-23 03:15:10.871483: step: 1628/531, loss: 0.004357719793915749 2023-01-23 03:15:12.021958: step: 1632/531, loss: 0.007411765865981579 2023-01-23 03:15:13.118714: step: 1636/531, loss: 0.0010627746814861894 2023-01-23 03:15:14.231363: step: 1640/531, loss: 0.010985566303133965 2023-01-23 03:15:15.364054: step: 1644/531, loss: 0.002540492918342352 2023-01-23 03:15:16.477237: step: 1648/531, loss: 0.003048801328986883 2023-01-23 03:15:17.601302: step: 1652/531, loss: 0.010051059536635876 2023-01-23 03:15:18.718087: step: 1656/531, loss: 0.0037695884238928556 2023-01-23 03:15:19.823146: step: 1660/531, loss: 0.35198134183883667 2023-01-23 03:15:20.930534: step: 1664/531, loss: 0.001451206160709262 2023-01-23 03:15:22.012831: step: 1668/531, loss: 8.831024024402723e-05 2023-01-23 03:15:23.119665: step: 1672/531, loss: 0.003943252377212048 2023-01-23 03:15:24.228092: step: 1676/531, loss: 0.006049108691513538 2023-01-23 03:15:25.338539: step: 1680/531, loss: 1.6689300537109375e-06 2023-01-23 03:15:26.489381: step: 1684/531, loss: 0.016969136893749237 2023-01-23 03:15:27.606285: step: 1688/531, loss: 0.013136005029082298 2023-01-23 03:15:28.720896: step: 1692/531, loss: 0.00012054443504894152 2023-01-23 03:15:29.837293: step: 1696/531, loss: 0.03838710859417915 2023-01-23 03:15:30.980509: step: 1700/531, loss: 0.10964298993349075 2023-01-23 03:15:32.099858: step: 1704/531, loss: 0.12334060668945312 2023-01-23 03:15:33.206252: step: 1708/531, loss: 0.002704620361328125 2023-01-23 03:15:34.313673: step: 1712/531, loss: 0.04771919175982475 2023-01-23 03:15:35.433032: step: 1716/531, loss: 0.0032807348761707544 2023-01-23 03:15:36.557406: step: 1720/531, loss: 0.00015978813462425023 2023-01-23 03:15:37.655017: step: 1724/531, loss: 0.014946985058486462 2023-01-23 03:15:38.783308: step: 1728/531, loss: 0.019005775451660156 2023-01-23 03:15:39.887361: step: 1732/531, loss: 0.10810118168592453 2023-01-23 03:15:41.002565: step: 1736/531, loss: 0.05720863491296768 2023-01-23 03:15:42.126022: step: 1740/531, loss: 0.006876516621559858 2023-01-23 03:15:43.237552: step: 1744/531, loss: 0.03406667709350586 2023-01-23 03:15:44.377234: step: 1748/531, loss: 0.03202657774090767 2023-01-23 03:15:45.474636: step: 1752/531, loss: 0.004449415020644665 2023-01-23 03:15:46.600770: step: 1756/531, loss: 0.008105278015136719 2023-01-23 03:15:47.710320: step: 1760/531, loss: 0.04930820316076279 2023-01-23 03:15:48.797651: step: 1764/531, loss: 0.002547550480812788 2023-01-23 03:15:49.921864: step: 1768/531, loss: 0.026983069255948067 2023-01-23 03:15:51.041034: step: 1772/531, loss: 0.002289199735969305 2023-01-23 03:15:52.193769: step: 1776/531, loss: 0.026494454592466354 2023-01-23 03:15:53.300331: step: 1780/531, loss: 0.05965833738446236 2023-01-23 03:15:54.432366: step: 1784/531, loss: 0.0032626150641590357 2023-01-23 03:15:55.590658: step: 1788/531, loss: 0.02072601392865181 2023-01-23 03:15:56.709534: step: 1792/531, loss: 0.028166770935058594 2023-01-23 03:15:57.825589: step: 1796/531, loss: 0.0029715539421886206 2023-01-23 03:15:58.939777: step: 1800/531, loss: 0.04965553060173988 2023-01-23 03:16:00.058942: step: 1804/531, loss: 1.1444091796875e-05 2023-01-23 03:16:01.183958: step: 1808/531, loss: 0.002735710237175226 2023-01-23 03:16:02.331144: step: 1812/531, loss: 0.016646387055516243 2023-01-23 03:16:03.454989: step: 1816/531, loss: 0.0002789497375488281 2023-01-23 03:16:04.584073: step: 1820/531, loss: 0.0028167725540697575 2023-01-23 03:16:05.715152: step: 1824/531, loss: 0.004387092776596546 2023-01-23 03:16:06.860166: step: 1828/531, loss: 0.008446693420410156 2023-01-23 03:16:07.975463: step: 1832/531, loss: 0.09166374802589417 2023-01-23 03:16:09.116465: step: 1836/531, loss: 0.021665288135409355 2023-01-23 03:16:10.262181: step: 1840/531, loss: 2.956390380859375e-05 2023-01-23 03:16:11.384120: step: 1844/531, loss: 0.003444194793701172 2023-01-23 03:16:12.503520: step: 1848/531, loss: 0.04512529447674751 2023-01-23 03:16:13.611903: step: 1852/531, loss: 0.0004996299976482987 2023-01-23 03:16:14.759306: step: 1856/531, loss: 0.0010347366333007812 2023-01-23 03:16:15.896618: step: 1860/531, loss: 0.051970481872558594 2023-01-23 03:16:17.029924: step: 1864/531, loss: 0.014038467779755592 2023-01-23 03:16:18.189844: step: 1868/531, loss: 0.008013535290956497 2023-01-23 03:16:19.314879: step: 1872/531, loss: 0.0016998292412608862 2023-01-23 03:16:20.478951: step: 1876/531, loss: 0.005942249670624733 2023-01-23 03:16:21.606942: step: 1880/531, loss: 0.02713184244930744 2023-01-23 03:16:22.757959: step: 1884/531, loss: 0.011976242065429688 2023-01-23 03:16:23.869716: step: 1888/531, loss: 4.4345855712890625e-05 2023-01-23 03:16:24.982258: step: 1892/531, loss: 0.000531101250089705 2023-01-23 03:16:26.125378: step: 1896/531, loss: 3.0040740966796875e-05 2023-01-23 03:16:27.246841: step: 1900/531, loss: 0.0017988205654546618 2023-01-23 03:16:28.391115: step: 1904/531, loss: 0.0015722275711596012 2023-01-23 03:16:29.528520: step: 1908/531, loss: 0.017586326226592064 2023-01-23 03:16:30.675645: step: 1912/531, loss: 0.002399945165961981 2023-01-23 03:16:31.818920: step: 1916/531, loss: 9.942054748535156e-05 2023-01-23 03:16:32.947974: step: 1920/531, loss: 0.00029621124849654734 2023-01-23 03:16:34.088496: step: 1924/531, loss: 0.000118255615234375 2023-01-23 03:16:35.235898: step: 1928/531, loss: 0.0016471862327307463 2023-01-23 03:16:36.350632: step: 1932/531, loss: 0.021920204162597656 2023-01-23 03:16:37.470051: step: 1936/531, loss: 0.005694103427231312 2023-01-23 03:16:38.604990: step: 1940/531, loss: 0.00018787384033203125 2023-01-23 03:16:39.718618: step: 1944/531, loss: 0.017901327461004257 2023-01-23 03:16:40.833793: step: 1948/531, loss: 0.04023037105798721 2023-01-23 03:16:41.967017: step: 1952/531, loss: 0.002662849612534046 2023-01-23 03:16:43.092568: step: 1956/531, loss: 0.03124532662332058 2023-01-23 03:16:44.239809: step: 1960/531, loss: 0.0007418632740154862 2023-01-23 03:16:45.393141: step: 1964/531, loss: 0.000194549560546875 2023-01-23 03:16:46.542904: step: 1968/531, loss: 0.012388801202178001 2023-01-23 03:16:47.674792: step: 1972/531, loss: 0.0242493636906147 2023-01-23 03:16:48.795054: step: 1976/531, loss: 0.017926692962646484 2023-01-23 03:16:49.907344: step: 1980/531, loss: 0.026798248291015625 2023-01-23 03:16:51.019948: step: 1984/531, loss: 0.014082526788115501 2023-01-23 03:16:52.150024: step: 1988/531, loss: 0.0021657943725585938 2023-01-23 03:16:53.269654: step: 1992/531, loss: 0.015585088171064854 2023-01-23 03:16:54.427608: step: 1996/531, loss: 0.0418248176574707 2023-01-23 03:16:55.557941: step: 2000/531, loss: 0.0003028869687113911 2023-01-23 03:16:56.703384: step: 2004/531, loss: 0.01108398474752903 2023-01-23 03:16:57.815796: step: 2008/531, loss: 9.651184518588707e-05 2023-01-23 03:16:58.924891: step: 2012/531, loss: 0.0002839088556356728 2023-01-23 03:17:00.055877: step: 2016/531, loss: 0.0951557606458664 2023-01-23 03:17:01.158755: step: 2020/531, loss: 0.1912635713815689 2023-01-23 03:17:02.272367: step: 2024/531, loss: 0.0003766060108318925 2023-01-23 03:17:03.369531: step: 2028/531, loss: 0.46143627166748047 2023-01-23 03:17:04.482284: step: 2032/531, loss: 0.0015483855968341231 2023-01-23 03:17:05.567932: step: 2036/531, loss: 0.0018707275157794356 2023-01-23 03:17:06.704068: step: 2040/531, loss: 0.00037803652230650187 2023-01-23 03:17:07.822560: step: 2044/531, loss: 0.0051177977584302425 2023-01-23 03:17:08.980542: step: 2048/531, loss: 1.8215179807157256e-05 2023-01-23 03:17:10.109188: step: 2052/531, loss: 0.0002663612540345639 2023-01-23 03:17:11.234776: step: 2056/531, loss: 0.0006105899810791016 2023-01-23 03:17:12.370152: step: 2060/531, loss: 6.809234764659777e-05 2023-01-23 03:17:13.488434: step: 2064/531, loss: 0.0005039215320721269 2023-01-23 03:17:14.591857: step: 2068/531, loss: 0.00024032594228629023 2023-01-23 03:17:15.709077: step: 2072/531, loss: 0.0005002021789550781 2023-01-23 03:17:16.819106: step: 2076/531, loss: 0.001376342843286693 2023-01-23 03:17:17.967725: step: 2080/531, loss: 0.011607170104980469 2023-01-23 03:17:19.103110: step: 2084/531, loss: 0.0532136932015419 2023-01-23 03:17:20.241134: step: 2088/531, loss: 0.0009836197132244706 2023-01-23 03:17:21.413974: step: 2092/531, loss: 0.013960391283035278 2023-01-23 03:17:22.560749: step: 2096/531, loss: 0.027484990656375885 2023-01-23 03:17:23.689308: step: 2100/531, loss: 0.01514358539134264 2023-01-23 03:17:24.815548: step: 2104/531, loss: 0.015422630123794079 2023-01-23 03:17:25.933220: step: 2108/531, loss: 0.007812690921127796 2023-01-23 03:17:27.060261: step: 2112/531, loss: 0.06453514844179153 2023-01-23 03:17:28.181067: step: 2116/531, loss: 0.007500458043068647 2023-01-23 03:17:29.283136: step: 2120/531, loss: 0.008971215225756168 2023-01-23 03:17:30.410762: step: 2124/531, loss: 0.0004103660467080772 ================================================== Loss: 0.021 -------------------- Dev: {'event': {'p': 0.5917948717948718, 'r': 0.7683089214380826, 'f1': 0.6685979142526071}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Test: {'event': {'p': 0.6333656644034917, 'r': 0.7787716159809183, 'f1': 0.6985825086921637}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Chinese: {'event': {'p': 0.5581395348837209, 'r': 0.8888888888888888, 'f1': 0.6857142857142857}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Korean: {'event': {'p': 0.6595744680851063, 'r': 0.49206349206349204, 'f1': 0.5636363636363635}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Russian: {'event': {'p': 0.4523809523809524, 'r': 0.5277777777777778, 'f1': 0.4871794871794871}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 22 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:18:10.230446: step: 4/531, loss: 0.0018997193546965718 2023-01-23 03:18:11.359744: step: 8/531, loss: 0.0044491770677268505 2023-01-23 03:18:12.516716: step: 12/531, loss: 0.0006845474126748741 2023-01-23 03:18:13.621748: step: 16/531, loss: 0.0003575325245037675 2023-01-23 03:18:14.737172: step: 20/531, loss: 0.0024196626618504524 2023-01-23 03:18:15.856870: step: 24/531, loss: 0.00042047502938658 2023-01-23 03:18:16.968301: step: 28/531, loss: 5.769729978055693e-05 2023-01-23 03:18:18.125003: step: 32/531, loss: 0.016727877780795097 2023-01-23 03:18:19.235266: step: 36/531, loss: 0.3487062454223633 2023-01-23 03:18:20.343343: step: 40/531, loss: 0.060029126703739166 2023-01-23 03:18:21.448595: step: 44/531, loss: 0.00387401576153934 2023-01-23 03:18:22.565350: step: 48/531, loss: 0.001007080078125 2023-01-23 03:18:23.665796: step: 52/531, loss: 0.0370633602142334 2023-01-23 03:18:24.767457: step: 56/531, loss: 0.006470871157944202 2023-01-23 03:18:25.930826: step: 60/531, loss: 0.0017126083839684725 2023-01-23 03:18:27.092454: step: 64/531, loss: 0.0011920928955078125 2023-01-23 03:18:28.214402: step: 68/531, loss: 9.5367431640625e-06 2023-01-23 03:18:29.329577: step: 72/531, loss: 0.07960548251867294 2023-01-23 03:18:30.446900: step: 76/531, loss: 0.0006762504926882684 2023-01-23 03:18:31.581166: step: 80/531, loss: 0.00035643577575683594 2023-01-23 03:18:32.698039: step: 84/531, loss: 4.1007992876984645e-06 2023-01-23 03:18:33.840088: step: 88/531, loss: 0.002665901090949774 2023-01-23 03:18:34.947571: step: 92/531, loss: 0.0012102127075195312 2023-01-23 03:18:36.080221: step: 96/531, loss: 0.010812587104737759 2023-01-23 03:18:37.189043: step: 100/531, loss: 0.0003010451910085976 2023-01-23 03:18:38.346868: step: 104/531, loss: 0.0054826741106808186 2023-01-23 03:18:39.489267: step: 108/531, loss: 0.019179726019501686 2023-01-23 03:18:40.613691: step: 112/531, loss: 0.0010495185852050781 2023-01-23 03:18:41.762635: step: 116/531, loss: 0.011016845703125 2023-01-23 03:18:42.923162: step: 120/531, loss: 0.16760531067848206 2023-01-23 03:18:44.059594: step: 124/531, loss: 0.0018293381435796618 2023-01-23 03:18:45.177710: step: 128/531, loss: 0.08307604491710663 2023-01-23 03:18:46.304031: step: 132/531, loss: 0.0032601356506347656 2023-01-23 03:18:47.430478: step: 136/531, loss: 0.0004682541184592992 2023-01-23 03:18:48.551100: step: 140/531, loss: 0.0013774872059002519 2023-01-23 03:18:49.684616: step: 144/531, loss: 0.00011558532423805445 2023-01-23 03:18:50.783723: step: 148/531, loss: 0.0033724785316735506 2023-01-23 03:18:51.906646: step: 152/531, loss: 0.00027179718017578125 2023-01-23 03:18:53.049134: step: 156/531, loss: 0.11645297706127167 2023-01-23 03:18:54.179217: step: 160/531, loss: 0.0010637283558025956 2023-01-23 03:18:55.288315: step: 164/531, loss: 0.0067164418287575245 2023-01-23 03:18:56.390151: step: 168/531, loss: 0.00036110877408646047 2023-01-23 03:18:57.551481: step: 172/531, loss: 0.014166069217026234 2023-01-23 03:18:58.668468: step: 176/531, loss: 9.822845458984375e-05 2023-01-23 03:18:59.797989: step: 180/531, loss: 0.08979358524084091 2023-01-23 03:19:00.940618: step: 184/531, loss: 0.0018795967334881425 2023-01-23 03:19:02.072101: step: 188/531, loss: 0.03762493282556534 2023-01-23 03:19:03.188971: step: 192/531, loss: 0.009085465222597122 2023-01-23 03:19:04.346029: step: 196/531, loss: 0.01636505126953125 2023-01-23 03:19:05.502875: step: 200/531, loss: 0.006860542576760054 2023-01-23 03:19:06.652837: step: 204/531, loss: 0.004508781712502241 2023-01-23 03:19:07.797453: step: 208/531, loss: 0.004056548699736595 2023-01-23 03:19:08.892537: step: 212/531, loss: 0.00035839079646393657 2023-01-23 03:19:10.058399: step: 216/531, loss: 0.00569496164098382 2023-01-23 03:19:11.185012: step: 220/531, loss: 0.0006122112390585244 2023-01-23 03:19:12.346960: step: 224/531, loss: 0.010429192334413528 2023-01-23 03:19:13.480329: step: 228/531, loss: 0.009557723999023438 2023-01-23 03:19:14.599825: step: 232/531, loss: 0.005618095397949219 2023-01-23 03:19:15.717362: step: 236/531, loss: 0.004314994905143976 2023-01-23 03:19:16.825176: step: 240/531, loss: 0.0004443645302671939 2023-01-23 03:19:17.967277: step: 244/531, loss: 0.0015382766723632812 2023-01-23 03:19:19.083285: step: 248/531, loss: 0.0931055098772049 2023-01-23 03:19:20.187581: step: 252/531, loss: 0.036287691444158554 2023-01-23 03:19:21.346732: step: 256/531, loss: 0.12040519714355469 2023-01-23 03:19:22.475048: step: 260/531, loss: 0.00010242462303722277 2023-01-23 03:19:23.569585: step: 264/531, loss: 0.016102219000458717 2023-01-23 03:19:24.671219: step: 268/531, loss: 0.009535598568618298 2023-01-23 03:19:25.781089: step: 272/531, loss: 0.00019993782916571945 2023-01-23 03:19:26.884784: step: 276/531, loss: 0.03116626851260662 2023-01-23 03:19:27.995836: step: 280/531, loss: 0.006656837649643421 2023-01-23 03:19:29.085482: step: 284/531, loss: 0.0061404225416481495 2023-01-23 03:19:30.250165: step: 288/531, loss: 0.04730252921581268 2023-01-23 03:19:31.388686: step: 292/531, loss: 0.044647980481386185 2023-01-23 03:19:32.512114: step: 296/531, loss: 0.04547290876507759 2023-01-23 03:19:33.637058: step: 300/531, loss: 0.012807703576982021 2023-01-23 03:19:34.768564: step: 304/531, loss: 0.0037442208267748356 2023-01-23 03:19:35.904600: step: 308/531, loss: 0.0045761107467114925 2023-01-23 03:19:37.030028: step: 312/531, loss: 0.03157777711749077 2023-01-23 03:19:38.163526: step: 316/531, loss: 0.002334976103156805 2023-01-23 03:19:39.303291: step: 320/531, loss: 0.005282497499138117 2023-01-23 03:19:40.448492: step: 324/531, loss: 0.03480219841003418 2023-01-23 03:19:41.570413: step: 328/531, loss: 0.0017469405429437757 2023-01-23 03:19:42.692978: step: 332/531, loss: 0.0015405655140057206 2023-01-23 03:19:43.823108: step: 336/531, loss: 9.860992577159777e-05 2023-01-23 03:19:44.951851: step: 340/531, loss: 0.0017723082564771175 2023-01-23 03:19:46.058115: step: 344/531, loss: 0.08843688666820526 2023-01-23 03:19:47.179612: step: 348/531, loss: 0.0003970146062783897 2023-01-23 03:19:48.312390: step: 352/531, loss: 8.964539119915571e-06 2023-01-23 03:19:49.460698: step: 356/531, loss: 0.00156402587890625 2023-01-23 03:19:50.580402: step: 360/531, loss: 0.0002193450927734375 2023-01-23 03:19:51.703945: step: 364/531, loss: 0.015962792560458183 2023-01-23 03:19:52.825300: step: 368/531, loss: 0.003342247102409601 2023-01-23 03:19:53.978151: step: 372/531, loss: 0.09523721039295197 2023-01-23 03:19:55.097101: step: 376/531, loss: -1.087188684323337e-05 2023-01-23 03:19:56.220333: step: 380/531, loss: 0.030991556122899055 2023-01-23 03:19:57.342230: step: 384/531, loss: 0.0007013797876425087 2023-01-23 03:19:58.440074: step: 388/531, loss: 0.00013599396334029734 2023-01-23 03:19:59.553009: step: 392/531, loss: 0.021628953516483307 2023-01-23 03:20:00.684305: step: 396/531, loss: 0.0026985169388353825 2023-01-23 03:20:01.777265: step: 400/531, loss: 0.000194549560546875 2023-01-23 03:20:02.899105: step: 404/531, loss: 0.0016930580604821444 2023-01-23 03:20:04.015200: step: 408/531, loss: 0.00022363662719726562 2023-01-23 03:20:05.148883: step: 412/531, loss: 0.003548908280208707 2023-01-23 03:20:06.292388: step: 416/531, loss: 0.22406716644763947 2023-01-23 03:20:07.398256: step: 420/531, loss: 0.037186816334724426 2023-01-23 03:20:08.528373: step: 424/531, loss: 0.00013580323138739914 2023-01-23 03:20:09.660240: step: 428/531, loss: 0.0006946563953533769 2023-01-23 03:20:10.749791: step: 432/531, loss: 1.602172778802924e-05 2023-01-23 03:20:11.897519: step: 436/531, loss: 0.03517146036028862 2023-01-23 03:20:13.015805: step: 440/531, loss: 0.018936539068818092 2023-01-23 03:20:14.155596: step: 444/531, loss: 0.028105545789003372 2023-01-23 03:20:15.269431: step: 448/531, loss: 0.0002340316859772429 2023-01-23 03:20:16.427269: step: 452/531, loss: 0.013898754492402077 2023-01-23 03:20:17.552271: step: 456/531, loss: 0.002858114195987582 2023-01-23 03:20:18.636114: step: 460/531, loss: 0.0003093719424214214 2023-01-23 03:20:19.775461: step: 464/531, loss: 0.0010966301197186112 2023-01-23 03:20:20.881653: step: 468/531, loss: 0.0002840042288880795 2023-01-23 03:20:22.035251: step: 472/531, loss: 0.01746678352355957 2023-01-23 03:20:23.173143: step: 476/531, loss: 0.012918663211166859 2023-01-23 03:20:24.301432: step: 480/531, loss: 0.0009792328346520662 2023-01-23 03:20:25.417003: step: 484/531, loss: 0.4155428111553192 2023-01-23 03:20:26.529303: step: 488/531, loss: 0.01971261575818062 2023-01-23 03:20:27.656342: step: 492/531, loss: 0.0010366439819335938 2023-01-23 03:20:28.768966: step: 496/531, loss: 0.00246009835973382 2023-01-23 03:20:29.917536: step: 500/531, loss: 0.0013407707447186112 2023-01-23 03:20:31.028289: step: 504/531, loss: 0.042410414665937424 2023-01-23 03:20:32.155010: step: 508/531, loss: 0.00012774467177223414 2023-01-23 03:20:33.292710: step: 512/531, loss: 0.035250473767519 2023-01-23 03:20:34.425797: step: 516/531, loss: 0.030462075024843216 2023-01-23 03:20:35.568156: step: 520/531, loss: 0.039939213544130325 2023-01-23 03:20:36.688807: step: 524/531, loss: 0.0749690979719162 2023-01-23 03:20:37.820597: step: 528/531, loss: 0.17607556283473969 2023-01-23 03:20:38.942481: step: 532/531, loss: 0.0008087158203125 2023-01-23 03:20:40.045275: step: 536/531, loss: 0.01723170280456543 2023-01-23 03:20:41.180109: step: 540/531, loss: 0.0003152370627503842 2023-01-23 03:20:42.308408: step: 544/531, loss: 0.03000025823712349 2023-01-23 03:20:43.403933: step: 548/531, loss: 0.0008251190301962197 2023-01-23 03:20:44.514187: step: 552/531, loss: 0.00015945434279274195 2023-01-23 03:20:45.630464: step: 556/531, loss: 0.028332805261015892 2023-01-23 03:20:46.792678: step: 560/531, loss: 0.0003824234299827367 2023-01-23 03:20:47.927654: step: 564/531, loss: 0.003098392393440008 2023-01-23 03:20:49.047673: step: 568/531, loss: 0.05826697498559952 2023-01-23 03:20:50.152061: step: 572/531, loss: 0.0016804696060717106 2023-01-23 03:20:51.303497: step: 576/531, loss: 0.6191143989562988 2023-01-23 03:20:52.437262: step: 580/531, loss: 0.011426257900893688 2023-01-23 03:20:53.541604: step: 584/531, loss: 0.010584450326859951 2023-01-23 03:20:54.651746: step: 588/531, loss: 0.07742176204919815 2023-01-23 03:20:55.779330: step: 592/531, loss: 0.0008602141751907766 2023-01-23 03:20:56.913895: step: 596/531, loss: 0.0007082939264364541 2023-01-23 03:20:58.027214: step: 600/531, loss: 0.017056941986083984 2023-01-23 03:20:59.175115: step: 604/531, loss: 0.00039281847421079874 2023-01-23 03:21:00.317204: step: 608/531, loss: 0.017569446936249733 2023-01-23 03:21:01.431566: step: 612/531, loss: 0.020808743312954903 2023-01-23 03:21:02.569833: step: 616/531, loss: 3.1042098271427676e-05 2023-01-23 03:21:03.661169: step: 620/531, loss: 0.003881740616634488 2023-01-23 03:21:04.781930: step: 624/531, loss: 0.014773941598832607 2023-01-23 03:21:05.902145: step: 628/531, loss: 0.04332322999835014 2023-01-23 03:21:07.042347: step: 632/531, loss: 0.033548545092344284 2023-01-23 03:21:08.184792: step: 636/531, loss: 0.0005468368763104081 2023-01-23 03:21:09.305286: step: 640/531, loss: 0.026336288079619408 2023-01-23 03:21:10.424385: step: 644/531, loss: 0.0011162757873535156 2023-01-23 03:21:11.550084: step: 648/531, loss: 0.026674319058656693 2023-01-23 03:21:12.711089: step: 652/531, loss: 0.003020334290340543 2023-01-23 03:21:13.817894: step: 656/531, loss: 0.0015174865256994963 2023-01-23 03:21:14.971540: step: 660/531, loss: 0.04860839992761612 2023-01-23 03:21:16.096545: step: 664/531, loss: 1.0395050594524946e-05 2023-01-23 03:21:17.197131: step: 668/531, loss: 0.0017058372031897306 2023-01-23 03:21:18.314197: step: 672/531, loss: 0.011866712011396885 2023-01-23 03:21:19.454531: step: 676/531, loss: 0.0011932372581213713 2023-01-23 03:21:20.559140: step: 680/531, loss: 0.0006091117975302041 2023-01-23 03:21:21.666609: step: 684/531, loss: 0.0790988951921463 2023-01-23 03:21:22.799137: step: 688/531, loss: 0.00023298263840842992 2023-01-23 03:21:23.956708: step: 692/531, loss: 0.00281696324236691 2023-01-23 03:21:25.079014: step: 696/531, loss: 0.000725269375834614 2023-01-23 03:21:26.184460: step: 700/531, loss: 0.0004669189511332661 2023-01-23 03:21:27.305625: step: 704/531, loss: 0.0002814293256960809 2023-01-23 03:21:28.427217: step: 708/531, loss: 0.0026995183434337378 2023-01-23 03:21:29.535863: step: 712/531, loss: 0.008170509710907936 2023-01-23 03:21:30.679503: step: 716/531, loss: 0.006976318545639515 2023-01-23 03:21:31.814206: step: 720/531, loss: 0.0005917549133300781 2023-01-23 03:21:32.915736: step: 724/531, loss: 4.882812572759576e-05 2023-01-23 03:21:34.052335: step: 728/531, loss: 0.0004124641418457031 2023-01-23 03:21:35.179886: step: 732/531, loss: 0.013591480441391468 2023-01-23 03:21:36.277684: step: 736/531, loss: 0.012377548962831497 2023-01-23 03:21:37.447052: step: 740/531, loss: 0.03476162254810333 2023-01-23 03:21:38.547936: step: 744/531, loss: 0.004398823250085115 2023-01-23 03:21:39.687345: step: 748/531, loss: 2.4604798454674892e-05 2023-01-23 03:21:40.826408: step: 752/531, loss: 0.013671494089066982 2023-01-23 03:21:41.949522: step: 756/531, loss: 0.0011943817371502519 2023-01-23 03:21:43.078453: step: 760/531, loss: 0.001507663750089705 2023-01-23 03:21:44.267987: step: 764/531, loss: 0.03401928022503853 2023-01-23 03:21:45.366391: step: 768/531, loss: 0.0713188648223877 2023-01-23 03:21:46.461550: step: 772/531, loss: 0.05470247566699982 2023-01-23 03:21:47.574059: step: 776/531, loss: 0.0004675865056924522 2023-01-23 03:21:48.671113: step: 780/531, loss: 7.171630568336695e-05 2023-01-23 03:21:49.829054: step: 784/531, loss: 0.03242187574505806 2023-01-23 03:21:50.953640: step: 788/531, loss: 0.058813001960515976 2023-01-23 03:21:52.078099: step: 792/531, loss: 4.472732325666584e-05 2023-01-23 03:21:53.190332: step: 796/531, loss: 0.0002915382501669228 2023-01-23 03:21:54.315171: step: 800/531, loss: 0.0014373778831213713 2023-01-23 03:21:55.437158: step: 804/531, loss: 2.2268297470873222e-05 2023-01-23 03:21:56.544168: step: 808/531, loss: 0.003915023989975452 2023-01-23 03:21:57.690421: step: 812/531, loss: 6.723403930664062e-05 2023-01-23 03:21:58.817491: step: 816/531, loss: 0.11374874413013458 2023-01-23 03:21:59.964719: step: 820/531, loss: 0.00211410503834486 2023-01-23 03:22:01.069700: step: 824/531, loss: 0.020637132227420807 2023-01-23 03:22:02.186548: step: 828/531, loss: 0.007857704535126686 2023-01-23 03:22:03.282010: step: 832/531, loss: 0.0005828857538290322 2023-01-23 03:22:04.395004: step: 836/531, loss: 0.0018834115471690893 2023-01-23 03:22:05.553228: step: 840/531, loss: 0.08020758628845215 2023-01-23 03:22:06.711575: step: 844/531, loss: 6.542205665027723e-05 2023-01-23 03:22:07.820427: step: 848/531, loss: 0.002821350237354636 2023-01-23 03:22:08.922669: step: 852/531, loss: 0.010622597299516201 2023-01-23 03:22:10.060356: step: 856/531, loss: 0.00035839079646393657 2023-01-23 03:22:11.157121: step: 860/531, loss: 0.05974989011883736 2023-01-23 03:22:12.310220: step: 864/531, loss: 0.0011190414661541581 2023-01-23 03:22:13.416051: step: 868/531, loss: 0.020493507385253906 2023-01-23 03:22:14.540636: step: 872/531, loss: 0.0014724732609465718 2023-01-23 03:22:15.656438: step: 876/531, loss: 0.013752556405961514 2023-01-23 03:22:16.814225: step: 880/531, loss: 0.1369609832763672 2023-01-23 03:22:17.917572: step: 884/531, loss: 0.00010938644845737144 2023-01-23 03:22:19.053872: step: 888/531, loss: 0.0010993957985192537 2023-01-23 03:22:20.173447: step: 892/531, loss: 0.3618907928466797 2023-01-23 03:22:21.282803: step: 896/531, loss: 0.0010310172801837325 2023-01-23 03:22:22.427149: step: 900/531, loss: 0.0013763427268713713 2023-01-23 03:22:23.534422: step: 904/531, loss: 0.00030961036100052297 2023-01-23 03:22:24.643109: step: 908/531, loss: 0.015498543158173561 2023-01-23 03:22:25.753287: step: 912/531, loss: 0.0022365569602698088 2023-01-23 03:22:26.907660: step: 916/531, loss: 0.00829381961375475 2023-01-23 03:22:28.038073: step: 920/531, loss: 0.00824518222361803 2023-01-23 03:22:29.169952: step: 924/531, loss: 0.00010433196439407766 2023-01-23 03:22:30.272340: step: 928/531, loss: 1.6498564946232364e-05 2023-01-23 03:22:31.377219: step: 932/531, loss: 0.0361081101000309 2023-01-23 03:22:32.480753: step: 936/531, loss: 0.0017801284557208419 2023-01-23 03:22:33.586790: step: 940/531, loss: 0.015580940060317516 2023-01-23 03:22:34.698346: step: 944/531, loss: 0.004696464631706476 2023-01-23 03:22:35.819761: step: 948/531, loss: 0.00025310515775345266 2023-01-23 03:22:36.947432: step: 952/531, loss: 0.051716484129428864 2023-01-23 03:22:38.087894: step: 956/531, loss: 0.0018837929237633944 2023-01-23 03:22:39.231900: step: 960/531, loss: 0.01030039880424738 2023-01-23 03:22:40.357183: step: 964/531, loss: 0.002150249434635043 2023-01-23 03:22:41.530860: step: 968/531, loss: 0.007288313005119562 2023-01-23 03:22:42.664418: step: 972/531, loss: 0.0005840301746502519 2023-01-23 03:22:43.800790: step: 976/531, loss: 0.0006574630970135331 2023-01-23 03:22:44.926682: step: 980/531, loss: 0.009013509377837181 2023-01-23 03:22:46.065876: step: 984/531, loss: 0.000600624131038785 2023-01-23 03:22:47.164349: step: 988/531, loss: 0.00046863555326126516 2023-01-23 03:22:48.269684: step: 992/531, loss: 0.0009137153974734247 2023-01-23 03:22:49.425637: step: 996/531, loss: 0.005797004792839289 2023-01-23 03:22:50.571135: step: 1000/531, loss: 0.0015172958374023438 2023-01-23 03:22:51.690847: step: 1004/531, loss: 0.030331803485751152 2023-01-23 03:22:52.803158: step: 1008/531, loss: 0.34022796154022217 2023-01-23 03:22:53.941917: step: 1012/531, loss: 0.00017375947209075093 2023-01-23 03:22:55.080486: step: 1016/531, loss: 0.00011119843111373484 2023-01-23 03:22:56.208666: step: 1020/531, loss: 0.025585317984223366 2023-01-23 03:22:57.333660: step: 1024/531, loss: 0.012732314877212048 2023-01-23 03:22:58.439886: step: 1028/531, loss: 0.014472484588623047 2023-01-23 03:22:59.591124: step: 1032/531, loss: 0.004925251007080078 2023-01-23 03:23:00.732619: step: 1036/531, loss: 0.012321091257035732 2023-01-23 03:23:01.869120: step: 1040/531, loss: 0.008662796579301357 2023-01-23 03:23:02.968157: step: 1044/531, loss: 0.0004496574401855469 2023-01-23 03:23:04.139745: step: 1048/531, loss: 0.0006292343023233116 2023-01-23 03:23:05.232426: step: 1052/531, loss: 0.011067581363022327 2023-01-23 03:23:06.355512: step: 1056/531, loss: 0.007999992929399014 2023-01-23 03:23:07.464850: step: 1060/531, loss: 1.3065338862361386e-05 2023-01-23 03:23:08.615700: step: 1064/531, loss: 0.007838821038603783 2023-01-23 03:23:09.759055: step: 1068/531, loss: 0.00381641392596066 2023-01-23 03:23:10.882886: step: 1072/531, loss: 0.0006130457040853798 2023-01-23 03:23:12.022396: step: 1076/531, loss: 6.65664701955393e-05 2023-01-23 03:23:13.138467: step: 1080/531, loss: -3.623962356869015e-06 2023-01-23 03:23:14.263914: step: 1084/531, loss: 0.005475044250488281 2023-01-23 03:23:15.367176: step: 1088/531, loss: 8.568763587391004e-05 2023-01-23 03:23:16.492300: step: 1092/531, loss: 0.006172323599457741 2023-01-23 03:23:17.611076: step: 1096/531, loss: 0.020245075225830078 2023-01-23 03:23:18.719643: step: 1100/531, loss: 0.0008183479658327997 2023-01-23 03:23:19.847645: step: 1104/531, loss: 0.03612127527594566 2023-01-23 03:23:20.990192: step: 1108/531, loss: 0.03242497146129608 2023-01-23 03:23:22.111436: step: 1112/531, loss: 0.0028014183044433594 2023-01-23 03:23:23.231698: step: 1116/531, loss: 0.001052951905876398 2023-01-23 03:23:24.370251: step: 1120/531, loss: 0.004205513279885054 2023-01-23 03:23:25.534607: step: 1124/531, loss: 0.01369323767721653 2023-01-23 03:23:26.663463: step: 1128/531, loss: 0.030561350286006927 2023-01-23 03:23:27.756536: step: 1132/531, loss: 0.009946252219378948 2023-01-23 03:23:28.885829: step: 1136/531, loss: 0.005287313833832741 2023-01-23 03:23:29.993846: step: 1140/531, loss: 0.002252006670460105 2023-01-23 03:23:31.100790: step: 1144/531, loss: 0.0007434844737872481 2023-01-23 03:23:32.203574: step: 1148/531, loss: 0.0003534912830218673 2023-01-23 03:23:33.317464: step: 1152/531, loss: 0.010028839111328125 2023-01-23 03:23:34.457075: step: 1156/531, loss: 0.003820228623226285 2023-01-23 03:23:35.593046: step: 1160/531, loss: 0.005708575714379549 2023-01-23 03:23:36.706694: step: 1164/531, loss: 0.0005479812389239669 2023-01-23 03:23:37.851147: step: 1168/531, loss: 0.0013969421852380037 2023-01-23 03:23:38.967552: step: 1172/531, loss: 0.002073001815006137 2023-01-23 03:23:40.056893: step: 1176/531, loss: 0.03377051651477814 2023-01-23 03:23:41.180782: step: 1180/531, loss: 0.1140756607055664 2023-01-23 03:23:42.314530: step: 1184/531, loss: 0.02752857282757759 2023-01-23 03:23:43.438372: step: 1188/531, loss: 0.03992490842938423 2023-01-23 03:23:44.558339: step: 1192/531, loss: 0.03566722944378853 2023-01-23 03:23:45.672941: step: 1196/531, loss: 0.08096656203269958 2023-01-23 03:23:46.806398: step: 1200/531, loss: 0.1128566786646843 2023-01-23 03:23:47.918383: step: 1204/531, loss: 0.00015373231144621968 2023-01-23 03:23:49.037348: step: 1208/531, loss: 0.0013044835068285465 2023-01-23 03:23:50.155870: step: 1212/531, loss: 0.002299833344295621 2023-01-23 03:23:51.282156: step: 1216/531, loss: 0.0016788482898846269 2023-01-23 03:23:52.403972: step: 1220/531, loss: 0.02349414862692356 2023-01-23 03:23:53.522245: step: 1224/531, loss: 0.002075767610222101 2023-01-23 03:23:54.645375: step: 1228/531, loss: 5.545616295421496e-05 2023-01-23 03:23:55.756188: step: 1232/531, loss: 0.00147924420889467 2023-01-23 03:23:56.853735: step: 1236/531, loss: 0.00024356841458939016 2023-01-23 03:23:57.982222: step: 1240/531, loss: 0.05311603471636772 2023-01-23 03:23:59.087369: step: 1244/531, loss: 0.0010235816007480025 2023-01-23 03:24:00.216940: step: 1248/531, loss: 0.0018507003551349044 2023-01-23 03:24:01.336151: step: 1252/531, loss: 0.00023155212693382055 2023-01-23 03:24:02.443993: step: 1256/531, loss: 0.0077453614212572575 2023-01-23 03:24:03.626280: step: 1260/531, loss: 0.00018708706193137914 2023-01-23 03:24:04.762275: step: 1264/531, loss: 0.002448654267936945 2023-01-23 03:24:05.900472: step: 1268/531, loss: 0.011070298962295055 2023-01-23 03:24:07.023779: step: 1272/531, loss: 0.00357818603515625 2023-01-23 03:24:08.158782: step: 1276/531, loss: 0.0006844520685262978 2023-01-23 03:24:09.274420: step: 1280/531, loss: 0.007412720005959272 2023-01-23 03:24:10.410279: step: 1284/531, loss: 0.018950367346405983 2023-01-23 03:24:11.546886: step: 1288/531, loss: 4.258155604475178e-05 2023-01-23 03:24:12.655608: step: 1292/531, loss: 0.02897520177066326 2023-01-23 03:24:13.779676: step: 1296/531, loss: 0.007234001066535711 2023-01-23 03:24:14.930257: step: 1300/531, loss: 0.09483995288610458 2023-01-23 03:24:16.066214: step: 1304/531, loss: 0.06255817413330078 2023-01-23 03:24:17.192272: step: 1308/531, loss: 0.00010280608694301918 2023-01-23 03:24:18.336551: step: 1312/531, loss: 0.004844760987907648 2023-01-23 03:24:19.444271: step: 1316/531, loss: 0.0015634536975994706 2023-01-23 03:24:20.571761: step: 1320/531, loss: 0.009601974859833717 2023-01-23 03:24:21.675580: step: 1324/531, loss: 0.005086708348244429 2023-01-23 03:24:22.856777: step: 1328/531, loss: 0.00031538010807707906 2023-01-23 03:24:23.989848: step: 1332/531, loss: 0.00020675659470725805 2023-01-23 03:24:25.078800: step: 1336/531, loss: 1.087188684323337e-05 2023-01-23 03:24:26.205807: step: 1340/531, loss: 0.005674839019775391 2023-01-23 03:24:27.345984: step: 1344/531, loss: 2.2983551389188506e-05 2023-01-23 03:24:28.468562: step: 1348/531, loss: 0.0052394866943359375 2023-01-23 03:24:29.585677: step: 1352/531, loss: 0.003440380096435547 2023-01-23 03:24:30.691971: step: 1356/531, loss: 0.10207786411046982 2023-01-23 03:24:31.837578: step: 1360/531, loss: 0.021319866180419922 2023-01-23 03:24:32.979428: step: 1364/531, loss: 0.0007732391823083162 2023-01-23 03:24:34.113765: step: 1368/531, loss: 0.009522723965346813 2023-01-23 03:24:35.246718: step: 1372/531, loss: 0.0037151335272938013 2023-01-23 03:24:36.392331: step: 1376/531, loss: 0.00010452270362293348 2023-01-23 03:24:37.508859: step: 1380/531, loss: 0.0198618546128273 2023-01-23 03:24:38.641299: step: 1384/531, loss: 0.0002891540643759072 2023-01-23 03:24:39.754075: step: 1388/531, loss: 0.02489948458969593 2023-01-23 03:24:40.848045: step: 1392/531, loss: 0.0014639378059655428 2023-01-23 03:24:41.954668: step: 1396/531, loss: 0.004904556553810835 2023-01-23 03:24:43.110749: step: 1400/531, loss: 0.004082775209099054 2023-01-23 03:24:44.241845: step: 1404/531, loss: 0.0001811981201171875 2023-01-23 03:24:45.350748: step: 1408/531, loss: 0.0020814896561205387 2023-01-23 03:24:46.481628: step: 1412/531, loss: 0.046598151326179504 2023-01-23 03:24:47.596914: step: 1416/531, loss: 3.5667417250806466e-05 2023-01-23 03:24:48.719868: step: 1420/531, loss: 0.04678173363208771 2023-01-23 03:24:49.818144: step: 1424/531, loss: 0.036341190338134766 2023-01-23 03:24:50.933714: step: 1428/531, loss: 0.013762093149125576 2023-01-23 03:24:52.079362: step: 1432/531, loss: 0.016721153631806374 2023-01-23 03:24:53.189661: step: 1436/531, loss: 0.013793754391372204 2023-01-23 03:24:54.312737: step: 1440/531, loss: 0.0004946708795614541 2023-01-23 03:24:55.430189: step: 1444/531, loss: 0.014902686700224876 2023-01-23 03:24:56.544414: step: 1448/531, loss: 0.004966163542121649 2023-01-23 03:24:57.667573: step: 1452/531, loss: 0.0028114321175962687 2023-01-23 03:24:58.818202: step: 1456/531, loss: 0.00023365020751953125 2023-01-23 03:24:59.994331: step: 1460/531, loss: 0.0013624191051349044 2023-01-23 03:25:01.131188: step: 1464/531, loss: 0.0048957825638353825 2023-01-23 03:25:02.293780: step: 1468/531, loss: 0.017310334369540215 2023-01-23 03:25:03.411844: step: 1472/531, loss: 0.00020084381685592234 2023-01-23 03:25:04.552097: step: 1476/531, loss: 2.202987889177166e-05 2023-01-23 03:25:05.673440: step: 1480/531, loss: 0.006887245457619429 2023-01-23 03:25:06.799174: step: 1484/531, loss: 0.0036351203452795744 2023-01-23 03:25:07.903394: step: 1488/531, loss: 0.0008919716347008944 2023-01-23 03:25:09.022945: step: 1492/531, loss: 1.4972686585679185e-05 2023-01-23 03:25:10.132116: step: 1496/531, loss: 0.00113677978515625 2023-01-23 03:25:11.241916: step: 1500/531, loss: 0.03809032589197159 2023-01-23 03:25:12.395109: step: 1504/531, loss: 5.149841672391631e-05 2023-01-23 03:25:13.523192: step: 1508/531, loss: 3.592967914300971e-05 2023-01-23 03:25:14.664444: step: 1512/531, loss: 0.00013780593872070312 2023-01-23 03:25:15.806380: step: 1516/531, loss: 0.0876707062125206 2023-01-23 03:25:16.948993: step: 1520/531, loss: 0.006524849217385054 2023-01-23 03:25:18.072596: step: 1524/531, loss: 0.0142395980656147 2023-01-23 03:25:19.219549: step: 1528/531, loss: 0.21990585327148438 2023-01-23 03:25:20.318752: step: 1532/531, loss: 0.05859050527215004 2023-01-23 03:25:21.427040: step: 1536/531, loss: 0.11898240447044373 2023-01-23 03:25:22.591878: step: 1540/531, loss: 0.023908233270049095 2023-01-23 03:25:23.741169: step: 1544/531, loss: 0.011330222710967064 2023-01-23 03:25:24.833004: step: 1548/531, loss: 0.0031680106185376644 2023-01-23 03:25:25.944152: step: 1552/531, loss: 0.0049527171067893505 2023-01-23 03:25:27.044110: step: 1556/531, loss: 0.013262081891298294 2023-01-23 03:25:28.203867: step: 1560/531, loss: 0.03152790293097496 2023-01-23 03:25:29.318877: step: 1564/531, loss: 0.005958748050034046 2023-01-23 03:25:30.452749: step: 1568/531, loss: 0.014830875210464 2023-01-23 03:25:31.567622: step: 1572/531, loss: 0.006167793646454811 2023-01-23 03:25:32.719147: step: 1576/531, loss: 0.006929779425263405 2023-01-23 03:25:33.833194: step: 1580/531, loss: 0.043218232691287994 2023-01-23 03:25:34.961237: step: 1584/531, loss: 0.04324054718017578 2023-01-23 03:25:36.097107: step: 1588/531, loss: 0.027252960950136185 2023-01-23 03:25:37.221651: step: 1592/531, loss: 0.004952907562255859 2023-01-23 03:25:38.334969: step: 1596/531, loss: 0.04693203419446945 2023-01-23 03:25:39.494902: step: 1600/531, loss: 0.0032629012130200863 2023-01-23 03:25:40.640964: step: 1604/531, loss: 0.004635715391486883 2023-01-23 03:25:41.769920: step: 1608/531, loss: 6.408691842807457e-05 2023-01-23 03:25:42.893197: step: 1612/531, loss: 0.0008629798539914191 2023-01-23 03:25:44.012881: step: 1616/531, loss: 0.0017566204769536853 2023-01-23 03:25:45.155165: step: 1620/531, loss: 0.026446105912327766 2023-01-23 03:25:46.295719: step: 1624/531, loss: 0.00035581589327193797 2023-01-23 03:25:47.418824: step: 1628/531, loss: 0.00023602842702530324 2023-01-23 03:25:48.519395: step: 1632/531, loss: 0.0008705139043740928 2023-01-23 03:25:49.660795: step: 1636/531, loss: 0.015746308490633965 2023-01-23 03:25:50.784926: step: 1640/531, loss: 0.0003379821719136089 2023-01-23 03:25:51.916875: step: 1644/531, loss: 0.0027925490867346525 2023-01-23 03:25:53.051184: step: 1648/531, loss: 0.022362733259797096 2023-01-23 03:25:54.194392: step: 1652/531, loss: 0.023240281268954277 2023-01-23 03:25:55.342486: step: 1656/531, loss: 0.00033321380033157766 2023-01-23 03:25:56.454192: step: 1660/531, loss: 4.062652442371473e-05 2023-01-23 03:25:57.600478: step: 1664/531, loss: 0.006613921839743853 2023-01-23 03:25:58.693491: step: 1668/531, loss: 0.0006512642139568925 2023-01-23 03:25:59.814248: step: 1672/531, loss: 0.0011952400673180819 2023-01-23 03:26:00.979024: step: 1676/531, loss: 0.0021736144553869963 2023-01-23 03:26:02.103806: step: 1680/531, loss: 0.0003326415899209678 2023-01-23 03:26:03.218565: step: 1684/531, loss: 0.0017589093185961246 2023-01-23 03:26:04.340015: step: 1688/531, loss: 0.022540735080838203 2023-01-23 03:26:05.440314: step: 1692/531, loss: 0.0006381988641805947 2023-01-23 03:26:06.559205: step: 1696/531, loss: 0.0010938644409179688 2023-01-23 03:26:07.693082: step: 1700/531, loss: 0.04744997248053551 2023-01-23 03:26:08.831398: step: 1704/531, loss: 0.006247234530746937 2023-01-23 03:26:09.972470: step: 1708/531, loss: 0.028505802154541016 2023-01-23 03:26:11.063270: step: 1712/531, loss: 0.004985189530998468 2023-01-23 03:26:12.199676: step: 1716/531, loss: 0.0009893417591229081 2023-01-23 03:26:13.321149: step: 1720/531, loss: 0.008517075330018997 2023-01-23 03:26:14.423275: step: 1724/531, loss: 0.010876084677875042 2023-01-23 03:26:15.544829: step: 1728/531, loss: 0.05873575061559677 2023-01-23 03:26:16.676563: step: 1732/531, loss: 0.0022992135491222143 2023-01-23 03:26:17.790662: step: 1736/531, loss: 0.013177204877138138 2023-01-23 03:26:18.931141: step: 1740/531, loss: 0.002798366593196988 2023-01-23 03:26:20.044533: step: 1744/531, loss: 0.002790260361507535 2023-01-23 03:26:21.166325: step: 1748/531, loss: 0.012932490557432175 2023-01-23 03:26:22.271506: step: 1752/531, loss: 0.012326211668550968 2023-01-23 03:26:23.386201: step: 1756/531, loss: 0.0008872032631188631 2023-01-23 03:26:24.535346: step: 1760/531, loss: 0.010002708993852139 2023-01-23 03:26:25.620648: step: 1764/531, loss: 0.0005664349300786853 2023-01-23 03:26:26.739291: step: 1768/531, loss: 0.003896522568538785 2023-01-23 03:26:27.845810: step: 1772/531, loss: 0.008837461471557617 2023-01-23 03:26:28.948744: step: 1776/531, loss: 0.01585855334997177 2023-01-23 03:26:30.064116: step: 1780/531, loss: 0.002283334732055664 2023-01-23 03:26:31.215124: step: 1784/531, loss: 0.03453655168414116 2023-01-23 03:26:32.357851: step: 1788/531, loss: 0.043459560722112656 2023-01-23 03:26:33.480109: step: 1792/531, loss: 0.033576298505067825 2023-01-23 03:26:34.603903: step: 1796/531, loss: 4.062652442371473e-05 2023-01-23 03:26:35.737605: step: 1800/531, loss: 0.009509564377367496 2023-01-23 03:26:36.822900: step: 1804/531, loss: 0.0009515762212686241 2023-01-23 03:26:37.956516: step: 1808/531, loss: 0.005295086186379194 2023-01-23 03:26:39.056372: step: 1812/531, loss: 0.10067968815565109 2023-01-23 03:26:40.162033: step: 1816/531, loss: 0.0003171921125613153 2023-01-23 03:26:41.255084: step: 1820/531, loss: 0.03629465028643608 2023-01-23 03:26:42.369121: step: 1824/531, loss: 0.0012722969986498356 2023-01-23 03:26:43.495633: step: 1828/531, loss: 0.013050603680312634 2023-01-23 03:26:44.626461: step: 1832/531, loss: 0.1384061872959137 2023-01-23 03:26:45.768863: step: 1836/531, loss: 0.14968930184841156 2023-01-23 03:26:46.895657: step: 1840/531, loss: 0.0395696684718132 2023-01-23 03:26:48.009772: step: 1844/531, loss: 2.6988982426701114e-05 2023-01-23 03:26:49.152146: step: 1848/531, loss: 0.4747430384159088 2023-01-23 03:26:50.263293: step: 1852/531, loss: 0.006366729736328125 2023-01-23 03:26:51.398347: step: 1856/531, loss: 7.24792471373803e-06 2023-01-23 03:26:52.514964: step: 1860/531, loss: 0.0019212722545489669 2023-01-23 03:26:53.632743: step: 1864/531, loss: 0.018688775599002838 2023-01-23 03:26:54.727408: step: 1868/531, loss: 6.561278860317543e-05 2023-01-23 03:26:55.838459: step: 1872/531, loss: 1.4591218132409267e-05 2023-01-23 03:26:56.941226: step: 1876/531, loss: 0.0025844573974609375 2023-01-23 03:26:58.047793: step: 1880/531, loss: 0.0009521484607830644 2023-01-23 03:26:59.188246: step: 1884/531, loss: 0.0005189895164221525 2023-01-23 03:27:00.291278: step: 1888/531, loss: 0.03042163886129856 2023-01-23 03:27:01.399516: step: 1892/531, loss: 0.0009130955440923572 2023-01-23 03:27:02.512654: step: 1896/531, loss: 0.03568878397345543 2023-01-23 03:27:03.624264: step: 1900/531, loss: 0.0004989624139852822 2023-01-23 03:27:04.753277: step: 1904/531, loss: 0.0001180648832814768 2023-01-23 03:27:05.874122: step: 1908/531, loss: 0.000634050345979631 2023-01-23 03:27:07.022659: step: 1912/531, loss: 0.002976703690364957 2023-01-23 03:27:08.150079: step: 1916/531, loss: 0.0048653604462742805 2023-01-23 03:27:09.269024: step: 1920/531, loss: 0.00113849644549191 2023-01-23 03:27:10.390088: step: 1924/531, loss: 0.00011630058725131676 2023-01-23 03:27:11.502167: step: 1928/531, loss: 0.003735160920768976 2023-01-23 03:27:12.665230: step: 1932/531, loss: 0.00038471221341751516 2023-01-23 03:27:13.783310: step: 1936/531, loss: 0.01751251146197319 2023-01-23 03:27:14.898596: step: 1940/531, loss: 0.0010505676036700606 2023-01-23 03:27:16.044501: step: 1944/531, loss: 0.0010538101196289062 2023-01-23 03:27:17.165814: step: 1948/531, loss: 0.09430637210607529 2023-01-23 03:27:18.287370: step: 1952/531, loss: 0.006407737731933594 2023-01-23 03:27:19.388540: step: 1956/531, loss: 0.015543365851044655 2023-01-23 03:27:20.521595: step: 1960/531, loss: 0.0006081700557842851 2023-01-23 03:27:21.633832: step: 1964/531, loss: 0.0005928039317950606 2023-01-23 03:27:22.759386: step: 1968/531, loss: 0.03567361831665039 2023-01-23 03:27:23.871800: step: 1972/531, loss: 0.03439774364233017 2023-01-23 03:27:24.995047: step: 1976/531, loss: 0.00010366439528297633 2023-01-23 03:27:26.092822: step: 1980/531, loss: 0.015834618359804153 2023-01-23 03:27:27.232283: step: 1984/531, loss: 0.002125310944393277 2023-01-23 03:27:28.381348: step: 1988/531, loss: 4.9591064453125e-05 2023-01-23 03:27:29.534927: step: 1992/531, loss: 0.0073604583740234375 2023-01-23 03:27:30.666423: step: 1996/531, loss: 0.004454994108527899 2023-01-23 03:27:31.815685: step: 2000/531, loss: 0.024219512939453125 2023-01-23 03:27:32.953793: step: 2004/531, loss: 0.0023954391945153475 2023-01-23 03:27:34.101192: step: 2008/531, loss: 0.02621944062411785 2023-01-23 03:27:35.218849: step: 2012/531, loss: 0.006780100055038929 2023-01-23 03:27:36.335012: step: 2016/531, loss: 0.1432262510061264 2023-01-23 03:27:37.485699: step: 2020/531, loss: 0.04514102637767792 2023-01-23 03:27:38.618073: step: 2024/531, loss: 0.02908172644674778 2023-01-23 03:27:39.709947: step: 2028/531, loss: 2.956390380859375e-05 2023-01-23 03:27:40.809305: step: 2032/531, loss: 0.00086126325186342 2023-01-23 03:27:41.928486: step: 2036/531, loss: 0.3446231782436371 2023-01-23 03:27:43.063027: step: 2040/531, loss: 4.768380676978268e-07 2023-01-23 03:27:44.179806: step: 2044/531, loss: 0.0012499808799475431 2023-01-23 03:27:45.301021: step: 2048/531, loss: 1.2969970157428179e-05 2023-01-23 03:27:46.424713: step: 2052/531, loss: 0.0001203536958200857 2023-01-23 03:27:47.552876: step: 2056/531, loss: 0.0004585743008647114 2023-01-23 03:27:48.642080: step: 2060/531, loss: 0.005822944920510054 2023-01-23 03:27:49.763785: step: 2064/531, loss: 0.00727424630895257 2023-01-23 03:27:50.879114: step: 2068/531, loss: 0.00022459030151367188 2023-01-23 03:27:51.987680: step: 2072/531, loss: 0.015064621344208717 2023-01-23 03:27:53.088935: step: 2076/531, loss: 0.025440121069550514 2023-01-23 03:27:54.212097: step: 2080/531, loss: 0.002681923098862171 2023-01-23 03:27:55.326269: step: 2084/531, loss: 0.007973670959472656 2023-01-23 03:27:56.449572: step: 2088/531, loss: 0.0054111480712890625 2023-01-23 03:27:57.576331: step: 2092/531, loss: 0.03980541229248047 2023-01-23 03:27:58.727336: step: 2096/531, loss: 0.02603158913552761 2023-01-23 03:27:59.869635: step: 2100/531, loss: 0.0002674102724995464 2023-01-23 03:28:01.017431: step: 2104/531, loss: 0.0009061813470907509 2023-01-23 03:28:02.117413: step: 2108/531, loss: 0.00402336148545146 2023-01-23 03:28:03.219591: step: 2112/531, loss: 0.0015176773304119706 2023-01-23 03:28:04.376847: step: 2116/531, loss: 0.00011763573274947703 2023-01-23 03:28:05.469535: step: 2120/531, loss: 0.00029010773869231343 2023-01-23 03:28:06.600652: step: 2124/531, loss: 0.5303184390068054 ================================================== Loss: 0.022 -------------------- Dev: {'event': {'p': 0.5696324951644101, 'r': 0.7842876165113183, 'f1': 0.6599439775910365}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Test: {'event': {'p': 0.6037906137184116, 'r': 0.7978533094812165, 'f1': 0.6873876188029797}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Chinese: {'event': {'p': 0.5333333333333333, 'r': 0.8888888888888888, 'f1': 0.6666666666666667}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Korean: {'event': {'p': 0.631578947368421, 'r': 0.5714285714285714, 'f1': 0.6}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Russian: {'event': {'p': 0.3488372093023256, 'r': 0.4166666666666667, 'f1': 0.379746835443038}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 23 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:28:46.815037: step: 4/531, loss: 0.014155960641801357 2023-01-23 03:28:47.926513: step: 8/531, loss: 0.018351102247834206 2023-01-23 03:28:49.067340: step: 12/531, loss: 0.00032978056697174907 2023-01-23 03:28:50.182677: step: 16/531, loss: 1.697540210443549e-05 2023-01-23 03:28:51.324636: step: 20/531, loss: 0.0007347107166424394 2023-01-23 03:28:52.461970: step: 24/531, loss: 0.003898429684340954 2023-01-23 03:28:53.588634: step: 28/531, loss: 0.004001712426543236 2023-01-23 03:28:54.702870: step: 32/531, loss: 0.00012407303438521922 2023-01-23 03:28:55.833696: step: 36/531, loss: 0.0002633094845805317 2023-01-23 03:28:56.972527: step: 40/531, loss: 0.01026611402630806 2023-01-23 03:28:58.079425: step: 44/531, loss: 0.009000015445053577 2023-01-23 03:28:59.213413: step: 48/531, loss: 0.001984548754990101 2023-01-23 03:29:00.341822: step: 52/531, loss: 0.009127807803452015 2023-01-23 03:29:01.476843: step: 56/531, loss: 0.01872711256146431 2023-01-23 03:29:02.605768: step: 60/531, loss: 0.005709266755729914 2023-01-23 03:29:03.736116: step: 64/531, loss: 0.0011022568214684725 2023-01-23 03:29:04.861527: step: 68/531, loss: 0.013421153649687767 2023-01-23 03:29:05.981898: step: 72/531, loss: 0.01851673237979412 2023-01-23 03:29:07.118520: step: 76/531, loss: 1.773834264895413e-05 2023-01-23 03:29:08.225095: step: 80/531, loss: 0.0001794815034372732 2023-01-23 03:29:09.335623: step: 84/531, loss: 0.0006766319274902344 2023-01-23 03:29:10.468199: step: 88/531, loss: 0.019933607429265976 2023-01-23 03:29:11.588362: step: 92/531, loss: 0.0012557983864098787 2023-01-23 03:29:12.741205: step: 96/531, loss: 0.0032648087944835424 2023-01-23 03:29:13.860107: step: 100/531, loss: 0.0245832446962595 2023-01-23 03:29:14.970976: step: 104/531, loss: 0.1237730011343956 2023-01-23 03:29:16.113712: step: 108/531, loss: 0.14036230742931366 2023-01-23 03:29:17.272137: step: 112/531, loss: 0.0012830734485760331 2023-01-23 03:29:18.416081: step: 116/531, loss: 0.007854461669921875 2023-01-23 03:29:19.554570: step: 120/531, loss: 8.525848534191027e-05 2023-01-23 03:29:20.712465: step: 124/531, loss: 0.08302974700927734 2023-01-23 03:29:21.848575: step: 128/531, loss: 0.013432217761874199 2023-01-23 03:29:23.013943: step: 132/531, loss: 0.0020078658126294613 2023-01-23 03:29:24.150520: step: 136/531, loss: 0.019022800028324127 2023-01-23 03:29:25.292907: step: 140/531, loss: 0.011188602074980736 2023-01-23 03:29:26.417463: step: 144/531, loss: 0.03532681614160538 2023-01-23 03:29:27.526274: step: 148/531, loss: 1.468658410885837e-05 2023-01-23 03:29:28.633265: step: 152/531, loss: 0.08642387390136719 2023-01-23 03:29:29.753199: step: 156/531, loss: 0.0011396408081054688 2023-01-23 03:29:30.876162: step: 160/531, loss: 0.01595134660601616 2023-01-23 03:29:31.990808: step: 164/531, loss: 0.005595779977738857 2023-01-23 03:29:33.110552: step: 168/531, loss: 0.0075889588333666325 2023-01-23 03:29:34.222539: step: 172/531, loss: 0.0066123963333666325 2023-01-23 03:29:35.355637: step: 176/531, loss: 0.212663933634758 2023-01-23 03:29:36.500490: step: 180/531, loss: 0.02056121826171875 2023-01-23 03:29:37.617509: step: 184/531, loss: 0.0004181861877441406 2023-01-23 03:29:38.726063: step: 188/531, loss: 0.0013456344604492188 2023-01-23 03:29:39.849845: step: 192/531, loss: 0.0006208419799804688 2023-01-23 03:29:40.950576: step: 196/531, loss: 4.951202392578125 2023-01-23 03:29:42.096475: step: 200/531, loss: 0.018161773681640625 2023-01-23 03:29:43.227242: step: 204/531, loss: 0.0003700733359437436 2023-01-23 03:29:44.362701: step: 208/531, loss: 0.003593635745346546 2023-01-23 03:29:45.515128: step: 212/531, loss: 0.000194549560546875 2023-01-23 03:29:46.643206: step: 216/531, loss: 0.0006067276117391884 2023-01-23 03:29:47.773713: step: 220/531, loss: 0.03721323236823082 2023-01-23 03:29:48.899440: step: 224/531, loss: 0.03303928300738335 2023-01-23 03:29:50.036211: step: 228/531, loss: 0.004172325599938631 2023-01-23 03:29:51.140828: step: 232/531, loss: 0.006166363134980202 2023-01-23 03:29:52.260283: step: 236/531, loss: 5.531310307560489e-06 2023-01-23 03:29:53.383481: step: 240/531, loss: 0.00019979476928710938 2023-01-23 03:29:54.520936: step: 244/531, loss: 8.292197890114039e-05 2023-01-23 03:29:55.706769: step: 248/531, loss: 0.013271713629364967 2023-01-23 03:29:56.824138: step: 252/531, loss: 6.28471389063634e-05 2023-01-23 03:29:57.958209: step: 256/531, loss: 0.005969524849206209 2023-01-23 03:29:59.098585: step: 260/531, loss: 0.015056610107421875 2023-01-23 03:30:00.228942: step: 264/531, loss: 0.0005757331382483244 2023-01-23 03:30:01.388472: step: 268/531, loss: 0.00018186570378020406 2023-01-23 03:30:02.489847: step: 272/531, loss: 0.00077228550799191 2023-01-23 03:30:03.618449: step: 276/531, loss: 0.0010875702137127519 2023-01-23 03:30:04.802873: step: 280/531, loss: 5.3215029765851796e-05 2023-01-23 03:30:05.971608: step: 284/531, loss: 0.01006469689309597 2023-01-23 03:30:07.094573: step: 288/531, loss: 0.048592664301395416 2023-01-23 03:30:08.218499: step: 292/531, loss: 0.00031981468782760203 2023-01-23 03:30:09.329352: step: 296/531, loss: -9.34600848268019e-06 2023-01-23 03:30:10.471769: step: 300/531, loss: 5.426407005870715e-05 2023-01-23 03:30:11.592132: step: 304/531, loss: 0.0022500993218272924 2023-01-23 03:30:12.727804: step: 308/531, loss: 0.0005060672992840409 2023-01-23 03:30:13.879403: step: 312/531, loss: 0.0074062347412109375 2023-01-23 03:30:15.023896: step: 316/531, loss: 0.03669862449169159 2023-01-23 03:30:16.122846: step: 320/531, loss: 0.0012549400562420487 2023-01-23 03:30:17.242485: step: 324/531, loss: 0.0030506134498864412 2023-01-23 03:30:18.356349: step: 328/531, loss: 0.0025323869194835424 2023-01-23 03:30:19.481921: step: 332/531, loss: 0.00105448963586241 2023-01-23 03:30:20.622627: step: 336/531, loss: 0.007851219736039639 2023-01-23 03:30:21.727306: step: 340/531, loss: 0.0020708085503429174 2023-01-23 03:30:22.812597: step: 344/531, loss: 0.0003681182861328125 2023-01-23 03:30:23.902154: step: 348/531, loss: 0.0027888298500329256 2023-01-23 03:30:25.033106: step: 352/531, loss: 5.2070619858568534e-05 2023-01-23 03:30:26.177926: step: 356/531, loss: 0.0018109322991222143 2023-01-23 03:30:27.309672: step: 360/531, loss: 8.678436643094756e-06 2023-01-23 03:30:28.410992: step: 364/531, loss: 0.08064880967140198 2023-01-23 03:30:29.529703: step: 368/531, loss: 0.02924184873700142 2023-01-23 03:30:30.654850: step: 372/531, loss: 0.0004991531604900956 2023-01-23 03:30:31.783093: step: 376/531, loss: 0.004985904786735773 2023-01-23 03:30:32.902259: step: 380/531, loss: 0.004901027772575617 2023-01-23 03:30:33.986414: step: 384/531, loss: 0.0015162467025220394 2023-01-23 03:30:35.122867: step: 388/531, loss: 0.0005450248718261719 2023-01-23 03:30:36.208593: step: 392/531, loss: 0.00048251153202727437 2023-01-23 03:30:37.329606: step: 396/531, loss: 0.000983762787654996 2023-01-23 03:30:38.483073: step: 400/531, loss: 0.00043621062650345266 2023-01-23 03:30:39.602000: step: 404/531, loss: 0.00012392997450660914 2023-01-23 03:30:40.716214: step: 408/531, loss: 0.03224897384643555 2023-01-23 03:30:41.817173: step: 412/531, loss: 0.0037012577522546053 2023-01-23 03:30:42.985237: step: 416/531, loss: 0.05334148555994034 2023-01-23 03:30:44.113485: step: 420/531, loss: 0.0002004623383982107 2023-01-23 03:30:45.224885: step: 424/531, loss: 0.038133811205625534 2023-01-23 03:30:46.352551: step: 428/531, loss: 0.009861373342573643 2023-01-23 03:30:47.473883: step: 432/531, loss: 0.004862594418227673 2023-01-23 03:30:48.589246: step: 436/531, loss: 0.0012729645241051912 2023-01-23 03:30:49.690172: step: 440/531, loss: 4.901886131847277e-05 2023-01-23 03:30:50.830598: step: 444/531, loss: 0.00227947230450809 2023-01-23 03:30:51.948032: step: 448/531, loss: 0.0020626068580895662 2023-01-23 03:30:53.064016: step: 452/531, loss: 0.0050210957415401936 2023-01-23 03:30:54.168299: step: 456/531, loss: 0.00011596680269576609 2023-01-23 03:30:55.312868: step: 460/531, loss: 0.0012561798794195056 2023-01-23 03:30:56.425969: step: 464/531, loss: 1.9073486328125e-05 2023-01-23 03:30:57.568828: step: 468/531, loss: 0.2448255568742752 2023-01-23 03:30:58.693244: step: 472/531, loss: 0.0027361870743334293 2023-01-23 03:30:59.802369: step: 476/531, loss: 0.0004818916495423764 2023-01-23 03:31:00.910786: step: 480/531, loss: 0.03991289436817169 2023-01-23 03:31:02.037200: step: 484/531, loss: 0.011562966741621494 2023-01-23 03:31:03.192215: step: 488/531, loss: 0.015391826629638672 2023-01-23 03:31:04.293430: step: 492/531, loss: 0.02075357548892498 2023-01-23 03:31:05.431059: step: 496/531, loss: 0.23147784173488617 2023-01-23 03:31:06.596875: step: 500/531, loss: 0.0007053375011309981 2023-01-23 03:31:07.699721: step: 504/531, loss: 0.08710562437772751 2023-01-23 03:31:08.832558: step: 508/531, loss: 0.00012264252291060984 2023-01-23 03:31:09.941514: step: 512/531, loss: 0.00115547189489007 2023-01-23 03:31:11.044830: step: 516/531, loss: 0.006896304897964001 2023-01-23 03:31:12.172322: step: 520/531, loss: 0.02773761935532093 2023-01-23 03:31:13.311642: step: 524/531, loss: 0.001696634222753346 2023-01-23 03:31:14.435864: step: 528/531, loss: 0.00768699636682868 2023-01-23 03:31:15.582986: step: 532/531, loss: 0.05229339748620987 2023-01-23 03:31:16.708816: step: 536/531, loss: 0.05944023281335831 2023-01-23 03:31:17.875705: step: 540/531, loss: 0.0003666877746582031 2023-01-23 03:31:19.003000: step: 544/531, loss: 0.0038587572053074837 2023-01-23 03:31:20.106456: step: 548/531, loss: 0.0013689040206372738 2023-01-23 03:31:21.210813: step: 552/531, loss: 2.517700158932712e-05 2023-01-23 03:31:22.327463: step: 556/531, loss: 0.018257809802889824 2023-01-23 03:31:23.442074: step: 560/531, loss: 3.471374657237902e-05 2023-01-23 03:31:24.564471: step: 564/531, loss: 0.0005690574762411416 2023-01-23 03:31:25.707119: step: 568/531, loss: 0.6041606664657593 2023-01-23 03:31:26.843725: step: 572/531, loss: 0.05984220653772354 2023-01-23 03:31:27.978876: step: 576/531, loss: 0.005154419224709272 2023-01-23 03:31:29.111460: step: 580/531, loss: 0.001689910888671875 2023-01-23 03:31:30.256762: step: 584/531, loss: 0.016596509143710136 2023-01-23 03:31:31.403428: step: 588/531, loss: 0.0020378590561449528 2023-01-23 03:31:32.540964: step: 592/531, loss: 0.10165248066186905 2023-01-23 03:31:33.645721: step: 596/531, loss: 0.002917909761890769 2023-01-23 03:31:34.772587: step: 600/531, loss: 0.0006826877943240106 2023-01-23 03:31:35.884715: step: 604/531, loss: 2.4557113647460938e-05 2023-01-23 03:31:37.012842: step: 608/531, loss: 6.86645489622606e-06 2023-01-23 03:31:38.127288: step: 612/531, loss: 8.869171324477065e-06 2023-01-23 03:31:39.276853: step: 616/531, loss: 0.0009454727405682206 2023-01-23 03:31:40.412817: step: 620/531, loss: 0.0007668494945392013 2023-01-23 03:31:41.546075: step: 624/531, loss: 0.00785064697265625 2023-01-23 03:31:42.709171: step: 628/531, loss: 0.011459732428193092 2023-01-23 03:31:43.836627: step: 632/531, loss: 0.0004564285627566278 2023-01-23 03:31:44.955206: step: 636/531, loss: 0.0001750946103129536 2023-01-23 03:31:46.089648: step: 640/531, loss: 7.734298560535535e-05 2023-01-23 03:31:47.205547: step: 644/531, loss: 0.0006910323863849044 2023-01-23 03:31:48.393126: step: 648/531, loss: 0.008035564795136452 2023-01-23 03:31:49.522667: step: 652/531, loss: 0.0004639625549316406 2023-01-23 03:31:50.676746: step: 656/531, loss: 0.0037055970169603825 2023-01-23 03:31:51.786524: step: 660/531, loss: 0.019382189959287643 2023-01-23 03:31:52.931719: step: 664/531, loss: 0.001360177993774414 2023-01-23 03:31:54.044791: step: 668/531, loss: 0.06304216384887695 2023-01-23 03:31:55.193811: step: 672/531, loss: 0.06542596966028214 2023-01-23 03:31:56.336458: step: 676/531, loss: 0.00964651070535183 2023-01-23 03:31:57.465099: step: 680/531, loss: 0.0011067390441894531 2023-01-23 03:31:58.586523: step: 684/531, loss: 0.00021018982806708664 2023-01-23 03:31:59.706638: step: 688/531, loss: 0.0006690978771075606 2023-01-23 03:32:00.822176: step: 692/531, loss: 0.011448478326201439 2023-01-23 03:32:01.917279: step: 696/531, loss: 0.00011162758164573461 2023-01-23 03:32:03.026005: step: 700/531, loss: 0.009367180056869984 2023-01-23 03:32:04.163543: step: 704/531, loss: 0.011383724398911 2023-01-23 03:32:05.309853: step: 708/531, loss: 0.003972912207245827 2023-01-23 03:32:06.438654: step: 712/531, loss: 0.00030007364694029093 2023-01-23 03:32:07.555234: step: 716/531, loss: 0.00017089843458961695 2023-01-23 03:32:08.693206: step: 720/531, loss: 0.00745391845703125 2023-01-23 03:32:09.809352: step: 724/531, loss: 0.016063308343291283 2023-01-23 03:32:10.950372: step: 728/531, loss: 0.0016088485717773438 2023-01-23 03:32:12.117389: step: 732/531, loss: 0.03598365560173988 2023-01-23 03:32:13.247137: step: 736/531, loss: 0.010575485415756702 2023-01-23 03:32:14.388748: step: 740/531, loss: 2.5558472771081142e-05 2023-01-23 03:32:15.516680: step: 744/531, loss: 0.05165262147784233 2023-01-23 03:32:16.639234: step: 748/531, loss: 1.792907642084174e-05 2023-01-23 03:32:17.759900: step: 752/531, loss: 0.0004368305380921811 2023-01-23 03:32:18.977093: step: 756/531, loss: 0.0033084868919104338 2023-01-23 03:32:20.106188: step: 760/531, loss: 0.028829099610447884 2023-01-23 03:32:21.243673: step: 764/531, loss: 7.567405555164441e-05 2023-01-23 03:32:22.371150: step: 768/531, loss: 0.00034708977909758687 2023-01-23 03:32:23.549491: step: 772/531, loss: 0.0001330375816905871 2023-01-23 03:32:24.676253: step: 776/531, loss: -5.662441253662109e-06 2023-01-23 03:32:25.807733: step: 780/531, loss: 0.0021949768997728825 2023-01-23 03:32:26.919350: step: 784/531, loss: 0.017023611813783646 2023-01-23 03:32:28.055905: step: 788/531, loss: 0.00017278196173720062 2023-01-23 03:32:29.161994: step: 792/531, loss: 0.00433616666123271 2023-01-23 03:32:30.261312: step: 796/531, loss: 0.023058509454131126 2023-01-23 03:32:31.402983: step: 800/531, loss: 4.8351288569392636e-05 2023-01-23 03:32:32.531817: step: 804/531, loss: 0.004530906677246094 2023-01-23 03:32:33.639892: step: 808/531, loss: 0.0002655029238667339 2023-01-23 03:32:34.779114: step: 812/531, loss: 0.005262470338493586 2023-01-23 03:32:35.924936: step: 816/531, loss: 0.0008194923866540194 2023-01-23 03:32:37.076307: step: 820/531, loss: 7.915496826171875e-05 2023-01-23 03:32:38.195480: step: 824/531, loss: 0.0032746316865086555 2023-01-23 03:32:39.298159: step: 828/531, loss: 0.002218961948528886 2023-01-23 03:32:40.420621: step: 832/531, loss: 0.0012674331665039062 2023-01-23 03:32:41.521966: step: 836/531, loss: -2.956390289909905e-06 2023-01-23 03:32:42.687935: step: 840/531, loss: 0.0005959570407867432 2023-01-23 03:32:43.817357: step: 844/531, loss: 4.920959327137098e-05 2023-01-23 03:32:44.944063: step: 848/531, loss: 0.003874874208122492 2023-01-23 03:32:46.071583: step: 852/531, loss: 0.03710046038031578 2023-01-23 03:32:47.184420: step: 856/531, loss: 0.40577390789985657 2023-01-23 03:32:48.331664: step: 860/531, loss: 0.009037780575454235 2023-01-23 03:32:49.450438: step: 864/531, loss: 0.0011695862049236894 2023-01-23 03:32:50.558344: step: 868/531, loss: 0.0027116776909679174 2023-01-23 03:32:51.697017: step: 872/531, loss: 0.008507300168275833 2023-01-23 03:32:52.780987: step: 876/531, loss: 0.0006824493175372481 2023-01-23 03:32:53.883221: step: 880/531, loss: 5.216598219703883e-05 2023-01-23 03:32:55.011201: step: 884/531, loss: 0.002790260361507535 2023-01-23 03:32:56.141705: step: 888/531, loss: 0.001773834228515625 2023-01-23 03:32:57.259243: step: 892/531, loss: 0.0009116172441281378 2023-01-23 03:32:58.373320: step: 896/531, loss: 0.006837082095444202 2023-01-23 03:32:59.473109: step: 900/531, loss: 6.69479341013357e-05 2023-01-23 03:33:00.577274: step: 904/531, loss: 0.009524155408143997 2023-01-23 03:33:01.706891: step: 908/531, loss: 0.0004962921375408769 2023-01-23 03:33:02.857909: step: 912/531, loss: 0.0024506093468517065 2023-01-23 03:33:04.007176: step: 916/531, loss: 0.010242462158203125 2023-01-23 03:33:05.126179: step: 920/531, loss: 0.0008728028042241931 2023-01-23 03:33:06.251135: step: 924/531, loss: 0.04297580569982529 2023-01-23 03:33:07.363394: step: 928/531, loss: 0.004692649934440851 2023-01-23 03:33:08.494238: step: 932/531, loss: 0.0034207822754979134 2023-01-23 03:33:09.606792: step: 936/531, loss: 0.001046848250553012 2023-01-23 03:33:10.739653: step: 940/531, loss: 0.0026942254044115543 2023-01-23 03:33:11.859236: step: 944/531, loss: 2.09808349609375e-05 2023-01-23 03:33:12.982128: step: 948/531, loss: 0.005017089657485485 2023-01-23 03:33:14.101534: step: 952/531, loss: 0.00486679095774889 2023-01-23 03:33:15.222707: step: 956/531, loss: 0.05214891582727432 2023-01-23 03:33:16.360881: step: 960/531, loss: 0.0012584686046466231 2023-01-23 03:33:17.477274: step: 964/531, loss: 1.8596649169921875e-05 2023-01-23 03:33:18.580048: step: 968/531, loss: 0.0005046844598837197 2023-01-23 03:33:19.702169: step: 972/531, loss: 1.3351440202313825e-06 2023-01-23 03:33:20.796434: step: 976/531, loss: 0.0004749298095703125 2023-01-23 03:33:21.907536: step: 980/531, loss: 0.020760728046298027 2023-01-23 03:33:23.019501: step: 984/531, loss: 0.0002468109014444053 2023-01-23 03:33:24.116301: step: 988/531, loss: 0.006173896603286266 2023-01-23 03:33:25.217319: step: 992/531, loss: 0.020803451538085938 2023-01-23 03:33:26.330003: step: 996/531, loss: 0.00044536590576171875 2023-01-23 03:33:27.467478: step: 1000/531, loss: 0.024645615369081497 2023-01-23 03:33:28.570539: step: 1004/531, loss: 0.00020122528076171875 2023-01-23 03:33:29.669862: step: 1008/531, loss: 6.10351571594947e-06 2023-01-23 03:33:30.811494: step: 1012/531, loss: 0.02942485921084881 2023-01-23 03:33:31.941335: step: 1016/531, loss: 0.0002265930233988911 2023-01-23 03:33:33.056128: step: 1020/531, loss: 0.0004514694446697831 2023-01-23 03:33:34.196522: step: 1024/531, loss: 0.706406831741333 2023-01-23 03:33:35.332414: step: 1028/531, loss: 0.0001472473086323589 2023-01-23 03:33:36.458448: step: 1032/531, loss: 0.04700794443488121 2023-01-23 03:33:37.590413: step: 1036/531, loss: 0.0004622936248779297 2023-01-23 03:33:38.702701: step: 1040/531, loss: 0.0005142212030477822 2023-01-23 03:33:39.829808: step: 1044/531, loss: 4.942416853737086e-05 2023-01-23 03:33:40.967177: step: 1048/531, loss: 0.0005645751953125 2023-01-23 03:33:42.127533: step: 1052/531, loss: 0.011251830495893955 2023-01-23 03:33:43.284334: step: 1056/531, loss: 0.00916380900889635 2023-01-23 03:33:44.388526: step: 1060/531, loss: 0.013793659396469593 2023-01-23 03:33:45.503679: step: 1064/531, loss: 0.04953594505786896 2023-01-23 03:33:46.607323: step: 1068/531, loss: 0.0032609940972179174 2023-01-23 03:33:47.727534: step: 1072/531, loss: 0.05135021358728409 2023-01-23 03:33:48.840572: step: 1076/531, loss: 0.024227142333984375 2023-01-23 03:33:49.991786: step: 1080/531, loss: 0.05784149095416069 2023-01-23 03:33:51.411241: step: 1084/531, loss: 0.049965668469667435 2023-01-23 03:33:52.527291: step: 1088/531, loss: 0.00086212158203125 2023-01-23 03:33:53.637908: step: 1092/531, loss: 0.0021753313485533 2023-01-23 03:33:54.776950: step: 1096/531, loss: -6.675720669591101e-07 2023-01-23 03:33:55.886822: step: 1100/531, loss: 0.00010528564598644152 2023-01-23 03:33:57.013977: step: 1104/531, loss: 0.00014543533325195312 2023-01-23 03:33:58.129899: step: 1108/531, loss: 0.0014732361305505037 2023-01-23 03:33:59.215983: step: 1112/531, loss: 5.435943421616685e-06 2023-01-23 03:34:00.324618: step: 1116/531, loss: 0.0009437561384402215 2023-01-23 03:34:01.440553: step: 1120/531, loss: 1.2111663636460435e-05 2023-01-23 03:34:02.612841: step: 1124/531, loss: 0.011473273858428001 2023-01-23 03:34:03.756165: step: 1128/531, loss: 0.004797745030373335 2023-01-23 03:34:04.856673: step: 1132/531, loss: 6.341934204101562e-05 2023-01-23 03:34:05.959198: step: 1136/531, loss: 0.006159878335893154 2023-01-23 03:34:07.069810: step: 1140/531, loss: 0.0014754296280443668 2023-01-23 03:34:08.159885: step: 1144/531, loss: 0.06059751659631729 2023-01-23 03:34:09.267583: step: 1148/531, loss: 0.0008047700393944979 2023-01-23 03:34:10.376021: step: 1152/531, loss: 0.0003410816425457597 2023-01-23 03:34:11.485830: step: 1156/531, loss: 0.022004509344697 2023-01-23 03:34:12.692886: step: 1160/531, loss: 0.001546669052913785 2023-01-23 03:34:13.789069: step: 1164/531, loss: 0.0003765106375794858 2023-01-23 03:34:14.906305: step: 1168/531, loss: 0.0021356584038585424 2023-01-23 03:34:16.047142: step: 1172/531, loss: 0.0011013986077159643 2023-01-23 03:34:17.184634: step: 1176/531, loss: 0.003407096955925226 2023-01-23 03:34:18.303174: step: 1180/531, loss: 9.16481003514491e-05 2023-01-23 03:34:19.441501: step: 1184/531, loss: 0.0174116138368845 2023-01-23 03:34:20.575250: step: 1188/531, loss: 0.01302433107048273 2023-01-23 03:34:21.696365: step: 1192/531, loss: -2.384185791015625e-06 2023-01-23 03:34:22.808662: step: 1196/531, loss: 0.020924091339111328 2023-01-23 03:34:23.942634: step: 1200/531, loss: 0.0019758224952965975 2023-01-23 03:34:25.068082: step: 1204/531, loss: 0.011371231637895107 2023-01-23 03:34:26.192048: step: 1208/531, loss: 0.0015244930982589722 2023-01-23 03:34:27.320919: step: 1212/531, loss: 0.00042953493539243937 2023-01-23 03:34:28.425879: step: 1216/531, loss: 1.7261505490751006e-05 2023-01-23 03:34:29.567916: step: 1220/531, loss: 1.8119811784345075e-06 2023-01-23 03:34:30.678826: step: 1224/531, loss: 0.03948669508099556 2023-01-23 03:34:31.813657: step: 1228/531, loss: 0.0005420685047283769 2023-01-23 03:34:32.914128: step: 1232/531, loss: 0.0049370769411325455 2023-01-23 03:34:34.024233: step: 1236/531, loss: 1.926422191900201e-05 2023-01-23 03:34:35.153670: step: 1240/531, loss: 0.003025245852768421 2023-01-23 03:34:36.294909: step: 1244/531, loss: 0.007724761962890625 2023-01-23 03:34:37.439525: step: 1248/531, loss: 0.004211998078972101 2023-01-23 03:34:38.564001: step: 1252/531, loss: 0.00016860962205100805 2023-01-23 03:34:39.701909: step: 1256/531, loss: 0.002432918641716242 2023-01-23 03:34:40.806600: step: 1260/531, loss: 0.0008771896245889366 2023-01-23 03:34:41.914372: step: 1264/531, loss: 0.015062427148222923 2023-01-23 03:34:43.028041: step: 1268/531, loss: 0.00035834312438964844 2023-01-23 03:34:44.139651: step: 1272/531, loss: 0.0025428771041333675 2023-01-23 03:34:45.264200: step: 1276/531, loss: 0.00023069381131790578 2023-01-23 03:34:46.397477: step: 1280/531, loss: 0.0004006385861430317 2023-01-23 03:34:47.550142: step: 1284/531, loss: 0.0011306763626635075 2023-01-23 03:34:48.667039: step: 1288/531, loss: 0.0016908645629882812 2023-01-23 03:34:49.797268: step: 1292/531, loss: 0.0010279655689373612 2023-01-23 03:34:50.942689: step: 1296/531, loss: 3.9768219721736386e-05 2023-01-23 03:34:52.063920: step: 1300/531, loss: 0.003213882679119706 2023-01-23 03:34:53.211478: step: 1304/531, loss: 0.005150032229721546 2023-01-23 03:34:54.321584: step: 1308/531, loss: 0.0001581192045705393 2023-01-23 03:34:55.466406: step: 1312/531, loss: 0.007987165823578835 2023-01-23 03:34:56.587984: step: 1316/531, loss: 0.000255584716796875 2023-01-23 03:34:57.737132: step: 1320/531, loss: 0.002286529401317239 2023-01-23 03:34:58.848889: step: 1324/531, loss: 0.0013268471229821444 2023-01-23 03:34:59.979298: step: 1328/531, loss: 0.6295409798622131 2023-01-23 03:35:01.094125: step: 1332/531, loss: 0.014552021399140358 2023-01-23 03:35:02.249317: step: 1336/531, loss: 0.0006862640148028731 2023-01-23 03:35:03.374584: step: 1340/531, loss: 0.0015329361194744706 2023-01-23 03:35:04.484900: step: 1344/531, loss: 2.803802453854587e-05 2023-01-23 03:35:05.615210: step: 1348/531, loss: 0.015396691858768463 2023-01-23 03:35:06.731922: step: 1352/531, loss: 0.0010770796798169613 2023-01-23 03:35:07.878301: step: 1356/531, loss: 0.0003342151758261025 2023-01-23 03:35:09.035362: step: 1360/531, loss: 0.0010797501308843493 2023-01-23 03:35:10.153078: step: 1364/531, loss: 0.014882469549775124 2023-01-23 03:35:11.296875: step: 1368/531, loss: 0.0029285431373864412 2023-01-23 03:35:12.412059: step: 1372/531, loss: 0.008823776617646217 2023-01-23 03:35:13.528245: step: 1376/531, loss: 6.50405854685232e-05 2023-01-23 03:35:14.648803: step: 1380/531, loss: 0.002798462053760886 2023-01-23 03:35:15.749644: step: 1384/531, loss: 0.0006093024858273566 2023-01-23 03:35:16.880618: step: 1388/531, loss: 0.0018859029514715075 2023-01-23 03:35:18.006420: step: 1392/531, loss: 0.0017148017650470138 2023-01-23 03:35:19.140806: step: 1396/531, loss: 0.15394507348537445 2023-01-23 03:35:20.291686: step: 1400/531, loss: 0.0023052217438817024 2023-01-23 03:35:21.429225: step: 1404/531, loss: 1.1301040103717241e-05 2023-01-23 03:35:22.545444: step: 1408/531, loss: 0.0001277923583984375 2023-01-23 03:35:23.660398: step: 1412/531, loss: 0.02342100255191326 2023-01-23 03:35:24.789194: step: 1416/531, loss: 0.006754875183105469 2023-01-23 03:35:25.897328: step: 1420/531, loss: 0.00017118453979492188 2023-01-23 03:35:27.021249: step: 1424/531, loss: 0.025450468063354492 2023-01-23 03:35:28.163802: step: 1428/531, loss: 0.0002709388791117817 2023-01-23 03:35:29.288512: step: 1432/531, loss: 0.09664926677942276 2023-01-23 03:35:30.391736: step: 1436/531, loss: 0.004719638731330633 2023-01-23 03:35:31.508847: step: 1440/531, loss: 0.0011406898265704513 2023-01-23 03:35:32.638954: step: 1444/531, loss: 0.0016133309109136462 2023-01-23 03:35:33.765503: step: 1448/531, loss: 0.006688308902084827 2023-01-23 03:35:34.882217: step: 1452/531, loss: 0.000404167192755267 2023-01-23 03:35:36.008112: step: 1456/531, loss: 0.09490318596363068 2023-01-23 03:35:37.126575: step: 1460/531, loss: 7.953643944347277e-05 2023-01-23 03:35:38.277016: step: 1464/531, loss: 0.2200479507446289 2023-01-23 03:35:39.381843: step: 1468/531, loss: 0.0008084297878667712 2023-01-23 03:35:40.516966: step: 1472/531, loss: 0.02826089784502983 2023-01-23 03:35:41.641233: step: 1476/531, loss: 0.08072948455810547 2023-01-23 03:35:42.746619: step: 1480/531, loss: 0.010100364685058594 2023-01-23 03:35:43.854387: step: 1484/531, loss: 0.001026153564453125 2023-01-23 03:35:44.969585: step: 1488/531, loss: 0.000878667866345495 2023-01-23 03:35:46.092731: step: 1492/531, loss: 0.0006102085462771356 2023-01-23 03:35:47.221168: step: 1496/531, loss: 0.03264160454273224 2023-01-23 03:35:48.337579: step: 1500/531, loss: 0.001094055245630443 2023-01-23 03:35:49.446762: step: 1504/531, loss: 0.0006366729503497481 2023-01-23 03:35:50.572204: step: 1508/531, loss: 6.799698167014867e-05 2023-01-23 03:35:51.653996: step: 1512/531, loss: 0.007296538446098566 2023-01-23 03:35:52.754945: step: 1516/531, loss: 9.813308861339465e-05 2023-01-23 03:35:53.904291: step: 1520/531, loss: 0.004828357603400946 2023-01-23 03:35:55.021907: step: 1524/531, loss: 0.001416969345882535 2023-01-23 03:35:56.157823: step: 1528/531, loss: 0.049439240247011185 2023-01-23 03:35:57.251358: step: 1532/531, loss: 0.0028072833083570004 2023-01-23 03:35:58.383885: step: 1536/531, loss: 2.0790101189049892e-05 2023-01-23 03:35:59.500555: step: 1540/531, loss: 0.0069519043900072575 2023-01-23 03:36:00.634216: step: 1544/531, loss: 0.0017322540516033769 2023-01-23 03:36:01.765490: step: 1548/531, loss: 0.002494430635124445 2023-01-23 03:36:02.909209: step: 1552/531, loss: 0.0015844255685806274 2023-01-23 03:36:04.021724: step: 1556/531, loss: 0.0004020690976176411 2023-01-23 03:36:05.174473: step: 1560/531, loss: 0.0031066895462572575 2023-01-23 03:36:06.311646: step: 1564/531, loss: 0.07174170017242432 2023-01-23 03:36:07.409221: step: 1568/531, loss: 0.01071691419929266 2023-01-23 03:36:08.522021: step: 1572/531, loss: 0.013907814398407936 2023-01-23 03:36:09.633385: step: 1576/531, loss: 0.002496433211490512 2023-01-23 03:36:10.766904: step: 1580/531, loss: 0.0029046060517430305 2023-01-23 03:36:11.888676: step: 1584/531, loss: 0.018844319507479668 2023-01-23 03:36:12.997631: step: 1588/531, loss: 6.351470801746473e-05 2023-01-23 03:36:14.125591: step: 1592/531, loss: 0.028675103560090065 2023-01-23 03:36:15.244198: step: 1596/531, loss: 0.04164304956793785 2023-01-23 03:36:16.358709: step: 1600/531, loss: 0.02446146123111248 2023-01-23 03:36:17.489999: step: 1604/531, loss: 0.007489681243896484 2023-01-23 03:36:18.609164: step: 1608/531, loss: 0.16683831810951233 2023-01-23 03:36:19.760638: step: 1612/531, loss: 0.00019459724717307836 2023-01-23 03:36:20.858053: step: 1616/531, loss: 0.05150318145751953 2023-01-23 03:36:22.002985: step: 1620/531, loss: 4.2629246308933944e-05 2023-01-23 03:36:23.161942: step: 1624/531, loss: 0.0036334991455078125 2023-01-23 03:36:24.287789: step: 1628/531, loss: 0.01684246025979519 2023-01-23 03:36:25.398718: step: 1632/531, loss: 3.3378603347955504e-06 2023-01-23 03:36:26.528022: step: 1636/531, loss: 0.0028789518401026726 2023-01-23 03:36:27.627451: step: 1640/531, loss: 0.0023194788955152035 2023-01-23 03:36:28.746835: step: 1644/531, loss: 0.01586456410586834 2023-01-23 03:36:29.863277: step: 1648/531, loss: 0.09746570140123367 2023-01-23 03:36:30.935282: step: 1652/531, loss: 0.06596937030553818 2023-01-23 03:36:32.052949: step: 1656/531, loss: 0.022096730768680573 2023-01-23 03:36:33.192559: step: 1660/531, loss: 0.0021263123489916325 2023-01-23 03:36:34.327742: step: 1664/531, loss: 8.296966552734375e-05 2023-01-23 03:36:35.454383: step: 1668/531, loss: 0.00015087128849700093 2023-01-23 03:36:36.590991: step: 1672/531, loss: 0.018308257684111595 2023-01-23 03:36:37.707798: step: 1676/531, loss: 9.279251389671117e-05 2023-01-23 03:36:38.841247: step: 1680/531, loss: 0.0003056526475120336 2023-01-23 03:36:39.985096: step: 1684/531, loss: 0.000118255615234375 2023-01-23 03:36:41.103636: step: 1688/531, loss: 0.489303320646286 2023-01-23 03:36:42.206093: step: 1692/531, loss: 0.000506496406160295 2023-01-23 03:36:43.345340: step: 1696/531, loss: 0.4020484983921051 2023-01-23 03:36:44.453454: step: 1700/531, loss: 0.017195701599121094 2023-01-23 03:36:45.581490: step: 1704/531, loss: 1.659393274167087e-05 2023-01-23 03:36:46.698181: step: 1708/531, loss: 0.00032138824462890625 2023-01-23 03:36:47.810597: step: 1712/531, loss: 0.11515121161937714 2023-01-23 03:36:48.936751: step: 1716/531, loss: 0.000286293012322858 2023-01-23 03:36:50.056552: step: 1720/531, loss: 0.0013742446899414062 2023-01-23 03:36:51.196689: step: 1724/531, loss: 0.004477310460060835 2023-01-23 03:36:52.310209: step: 1728/531, loss: 0.004259681794792414 2023-01-23 03:36:53.440093: step: 1732/531, loss: 0.05674934387207031 2023-01-23 03:36:54.530857: step: 1736/531, loss: 0.03366789594292641 2023-01-23 03:36:55.673673: step: 1740/531, loss: 0.0017023086547851562 2023-01-23 03:36:56.782424: step: 1744/531, loss: 0.007914924994111061 2023-01-23 03:36:57.918263: step: 1748/531, loss: 0.016933251172304153 2023-01-23 03:36:59.029579: step: 1752/531, loss: 0.00015182494826149195 2023-01-23 03:37:00.136243: step: 1756/531, loss: 9.107589721679688e-05 2023-01-23 03:37:01.244577: step: 1760/531, loss: 0.00017179847054649144 2023-01-23 03:37:02.406550: step: 1764/531, loss: 0.00015707015700172633 2023-01-23 03:37:03.497475: step: 1768/531, loss: 2.021789623540826e-05 2023-01-23 03:37:04.650320: step: 1772/531, loss: 0.08329629898071289 2023-01-23 03:37:05.804177: step: 1776/531, loss: 0.0007694244850426912 2023-01-23 03:37:06.916478: step: 1780/531, loss: 0.0012290955055505037 2023-01-23 03:37:08.068779: step: 1784/531, loss: 0.10531673580408096 2023-01-23 03:37:09.198208: step: 1788/531, loss: 0.021512603387236595 2023-01-23 03:37:10.338825: step: 1792/531, loss: 0.09801311790943146 2023-01-23 03:37:11.465559: step: 1796/531, loss: 5.898475501453504e-05 2023-01-23 03:37:12.646542: step: 1800/531, loss: 0.0384368896484375 2023-01-23 03:37:13.778805: step: 1804/531, loss: 0.00011668205843307078 2023-01-23 03:37:14.905257: step: 1808/531, loss: 0.0009202957153320312 2023-01-23 03:37:16.004180: step: 1812/531, loss: 0.011274528689682484 2023-01-23 03:37:17.176174: step: 1816/531, loss: 0.03567848354578018 2023-01-23 03:37:18.288931: step: 1820/531, loss: 0.011447524651885033 2023-01-23 03:37:19.445428: step: 1824/531, loss: 0.0008705139043740928 2023-01-23 03:37:20.578564: step: 1828/531, loss: 0.011373138055205345 2023-01-23 03:37:21.688931: step: 1832/531, loss: 0.019310856238007545 2023-01-23 03:37:22.794181: step: 1836/531, loss: 0.002705478807911277 2023-01-23 03:37:23.932431: step: 1840/531, loss: 0.00416412390768528 2023-01-23 03:37:25.069721: step: 1844/531, loss: 0.0006786346202716231 2023-01-23 03:37:26.219318: step: 1848/531, loss: 0.27540796995162964 2023-01-23 03:37:27.353349: step: 1852/531, loss: 7.247925168485381e-06 2023-01-23 03:37:28.498413: step: 1856/531, loss: 0.011501980014145374 2023-01-23 03:37:29.627716: step: 1860/531, loss: 0.00017232894606422633 2023-01-23 03:37:30.749957: step: 1864/531, loss: 0.0001307010679738596 2023-01-23 03:37:31.877134: step: 1868/531, loss: 0.10891075432300568 2023-01-23 03:37:32.998046: step: 1872/531, loss: 0.0023916244972497225 2023-01-23 03:37:34.120896: step: 1876/531, loss: 0.006252288818359375 2023-01-23 03:37:35.251341: step: 1880/531, loss: 0.027922438457608223 2023-01-23 03:37:36.375417: step: 1884/531, loss: 0.03657875210046768 2023-01-23 03:37:37.476953: step: 1888/531, loss: 0.07212285697460175 2023-01-23 03:37:38.578362: step: 1892/531, loss: 0.0004203796270303428 2023-01-23 03:37:39.698807: step: 1896/531, loss: 0.0017456054920330644 2023-01-23 03:37:40.822181: step: 1900/531, loss: 0.02304067648947239 2023-01-23 03:37:41.960391: step: 1904/531, loss: 0.14250606298446655 2023-01-23 03:37:43.107423: step: 1908/531, loss: 0.005561447236686945 2023-01-23 03:37:44.257783: step: 1912/531, loss: 0.011288642883300781 2023-01-23 03:37:45.393156: step: 1916/531, loss: 0.018338680267333984 2023-01-23 03:37:46.510132: step: 1920/531, loss: 0.0005675316206179559 2023-01-23 03:37:47.623156: step: 1924/531, loss: 0.002506256103515625 2023-01-23 03:37:48.775166: step: 1928/531, loss: 0.023679541423916817 2023-01-23 03:37:49.902954: step: 1932/531, loss: 0.0019109725253656507 2023-01-23 03:37:51.012741: step: 1936/531, loss: 0.00011405944678699598 2023-01-23 03:37:52.161926: step: 1940/531, loss: 0.00012311934551689774 2023-01-23 03:37:53.277792: step: 1944/531, loss: 0.03940429911017418 2023-01-23 03:37:54.392090: step: 1948/531, loss: 0.0012498856522142887 2023-01-23 03:37:55.533805: step: 1952/531, loss: 0.0001930236758198589 2023-01-23 03:37:56.669345: step: 1956/531, loss: 0.010274887084960938 2023-01-23 03:37:57.791716: step: 1960/531, loss: 0.005497169215232134 2023-01-23 03:37:58.895522: step: 1964/531, loss: 5.7506560551701114e-05 2023-01-23 03:37:59.998325: step: 1968/531, loss: 2.6702882678364404e-06 2023-01-23 03:38:01.116772: step: 1972/531, loss: 0.00031719208345748484 2023-01-23 03:38:02.248942: step: 1976/531, loss: 0.0002713203430175781 2023-01-23 03:38:03.368385: step: 1980/531, loss: 0.05206799507141113 2023-01-23 03:38:04.484042: step: 1984/531, loss: 0.0017215729458257556 2023-01-23 03:38:05.596246: step: 1988/531, loss: 0.0010931015713140368 2023-01-23 03:38:06.712962: step: 1992/531, loss: 0.01668539084494114 2023-01-23 03:38:07.796938: step: 1996/531, loss: 0.00020408631826285273 2023-01-23 03:38:08.896883: step: 2000/531, loss: 0.004532241728156805 2023-01-23 03:38:10.022797: step: 2004/531, loss: 0.010374260134994984 2023-01-23 03:38:11.149151: step: 2008/531, loss: 0.01088948268443346 2023-01-23 03:38:12.279545: step: 2012/531, loss: 0.005203056149184704 2023-01-23 03:38:13.394479: step: 2016/531, loss: 9.832382784225047e-05 2023-01-23 03:38:14.546402: step: 2020/531, loss: 0.0008849144214764237 2023-01-23 03:38:15.686853: step: 2024/531, loss: 0.0002753257576841861 2023-01-23 03:38:16.777935: step: 2028/531, loss: 0.0008844375261105597 2023-01-23 03:38:17.899874: step: 2032/531, loss: 0.05789985880255699 2023-01-23 03:38:19.025768: step: 2036/531, loss: 0.0009868622291833162 2023-01-23 03:38:20.132607: step: 2040/531, loss: 0.0005660175811499357 2023-01-23 03:38:21.295203: step: 2044/531, loss: 0.01151733472943306 2023-01-23 03:38:22.422296: step: 2048/531, loss: 0.001330471015535295 2023-01-23 03:38:23.558253: step: 2052/531, loss: 0.0026617050170898438 2023-01-23 03:38:24.672712: step: 2056/531, loss: 0.004637623205780983 2023-01-23 03:38:25.771234: step: 2060/531, loss: 0.06763839721679688 2023-01-23 03:38:26.857023: step: 2064/531, loss: 0.002140092896297574 2023-01-23 03:38:27.963207: step: 2068/531, loss: 0.004965877626091242 2023-01-23 03:38:29.072790: step: 2072/531, loss: 0.0002796054177451879 2023-01-23 03:38:30.187780: step: 2076/531, loss: 1.3732910701946821e-05 2023-01-23 03:38:31.357353: step: 2080/531, loss: 0.008048057556152344 2023-01-23 03:38:32.475258: step: 2084/531, loss: 0.004024315159767866 2023-01-23 03:38:33.590082: step: 2088/531, loss: 0.006915092468261719 2023-01-23 03:38:34.716530: step: 2092/531, loss: 0.0008516311645507812 2023-01-23 03:38:35.849116: step: 2096/531, loss: 0.00036945342435501516 2023-01-23 03:38:36.959225: step: 2100/531, loss: 0.01400852296501398 2023-01-23 03:38:38.073435: step: 2104/531, loss: 0.0004715919494628906 2023-01-23 03:38:39.190731: step: 2108/531, loss: 0.05077171325683594 2023-01-23 03:38:40.288918: step: 2112/531, loss: 0.012525559403002262 2023-01-23 03:38:41.430687: step: 2116/531, loss: 0.020716095343232155 2023-01-23 03:38:42.589421: step: 2120/531, loss: 0.016623878851532936 2023-01-23 03:38:43.736887: step: 2124/531, loss: 0.0005946159362792969 ================================================== Loss: 0.030 -------------------- Dev: {'event': {'p': 0.5730994152046783, 'r': 0.7829560585885486, 'f1': 0.6617895329206528}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Test: {'event': {'p': 0.6226240148354195, 'r': 0.800834824090638, 'f1': 0.7005738132498694}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Chinese: {'event': {'p': 0.550561797752809, 'r': 0.9074074074074074, 'f1': 0.6853146853146853}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Korean: {'event': {'p': 0.576271186440678, 'r': 0.5396825396825397, 'f1': 0.5573770491803278}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Russian: {'event': {'p': 0.3333333333333333, 'r': 0.4166666666666667, 'f1': 0.3703703703703704}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 24 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:39:23.867129: step: 4/531, loss: 5.91278076171875e-05 2023-01-23 03:39:24.975905: step: 8/531, loss: 1.831054760259576e-05 2023-01-23 03:39:26.093663: step: 12/531, loss: 0.0008469581371173263 2023-01-23 03:39:27.200817: step: 16/531, loss: 0.0002536773681640625 2023-01-23 03:39:28.304170: step: 20/531, loss: 0.009461021982133389 2023-01-23 03:39:29.410752: step: 24/531, loss: 0.012678338214755058 2023-01-23 03:39:30.572292: step: 28/531, loss: 0.003930473234504461 2023-01-23 03:39:31.699529: step: 32/531, loss: 0.013451588340103626 2023-01-23 03:39:32.817562: step: 36/531, loss: 0.001220703125 2023-01-23 03:39:33.952221: step: 40/531, loss: 0.02112560346722603 2023-01-23 03:39:35.065268: step: 44/531, loss: 0.01291732769459486 2023-01-23 03:39:36.201566: step: 48/531, loss: 2.6035308110294864e-05 2023-01-23 03:39:37.309100: step: 52/531, loss: 2.269744800287299e-05 2023-01-23 03:39:38.433486: step: 56/531, loss: 0.000653171562589705 2023-01-23 03:39:39.548687: step: 60/531, loss: 0.0018260478973388672 2023-01-23 03:39:40.701326: step: 64/531, loss: 0.0030193328857421875 2023-01-23 03:39:41.816732: step: 68/531, loss: 0.0008449554443359375 2023-01-23 03:39:42.944677: step: 72/531, loss: 0.0014793395530432463 2023-01-23 03:39:44.063163: step: 76/531, loss: 0.0010267734760418534 2023-01-23 03:39:45.192328: step: 80/531, loss: 0.0005099296686239541 2023-01-23 03:39:46.349086: step: 84/531, loss: 0.021720124408602715 2023-01-23 03:39:47.462491: step: 88/531, loss: 0.004712295718491077 2023-01-23 03:39:48.611911: step: 92/531, loss: 0.006507682614028454 2023-01-23 03:39:49.723962: step: 96/531, loss: 0.035111621022224426 2023-01-23 03:39:50.834452: step: 100/531, loss: 0.008625412359833717 2023-01-23 03:39:51.979711: step: 104/531, loss: 0.007600593380630016 2023-01-23 03:39:53.139376: step: 108/531, loss: 0.00042738913907669485 2023-01-23 03:39:54.270388: step: 112/531, loss: 0.00487442035228014 2023-01-23 03:39:55.409424: step: 116/531, loss: 1.716613724056515e-06 2023-01-23 03:39:56.526771: step: 120/531, loss: 0.0002675056457519531 2023-01-23 03:39:57.665981: step: 124/531, loss: 0.0005996704567223787 2023-01-23 03:39:58.780783: step: 128/531, loss: 0.0005227089277468622 2023-01-23 03:39:59.883627: step: 132/531, loss: 0.0004280090506654233 2023-01-23 03:40:01.004596: step: 136/531, loss: 0.0025522231590002775 2023-01-23 03:40:02.130132: step: 140/531, loss: 0.00040922165499068797 2023-01-23 03:40:03.248634: step: 144/531, loss: 0.0004783660115208477 2023-01-23 03:40:04.341680: step: 148/531, loss: 0.00023488998704124242 2023-01-23 03:40:05.456465: step: 152/531, loss: 0.5094490647315979 2023-01-23 03:40:06.555944: step: 156/531, loss: 0.036812786012887955 2023-01-23 03:40:07.692652: step: 160/531, loss: 0.0022897720336914062 2023-01-23 03:40:08.808785: step: 164/531, loss: 0.000346946733770892 2023-01-23 03:40:09.929577: step: 168/531, loss: 0.01707305945456028 2023-01-23 03:40:11.083329: step: 172/531, loss: 0.001058387802913785 2023-01-23 03:40:12.248877: step: 176/531, loss: 0.002761268522590399 2023-01-23 03:40:13.374205: step: 180/531, loss: 0.017061805352568626 2023-01-23 03:40:14.490922: step: 184/531, loss: 3.6430355976335704e-05 2023-01-23 03:40:15.639488: step: 188/531, loss: 0.009377861395478249 2023-01-23 03:40:16.771919: step: 192/531, loss: 0.018456079065799713 2023-01-23 03:40:17.886517: step: 196/531, loss: 0.010912704281508923 2023-01-23 03:40:19.015940: step: 200/531, loss: 2.1553041733568534e-05 2023-01-23 03:40:20.125919: step: 204/531, loss: 0.006747436244040728 2023-01-23 03:40:21.244177: step: 208/531, loss: 0.025938605889678 2023-01-23 03:40:22.372690: step: 212/531, loss: 0.007895183749496937 2023-01-23 03:40:23.489461: step: 216/531, loss: 0.22869530320167542 2023-01-23 03:40:24.646639: step: 220/531, loss: 0.0002742767392192036 2023-01-23 03:40:25.798682: step: 224/531, loss: 0.011216926388442516 2023-01-23 03:40:26.916466: step: 228/531, loss: 0.00012130737741244957 2023-01-23 03:40:28.050458: step: 232/531, loss: 0.004027175717055798 2023-01-23 03:40:29.162119: step: 236/531, loss: 0.0002709388791117817 2023-01-23 03:40:30.301626: step: 240/531, loss: 1.277923547604587e-05 2023-01-23 03:40:31.448566: step: 244/531, loss: 0.0001714706450002268 2023-01-23 03:40:32.574833: step: 248/531, loss: 0.1385120451450348 2023-01-23 03:40:33.672848: step: 252/531, loss: 0.00979690533131361 2023-01-23 03:40:34.792420: step: 256/531, loss: 0.011391067877411842 2023-01-23 03:40:35.945333: step: 260/531, loss: 0.0003238678036723286 2023-01-23 03:40:37.100102: step: 264/531, loss: 0.002680647186934948 2023-01-23 03:40:38.237237: step: 268/531, loss: 0.00028934478177689016 2023-01-23 03:40:39.351579: step: 272/531, loss: 0.00019693374633789062 2023-01-23 03:40:40.475610: step: 276/531, loss: 0.00013113021850585938 2023-01-23 03:40:41.630446: step: 280/531, loss: 0.004318142309784889 2023-01-23 03:40:42.768116: step: 284/531, loss: 0.016063785180449486 2023-01-23 03:40:43.899574: step: 288/531, loss: 0.006782626733183861 2023-01-23 03:40:45.068118: step: 292/531, loss: 0.0017033576732501388 2023-01-23 03:40:46.201013: step: 296/531, loss: 0.23460941016674042 2023-01-23 03:40:47.320679: step: 300/531, loss: 0.0007954597822390497 2023-01-23 03:40:48.447629: step: 304/531, loss: 0.002703094622120261 2023-01-23 03:40:49.584285: step: 308/531, loss: 0.060344792902469635 2023-01-23 03:40:50.715479: step: 312/531, loss: 0.00044622423592954874 2023-01-23 03:40:51.820716: step: 316/531, loss: 0.0005597114795818925 2023-01-23 03:40:52.963308: step: 320/531, loss: 0.015132714062929153 2023-01-23 03:40:54.082838: step: 324/531, loss: 0.019302034750580788 2023-01-23 03:40:55.217346: step: 328/531, loss: 0.0009077072609215975 2023-01-23 03:40:56.367180: step: 332/531, loss: 0.00030555727425962687 2023-01-23 03:40:57.488712: step: 336/531, loss: 0.0007661819690838456 2023-01-23 03:40:58.618740: step: 340/531, loss: 0.01809101179242134 2023-01-23 03:40:59.734678: step: 344/531, loss: -5.722046125811175e-07 2023-01-23 03:41:00.873859: step: 348/531, loss: 0.0018939971923828125 2023-01-23 03:41:01.968432: step: 352/531, loss: 0.0004322051827330142 2023-01-23 03:41:03.112379: step: 356/531, loss: 0.0002918243408203125 2023-01-23 03:41:04.257193: step: 360/531, loss: 0.0002749919949565083 2023-01-23 03:41:05.364188: step: 364/531, loss: 3.361701965332031e-05 2023-01-23 03:41:06.481231: step: 368/531, loss: 0.02384319342672825 2023-01-23 03:41:07.671817: step: 372/531, loss: 0.0008242130279541016 2023-01-23 03:41:08.791104: step: 376/531, loss: 0.00022363662719726562 2023-01-23 03:41:09.908686: step: 380/531, loss: 0.0006729125743731856 2023-01-23 03:41:11.018026: step: 384/531, loss: 9.012223017634824e-06 2023-01-23 03:41:12.110467: step: 388/531, loss: 8.58306884765625e-06 2023-01-23 03:41:13.226118: step: 392/531, loss: 0.0009639739873819053 2023-01-23 03:41:14.371691: step: 396/531, loss: 2.861026587197557e-06 2023-01-23 03:41:15.517103: step: 400/531, loss: 0.627214789390564 2023-01-23 03:41:16.670071: step: 404/531, loss: 0.00015325547428801656 2023-01-23 03:41:17.771676: step: 408/531, loss: 6.866455805720761e-06 2023-01-23 03:41:18.883415: step: 412/531, loss: 0.0007921218639239669 2023-01-23 03:41:20.007853: step: 416/531, loss: 0.00011711120896507055 2023-01-23 03:41:21.128874: step: 420/531, loss: 0.00011200905282748863 2023-01-23 03:41:22.248211: step: 424/531, loss: 0.00010223388380836695 2023-01-23 03:41:23.396076: step: 428/531, loss: 0.003694153157994151 2023-01-23 03:41:24.490473: step: 432/531, loss: 0.01503686886280775 2023-01-23 03:41:25.596976: step: 436/531, loss: 0.036676835268735886 2023-01-23 03:41:26.733263: step: 440/531, loss: 0.05137920752167702 2023-01-23 03:41:27.873821: step: 444/531, loss: 1.5068053471622989e-05 2023-01-23 03:41:29.014362: step: 448/531, loss: 0.0003145694499835372 2023-01-23 03:41:30.153708: step: 452/531, loss: 4.281997826183215e-05 2023-01-23 03:41:31.278382: step: 456/531, loss: 0.0053497315384447575 2023-01-23 03:41:32.404468: step: 460/531, loss: 2.4795533590804553e-06 2023-01-23 03:41:33.550122: step: 464/531, loss: 0.014058399014174938 2023-01-23 03:41:34.674393: step: 468/531, loss: 4.00543194700731e-06 2023-01-23 03:41:35.811628: step: 472/531, loss: 0.017551803961396217 2023-01-23 03:41:36.919423: step: 476/531, loss: 0.005578565876930952 2023-01-23 03:41:38.072870: step: 480/531, loss: 0.061332669109106064 2023-01-23 03:41:39.212704: step: 484/531, loss: 0.008698082529008389 2023-01-23 03:41:40.338742: step: 488/531, loss: 0.002961730817332864 2023-01-23 03:41:41.471475: step: 492/531, loss: 0.08084259182214737 2023-01-23 03:41:42.616749: step: 496/531, loss: 0.18942995369434357 2023-01-23 03:41:43.771823: step: 500/531, loss: 0.0003366470627952367 2023-01-23 03:41:44.884548: step: 504/531, loss: 0.0028450011741369963 2023-01-23 03:41:46.016516: step: 508/531, loss: 0.009164047427475452 2023-01-23 03:41:47.144526: step: 512/531, loss: 0.44605007767677307 2023-01-23 03:41:48.284957: step: 516/531, loss: 7.25746140233241e-05 2023-01-23 03:41:49.409962: step: 520/531, loss: 0.002776241395622492 2023-01-23 03:41:50.537072: step: 524/531, loss: 0.0012580871116369963 2023-01-23 03:41:51.644816: step: 528/531, loss: 0.00284652691334486 2023-01-23 03:41:52.760105: step: 532/531, loss: 0.0011589049827307463 2023-01-23 03:41:53.894445: step: 536/531, loss: 0.002253913786262274 2023-01-23 03:41:55.011741: step: 540/531, loss: 0.002480888506397605 2023-01-23 03:41:56.136860: step: 544/531, loss: 0.018663406372070312 2023-01-23 03:41:57.267301: step: 548/531, loss: 9.34600830078125e-05 2023-01-23 03:41:58.392125: step: 552/531, loss: 0.0072227478958666325 2023-01-23 03:41:59.515577: step: 556/531, loss: 0.0007403374183923006 2023-01-23 03:42:00.644548: step: 560/531, loss: 6.141662743175402e-05 2023-01-23 03:42:01.750649: step: 564/531, loss: 5.054473876953125e-05 2023-01-23 03:42:02.893955: step: 568/531, loss: 0.0018516541458666325 2023-01-23 03:42:04.015803: step: 572/531, loss: 6.67572021484375e-06 2023-01-23 03:42:05.122272: step: 576/531, loss: 0.001562654972076416 2023-01-23 03:42:06.248704: step: 580/531, loss: 0.09368829429149628 2023-01-23 03:42:07.399258: step: 584/531, loss: 0.018031883984804153 2023-01-23 03:42:08.539905: step: 588/531, loss: 7.858276512706652e-05 2023-01-23 03:42:09.646894: step: 592/531, loss: 0.002198505448177457 2023-01-23 03:42:10.777465: step: 596/531, loss: 1.0395050594524946e-05 2023-01-23 03:42:11.928900: step: 600/531, loss: 0.006389045622199774 2023-01-23 03:42:13.044724: step: 604/531, loss: 5.73158249608241e-05 2023-01-23 03:42:14.157874: step: 608/531, loss: 7.705688767600805e-05 2023-01-23 03:42:15.313984: step: 612/531, loss: 0.005303001496940851 2023-01-23 03:42:16.429476: step: 616/531, loss: 0.004048157017678022 2023-01-23 03:42:17.540628: step: 620/531, loss: 0.008164596743881702 2023-01-23 03:42:18.658292: step: 624/531, loss: 5.7697296142578125e-05 2023-01-23 03:42:19.790548: step: 628/531, loss: 0.004396772477775812 2023-01-23 03:42:20.893576: step: 632/531, loss: 0.018535137176513672 2023-01-23 03:42:22.009670: step: 636/531, loss: 0.00033063889713957906 2023-01-23 03:42:23.130812: step: 640/531, loss: 0.17800946533679962 2023-01-23 03:42:24.247071: step: 644/531, loss: 0.00014419556828215718 2023-01-23 03:42:25.385627: step: 648/531, loss: 0.03164225071668625 2023-01-23 03:42:26.520736: step: 652/531, loss: 0.0004584789276123047 2023-01-23 03:42:27.643416: step: 656/531, loss: 0.00013151168241165578 2023-01-23 03:42:28.754330: step: 660/531, loss: 0.00012903213792014867 2023-01-23 03:42:29.876868: step: 664/531, loss: 3.62396240234375e-05 2023-01-23 03:42:31.031232: step: 668/531, loss: 0.01884288713335991 2023-01-23 03:42:32.140646: step: 672/531, loss: 0.0045454977080225945 2023-01-23 03:42:33.273545: step: 676/531, loss: 0.0008653640979900956 2023-01-23 03:42:34.388457: step: 680/531, loss: 0.0001649379701120779 2023-01-23 03:42:35.516515: step: 684/531, loss: 0.0012868881458416581 2023-01-23 03:42:36.653466: step: 688/531, loss: 0.000354766845703125 2023-01-23 03:42:37.797996: step: 692/531, loss: 0.0005753517034463584 2023-01-23 03:42:38.916322: step: 696/531, loss: 0.020474813878536224 2023-01-23 03:42:40.060355: step: 700/531, loss: 0.001491117523983121 2023-01-23 03:42:41.188461: step: 704/531, loss: 3.461838059592992e-05 2023-01-23 03:42:42.306006: step: 708/531, loss: 0.0020816803444176912 2023-01-23 03:42:43.409402: step: 712/531, loss: 0.033225249499082565 2023-01-23 03:42:44.525126: step: 716/531, loss: 0.0010701180435717106 2023-01-23 03:42:45.641475: step: 720/531, loss: 0.014149665832519531 2023-01-23 03:42:46.759095: step: 724/531, loss: 0.0013815879356116056 2023-01-23 03:42:47.915875: step: 728/531, loss: 0.013937091454863548 2023-01-23 03:42:49.053591: step: 732/531, loss: 7.858276512706652e-05 2023-01-23 03:42:50.171010: step: 736/531, loss: 0.0002490997430868447 2023-01-23 03:42:51.303165: step: 740/531, loss: 0.0001737594575388357 2023-01-23 03:42:52.426257: step: 744/531, loss: 0.004236412234604359 2023-01-23 03:42:53.532257: step: 748/531, loss: 0.013752556405961514 2023-01-23 03:42:54.632436: step: 752/531, loss: 7.07626313669607e-05 2023-01-23 03:42:55.751410: step: 756/531, loss: 0.0006228744750842452 2023-01-23 03:42:56.848692: step: 760/531, loss: 0.0013320923317223787 2023-01-23 03:42:58.013107: step: 764/531, loss: 0.004911232274025679 2023-01-23 03:42:59.150085: step: 768/531, loss: 0.016339445486664772 2023-01-23 03:43:00.268606: step: 772/531, loss: 7.705688767600805e-05 2023-01-23 03:43:01.426055: step: 776/531, loss: 8.049011375987902e-05 2023-01-23 03:43:02.539362: step: 780/531, loss: 0.001172399497590959 2023-01-23 03:43:03.653834: step: 784/531, loss: 0.0005744933732785285 2023-01-23 03:43:04.771697: step: 788/531, loss: 0.00043392181396484375 2023-01-23 03:43:05.900610: step: 792/531, loss: 0.0024810314644128084 2023-01-23 03:43:07.021138: step: 796/531, loss: 0.0003286361461505294 2023-01-23 03:43:08.137514: step: 800/531, loss: 0.0010199546813964844 2023-01-23 03:43:09.264852: step: 804/531, loss: 0.00018186568922828883 2023-01-23 03:43:10.406107: step: 808/531, loss: 0.002832603408023715 2023-01-23 03:43:11.514540: step: 812/531, loss: 0.011869430541992188 2023-01-23 03:43:12.625407: step: 816/531, loss: 0.06693868339061737 2023-01-23 03:43:13.775550: step: 820/531, loss: 0.007252788636833429 2023-01-23 03:43:14.922580: step: 824/531, loss: 0.006060886662453413 2023-01-23 03:43:16.035398: step: 828/531, loss: 0.0031021118629723787 2023-01-23 03:43:17.143760: step: 832/531, loss: 0.017304040491580963 2023-01-23 03:43:18.249512: step: 836/531, loss: 0.06982836872339249 2023-01-23 03:43:19.383477: step: 840/531, loss: 0.004384422209113836 2023-01-23 03:43:20.509857: step: 844/531, loss: 0.005603218451142311 2023-01-23 03:43:21.637219: step: 848/531, loss: 0.001747727394104004 2023-01-23 03:43:22.769707: step: 852/531, loss: 0.011030388064682484 2023-01-23 03:43:23.925118: step: 856/531, loss: 0.0003007889026775956 2023-01-23 03:43:25.045888: step: 860/531, loss: 0.00020572246285155416 2023-01-23 03:43:26.199425: step: 864/531, loss: 0.0020354269072413445 2023-01-23 03:43:27.314568: step: 868/531, loss: 0.21678677201271057 2023-01-23 03:43:28.440615: step: 872/531, loss: 0.0005395889165811241 2023-01-23 03:43:29.586437: step: 876/531, loss: 0.0001865387021098286 2023-01-23 03:43:30.701780: step: 880/531, loss: -2.28881845032447e-06 2023-01-23 03:43:31.814629: step: 884/531, loss: 7.772445678710938e-05 2023-01-23 03:43:32.924672: step: 888/531, loss: 0.002215576358139515 2023-01-23 03:43:34.033982: step: 892/531, loss: 4.4345855712890625e-05 2023-01-23 03:43:35.153254: step: 896/531, loss: 0.07345886528491974 2023-01-23 03:43:36.270437: step: 900/531, loss: 0.03985557705163956 2023-01-23 03:43:37.414961: step: 904/531, loss: 8.392333984375e-05 2023-01-23 03:43:38.535156: step: 908/531, loss: -6.86645489622606e-06 2023-01-23 03:43:39.667483: step: 912/531, loss: 0.008864021860063076 2023-01-23 03:43:40.789845: step: 916/531, loss: 0.004531288519501686 2023-01-23 03:43:41.938749: step: 920/531, loss: 0.00027437208336777985 2023-01-23 03:43:43.033059: step: 924/531, loss: 0.0005645751953125 2023-01-23 03:43:44.152077: step: 928/531, loss: 0.0003319740353617817 2023-01-23 03:43:45.283776: step: 932/531, loss: 0.005953979212790728 2023-01-23 03:43:46.455835: step: 936/531, loss: 0.013526917435228825 2023-01-23 03:43:47.569300: step: 940/531, loss: 0.0007686614990234375 2023-01-23 03:43:48.701410: step: 944/531, loss: 0.001796674681827426 2023-01-23 03:43:49.854622: step: 948/531, loss: 0.01682434231042862 2023-01-23 03:43:51.013158: step: 952/531, loss: 0.02594909630715847 2023-01-23 03:43:52.116454: step: 956/531, loss: 0.0012048721546307206 2023-01-23 03:43:53.254579: step: 960/531, loss: 0.0005853652837686241 2023-01-23 03:43:54.440843: step: 964/531, loss: 0.0023124695289880037 2023-01-23 03:43:55.555955: step: 968/531, loss: 0.0025557042099535465 2023-01-23 03:43:56.697247: step: 972/531, loss: 0.002075147582218051 2023-01-23 03:43:57.815497: step: 976/531, loss: 0.0009582997299730778 2023-01-23 03:43:58.929302: step: 980/531, loss: 0.000590419746004045 2023-01-23 03:44:00.040043: step: 984/531, loss: 0.008892536163330078 2023-01-23 03:44:01.158630: step: 988/531, loss: 0.0005836010095663369 2023-01-23 03:44:02.255630: step: 992/531, loss: 0.008130836300551891 2023-01-23 03:44:03.364504: step: 996/531, loss: 0.003543853759765625 2023-01-23 03:44:04.493919: step: 1000/531, loss: 0.0013212204212322831 2023-01-23 03:44:05.580587: step: 1004/531, loss: 7.43865984986769e-06 2023-01-23 03:44:06.704846: step: 1008/531, loss: 0.00164794921875 2023-01-23 03:44:07.831639: step: 1012/531, loss: 0.14066238701343536 2023-01-23 03:44:08.958653: step: 1016/531, loss: 3.7527082895394415e-05 2023-01-23 03:44:10.046408: step: 1020/531, loss: -1.8119811784345075e-06 2023-01-23 03:44:11.205385: step: 1024/531, loss: 0.00037288665771484375 2023-01-23 03:44:12.340934: step: 1028/531, loss: 0.009696769528090954 2023-01-23 03:44:13.465232: step: 1032/531, loss: 0.03124275431036949 2023-01-23 03:44:14.590388: step: 1036/531, loss: 7.41004987503402e-05 2023-01-23 03:44:15.730213: step: 1040/531, loss: 0.00027179718017578125 2023-01-23 03:44:16.841019: step: 1044/531, loss: 3.7670135498046875e-05 2023-01-23 03:44:17.972799: step: 1048/531, loss: 0.0005243778578005731 2023-01-23 03:44:19.107940: step: 1052/531, loss: 0.0004273414670024067 2023-01-23 03:44:20.216648: step: 1056/531, loss: 0.0010843276977539062 2023-01-23 03:44:21.332313: step: 1060/531, loss: 3.24249276673072e-06 2023-01-23 03:44:22.467281: step: 1064/531, loss: 0.11719933152198792 2023-01-23 03:44:23.562811: step: 1068/531, loss: 0.0021984099876135588 2023-01-23 03:44:24.702949: step: 1072/531, loss: 0.004708480555564165 2023-01-23 03:44:25.825348: step: 1076/531, loss: 0.0026920319069176912 2023-01-23 03:44:26.967956: step: 1080/531, loss: 0.13162268698215485 2023-01-23 03:44:28.072867: step: 1084/531, loss: 0.02029895968735218 2023-01-23 03:44:29.190002: step: 1088/531, loss: 1.5163422176556196e-05 2023-01-23 03:44:30.309552: step: 1092/531, loss: 0.028029512614011765 2023-01-23 03:44:31.438618: step: 1096/531, loss: 0.011315584182739258 2023-01-23 03:44:32.554482: step: 1100/531, loss: 0.005497408099472523 2023-01-23 03:44:33.658808: step: 1104/531, loss: 0.005373812280595303 2023-01-23 03:44:34.787164: step: 1108/531, loss: 0.022137021645903587 2023-01-23 03:44:35.911442: step: 1112/531, loss: 0.23715955018997192 2023-01-23 03:44:37.027583: step: 1116/531, loss: 0.0008046150323934853 2023-01-23 03:44:38.144117: step: 1120/531, loss: 0.0012140274047851562 2023-01-23 03:44:39.291893: step: 1124/531, loss: 0.006359672639518976 2023-01-23 03:44:40.422178: step: 1128/531, loss: 0.021683311089873314 2023-01-23 03:44:41.557037: step: 1132/531, loss: 0.003791427705436945 2023-01-23 03:44:42.668254: step: 1136/531, loss: 0.0020242691971361637 2023-01-23 03:44:43.784736: step: 1140/531, loss: 0.00011944771540584043 2023-01-23 03:44:44.902767: step: 1144/531, loss: 0.02358236536383629 2023-01-23 03:44:46.025780: step: 1148/531, loss: 0.13448219001293182 2023-01-23 03:44:47.146929: step: 1152/531, loss: 0.16106128692626953 2023-01-23 03:44:48.262673: step: 1156/531, loss: 4.9591064453125e-05 2023-01-23 03:44:49.376616: step: 1160/531, loss: 0.005859756376594305 2023-01-23 03:44:50.473886: step: 1164/531, loss: 5.817413693876006e-06 2023-01-23 03:44:51.565656: step: 1168/531, loss: 0.00014743805513717234 2023-01-23 03:44:52.694157: step: 1172/531, loss: 0.02166595496237278 2023-01-23 03:44:53.794170: step: 1176/531, loss: 0.017145730555057526 2023-01-23 03:44:54.886917: step: 1180/531, loss: 6.961822691664565e-06 2023-01-23 03:44:55.984396: step: 1184/531, loss: 0.003286743303760886 2023-01-23 03:44:57.079131: step: 1188/531, loss: 0.0010373115073889494 2023-01-23 03:44:58.193711: step: 1192/531, loss: 0.0002922058047261089 2023-01-23 03:44:59.331465: step: 1196/531, loss: 0.03240489959716797 2023-01-23 03:45:00.479994: step: 1200/531, loss: 0.0322783961892128 2023-01-23 03:45:01.618666: step: 1204/531, loss: 1.888275073724799e-05 2023-01-23 03:45:02.743882: step: 1208/531, loss: 0.023116111755371094 2023-01-23 03:45:03.879127: step: 1212/531, loss: 0.0017627716297283769 2023-01-23 03:45:05.002638: step: 1216/531, loss: 5.474090721691027e-05 2023-01-23 03:45:06.106655: step: 1220/531, loss: 0.006732320878654718 2023-01-23 03:45:07.236121: step: 1224/531, loss: 0.0022884116042405367 2023-01-23 03:45:08.366803: step: 1228/531, loss: 0.0002704620419535786 2023-01-23 03:45:09.492839: step: 1232/531, loss: 0.00037488937960006297 2023-01-23 03:45:10.592273: step: 1236/531, loss: 0.012486266903579235 2023-01-23 03:45:11.700069: step: 1240/531, loss: 2.1934511096333154e-05 2023-01-23 03:45:12.790333: step: 1244/531, loss: 0.010217857547104359 2023-01-23 03:45:13.914323: step: 1248/531, loss: 2.560615394031629e-05 2023-01-23 03:45:15.031070: step: 1252/531, loss: 3.14712519866589e-06 2023-01-23 03:45:16.157394: step: 1256/531, loss: 0.019257163628935814 2023-01-23 03:45:17.299104: step: 1260/531, loss: 9.431838407181203e-05 2023-01-23 03:45:18.391935: step: 1264/531, loss: 0.002758169313892722 2023-01-23 03:45:19.519399: step: 1268/531, loss: 0.05671043321490288 2023-01-23 03:45:20.624543: step: 1272/531, loss: 3.261566234868951e-05 2023-01-23 03:45:21.740319: step: 1276/531, loss: 4.673004241340095e-06 2023-01-23 03:45:22.855250: step: 1280/531, loss: 4.291534423828125e-05 2023-01-23 03:45:23.958254: step: 1284/531, loss: -8.01086389401462e-06 2023-01-23 03:45:25.051573: step: 1288/531, loss: 0.0027475357055664062 2023-01-23 03:45:26.178503: step: 1292/531, loss: 0.00011177063424838707 2023-01-23 03:45:27.327899: step: 1296/531, loss: 0.08426089584827423 2023-01-23 03:45:28.487082: step: 1300/531, loss: 0.07674475014209747 2023-01-23 03:45:29.634914: step: 1304/531, loss: 0.004261589143425226 2023-01-23 03:45:30.746449: step: 1308/531, loss: 0.007454085163772106 2023-01-23 03:45:31.874683: step: 1312/531, loss: 0.0021406172309070826 2023-01-23 03:45:33.001079: step: 1316/531, loss: 0.0002738952753134072 2023-01-23 03:45:34.139951: step: 1320/531, loss: 0.023512747138738632 2023-01-23 03:45:35.273770: step: 1324/531, loss: 0.020349694415926933 2023-01-23 03:45:36.420828: step: 1328/531, loss: 0.0031551362480968237 2023-01-23 03:45:37.549856: step: 1332/531, loss: 0.00012092590623069555 2023-01-23 03:45:38.685487: step: 1336/531, loss: 0.004311466123908758 2023-01-23 03:45:39.806574: step: 1340/531, loss: 0.00036487579927779734 2023-01-23 03:45:40.923644: step: 1344/531, loss: 0.04719598591327667 2023-01-23 03:45:42.025575: step: 1348/531, loss: 0.007465839851647615 2023-01-23 03:45:43.166006: step: 1352/531, loss: 9.250640869140625e-05 2023-01-23 03:45:44.292772: step: 1356/531, loss: -1.826286461437121e-05 2023-01-23 03:45:45.412123: step: 1360/531, loss: 0.0006107331137172878 2023-01-23 03:45:46.524018: step: 1364/531, loss: 0.001025342964567244 2023-01-23 03:45:47.617380: step: 1368/531, loss: 0.026270676404237747 2023-01-23 03:45:48.757528: step: 1372/531, loss: 0.007968711666762829 2023-01-23 03:45:49.884173: step: 1376/531, loss: 0.014845657162368298 2023-01-23 03:45:50.987086: step: 1380/531, loss: 1.4400482541532256e-05 2023-01-23 03:45:52.097565: step: 1384/531, loss: 0.014618396759033203 2023-01-23 03:45:53.225829: step: 1388/531, loss: 0.014489746652543545 2023-01-23 03:45:54.349214: step: 1392/531, loss: 0.0010992049938067794 2023-01-23 03:45:55.486880: step: 1396/531, loss: 0.003054332919418812 2023-01-23 03:45:56.627908: step: 1400/531, loss: 0.024099349975585938 2023-01-23 03:45:57.734971: step: 1404/531, loss: 0.00011539459228515625 2023-01-23 03:45:58.819839: step: 1408/531, loss: 1.1920930774067529e-05 2023-01-23 03:45:59.956412: step: 1412/531, loss: 0.001798439072445035 2023-01-23 03:46:01.086574: step: 1416/531, loss: 0.0023027898278087378 2023-01-23 03:46:02.192751: step: 1420/531, loss: 2.250671423098538e-05 2023-01-23 03:46:03.325339: step: 1424/531, loss: 0.017932415008544922 2023-01-23 03:46:04.444222: step: 1428/531, loss: 2.117157055181451e-05 2023-01-23 03:46:05.573955: step: 1432/531, loss: 0.09610423445701599 2023-01-23 03:46:06.687352: step: 1436/531, loss: 0.0016328811179846525 2023-01-23 03:46:07.797636: step: 1440/531, loss: 0.0014927983283996582 2023-01-23 03:46:08.888837: step: 1444/531, loss: 3.2615658710710704e-05 2023-01-23 03:46:10.021645: step: 1448/531, loss: 0.0029414177406579256 2023-01-23 03:46:11.124415: step: 1452/531, loss: 0.00042066574678756297 2023-01-23 03:46:12.271146: step: 1456/531, loss: 0.17343387007713318 2023-01-23 03:46:13.387516: step: 1460/531, loss: 0.5168283581733704 2023-01-23 03:46:14.499772: step: 1464/531, loss: 0.0032763960771262646 2023-01-23 03:46:15.620306: step: 1468/531, loss: 1.9978411197662354 2023-01-23 03:46:16.727753: step: 1472/531, loss: 7.886887033237144e-05 2023-01-23 03:46:17.877210: step: 1476/531, loss: 0.0017276763683184981 2023-01-23 03:46:18.972742: step: 1480/531, loss: 0.008297347463667393 2023-01-23 03:46:20.102687: step: 1484/531, loss: 5.2261355449445546e-05 2023-01-23 03:46:21.239506: step: 1488/531, loss: 0.01113500539213419 2023-01-23 03:46:22.373006: step: 1492/531, loss: 0.05220966413617134 2023-01-23 03:46:23.528290: step: 1496/531, loss: 0.00022125244140625 2023-01-23 03:46:24.659806: step: 1500/531, loss: 0.04092588648200035 2023-01-23 03:46:25.772169: step: 1504/531, loss: 0.0023328305687755346 2023-01-23 03:46:26.891190: step: 1508/531, loss: 0.06254033744335175 2023-01-23 03:46:28.054386: step: 1512/531, loss: 0.0006333351484499872 2023-01-23 03:46:29.186922: step: 1516/531, loss: 0.049677420407533646 2023-01-23 03:46:30.315230: step: 1520/531, loss: 0.0010841370094567537 2023-01-23 03:46:31.441574: step: 1524/531, loss: 0.003187978407368064 2023-01-23 03:46:32.559315: step: 1528/531, loss: 0.015378189273178577 2023-01-23 03:46:33.677315: step: 1532/531, loss: 0.0010898590553551912 2023-01-23 03:46:34.801499: step: 1536/531, loss: 0.003979777917265892 2023-01-23 03:46:35.906843: step: 1540/531, loss: 0.00020428001880645752 2023-01-23 03:46:37.022074: step: 1544/531, loss: 0.00026788710965774953 2023-01-23 03:46:38.148742: step: 1548/531, loss: 0.00018596649169921875 2023-01-23 03:46:39.282303: step: 1552/531, loss: 5.836486889165826e-05 2023-01-23 03:46:40.385430: step: 1556/531, loss: 4.062652442371473e-05 2023-01-23 03:46:41.501705: step: 1560/531, loss: 0.0003044128534384072 2023-01-23 03:46:42.655106: step: 1564/531, loss: 0.016448449343442917 2023-01-23 03:46:43.759029: step: 1568/531, loss: 0.0006078720325604081 2023-01-23 03:46:44.886604: step: 1572/531, loss: 0.2753753960132599 2023-01-23 03:46:46.008041: step: 1576/531, loss: 0.013185406103730202 2023-01-23 03:46:47.155472: step: 1580/531, loss: 0.0004893302684649825 2023-01-23 03:46:48.267976: step: 1584/531, loss: 0.03410310670733452 2023-01-23 03:46:49.419303: step: 1588/531, loss: 0.6755965352058411 2023-01-23 03:46:50.566571: step: 1592/531, loss: 0.004021358676254749 2023-01-23 03:46:51.678069: step: 1596/531, loss: 0.00010814667621161789 2023-01-23 03:46:52.788478: step: 1600/531, loss: 0.02467174455523491 2023-01-23 03:46:53.923661: step: 1604/531, loss: 0.004039192106574774 2023-01-23 03:46:55.038697: step: 1608/531, loss: 0.0001583099365234375 2023-01-23 03:46:56.166399: step: 1612/531, loss: 0.021172380074858665 2023-01-23 03:46:57.331330: step: 1616/531, loss: 0.006282710935920477 2023-01-23 03:46:58.463489: step: 1620/531, loss: 0.0017760753398761153 2023-01-23 03:46:59.594638: step: 1624/531, loss: 0.0035886764526367188 2023-01-23 03:47:00.746041: step: 1628/531, loss: 0.0011241913307458162 2023-01-23 03:47:01.867443: step: 1632/531, loss: 0.021419525146484375 2023-01-23 03:47:02.986603: step: 1636/531, loss: 0.0009461403242312372 2023-01-23 03:47:04.101539: step: 1640/531, loss: 0.0006842613220214844 2023-01-23 03:47:05.227435: step: 1644/531, loss: 0.1723162680864334 2023-01-23 03:47:06.330975: step: 1648/531, loss: 0.020807411521673203 2023-01-23 03:47:07.457414: step: 1652/531, loss: 0.00020484924607444555 2023-01-23 03:47:08.564212: step: 1656/531, loss: 0.0029966356232762337 2023-01-23 03:47:09.680113: step: 1660/531, loss: 0.003917026799172163 2023-01-23 03:47:10.816512: step: 1664/531, loss: 0.014870263636112213 2023-01-23 03:47:11.952617: step: 1668/531, loss: 0.02199258841574192 2023-01-23 03:47:13.118706: step: 1672/531, loss: 0.00030956268892623484 2023-01-23 03:47:14.263006: step: 1676/531, loss: 8.77380352903856e-06 2023-01-23 03:47:15.380618: step: 1680/531, loss: 0.006533431820571423 2023-01-23 03:47:16.521350: step: 1684/531, loss: 0.004490470979362726 2023-01-23 03:47:17.639319: step: 1688/531, loss: 0.06544437259435654 2023-01-23 03:47:18.766082: step: 1692/531, loss: 0.002435016678646207 2023-01-23 03:47:19.868848: step: 1696/531, loss: 0.0013275147648528218 2023-01-23 03:47:21.008412: step: 1700/531, loss: 0.0035051347222179174 2023-01-23 03:47:22.163294: step: 1704/531, loss: 2.975463939947076e-05 2023-01-23 03:47:23.279905: step: 1708/531, loss: 0.01707991398870945 2023-01-23 03:47:24.432478: step: 1712/531, loss: 0.012486553750932217 2023-01-23 03:47:25.553838: step: 1716/531, loss: 0.0023081779945641756 2023-01-23 03:47:26.673824: step: 1720/531, loss: 0.0003156662278342992 2023-01-23 03:47:27.796163: step: 1724/531, loss: 3.833770824712701e-05 2023-01-23 03:47:28.929206: step: 1728/531, loss: 0.0001930236758198589 2023-01-23 03:47:30.033168: step: 1732/531, loss: 0.057274624705314636 2023-01-23 03:47:31.158096: step: 1736/531, loss: 0.013962173834443092 2023-01-23 03:47:32.305324: step: 1740/531, loss: 0.022211171686649323 2023-01-23 03:47:33.431391: step: 1744/531, loss: 0.012823772616684437 2023-01-23 03:47:34.569991: step: 1748/531, loss: 0.012158584780991077 2023-01-23 03:47:35.688330: step: 1752/531, loss: 7.24792471373803e-06 2023-01-23 03:47:36.802911: step: 1756/531, loss: 0.0038221837021410465 2023-01-23 03:47:37.903915: step: 1760/531, loss: 0.00025424957857467234 2023-01-23 03:47:39.044573: step: 1764/531, loss: 0.015503883361816406 2023-01-23 03:47:40.150975: step: 1768/531, loss: 0.003568649524822831 2023-01-23 03:47:41.277912: step: 1772/531, loss: 0.0033905983436852694 2023-01-23 03:47:42.397420: step: 1776/531, loss: 0.10421008616685867 2023-01-23 03:47:43.524338: step: 1780/531, loss: 0.000776100205257535 2023-01-23 03:47:44.632882: step: 1784/531, loss: 0.0011583329178392887 2023-01-23 03:47:45.768080: step: 1788/531, loss: 0.010939407162368298 2023-01-23 03:47:46.893435: step: 1792/531, loss: 0.0007143020629882812 2023-01-23 03:47:47.998602: step: 1796/531, loss: 0.01907653920352459 2023-01-23 03:47:49.116245: step: 1800/531, loss: 0.00012941360182594508 2023-01-23 03:47:50.238081: step: 1804/531, loss: 0.0002992272493429482 2023-01-23 03:47:51.354350: step: 1808/531, loss: 0.011938858777284622 2023-01-23 03:47:52.472133: step: 1812/531, loss: 0.016828538849949837 2023-01-23 03:47:53.629837: step: 1816/531, loss: 0.01117630023509264 2023-01-23 03:47:54.763101: step: 1820/531, loss: 0.24527034163475037 2023-01-23 03:47:55.883356: step: 1824/531, loss: 0.005370235536247492 2023-01-23 03:47:57.027677: step: 1828/531, loss: 0.0003772735653910786 2023-01-23 03:47:58.136217: step: 1832/531, loss: 0.00011901855759788305 2023-01-23 03:47:59.268489: step: 1836/531, loss: 0.0060482025146484375 2023-01-23 03:48:00.394363: step: 1840/531, loss: 9.52720656641759e-05 2023-01-23 03:48:01.497920: step: 1844/531, loss: 0.0031517029274255037 2023-01-23 03:48:02.630359: step: 1848/531, loss: 0.007832765579223633 2023-01-23 03:48:03.808464: step: 1852/531, loss: 0.0004680633428506553 2023-01-23 03:48:04.948541: step: 1856/531, loss: 0.020852327346801758 2023-01-23 03:48:06.082880: step: 1860/531, loss: 0.2709079682826996 2023-01-23 03:48:07.201336: step: 1864/531, loss: 0.03138594329357147 2023-01-23 03:48:08.327189: step: 1868/531, loss: 0.02486400678753853 2023-01-23 03:48:09.425878: step: 1872/531, loss: 0.00231170654296875 2023-01-23 03:48:10.590744: step: 1876/531, loss: 0.005629729945212603 2023-01-23 03:48:11.707161: step: 1880/531, loss: 0.0009407043689861894 2023-01-23 03:48:12.863012: step: 1884/531, loss: 2.059936559817288e-05 2023-01-23 03:48:14.028894: step: 1888/531, loss: 0.01094665564596653 2023-01-23 03:48:15.185561: step: 1892/531, loss: 0.002063274383544922 2023-01-23 03:48:16.304740: step: 1896/531, loss: 0.0023759843315929174 2023-01-23 03:48:17.438519: step: 1900/531, loss: 0.0014505386352539062 2023-01-23 03:48:18.565121: step: 1904/531, loss: 0.0006941794999875128 2023-01-23 03:48:19.702022: step: 1908/531, loss: 0.1358344554901123 2023-01-23 03:48:20.826218: step: 1912/531, loss: 0.0018725395202636719 2023-01-23 03:48:21.964763: step: 1916/531, loss: 0.00012922286987304688 2023-01-23 03:48:23.078404: step: 1920/531, loss: 3.337860107421875e-05 2023-01-23 03:48:24.203328: step: 1924/531, loss: -2.47955313170678e-06 2023-01-23 03:48:25.352503: step: 1928/531, loss: 0.0037004470359534025 2023-01-23 03:48:26.475483: step: 1932/531, loss: 0.00874023512005806 2023-01-23 03:48:27.601958: step: 1936/531, loss: 0.02477288246154785 2023-01-23 03:48:28.711474: step: 1940/531, loss: 4.76837158203125e-06 2023-01-23 03:48:29.829683: step: 1944/531, loss: 0.03425197675824165 2023-01-23 03:48:30.938183: step: 1948/531, loss: 0.011546325869858265 2023-01-23 03:48:32.083915: step: 1952/531, loss: 0.01705913618206978 2023-01-23 03:48:33.233783: step: 1956/531, loss: 0.0004614829958882183 2023-01-23 03:48:34.349561: step: 1960/531, loss: 0.0015539169544354081 2023-01-23 03:48:35.470965: step: 1964/531, loss: 0.08339511603116989 2023-01-23 03:48:36.589883: step: 1968/531, loss: 0.06810970604419708 2023-01-23 03:48:37.717016: step: 1972/531, loss: 0.006753015331923962 2023-01-23 03:48:38.830492: step: 1976/531, loss: 0.0014304905198514462 2023-01-23 03:48:39.973056: step: 1980/531, loss: 0.00022920667834114283 2023-01-23 03:48:41.088139: step: 1984/531, loss: 0.00021257401385810226 2023-01-23 03:48:42.211738: step: 1988/531, loss: 0.005514240358024836 2023-01-23 03:48:43.340407: step: 1992/531, loss: 0.042029574513435364 2023-01-23 03:48:44.456535: step: 1996/531, loss: 8.459090895485133e-05 2023-01-23 03:48:45.571783: step: 2000/531, loss: 0.06822490692138672 2023-01-23 03:48:46.699252: step: 2004/531, loss: 0.003777408739551902 2023-01-23 03:48:47.821047: step: 2008/531, loss: 0.0009963036281988025 2023-01-23 03:48:48.946903: step: 2012/531, loss: 0.00019874573627021164 2023-01-23 03:48:50.107948: step: 2016/531, loss: 0.13633279502391815 2023-01-23 03:48:51.204458: step: 2020/531, loss: 0.01930980756878853 2023-01-23 03:48:52.336005: step: 2024/531, loss: 0.004443645477294922 2023-01-23 03:48:53.466048: step: 2028/531, loss: 0.04711952432990074 2023-01-23 03:48:54.661235: step: 2032/531, loss: 0.02450275421142578 2023-01-23 03:48:55.783229: step: 2036/531, loss: 0.00010251998901367188 2023-01-23 03:48:56.904115: step: 2040/531, loss: 0.026918604969978333 2023-01-23 03:48:58.026097: step: 2044/531, loss: 0.1010231003165245 2023-01-23 03:48:59.152658: step: 2048/531, loss: 5.818209171295166 2023-01-23 03:49:00.278212: step: 2052/531, loss: 0.003371572820469737 2023-01-23 03:49:01.403317: step: 2056/531, loss: 0.00259475689381361 2023-01-23 03:49:02.516598: step: 2060/531, loss: 0.015487289056181908 2023-01-23 03:49:03.657268: step: 2064/531, loss: 0.0007555008050985634 2023-01-23 03:49:04.763845: step: 2068/531, loss: 0.11212190985679626 2023-01-23 03:49:05.899966: step: 2072/531, loss: 0.015349293127655983 2023-01-23 03:49:07.013112: step: 2076/531, loss: 0.01198873482644558 2023-01-23 03:49:08.147926: step: 2080/531, loss: 4.4345861169858836e-06 2023-01-23 03:49:09.280438: step: 2084/531, loss: 0.03282823786139488 2023-01-23 03:49:10.392049: step: 2088/531, loss: 0.002299356274306774 2023-01-23 03:49:11.541778: step: 2092/531, loss: 0.028997136279940605 2023-01-23 03:49:12.663429: step: 2096/531, loss: 0.0005409240256994963 2023-01-23 03:49:13.803840: step: 2100/531, loss: 0.0026992796920239925 2023-01-23 03:49:14.918651: step: 2104/531, loss: 0.0022093772422522306 2023-01-23 03:49:16.059638: step: 2108/531, loss: 0.00027599334134720266 2023-01-23 03:49:17.153895: step: 2112/531, loss: 0.005663490388542414 2023-01-23 03:49:18.287940: step: 2116/531, loss: 0.001531410263851285 2023-01-23 03:49:19.427647: step: 2120/531, loss: 0.000476837158203125 2023-01-23 03:49:20.573284: step: 2124/531, loss: 0.022632265463471413 ================================================== Loss: 0.035 -------------------- Dev: {'event': {'p': 0.6110520722635494, 'r': 0.7656458055925432, 'f1': 0.6796690307328604}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Test: {'event': {'p': 0.6368015414258189, 'r': 0.7883124627310674, 'f1': 0.7045030642152945}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Chinese: {'event': {'p': 0.5925925925925926, 'r': 0.8888888888888888, 'f1': 0.711111111111111}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Korean: {'event': {'p': 0.6585365853658537, 'r': 0.42857142857142855, 'f1': 0.5192307692307693}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Russian: {'event': {'p': 0.4444444444444444, 'r': 0.5555555555555556, 'f1': 0.49382716049382713}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 25 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:50:00.247262: step: 4/531, loss: 0.032988931983709335 2023-01-23 03:50:01.393446: step: 8/531, loss: 3.5762786865234375e-05 2023-01-23 03:50:02.527689: step: 12/531, loss: 0.0005677223089151084 2023-01-23 03:50:03.640359: step: 16/531, loss: 0.006481218617409468 2023-01-23 03:50:04.737959: step: 20/531, loss: 0.02497086673974991 2023-01-23 03:50:05.857124: step: 24/531, loss: 0.0042969705536961555 2023-01-23 03:50:06.997804: step: 28/531, loss: 0.0009853362571448088 2023-01-23 03:50:08.102886: step: 32/531, loss: 0.0012187004322186112 2023-01-23 03:50:09.220789: step: 36/531, loss: 0.006720161531120539 2023-01-23 03:50:10.349272: step: 40/531, loss: 8.57353225001134e-05 2023-01-23 03:50:11.468594: step: 44/531, loss: 0.010281180962920189 2023-01-23 03:50:12.630580: step: 48/531, loss: 0.0014828682178631425 2023-01-23 03:50:13.761674: step: 52/531, loss: 0.000152587890625 2023-01-23 03:50:14.866943: step: 56/531, loss: 0.0025002481415867805 2023-01-23 03:50:15.976436: step: 60/531, loss: 9.72747784544481e-06 2023-01-23 03:50:17.099429: step: 64/531, loss: 0.00015115737915039062 2023-01-23 03:50:18.190576: step: 68/531, loss: 0.0012115478748455644 2023-01-23 03:50:19.306410: step: 72/531, loss: 0.026049185544252396 2023-01-23 03:50:20.440126: step: 76/531, loss: 0.00029411318246275187 2023-01-23 03:50:21.613007: step: 80/531, loss: 0.03990078344941139 2023-01-23 03:50:22.752088: step: 84/531, loss: 1.8787384760798886e-05 2023-01-23 03:50:23.876239: step: 88/531, loss: 0.006272793281823397 2023-01-23 03:50:25.047833: step: 92/531, loss: 1.125335711549269e-05 2023-01-23 03:50:26.179635: step: 96/531, loss: 0.0016862868797034025 2023-01-23 03:50:27.304510: step: 100/531, loss: 0.009882260113954544 2023-01-23 03:50:28.401405: step: 104/531, loss: 0.06958713382482529 2023-01-23 03:50:29.558749: step: 108/531, loss: 0.1126624122262001 2023-01-23 03:50:30.677405: step: 112/531, loss: 0.0014297484885901213 2023-01-23 03:50:31.784255: step: 116/531, loss: 0.0012443542946130037 2023-01-23 03:50:32.931040: step: 120/531, loss: -2.8610202207346447e-07 2023-01-23 03:50:34.053851: step: 124/531, loss: 0.05489387363195419 2023-01-23 03:50:35.201799: step: 128/531, loss: 0.0013509751297533512 2023-01-23 03:50:36.332223: step: 132/531, loss: 1.2588501704158261e-05 2023-01-23 03:50:37.492383: step: 136/531, loss: 0.0018597602611407638 2023-01-23 03:50:38.630417: step: 140/531, loss: 0.0075054168701171875 2023-01-23 03:50:39.758626: step: 144/531, loss: 0.0011123657459393144 2023-01-23 03:50:40.873287: step: 148/531, loss: 8.392333256779239e-06 2023-01-23 03:50:42.014442: step: 152/531, loss: 0.009091091342270374 2023-01-23 03:50:43.127252: step: 156/531, loss: 0.00015382767014671117 2023-01-23 03:50:44.241353: step: 160/531, loss: 3.328323145979084e-05 2023-01-23 03:50:45.372009: step: 164/531, loss: 1.9931792849092744e-05 2023-01-23 03:50:46.521252: step: 168/531, loss: 0.03233089670538902 2023-01-23 03:50:47.670946: step: 172/531, loss: 0.0481935515999794 2023-01-23 03:50:48.816056: step: 176/531, loss: 0.00025272369384765625 2023-01-23 03:50:49.926200: step: 180/531, loss: 0.013467979617416859 2023-01-23 03:50:51.042802: step: 184/531, loss: 0.006554984953254461 2023-01-23 03:50:52.159944: step: 188/531, loss: 1.3446808225126006e-05 2023-01-23 03:50:53.308015: step: 192/531, loss: 0.02058105543255806 2023-01-23 03:50:54.451851: step: 196/531, loss: 0.006235265638679266 2023-01-23 03:50:55.596888: step: 200/531, loss: 0.021974945440888405 2023-01-23 03:50:56.730569: step: 204/531, loss: 0.00020799637422896922 2023-01-23 03:50:57.852763: step: 208/531, loss: 0.002738761715590954 2023-01-23 03:50:58.969969: step: 212/531, loss: 1.5544890629826114e-05 2023-01-23 03:51:00.106842: step: 216/531, loss: 0.22516824305057526 2023-01-23 03:51:01.226393: step: 220/531, loss: 0.00028567315894179046 2023-01-23 03:51:02.372431: step: 224/531, loss: 0.016933728009462357 2023-01-23 03:51:03.481662: step: 228/531, loss: 0.0013299941783770919 2023-01-23 03:51:04.606044: step: 232/531, loss: 2.0122528439969756e-05 2023-01-23 03:51:05.697090: step: 236/531, loss: 0.0017433167668059468 2023-01-23 03:51:06.865245: step: 240/531, loss: 0.22139672935009003 2023-01-23 03:51:08.009292: step: 244/531, loss: 7.07626313669607e-05 2023-01-23 03:51:09.129990: step: 248/531, loss: 0.003290367079898715 2023-01-23 03:51:10.228615: step: 252/531, loss: 0.00015249251737259328 2023-01-23 03:51:11.380507: step: 256/531, loss: 0.0002323150692973286 2023-01-23 03:51:12.486096: step: 260/531, loss: 0.111589714884758 2023-01-23 03:51:13.623700: step: 264/531, loss: 0.017264558002352715 2023-01-23 03:51:14.753405: step: 268/531, loss: 0.006297302432358265 2023-01-23 03:51:15.903564: step: 272/531, loss: 0.0006596565362997353 2023-01-23 03:51:17.026479: step: 276/531, loss: 7.882118370616809e-05 2023-01-23 03:51:18.157941: step: 280/531, loss: 0.000331878662109375 2023-01-23 03:51:19.335446: step: 284/531, loss: 0.018483351916074753 2023-01-23 03:51:20.460444: step: 288/531, loss: 1.640319896978326e-05 2023-01-23 03:51:21.608713: step: 292/531, loss: 8.010864803509321e-06 2023-01-23 03:51:22.712207: step: 296/531, loss: 6.380081322276965e-05 2023-01-23 03:51:23.854232: step: 300/531, loss: 0.002920818515121937 2023-01-23 03:51:24.984246: step: 304/531, loss: 0.002807808108627796 2023-01-23 03:51:26.143274: step: 308/531, loss: 7.047654071357101e-05 2023-01-23 03:51:27.273992: step: 312/531, loss: 4.749298022943549e-05 2023-01-23 03:51:28.403551: step: 316/531, loss: 0.09273949265480042 2023-01-23 03:51:29.511136: step: 320/531, loss: 6.198883056640625e-06 2023-01-23 03:51:30.646499: step: 324/531, loss: 0.015950489789247513 2023-01-23 03:51:31.771934: step: 328/531, loss: 0.0003574848233256489 2023-01-23 03:51:32.899010: step: 332/531, loss: 6.523132469737902e-05 2023-01-23 03:51:34.023624: step: 336/531, loss: 0.0012815475929528475 2023-01-23 03:51:35.128969: step: 340/531, loss: 0.00044374464778229594 2023-01-23 03:51:36.256669: step: 344/531, loss: 0.00048694611177779734 2023-01-23 03:51:37.367996: step: 348/531, loss: 2.6416779292048886e-05 2023-01-23 03:51:38.506083: step: 352/531, loss: 0.0003872871457133442 2023-01-23 03:51:39.625936: step: 356/531, loss: 1.2207032341393642e-05 2023-01-23 03:51:40.756735: step: 360/531, loss: 0.00017385483079124242 2023-01-23 03:51:41.871373: step: 364/531, loss: 0.00020160674466751516 2023-01-23 03:51:42.994367: step: 368/531, loss: 0.11150521785020828 2023-01-23 03:51:44.157531: step: 372/531, loss: 0.004104232881218195 2023-01-23 03:51:45.266723: step: 376/531, loss: 0.002468585968017578 2023-01-23 03:51:46.387703: step: 380/531, loss: 0.001498317695222795 2023-01-23 03:51:47.494753: step: 384/531, loss: 0.030260277912020683 2023-01-23 03:51:48.625574: step: 388/531, loss: 0.01050491351634264 2023-01-23 03:51:49.755305: step: 392/531, loss: 0.10444565117359161 2023-01-23 03:51:50.853241: step: 396/531, loss: 0.0016338349087163806 2023-01-23 03:51:51.996450: step: 400/531, loss: 5.245209194981726e-06 2023-01-23 03:51:53.120635: step: 404/531, loss: 0.008989429101347923 2023-01-23 03:51:54.239449: step: 408/531, loss: 0.010001182556152344 2023-01-23 03:51:55.375838: step: 412/531, loss: 0.015207194723188877 2023-01-23 03:51:56.496656: step: 416/531, loss: 0.018520642071962357 2023-01-23 03:51:57.598299: step: 420/531, loss: 0.036756038665771484 2023-01-23 03:51:58.730445: step: 424/531, loss: 0.015356063842773438 2023-01-23 03:51:59.868200: step: 428/531, loss: 0.00039386749267578125 2023-01-23 03:52:00.988725: step: 432/531, loss: 0.0002860069216694683 2023-01-23 03:52:02.101974: step: 436/531, loss: 0.0005212783580645919 2023-01-23 03:52:03.281401: step: 440/531, loss: 0.0030637739691883326 2023-01-23 03:52:04.407784: step: 444/531, loss: 0.0009532928233966231 2023-01-23 03:52:05.558312: step: 448/531, loss: 0.02402172051370144 2023-01-23 03:52:06.691217: step: 452/531, loss: 0.021465493366122246 2023-01-23 03:52:07.829115: step: 456/531, loss: 0.026410698890686035 2023-01-23 03:52:08.945465: step: 460/531, loss: 0.018983269110322 2023-01-23 03:52:10.074380: step: 464/531, loss: 0.0003779888211283833 2023-01-23 03:52:11.201991: step: 468/531, loss: 0.03937859460711479 2023-01-23 03:52:12.340101: step: 472/531, loss: 0.002205467317253351 2023-01-23 03:52:13.451172: step: 476/531, loss: 0.00021839141845703125 2023-01-23 03:52:14.556870: step: 480/531, loss: 0.005312061402946711 2023-01-23 03:52:15.695396: step: 484/531, loss: 0.00025768281193450093 2023-01-23 03:52:16.840214: step: 488/531, loss: 0.044963642954826355 2023-01-23 03:52:17.964222: step: 492/531, loss: 0.0008768081897869706 2023-01-23 03:52:19.088245: step: 496/531, loss: 4.119873119634576e-05 2023-01-23 03:52:20.238823: step: 500/531, loss: 0.035985566675662994 2023-01-23 03:52:21.365433: step: 504/531, loss: 0.0539371520280838 2023-01-23 03:52:22.474021: step: 508/531, loss: 0.005373192019760609 2023-01-23 03:52:23.578359: step: 512/531, loss: 0.06608381122350693 2023-01-23 03:52:24.733374: step: 516/531, loss: 8.39233416627394e-06 2023-01-23 03:52:25.836664: step: 520/531, loss: -2.4795535864541307e-06 2023-01-23 03:52:26.944369: step: 524/531, loss: 0.00039033888606354594 2023-01-23 03:52:28.070533: step: 528/531, loss: 1.2874603271484375e-05 2023-01-23 03:52:29.191988: step: 532/531, loss: 0.0033060074783861637 2023-01-23 03:52:30.325955: step: 536/531, loss: 0.00011219978478038684 2023-01-23 03:52:31.449672: step: 540/531, loss: 0.012881851755082607 2023-01-23 03:52:32.549770: step: 544/531, loss: 7.362366159213707e-05 2023-01-23 03:52:33.665185: step: 548/531, loss: 0.0037229538429528475 2023-01-23 03:52:34.767933: step: 552/531, loss: 0.0003684043767862022 2023-01-23 03:52:35.908713: step: 556/531, loss: 0.05353298410773277 2023-01-23 03:52:37.015699: step: 560/531, loss: 2.7418136596679688e-05 2023-01-23 03:52:38.123646: step: 564/531, loss: -1.1444091796875e-05 2023-01-23 03:52:39.239837: step: 568/531, loss: 0.0003900528245139867 2023-01-23 03:52:40.356617: step: 572/531, loss: 0.011457158252596855 2023-01-23 03:52:41.467025: step: 576/531, loss: 2.0694733393611386e-05 2023-01-23 03:52:42.621782: step: 580/531, loss: 0.0008852005121298134 2023-01-23 03:52:43.736784: step: 584/531, loss: 0.0017203331226482987 2023-01-23 03:52:44.830810: step: 588/531, loss: 0.00034694670466706157 2023-01-23 03:52:45.993800: step: 592/531, loss: 8.034706843318418e-05 2023-01-23 03:52:47.107106: step: 596/531, loss: -4.482268195715733e-06 2023-01-23 03:52:48.259698: step: 600/531, loss: 0.005230140872299671 2023-01-23 03:52:49.388001: step: 604/531, loss: 0.0014334202278405428 2023-01-23 03:52:50.498605: step: 608/531, loss: 0.019589615985751152 2023-01-23 03:52:51.637063: step: 612/531, loss: 0.0001410484401276335 2023-01-23 03:52:52.736617: step: 616/531, loss: 0.0009629249689169228 2023-01-23 03:52:53.866751: step: 620/531, loss: 0.0028882978949695826 2023-01-23 03:52:54.970659: step: 624/531, loss: 0.004574775695800781 2023-01-23 03:52:56.112590: step: 628/531, loss: 0.004901457112282515 2023-01-23 03:52:57.219819: step: 632/531, loss: -9.536743306171047e-08 2023-01-23 03:52:58.327639: step: 636/531, loss: 0.02161111868917942 2023-01-23 03:52:59.449575: step: 640/531, loss: 0.0019255639053881168 2023-01-23 03:53:00.544125: step: 644/531, loss: 0.005345821380615234 2023-01-23 03:53:01.637545: step: 648/531, loss: 0.006084251217544079 2023-01-23 03:53:02.755536: step: 652/531, loss: 0.00032253266545012593 2023-01-23 03:53:03.837212: step: 656/531, loss: 0.011688184924423695 2023-01-23 03:53:04.981245: step: 660/531, loss: 0.00028862952603958547 2023-01-23 03:53:06.136525: step: 664/531, loss: 0.04719047620892525 2023-01-23 03:53:07.269494: step: 668/531, loss: 0.00037722886190749705 2023-01-23 03:53:08.388904: step: 672/531, loss: 0.0001333236723439768 2023-01-23 03:53:09.513662: step: 676/531, loss: 0.0027381896506994963 2023-01-23 03:53:10.672372: step: 680/531, loss: 0.011364174075424671 2023-01-23 03:53:11.799697: step: 684/531, loss: 0.00041828156099654734 2023-01-23 03:53:12.898110: step: 688/531, loss: 7.171630568336695e-05 2023-01-23 03:53:14.017957: step: 692/531, loss: 0.001556777860969305 2023-01-23 03:53:15.156178: step: 696/531, loss: 0.00251255021430552 2023-01-23 03:53:16.271755: step: 700/531, loss: 0.0028877260629087687 2023-01-23 03:53:17.432117: step: 704/531, loss: 0.004551267717033625 2023-01-23 03:53:18.553137: step: 708/531, loss: 1.5449522834387608e-05 2023-01-23 03:53:19.704254: step: 712/531, loss: 0.0038997652009129524 2023-01-23 03:53:20.816545: step: 716/531, loss: 0.02510681003332138 2023-01-23 03:53:21.921211: step: 720/531, loss: 0.003014731453731656 2023-01-23 03:53:23.138412: step: 724/531, loss: 0.0022031785920262337 2023-01-23 03:53:24.322401: step: 728/531, loss: 0.018454933539032936 2023-01-23 03:53:25.412217: step: 732/531, loss: 0.0005566597101278603 2023-01-23 03:53:26.525288: step: 736/531, loss: 0.0005632400861941278 2023-01-23 03:53:27.647928: step: 740/531, loss: 9.098053124034777e-05 2023-01-23 03:53:28.759531: step: 744/531, loss: 0.003599548479542136 2023-01-23 03:53:29.892091: step: 748/531, loss: 2.212524486822076e-05 2023-01-23 03:53:31.029507: step: 752/531, loss: 0.049063682556152344 2023-01-23 03:53:32.166849: step: 756/531, loss: 0.0011266708606854081 2023-01-23 03:53:33.301193: step: 760/531, loss: 1.014467716217041 2023-01-23 03:53:34.448219: step: 764/531, loss: 0.0012990952236577868 2023-01-23 03:53:35.561223: step: 768/531, loss: 0.01313924789428711 2023-01-23 03:53:36.677309: step: 772/531, loss: 0.00031824110192246735 2023-01-23 03:53:37.799527: step: 776/531, loss: 0.04759189859032631 2023-01-23 03:53:38.891670: step: 780/531, loss: 0.0013168335426598787 2023-01-23 03:53:40.015072: step: 784/531, loss: 0.0008058547973632812 2023-01-23 03:53:41.116902: step: 788/531, loss: 0.0012062669266015291 2023-01-23 03:53:42.213745: step: 792/531, loss: 0.0012069225776940584 2023-01-23 03:53:43.356786: step: 796/531, loss: 0.0011704444186761975 2023-01-23 03:53:44.463145: step: 800/531, loss: 0.00019512177095748484 2023-01-23 03:53:45.577152: step: 804/531, loss: 5.6076052715070546e-05 2023-01-23 03:53:46.684236: step: 808/531, loss: 0.01063003484159708 2023-01-23 03:53:47.802284: step: 812/531, loss: 0.0019748688209801912 2023-01-23 03:53:48.932667: step: 816/531, loss: 5.91278057981981e-06 2023-01-23 03:53:50.050600: step: 820/531, loss: 0.0033419609535485506 2023-01-23 03:53:51.175044: step: 824/531, loss: 0.0016244889702647924 2023-01-23 03:53:52.306953: step: 828/531, loss: 0.0007963180541992188 2023-01-23 03:53:53.415763: step: 832/531, loss: 0.01685648038983345 2023-01-23 03:53:54.543053: step: 836/531, loss: 0.004652786068618298 2023-01-23 03:53:55.684344: step: 840/531, loss: 0.47675180435180664 2023-01-23 03:53:56.806132: step: 844/531, loss: 0.031041432172060013 2023-01-23 03:53:57.896114: step: 848/531, loss: 0.0008979797712527215 2023-01-23 03:53:59.032784: step: 852/531, loss: 0.0057926177978515625 2023-01-23 03:54:00.139281: step: 856/531, loss: 0.000316619873046875 2023-01-23 03:54:01.261690: step: 860/531, loss: 0.00021085739717818797 2023-01-23 03:54:02.379586: step: 864/531, loss: 0.04318409040570259 2023-01-23 03:54:03.505009: step: 868/531, loss: 0.00518035888671875 2023-01-23 03:54:04.619623: step: 872/531, loss: 0.004272746853530407 2023-01-23 03:54:05.717930: step: 876/531, loss: 0.013213921338319778 2023-01-23 03:54:06.835033: step: 880/531, loss: 2.326965295651462e-05 2023-01-23 03:54:07.961524: step: 884/531, loss: 1.7881393432617188e-05 2023-01-23 03:54:09.054337: step: 888/531, loss: 0.00012092590623069555 2023-01-23 03:54:10.167665: step: 892/531, loss: 0.0015928269131109118 2023-01-23 03:54:11.281674: step: 896/531, loss: 0.0016504288651049137 2023-01-23 03:54:12.398587: step: 900/531, loss: 0.0006597518804483116 2023-01-23 03:54:13.535797: step: 904/531, loss: 0.006245994474738836 2023-01-23 03:54:14.647214: step: 908/531, loss: -2.822876012942288e-05 2023-01-23 03:54:15.754777: step: 912/531, loss: 8.55445905472152e-05 2023-01-23 03:54:16.885779: step: 916/531, loss: 0.00015182494826149195 2023-01-23 03:54:17.993891: step: 920/531, loss: 0.001562786172144115 2023-01-23 03:54:19.136844: step: 924/531, loss: 0.008929251693189144 2023-01-23 03:54:20.234086: step: 928/531, loss: 1.1634827387752011e-05 2023-01-23 03:54:21.347483: step: 932/531, loss: 0.011922646313905716 2023-01-23 03:54:22.442861: step: 936/531, loss: 0.00032491685124114156 2023-01-23 03:54:23.549287: step: 940/531, loss: 0.020729923620820045 2023-01-23 03:54:24.672992: step: 944/531, loss: 0.0006745338323526084 2023-01-23 03:54:25.798715: step: 948/531, loss: 0.008918190374970436 2023-01-23 03:54:26.920096: step: 952/531, loss: 0.0005529403570108116 2023-01-23 03:54:28.047583: step: 956/531, loss: 0.0022972107399255037 2023-01-23 03:54:29.170278: step: 960/531, loss: 0.005454063415527344 2023-01-23 03:54:30.292734: step: 964/531, loss: 0.0020036697387695312 2023-01-23 03:54:31.413975: step: 968/531, loss: 0.0004918098566122353 2023-01-23 03:54:32.540467: step: 972/531, loss: 0.2412470281124115 2023-01-23 03:54:33.661066: step: 976/531, loss: 5.4931642807787284e-05 2023-01-23 03:54:34.755951: step: 980/531, loss: 0.0005280360346660018 2023-01-23 03:54:35.903212: step: 984/531, loss: 0.010000323876738548 2023-01-23 03:54:36.998281: step: 988/531, loss: 0.002407550811767578 2023-01-23 03:54:38.129738: step: 992/531, loss: 0.005508804228156805 2023-01-23 03:54:39.230977: step: 996/531, loss: 2.0885468984488398e-05 2023-01-23 03:54:40.372495: step: 1000/531, loss: 0.023827172815799713 2023-01-23 03:54:41.509040: step: 1004/531, loss: 0.023612594231963158 2023-01-23 03:54:42.626879: step: 1008/531, loss: 0.00010824203491210938 2023-01-23 03:54:43.786136: step: 1012/531, loss: 0.0015560149913653731 2023-01-23 03:54:44.904498: step: 1016/531, loss: 0.007282542996108532 2023-01-23 03:54:46.012015: step: 1020/531, loss: 0.040325406938791275 2023-01-23 03:54:47.133740: step: 1024/531, loss: 5.264282299322076e-05 2023-01-23 03:54:48.263148: step: 1028/531, loss: 0.009829758666455746 2023-01-23 03:54:49.387946: step: 1032/531, loss: 0.00015773772611282766 2023-01-23 03:54:50.499479: step: 1036/531, loss: 0.0020146372262388468 2023-01-23 03:54:51.638497: step: 1040/531, loss: 0.005702400580048561 2023-01-23 03:54:52.783107: step: 1044/531, loss: 0.000550901866517961 2023-01-23 03:54:53.928611: step: 1048/531, loss: 0.026949310675263405 2023-01-23 03:54:55.046437: step: 1052/531, loss: 0.00015797615924384445 2023-01-23 03:54:56.145612: step: 1056/531, loss: 0.00018539429584052414 2023-01-23 03:54:57.275894: step: 1060/531, loss: 0.0007648468017578125 2023-01-23 03:54:58.411752: step: 1064/531, loss: 3.337860562169226e-06 2023-01-23 03:54:59.531742: step: 1068/531, loss: 0.07659760117530823 2023-01-23 03:55:00.663779: step: 1072/531, loss: 0.001273679779842496 2023-01-23 03:55:01.790335: step: 1076/531, loss: 0.0001922607480082661 2023-01-23 03:55:02.913958: step: 1080/531, loss: 0.026928521692752838 2023-01-23 03:55:04.100198: step: 1084/531, loss: 0.00029792787972837687 2023-01-23 03:55:05.228360: step: 1088/531, loss: 0.01058197021484375 2023-01-23 03:55:06.369295: step: 1092/531, loss: 6.88552827341482e-05 2023-01-23 03:55:07.483006: step: 1096/531, loss: 0.0004398346063680947 2023-01-23 03:55:08.620110: step: 1100/531, loss: 0.004431343171745539 2023-01-23 03:55:09.756190: step: 1104/531, loss: 0.012441063299775124 2023-01-23 03:55:10.913018: step: 1108/531, loss: 0.00021057129197288305 2023-01-23 03:55:12.043604: step: 1112/531, loss: 0.0002458572562318295 2023-01-23 03:55:13.156299: step: 1116/531, loss: 0.0171648021787405 2023-01-23 03:55:14.283435: step: 1120/531, loss: 2.517700158932712e-05 2023-01-23 03:55:15.380338: step: 1124/531, loss: 3.2424929941043956e-06 2023-01-23 03:55:16.493618: step: 1128/531, loss: 0.0006754875648766756 2023-01-23 03:55:17.638747: step: 1132/531, loss: -1.9073486328125e-06 2023-01-23 03:55:18.743335: step: 1136/531, loss: 1.182556115963962e-05 2023-01-23 03:55:19.865188: step: 1140/531, loss: 0.00010833739361260086 2023-01-23 03:55:20.971197: step: 1144/531, loss: 0.002923393389210105 2023-01-23 03:55:22.086433: step: 1148/531, loss: 0.3141302168369293 2023-01-23 03:55:23.187017: step: 1152/531, loss: 0.027521325275301933 2023-01-23 03:55:24.312449: step: 1156/531, loss: 0.0006405830499716103 2023-01-23 03:55:25.419872: step: 1160/531, loss: 0.02390575408935547 2023-01-23 03:55:26.525685: step: 1164/531, loss: 0.011572551913559437 2023-01-23 03:55:27.633296: step: 1168/531, loss: 0.06204252317547798 2023-01-23 03:55:28.757915: step: 1172/531, loss: 0.009407997131347656 2023-01-23 03:55:29.901107: step: 1176/531, loss: 8.773804438533261e-06 2023-01-23 03:55:30.989938: step: 1180/531, loss: 0.015708064660429955 2023-01-23 03:55:32.120243: step: 1184/531, loss: 0.0008949279435910285 2023-01-23 03:55:33.245406: step: 1188/531, loss: 0.0006538390880450606 2023-01-23 03:55:34.344473: step: 1192/531, loss: 6.9618222369172145e-06 2023-01-23 03:55:35.468464: step: 1196/531, loss: 0.002063560765236616 2023-01-23 03:55:36.574454: step: 1200/531, loss: 0.0012581349583342671 2023-01-23 03:55:37.708293: step: 1204/531, loss: 9.661913645686582e-05 2023-01-23 03:55:38.835115: step: 1208/531, loss: 1.773834264895413e-05 2023-01-23 03:55:39.991780: step: 1212/531, loss: 0.03893623128533363 2023-01-23 03:55:41.131489: step: 1216/531, loss: -1.4066696166992188e-05 2023-01-23 03:55:42.266448: step: 1220/531, loss: 0.00017309188842773438 2023-01-23 03:55:43.381853: step: 1224/531, loss: 0.022786427289247513 2023-01-23 03:55:44.490255: step: 1228/531, loss: 0.002492666244506836 2023-01-23 03:55:45.592279: step: 1232/531, loss: 0.00020456314086914062 2023-01-23 03:55:46.679752: step: 1236/531, loss: 0.0006899833679199219 2023-01-23 03:55:47.834607: step: 1240/531, loss: 0.0014776230091229081 2023-01-23 03:55:48.945240: step: 1244/531, loss: 0.004159402567893267 2023-01-23 03:55:50.067039: step: 1248/531, loss: 0.012343978509306908 2023-01-23 03:55:51.196137: step: 1252/531, loss: 0.002485656877979636 2023-01-23 03:55:52.345797: step: 1256/531, loss: 0.08677548915147781 2023-01-23 03:55:53.465866: step: 1260/531, loss: 0.0012414931552484632 2023-01-23 03:55:54.602821: step: 1264/531, loss: 1.33514404296875e-05 2023-01-23 03:55:55.708296: step: 1268/531, loss: 0.00011601448932196945 2023-01-23 03:55:56.836701: step: 1272/531, loss: 0.023260975256562233 2023-01-23 03:55:57.972458: step: 1276/531, loss: 0.00025663376436568797 2023-01-23 03:55:59.078450: step: 1280/531, loss: 0.0023916244972497225 2023-01-23 03:56:00.217172: step: 1284/531, loss: 0.000912189541850239 2023-01-23 03:56:01.354258: step: 1288/531, loss: 0.00010032653517555445 2023-01-23 03:56:02.462010: step: 1292/531, loss: 0.0008419037330895662 2023-01-23 03:56:03.586318: step: 1296/531, loss: 3.24249267578125e-05 2023-01-23 03:56:04.718329: step: 1300/531, loss: 0.05444631725549698 2023-01-23 03:56:05.851969: step: 1304/531, loss: 0.03429603576660156 2023-01-23 03:56:06.983362: step: 1308/531, loss: 3.814697265625e-06 2023-01-23 03:56:08.111810: step: 1312/531, loss: 0.002637767931446433 2023-01-23 03:56:09.233177: step: 1316/531, loss: 0.0008985042804852128 2023-01-23 03:56:10.374046: step: 1320/531, loss: 0.00013408661470748484 2023-01-23 03:56:11.515984: step: 1324/531, loss: 0.0054336548782885075 2023-01-23 03:56:12.631821: step: 1328/531, loss: 1.106262243411038e-05 2023-01-23 03:56:13.752632: step: 1332/531, loss: 0.013496018014848232 2023-01-23 03:56:14.895445: step: 1336/531, loss: 0.007743263617157936 2023-01-23 03:56:16.010491: step: 1340/531, loss: 0.015787268057465553 2023-01-23 03:56:17.136125: step: 1344/531, loss: 1.144409225162235e-06 2023-01-23 03:56:18.231062: step: 1348/531, loss: 0.0005481719854287803 2023-01-23 03:56:19.341650: step: 1352/531, loss: 4.787445141118951e-05 2023-01-23 03:56:20.534234: step: 1356/531, loss: 6.50405854685232e-05 2023-01-23 03:56:21.655509: step: 1360/531, loss: 0.004395985510200262 2023-01-23 03:56:22.805105: step: 1364/531, loss: 0.00157337193377316 2023-01-23 03:56:23.934038: step: 1368/531, loss: 0.005836129654198885 2023-01-23 03:56:25.041465: step: 1372/531, loss: 4.9114227294921875e-05 2023-01-23 03:56:26.159376: step: 1376/531, loss: 0.013924216851592064 2023-01-23 03:56:27.297120: step: 1380/531, loss: 0.00018959045701194555 2023-01-23 03:56:28.403157: step: 1384/531, loss: 7.266998727573082e-05 2023-01-23 03:56:29.539880: step: 1388/531, loss: 0.0009172439458779991 2023-01-23 03:56:30.661220: step: 1392/531, loss: 0.0034059525933116674 2023-01-23 03:56:31.785053: step: 1396/531, loss: 0.0017912863986566663 2023-01-23 03:56:32.884029: step: 1400/531, loss: 0.0004226684686727822 2023-01-23 03:56:34.018273: step: 1404/531, loss: 0.00026454924955032766 2023-01-23 03:56:35.130050: step: 1408/531, loss: 0.21079564094543457 2023-01-23 03:56:36.224528: step: 1412/531, loss: 4.711151268566027e-05 2023-01-23 03:56:37.370546: step: 1416/531, loss: 0.0005231857649050653 2023-01-23 03:56:38.489318: step: 1420/531, loss: 0.01396102923899889 2023-01-23 03:56:39.651674: step: 1424/531, loss: 0.07530689239501953 2023-01-23 03:56:40.802647: step: 1428/531, loss: 0.006221294868737459 2023-01-23 03:56:41.933952: step: 1432/531, loss: 0.011235427111387253 2023-01-23 03:56:43.073750: step: 1436/531, loss: 0.011354828253388405 2023-01-23 03:56:44.218651: step: 1440/531, loss: 0.015910720452666283 2023-01-23 03:56:45.342506: step: 1444/531, loss: 0.007373619358986616 2023-01-23 03:56:46.473369: step: 1448/531, loss: 0.014717578887939453 2023-01-23 03:56:47.591077: step: 1452/531, loss: 0.010570145212113857 2023-01-23 03:56:48.706721: step: 1456/531, loss: 0.006151771638542414 2023-01-23 03:56:49.861871: step: 1460/531, loss: 0.00028104783268645406 2023-01-23 03:56:51.021760: step: 1464/531, loss: 0.0011564254527911544 2023-01-23 03:56:52.136646: step: 1468/531, loss: 0.0002611160452943295 2023-01-23 03:56:53.263498: step: 1472/531, loss: 0.03071126900613308 2023-01-23 03:56:54.384557: step: 1476/531, loss: 0.3324892222881317 2023-01-23 03:56:55.500037: step: 1480/531, loss: 0.00020713805861305445 2023-01-23 03:56:56.641922: step: 1484/531, loss: 0.07565231621265411 2023-01-23 03:56:57.769119: step: 1488/531, loss: 0.006416225340217352 2023-01-23 03:56:58.870745: step: 1492/531, loss: 0.007015132810920477 2023-01-23 03:57:00.010915: step: 1496/531, loss: 0.00022058487229514867 2023-01-23 03:57:01.098913: step: 1500/531, loss: 0.00217094412073493 2023-01-23 03:57:02.212518: step: 1504/531, loss: 0.00021066665067337453 2023-01-23 03:57:03.353250: step: 1508/531, loss: 1.430511474609375e-05 2023-01-23 03:57:04.457509: step: 1512/531, loss: 0.0016808509826660156 2023-01-23 03:57:05.562978: step: 1516/531, loss: 0.0004983901744708419 2023-01-23 03:57:06.658440: step: 1520/531, loss: 0.05423259735107422 2023-01-23 03:57:07.773521: step: 1524/531, loss: 0.0007426739321090281 2023-01-23 03:57:08.881871: step: 1528/531, loss: 0.02074751816689968 2023-01-23 03:57:09.992275: step: 1532/531, loss: 0.0033749579451978207 2023-01-23 03:57:11.104190: step: 1536/531, loss: 0.0012913703685626388 2023-01-23 03:57:12.210857: step: 1540/531, loss: 0.0019307136535644531 2023-01-23 03:57:13.345546: step: 1544/531, loss: 0.0008337974431924522 2023-01-23 03:57:14.458766: step: 1548/531, loss: 0.01390991173684597 2023-01-23 03:57:15.598085: step: 1552/531, loss: 0.001444149063900113 2023-01-23 03:57:16.723105: step: 1556/531, loss: 0.03240060806274414 2023-01-23 03:57:17.826896: step: 1560/531, loss: 0.006424617953598499 2023-01-23 03:57:18.980837: step: 1564/531, loss: 0.0003888130304403603 2023-01-23 03:57:20.123114: step: 1568/531, loss: 0.0005902290577068925 2023-01-23 03:57:21.259554: step: 1572/531, loss: 0.008753872476518154 2023-01-23 03:57:22.381013: step: 1576/531, loss: 0.0009785651927813888 2023-01-23 03:57:23.518611: step: 1580/531, loss: 0.031662750989198685 2023-01-23 03:57:24.651191: step: 1584/531, loss: 0.036309242248535156 2023-01-23 03:57:25.802640: step: 1588/531, loss: 4.9877166020451114e-05 2023-01-23 03:57:26.897279: step: 1592/531, loss: 0.002708530519157648 2023-01-23 03:57:28.036534: step: 1596/531, loss: 0.041326142847537994 2023-01-23 03:57:29.205675: step: 1600/531, loss: 0.006421852391213179 2023-01-23 03:57:30.320503: step: 1604/531, loss: 2.0408631826285273e-05 2023-01-23 03:57:31.453014: step: 1608/531, loss: 0.00016663075075484812 2023-01-23 03:57:32.584458: step: 1612/531, loss: 0.0023912431206554174 2023-01-23 03:57:33.691242: step: 1616/531, loss: 0.0002895355282817036 2023-01-23 03:57:34.797615: step: 1620/531, loss: 0.04916572570800781 2023-01-23 03:57:35.896137: step: 1624/531, loss: 0.008287524804472923 2023-01-23 03:57:37.008784: step: 1628/531, loss: 0.0009645462268963456 2023-01-23 03:57:38.170012: step: 1632/531, loss: 0.05920391157269478 2023-01-23 03:57:39.283598: step: 1636/531, loss: 0.0001069068894139491 2023-01-23 03:57:40.407253: step: 1640/531, loss: 0.04209880903363228 2023-01-23 03:57:41.540585: step: 1644/531, loss: 0.0009475707774981856 2023-01-23 03:57:42.645717: step: 1648/531, loss: 0.000211620339541696 2023-01-23 03:57:43.792210: step: 1652/531, loss: 0.000320243852911517 2023-01-23 03:57:44.943684: step: 1656/531, loss: 0.23246383666992188 2023-01-23 03:57:46.052852: step: 1660/531, loss: 0.1031288206577301 2023-01-23 03:57:47.199135: step: 1664/531, loss: 0.06617667526006699 2023-01-23 03:57:48.306474: step: 1668/531, loss: 0.0009390831692144275 2023-01-23 03:57:49.421368: step: 1672/531, loss: 0.11262092739343643 2023-01-23 03:57:50.558148: step: 1676/531, loss: 0.0010297298431396484 2023-01-23 03:57:51.700397: step: 1680/531, loss: 0.004846763797104359 2023-01-23 03:57:52.805724: step: 1684/531, loss: 3.175735764671117e-05 2023-01-23 03:57:53.921498: step: 1688/531, loss: 0.0006113052368164062 2023-01-23 03:57:55.033605: step: 1692/531, loss: 0.0007187515147961676 2023-01-23 03:57:56.112463: step: 1696/531, loss: 5.226135181146674e-05 2023-01-23 03:57:57.234942: step: 1700/531, loss: 7.371902756858617e-05 2023-01-23 03:57:58.368983: step: 1704/531, loss: 0.00794219970703125 2023-01-23 03:57:59.494248: step: 1708/531, loss: 0.007822847925126553 2023-01-23 03:58:00.613409: step: 1712/531, loss: 0.01797313801944256 2023-01-23 03:58:01.736069: step: 1716/531, loss: 0.002474498702213168 2023-01-23 03:58:02.859426: step: 1720/531, loss: 0.0002582549932412803 2023-01-23 03:58:03.974774: step: 1724/531, loss: 0.022548770532011986 2023-01-23 03:58:05.114672: step: 1728/531, loss: 0.01644744910299778 2023-01-23 03:58:06.284608: step: 1732/531, loss: 0.0008003234979696572 2023-01-23 03:58:07.404248: step: 1736/531, loss: 0.005929755978286266 2023-01-23 03:58:08.537849: step: 1740/531, loss: 5.53131121705519e-06 2023-01-23 03:58:09.714504: step: 1744/531, loss: 0.0031376841943711042 2023-01-23 03:58:10.839485: step: 1748/531, loss: 5.5313107623078395e-06 2023-01-23 03:58:11.953592: step: 1752/531, loss: 3.161430504405871e-05 2023-01-23 03:58:13.053234: step: 1756/531, loss: 0.027098752558231354 2023-01-23 03:58:14.149871: step: 1760/531, loss: 5.540847996599041e-05 2023-01-23 03:58:15.265869: step: 1764/531, loss: 9.910017979564145e-05 2023-01-23 03:58:16.382769: step: 1768/531, loss: 0.00046882632886990905 2023-01-23 03:58:17.489223: step: 1772/531, loss: 0.0038802148774266243 2023-01-23 03:58:18.613957: step: 1776/531, loss: 0.0005563736194744706 2023-01-23 03:58:19.740249: step: 1780/531, loss: 0.0007347107166424394 2023-01-23 03:58:20.876622: step: 1784/531, loss: 0.02941417694091797 2023-01-23 03:58:22.005550: step: 1788/531, loss: 0.0036274672020226717 2023-01-23 03:58:23.108477: step: 1792/531, loss: 0.015027904883027077 2023-01-23 03:58:24.261426: step: 1796/531, loss: 0.015047645196318626 2023-01-23 03:58:25.383341: step: 1800/531, loss: 0.00018138886662200093 2023-01-23 03:58:26.493038: step: 1804/531, loss: 0.009407997131347656 2023-01-23 03:58:27.645841: step: 1808/531, loss: 0.002166652586311102 2023-01-23 03:58:28.793145: step: 1812/531, loss: 0.0004493713495321572 2023-01-23 03:58:29.918331: step: 1816/531, loss: 0.010866832919418812 2023-01-23 03:58:31.040502: step: 1820/531, loss: 0.11321182548999786 2023-01-23 03:58:32.171004: step: 1824/531, loss: 0.0016396522987633944 2023-01-23 03:58:33.282638: step: 1828/531, loss: 0.00016479493933729827 2023-01-23 03:58:34.414960: step: 1832/531, loss: 0.0028811455704271793 2023-01-23 03:58:35.529007: step: 1836/531, loss: 0.0001412391575286165 2023-01-23 03:58:36.636450: step: 1840/531, loss: 0.0001396656152792275 2023-01-23 03:58:37.727337: step: 1844/531, loss: 0.003260803408920765 2023-01-23 03:58:38.850762: step: 1848/531, loss: 1.4019012269272935e-05 2023-01-23 03:58:39.936318: step: 1852/531, loss: 0.016576098278164864 2023-01-23 03:58:41.078318: step: 1856/531, loss: 1.4781951904296875e-05 2023-01-23 03:58:42.222299: step: 1860/531, loss: 0.024943161755800247 2023-01-23 03:58:43.376278: step: 1864/531, loss: 0.044161032885313034 2023-01-23 03:58:44.522340: step: 1868/531, loss: 0.029897499829530716 2023-01-23 03:58:45.638032: step: 1872/531, loss: 0.00079517369158566 2023-01-23 03:58:46.750198: step: 1876/531, loss: 0.0002688407839741558 2023-01-23 03:58:47.856271: step: 1880/531, loss: 0.000251340854447335 2023-01-23 03:58:48.988656: step: 1884/531, loss: 0.009488009847700596 2023-01-23 03:58:50.124312: step: 1888/531, loss: 0.0004185676807537675 2023-01-23 03:58:51.258191: step: 1892/531, loss: 0.0014828682178631425 2023-01-23 03:58:52.399364: step: 1896/531, loss: 0.010054397396743298 2023-01-23 03:58:53.527617: step: 1900/531, loss: 0.00030498503474518657 2023-01-23 03:58:54.623542: step: 1904/531, loss: 0.00819253921508789 2023-01-23 03:58:55.731480: step: 1908/531, loss: 0.0027527809143066406 2023-01-23 03:58:56.870637: step: 1912/531, loss: 0.0002662658807821572 2023-01-23 03:58:57.988055: step: 1916/531, loss: 0.018985271453857422 2023-01-23 03:58:59.136542: step: 1920/531, loss: 1.7452241081628017e-05 2023-01-23 03:59:00.227366: step: 1924/531, loss: 5.016327486373484e-05 2023-01-23 03:59:01.346054: step: 1928/531, loss: 0.04257850721478462 2023-01-23 03:59:02.462201: step: 1932/531, loss: 0.0003082275507040322 2023-01-23 03:59:03.589879: step: 1936/531, loss: 0.0008853912586346269 2023-01-23 03:59:04.756358: step: 1940/531, loss: 0.003740501357242465 2023-01-23 03:59:05.870633: step: 1944/531, loss: 5.245208740234375e-05 2023-01-23 03:59:06.996872: step: 1948/531, loss: 0.0013359070289880037 2023-01-23 03:59:08.108475: step: 1952/531, loss: 0.0005120754358358681 2023-01-23 03:59:09.217316: step: 1956/531, loss: 0.002509784884750843 2023-01-23 03:59:10.335351: step: 1960/531, loss: 6.785392906749621e-05 2023-01-23 03:59:11.476021: step: 1964/531, loss: 7.591248140670359e-05 2023-01-23 03:59:12.653401: step: 1968/531, loss: 0.0034355639945715666 2023-01-23 03:59:13.769513: step: 1972/531, loss: 0.07772121578454971 2023-01-23 03:59:14.889009: step: 1976/531, loss: 0.02805786207318306 2023-01-23 03:59:16.008886: step: 1980/531, loss: 0.003248119493946433 2023-01-23 03:59:17.137537: step: 1984/531, loss: 0.0015300750965252519 2023-01-23 03:59:18.263941: step: 1988/531, loss: 0.021108437329530716 2023-01-23 03:59:19.380584: step: 1992/531, loss: 0.005775833036750555 2023-01-23 03:59:20.495260: step: 1996/531, loss: 0.007290506269782782 2023-01-23 03:59:21.637546: step: 2000/531, loss: 9.021758887683973e-05 2023-01-23 03:59:22.751090: step: 2004/531, loss: 0.0032418249174952507 2023-01-23 03:59:23.859127: step: 2008/531, loss: 0.013883685693144798 2023-01-23 03:59:25.000431: step: 2012/531, loss: 0.0002465248107910156 2023-01-23 03:59:26.104369: step: 2016/531, loss: 0.0011563062435016036 2023-01-23 03:59:27.250771: step: 2020/531, loss: 0.004359436221420765 2023-01-23 03:59:28.372533: step: 2024/531, loss: 0.0018100739689543843 2023-01-23 03:59:29.495403: step: 2028/531, loss: 0.0007214070064947009 2023-01-23 03:59:30.665941: step: 2032/531, loss: 0.012652111239731312 2023-01-23 03:59:31.782134: step: 2036/531, loss: 0.0005445242277346551 2023-01-23 03:59:32.917863: step: 2040/531, loss: 0.0241335891187191 2023-01-23 03:59:34.051233: step: 2044/531, loss: 0.0007307052728720009 2023-01-23 03:59:35.165871: step: 2048/531, loss: 0.0012901306618005037 2023-01-23 03:59:36.279880: step: 2052/531, loss: 0.0036148070357739925 2023-01-23 03:59:37.399874: step: 2056/531, loss: 0.04683256149291992 2023-01-23 03:59:38.518251: step: 2060/531, loss: 0.0006697654607705772 2023-01-23 03:59:39.637815: step: 2064/531, loss: 0.0007224082946777344 2023-01-23 03:59:40.764342: step: 2068/531, loss: 0.000293731689453125 2023-01-23 03:59:41.888933: step: 2072/531, loss: 0.002260398818179965 2023-01-23 03:59:42.994458: step: 2076/531, loss: 0.0008077621459960938 2023-01-23 03:59:44.122300: step: 2080/531, loss: 7.25746140233241e-05 2023-01-23 03:59:45.259551: step: 2084/531, loss: 0.0035633088555186987 2023-01-23 03:59:46.375097: step: 2088/531, loss: 0.00157337193377316 2023-01-23 03:59:47.502037: step: 2092/531, loss: 0.00011596679541980848 2023-01-23 03:59:48.602072: step: 2096/531, loss: 1.3828278497385327e-05 2023-01-23 03:59:49.700118: step: 2100/531, loss: 1.62124638336536e-06 2023-01-23 03:59:50.834843: step: 2104/531, loss: 0.011295795440673828 2023-01-23 03:59:51.966081: step: 2108/531, loss: 0.003174114041030407 2023-01-23 03:59:53.072447: step: 2112/531, loss: 1.068115216185106e-05 2023-01-23 03:59:54.163976: step: 2116/531, loss: 0.00024347304133698344 2023-01-23 03:59:55.325833: step: 2120/531, loss: 0.00036296845064498484 2023-01-23 03:59:56.455207: step: 2124/531, loss: 6.67572021484375e-06 ================================================== Loss: 0.015 -------------------- Dev: {'event': {'p': 0.5973360655737705, 'r': 0.7762982689747004, 'f1': 0.6751592356687898}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Test: {'event': {'p': 0.6263632053105738, 'r': 0.7877161598091831, 'f1': 0.6978341257263604}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Chinese: {'event': {'p': 0.5697674418604651, 'r': 0.9074074074074074, 'f1': 0.7}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Korean: {'event': {'p': 0.5344827586206896, 'r': 0.49206349206349204, 'f1': 0.512396694214876}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Russian: {'event': {'p': 0.4666666666666667, 'r': 0.5833333333333334, 'f1': 0.5185185185185186}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 26 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 04:00:36.647947: step: 4/531, loss: 0.00045347216655500233 2023-01-23 04:00:37.780741: step: 8/531, loss: 0.04642972722649574 2023-01-23 04:00:38.934241: step: 12/531, loss: 0.0029207230545580387 2023-01-23 04:00:40.074885: step: 16/531, loss: 0.03157081454992294 2023-01-23 04:00:41.198883: step: 20/531, loss: 0.001370334648527205 2023-01-23 04:00:42.344591: step: 24/531, loss: 0.0006361007690429688 2023-01-23 04:00:43.454531: step: 28/531, loss: 2.6226043701171875e-05 2023-01-23 04:00:44.564936: step: 32/531, loss: 0.00036277773324400187 2023-01-23 04:00:45.698635: step: 36/531, loss: 0.0066581727005541325 2023-01-23 04:00:46.829390: step: 40/531, loss: 0.01384277455508709 2023-01-23 04:00:47.931196: step: 44/531, loss: 0.007408905308693647 2023-01-23 04:00:49.079196: step: 48/531, loss: 0.018971920013427734 2023-01-23 04:00:50.185639: step: 52/531, loss: 6.866455078125e-05 2023-01-23 04:00:51.278027: step: 56/531, loss: 7.629394076502649e-06 2023-01-23 04:00:52.391132: step: 60/531, loss: 0.004829817917197943 2023-01-23 04:00:53.536068: step: 64/531, loss: 0.03000030666589737 2023-01-23 04:00:54.679992: step: 68/531, loss: 0.0023136138916015625 2023-01-23 04:00:55.804076: step: 72/531, loss: 1.5354156857938506e-05 2023-01-23 04:00:56.899828: step: 76/531, loss: 1.2397767022775952e-05 2023-01-23 04:00:58.031080: step: 80/531, loss: -9.5367431640625e-07 2023-01-23 04:00:59.154195: step: 84/531, loss: 0.0015115260612219572 2023-01-23 04:01:00.272400: step: 88/531, loss: 0.0003132820129394531 2023-01-23 04:01:01.413810: step: 92/531, loss: 2.765655517578125e-05 2023-01-23 04:01:02.539826: step: 96/531, loss: 0.12018032371997833 2023-01-23 04:01:03.646232: step: 100/531, loss: 0.06277842819690704 2023-01-23 04:01:04.801886: step: 104/531, loss: 0.0002703666687011719 2023-01-23 04:01:05.914786: step: 108/531, loss: 0.015691854059696198 2023-01-23 04:01:07.042454: step: 112/531, loss: 0.3374547064304352 2023-01-23 04:01:08.163033: step: 116/531, loss: 0.00046200753422454 2023-01-23 04:01:09.313833: step: 120/531, loss: 0.0026158334221690893 2023-01-23 04:01:10.431670: step: 124/531, loss: 0.0001748085196595639 2023-01-23 04:01:11.574715: step: 128/531, loss: 0.0006259918445721269 2023-01-23 04:01:12.714387: step: 132/531, loss: 0.004377841949462891 2023-01-23 04:01:13.833422: step: 136/531, loss: 0.0008360862266272306 2023-01-23 04:01:14.945188: step: 140/531, loss: 0.011205673217773438 2023-01-23 04:01:16.071760: step: 144/531, loss: 2.1022558939876035e-05 2023-01-23 04:01:17.228587: step: 148/531, loss: 0.0019003868801519275 2023-01-23 04:01:18.338027: step: 152/531, loss: 3.814697720372351e-06 2023-01-23 04:01:19.488641: step: 156/531, loss: 0.00014352799917105585 2023-01-23 04:01:20.622815: step: 160/531, loss: 0.022900773212313652 2023-01-23 04:01:21.728700: step: 164/531, loss: 7.2479248046875e-05 2023-01-23 04:01:22.837634: step: 168/531, loss: 0.00036110877408646047 2023-01-23 04:01:23.966044: step: 172/531, loss: 0.016541291028261185 2023-01-23 04:01:25.101600: step: 176/531, loss: 0.0001334190455963835 2023-01-23 04:01:26.227569: step: 180/531, loss: 0.06560993194580078 2023-01-23 04:01:27.363205: step: 184/531, loss: 0.009336281567811966 2023-01-23 04:01:28.488288: step: 188/531, loss: 0.00011510849435580894 2023-01-23 04:01:29.632065: step: 192/531, loss: 4.0054324017546605e-06 2023-01-23 04:01:30.770367: step: 196/531, loss: 0.11933369934558868 2023-01-23 04:01:31.917957: step: 200/531, loss: 4.75883498438634e-05 2023-01-23 04:01:33.015418: step: 204/531, loss: 0.011086845770478249 2023-01-23 04:01:34.133124: step: 208/531, loss: 0.000162029275088571 2023-01-23 04:01:35.263340: step: 212/531, loss: 0.13398189842700958 2023-01-23 04:01:36.401536: step: 216/531, loss: 4.3582916987361386e-05 2023-01-23 04:01:37.536602: step: 220/531, loss: 0.0003517150762490928 2023-01-23 04:01:38.645031: step: 224/531, loss: -3.8146970382513246e-07 2023-01-23 04:01:39.776007: step: 228/531, loss: 0.00023288727970793843 2023-01-23 04:01:40.909704: step: 232/531, loss: 0.0023923872504383326 2023-01-23 04:01:42.010890: step: 236/531, loss: 0.0022612097673118114 2023-01-23 04:01:43.118981: step: 240/531, loss: 0.0009933114051818848 2023-01-23 04:01:44.223043: step: 244/531, loss: 0.0006024837493896484 2023-01-23 04:01:45.329399: step: 248/531, loss: 0.0012908459175378084 2023-01-23 04:01:46.462870: step: 252/531, loss: 4.816055661649443e-05 2023-01-23 04:01:47.582961: step: 256/531, loss: 0.01306462287902832 2023-01-23 04:01:48.726496: step: 260/531, loss: 0.008605623617768288 2023-01-23 04:01:49.824336: step: 264/531, loss: 0.011499309912323952 2023-01-23 04:01:50.967493: step: 268/531, loss: 1.640319896978326e-05 2023-01-23 04:01:52.085096: step: 272/531, loss: 0.00666274968534708 2023-01-23 04:01:53.212229: step: 276/531, loss: 4.8255922592943534e-05 2023-01-23 04:01:54.332090: step: 280/531, loss: 0.006525421515107155 2023-01-23 04:01:55.439939: step: 284/531, loss: 5.6838991440599784e-05 2023-01-23 04:01:56.544870: step: 288/531, loss: 3.070831371587701e-05 2023-01-23 04:01:57.684387: step: 292/531, loss: 0.014255332760512829 2023-01-23 04:01:58.819354: step: 296/531, loss: 0.0013267993927001953 2023-01-23 04:01:59.961940: step: 300/531, loss: 0.000812530517578125 2023-01-23 04:02:01.128976: step: 304/531, loss: 0.017866231501102448 2023-01-23 04:02:02.245586: step: 308/531, loss: 0.04280805587768555 2023-01-23 04:02:03.370480: step: 312/531, loss: 0.0016048431862145662 2023-01-23 04:02:04.509733: step: 316/531, loss: 0.016740798950195312 2023-01-23 04:02:05.667984: step: 320/531, loss: 0.03598659113049507 2023-01-23 04:02:06.786668: step: 324/531, loss: 0.0008739471668377519 2023-01-23 04:02:07.904301: step: 328/531, loss: 0.0016384958289563656 2023-01-23 04:02:09.034504: step: 332/531, loss: 0.013918494805693626 2023-01-23 04:02:10.124828: step: 336/531, loss: 0.00021538735018111765 2023-01-23 04:02:11.250909: step: 340/531, loss: 0.002173042157664895 2023-01-23 04:02:12.397511: step: 344/531, loss: 0.016356898471713066 2023-01-23 04:02:13.504556: step: 348/531, loss: 0.006673621945083141 2023-01-23 04:02:14.604321: step: 352/531, loss: 0.007123851682990789 2023-01-23 04:02:15.714818: step: 356/531, loss: 0.0312746986746788 2023-01-23 04:02:16.838933: step: 360/531, loss: 0.0008863449329510331 2023-01-23 04:02:17.971566: step: 364/531, loss: 0.009633064270019531 2023-01-23 04:02:19.087015: step: 368/531, loss: 3.71456153516192e-05 2023-01-23 04:02:20.213832: step: 372/531, loss: 0.07318115234375 2023-01-23 04:02:21.337646: step: 376/531, loss: 0.007025718688964844 2023-01-23 04:02:22.472544: step: 380/531, loss: 0.033063411712646484 2023-01-23 04:02:23.595766: step: 384/531, loss: 0.0004489898565225303 2023-01-23 04:02:24.743224: step: 388/531, loss: 0.01707463338971138 2023-01-23 04:02:25.867433: step: 392/531, loss: 0.0017786026000976562 2023-01-23 04:02:27.019747: step: 396/531, loss: 0.03592662885785103 2023-01-23 04:02:28.140473: step: 400/531, loss: 0.004657649900764227 2023-01-23 04:02:29.319667: step: 404/531, loss: 0.0020380020141601562 2023-01-23 04:02:30.450735: step: 408/531, loss: 0.0003277778741903603 2023-01-23 04:02:31.555320: step: 412/531, loss: 0.6080552935600281 2023-01-23 04:02:32.674269: step: 416/531, loss: 0.011705970391631126 2023-01-23 04:02:33.802909: step: 420/531, loss: 7.247924258990679e-06 2023-01-23 04:02:34.928290: step: 424/531, loss: 0.0031394960824400187 2023-01-23 04:02:36.029874: step: 428/531, loss: 0.007942582480609417 2023-01-23 04:02:37.166624: step: 432/531, loss: 0.012568164616823196 2023-01-23 04:02:38.281421: step: 436/531, loss: 3.089904930675402e-05 2023-01-23 04:02:39.411831: step: 440/531, loss: 0.007097339723259211 2023-01-23 04:02:40.514214: step: 444/531, loss: 0.0030575632117688656 2023-01-23 04:02:41.667750: step: 448/531, loss: 7.286071922862902e-05 2023-01-23 04:02:42.787396: step: 452/531, loss: 0.00032863617525435984 2023-01-23 04:02:43.891152: step: 456/531, loss: 0.00044736862764693797 2023-01-23 04:02:45.034071: step: 460/531, loss: 0.00019073486328125 2023-01-23 04:02:46.162172: step: 464/531, loss: 1.354217511106981e-05 2023-01-23 04:02:47.287312: step: 468/531, loss: 0.016560936346650124 2023-01-23 04:02:48.393484: step: 472/531, loss: 0.002007007598876953 2023-01-23 04:02:49.534308: step: 476/531, loss: 1.3351440202313825e-06 2023-01-23 04:02:50.643572: step: 480/531, loss: 0.00781702995300293 2023-01-23 04:02:51.775916: step: 484/531, loss: 0.00022134781465865672 2023-01-23 04:02:52.904278: step: 488/531, loss: 0.00020151138596702367 2023-01-23 04:02:54.034129: step: 492/531, loss: 0.003185463137924671 2023-01-23 04:02:55.154576: step: 496/531, loss: 5.0067901611328125e-06 2023-01-23 04:02:56.279023: step: 500/531, loss: 0.0003032684326171875 2023-01-23 04:02:57.419393: step: 504/531, loss: 0.0001671314239501953 2023-01-23 04:02:58.531382: step: 508/531, loss: 0.00022974015155341476 2023-01-23 04:02:59.636589: step: 512/531, loss: 0.00025300978450104594 2023-01-23 04:03:00.741747: step: 516/531, loss: 4.4155120122013614e-05 2023-01-23 04:03:01.887669: step: 520/531, loss: 0.00039443973219022155 2023-01-23 04:03:03.018894: step: 524/531, loss: 9.918212890625e-05 2023-01-23 04:03:04.110231: step: 528/531, loss: 0.00037636756314896047 2023-01-23 04:03:05.265619: step: 532/531, loss: 0.0008621931774541736 2023-01-23 04:03:06.362640: step: 536/531, loss: 0.0004096984921488911 2023-01-23 04:03:07.499699: step: 540/531, loss: 0.0005216598510742188 2023-01-23 04:03:08.601175: step: 544/531, loss: 3.643035961431451e-05 2023-01-23 04:03:09.715802: step: 548/531, loss: 0.06685161590576172 2023-01-23 04:03:10.866300: step: 552/531, loss: 0.0007217407692223787 2023-01-23 04:03:11.981494: step: 556/531, loss: 0.00027179718017578125 2023-01-23 04:03:13.116822: step: 560/531, loss: 3.9577484130859375e-05 2023-01-23 04:03:14.236784: step: 564/531, loss: 0.03394961357116699 2023-01-23 04:03:15.359151: step: 568/531, loss: 0.036714743822813034 2023-01-23 04:03:16.499360: step: 572/531, loss: 0.00029969215393066406 2023-01-23 04:03:17.646756: step: 576/531, loss: 0.008076095953583717 2023-01-23 04:03:18.810097: step: 580/531, loss: 0.09984445571899414 2023-01-23 04:03:19.961484: step: 584/531, loss: 0.0019512176513671875 2023-01-23 04:03:21.095253: step: 588/531, loss: 0.004755592904984951 2023-01-23 04:03:22.200162: step: 592/531, loss: 0.00016365050396416336 2023-01-23 04:03:23.322766: step: 596/531, loss: 0.01605072058737278 2023-01-23 04:03:24.404771: step: 600/531, loss: 0.06844277679920197 2023-01-23 04:03:25.552632: step: 604/531, loss: 0.011874770745635033 2023-01-23 04:03:26.688790: step: 608/531, loss: 9.269714064430445e-05 2023-01-23 04:03:27.798569: step: 612/531, loss: 0.0005998134147375822 2023-01-23 04:03:28.936453: step: 616/531, loss: 0.0004364013730082661 2023-01-23 04:03:30.072574: step: 620/531, loss: 0.001398277236148715 2023-01-23 04:03:31.222419: step: 624/531, loss: 1.0776519957289565e-05 2023-01-23 04:03:32.340065: step: 628/531, loss: 0.0007604599231854081 2023-01-23 04:03:33.479279: step: 632/531, loss: 2.689361645025201e-05 2023-01-23 04:03:34.622293: step: 636/531, loss: 0.02329440228641033 2023-01-23 04:03:35.762605: step: 640/531, loss: 0.0021452903747558594 2023-01-23 04:03:36.904598: step: 644/531, loss: 0.0023331642150878906 2023-01-23 04:03:38.020583: step: 648/531, loss: 0.00031533243600279093 2023-01-23 04:03:39.135134: step: 652/531, loss: 0.0074738506227731705 2023-01-23 04:03:40.279593: step: 656/531, loss: 0.0002904892317019403 2023-01-23 04:03:41.387588: step: 660/531, loss: 2.09808349609375e-05 2023-01-23 04:03:42.504439: step: 664/531, loss: 5.91278076171875e-05 2023-01-23 04:03:43.615142: step: 668/531, loss: 0.24091139435768127 2023-01-23 04:03:44.740956: step: 672/531, loss: 0.0017959356773644686 2023-01-23 04:03:45.899576: step: 676/531, loss: 0.0005227088695392013 2023-01-23 04:03:47.049332: step: 680/531, loss: 0.012591361999511719 2023-01-23 04:03:48.163281: step: 684/531, loss: 0.052199557423591614 2023-01-23 04:03:49.288230: step: 688/531, loss: 0.013944435864686966 2023-01-23 04:03:50.406902: step: 692/531, loss: 0.0065622334368526936 2023-01-23 04:03:51.539266: step: 696/531, loss: 0.0008890152093954384 2023-01-23 04:03:52.655385: step: 700/531, loss: 0.0008566856267862022 2023-01-23 04:03:53.779139: step: 704/531, loss: 6.065368506824598e-05 2023-01-23 04:03:54.940788: step: 708/531, loss: 6.637573096668348e-05 2023-01-23 04:03:56.046617: step: 712/531, loss: 9.469986252952367e-05 2023-01-23 04:03:57.174721: step: 716/531, loss: 0.021500492468476295 2023-01-23 04:03:58.294805: step: 720/531, loss: 0.00020332337589934468 2023-01-23 04:03:59.410289: step: 724/531, loss: 0.1680024415254593 2023-01-23 04:04:00.517613: step: 728/531, loss: 0.0004405975341796875 2023-01-23 04:04:01.648314: step: 732/531, loss: 2.8038026357535273e-05 2023-01-23 04:04:02.793149: step: 736/531, loss: 0.0002522468566894531 2023-01-23 04:04:03.911433: step: 740/531, loss: 0.011303901672363281 2023-01-23 04:04:05.027990: step: 744/531, loss: 5.14984130859375e-05 2023-01-23 04:04:06.176217: step: 748/531, loss: 8.39233416627394e-06 2023-01-23 04:04:07.307065: step: 752/531, loss: 0.0006374359363690019 2023-01-23 04:04:08.443431: step: 756/531, loss: 4.005432856502011e-06 2023-01-23 04:04:09.577007: step: 760/531, loss: 0.01929759979248047 2023-01-23 04:04:10.738614: step: 764/531, loss: 0.010619450360536575 2023-01-23 04:04:11.886798: step: 768/531, loss: 9.70840483205393e-05 2023-01-23 04:04:13.005076: step: 772/531, loss: 0.005786705296486616 2023-01-23 04:04:14.161357: step: 776/531, loss: 0.013478660956025124 2023-01-23 04:04:15.353302: step: 780/531, loss: 0.004138183780014515 2023-01-23 04:04:16.466015: step: 784/531, loss: 0.0006269455188885331 2023-01-23 04:04:17.585826: step: 788/531, loss: 0.012637711130082607 2023-01-23 04:04:18.736401: step: 792/531, loss: 1.640319896978326e-05 2023-01-23 04:04:19.823170: step: 796/531, loss: 0.012727499008178711 2023-01-23 04:04:20.950115: step: 800/531, loss: 0.056548118591308594 2023-01-23 04:04:22.034545: step: 804/531, loss: 0.017975712195038795 2023-01-23 04:04:23.149209: step: 808/531, loss: 0.00017213822866324335 2023-01-23 04:04:24.268168: step: 812/531, loss: 0.00028123855008743703 2023-01-23 04:04:25.403227: step: 816/531, loss: 0.00037679672823287547 2023-01-23 04:04:26.537498: step: 820/531, loss: 1.5354156857938506e-05 2023-01-23 04:04:27.630681: step: 824/531, loss: 1.31011011035298e-05 2023-01-23 04:04:28.747035: step: 828/531, loss: 0.00274314871057868 2023-01-23 04:04:29.852524: step: 832/531, loss: 0.011080265045166016 2023-01-23 04:04:30.982850: step: 836/531, loss: 0.003657436463981867 2023-01-23 04:04:32.083506: step: 840/531, loss: 0.0004027366521768272 2023-01-23 04:04:33.218337: step: 844/531, loss: 0.00470733642578125 2023-01-23 04:04:34.342488: step: 848/531, loss: 0.0006496429559774697 2023-01-23 04:04:35.474689: step: 852/531, loss: 0.003203868865966797 2023-01-23 04:04:36.589547: step: 856/531, loss: 0.009075308218598366 2023-01-23 04:04:37.698402: step: 860/531, loss: 0.019313620403409004 2023-01-23 04:04:38.831901: step: 864/531, loss: 0.008193779736757278 2023-01-23 04:04:39.949273: step: 868/531, loss: 0.03574848175048828 2023-01-23 04:04:41.078827: step: 872/531, loss: 0.0023950578179210424 2023-01-23 04:04:42.193813: step: 876/531, loss: 0.0006591796409338713 2023-01-23 04:04:43.339719: step: 880/531, loss: 0.00881881732493639 2023-01-23 04:04:44.495296: step: 884/531, loss: 0.07266216725111008 2023-01-23 04:04:45.606272: step: 888/531, loss: 0.011291885748505592 2023-01-23 04:04:46.755675: step: 892/531, loss: 0.002259636064991355 2023-01-23 04:04:47.840895: step: 896/531, loss: 0.00216255197301507 2023-01-23 04:04:48.976918: step: 900/531, loss: 5.5646894907113165e-05 2023-01-23 04:04:50.122908: step: 904/531, loss: 0.0024394034408032894 2023-01-23 04:04:51.263644: step: 908/531, loss: 0.020668601617217064 2023-01-23 04:04:52.401138: step: 912/531, loss: 0.00051116943359375 2023-01-23 04:04:53.502543: step: 916/531, loss: 8.20159912109375e-05 2023-01-23 04:04:54.619799: step: 920/531, loss: 7.057190487103071e-06 2023-01-23 04:04:55.732072: step: 924/531, loss: 3.318786548334174e-05 2023-01-23 04:04:56.872487: step: 928/531, loss: 0.0051863668486475945 2023-01-23 04:04:57.999317: step: 932/531, loss: 5.9795380366267636e-05 2023-01-23 04:04:59.166232: step: 936/531, loss: 0.02184162102639675 2023-01-23 04:05:00.266910: step: 940/531, loss: 0.0004673004150390625 2023-01-23 04:05:01.387042: step: 944/531, loss: 0.01511306781321764 2023-01-23 04:05:02.499197: step: 948/531, loss: -1.5258788153005298e-06 2023-01-23 04:05:03.598688: step: 952/531, loss: 1.621246337890625e-05 2023-01-23 04:05:04.721161: step: 956/531, loss: 5.197525388211943e-05 2023-01-23 04:05:05.810856: step: 960/531, loss: 0.0028282166458666325 2023-01-23 04:05:06.904360: step: 964/531, loss: 0.004936886020004749 2023-01-23 04:05:07.989295: step: 968/531, loss: 5.741119457525201e-05 2023-01-23 04:05:09.109940: step: 972/531, loss: 1.316070574830519e-05 2023-01-23 04:05:10.233493: step: 976/531, loss: 0.0014938354725018144 2023-01-23 04:05:11.342871: step: 980/531, loss: 0.003910636994987726 2023-01-23 04:05:12.478671: step: 984/531, loss: 0.0003662109375 2023-01-23 04:05:13.610460: step: 988/531, loss: 0.0031481743790209293 2023-01-23 04:05:14.741770: step: 992/531, loss: 4.482269287109375e-05 2023-01-23 04:05:15.878354: step: 996/531, loss: 0.0003397941472940147 2023-01-23 04:05:17.017067: step: 1000/531, loss: 0.027684977278113365 2023-01-23 04:05:18.114199: step: 1004/531, loss: 0.009808349423110485 2023-01-23 04:05:19.209612: step: 1008/531, loss: 0.00030994415283203125 2023-01-23 04:05:20.359089: step: 1012/531, loss: 7.867813110351562e-05 2023-01-23 04:05:21.468257: step: 1016/531, loss: 0.00014672279939986765 2023-01-23 04:05:22.603277: step: 1020/531, loss: 0.0056816102005541325 2023-01-23 04:05:23.765023: step: 1024/531, loss: 0.005644512362778187 2023-01-23 04:05:24.856723: step: 1028/531, loss: 0.0015291214222088456 2023-01-23 04:05:25.969655: step: 1032/531, loss: 0.0031047819647938013 2023-01-23 04:05:27.113132: step: 1036/531, loss: 0.004734706599265337 2023-01-23 04:05:28.204116: step: 1040/531, loss: 0.0012608527904376388 2023-01-23 04:05:29.317203: step: 1044/531, loss: 0.0011785507667809725 2023-01-23 04:05:30.441452: step: 1048/531, loss: 0.00010185241990257055 2023-01-23 04:05:31.571467: step: 1052/531, loss: 0.00018005371384788305 2023-01-23 04:05:32.712973: step: 1056/531, loss: 0.00399932824075222 2023-01-23 04:05:33.833656: step: 1060/531, loss: 0.0003186225949320942 2023-01-23 04:05:34.958167: step: 1064/531, loss: 0.021273041144013405 2023-01-23 04:05:36.076651: step: 1068/531, loss: 0.00015754700871184468 2023-01-23 04:05:37.184399: step: 1072/531, loss: 0.002571582794189453 2023-01-23 04:05:38.340268: step: 1076/531, loss: 0.015506600961089134 2023-01-23 04:05:39.468377: step: 1080/531, loss: 6.408691115211695e-05 2023-01-23 04:05:40.619410: step: 1084/531, loss: 0.0038730620872229338 2023-01-23 04:05:41.742981: step: 1088/531, loss: 0.0008067131275311112 2023-01-23 04:05:42.867137: step: 1092/531, loss: 8.869171324477065e-06 2023-01-23 04:05:44.018013: step: 1096/531, loss: 0.004353523254394531 2023-01-23 04:05:45.093807: step: 1100/531, loss: 1.5735626220703125e-05 2023-01-23 04:05:46.207006: step: 1104/531, loss: 0.0033899785485118628 2023-01-23 04:05:47.328880: step: 1108/531, loss: 3.1900406611384824e-05 2023-01-23 04:05:48.445000: step: 1112/531, loss: 0.007274532224982977 2023-01-23 04:05:49.586164: step: 1116/531, loss: 4.673004514188506e-05 2023-01-23 04:05:50.721508: step: 1120/531, loss: 0.0005188941722735763 2023-01-23 04:05:51.806724: step: 1124/531, loss: 1.9073486328125e-05 2023-01-23 04:05:52.947375: step: 1128/531, loss: 0.0006818771362304688 2023-01-23 04:05:54.059779: step: 1132/531, loss: 0.007822990417480469 2023-01-23 04:05:55.201178: step: 1136/531, loss: 7.99179106252268e-05 2023-01-23 04:05:56.315855: step: 1140/531, loss: 0.019560718908905983 2023-01-23 04:05:57.429406: step: 1144/531, loss: 0.03733978420495987 2023-01-23 04:05:58.523167: step: 1148/531, loss: 0.0001755714329192415 2023-01-23 04:05:59.663175: step: 1152/531, loss: 0.000896453857421875 2023-01-23 04:06:00.757309: step: 1156/531, loss: 1.792907642084174e-05 2023-01-23 04:06:01.867331: step: 1160/531, loss: 0.13368311524391174 2023-01-23 04:06:02.991794: step: 1164/531, loss: 0.0001253128139069304 2023-01-23 04:06:04.103158: step: 1168/531, loss: 5.245209194981726e-06 2023-01-23 04:06:05.204761: step: 1172/531, loss: 0.0005786657566204667 2023-01-23 04:06:06.332472: step: 1176/531, loss: 0.00032787321833893657 2023-01-23 04:06:07.449342: step: 1180/531, loss: 0.000876569771207869 2023-01-23 04:06:08.567842: step: 1184/531, loss: 0.010364532470703125 2023-01-23 04:06:09.692767: step: 1188/531, loss: 0.017445851117372513 2023-01-23 04:06:10.804060: step: 1192/531, loss: 0.0006666183471679688 2023-01-23 04:06:11.932422: step: 1196/531, loss: 0.0006077766302041709 2023-01-23 04:06:13.065083: step: 1200/531, loss: 0.016824819147586823 2023-01-23 04:06:14.196248: step: 1204/531, loss: 0.0013669967884197831 2023-01-23 04:06:15.310007: step: 1208/531, loss: 0.00171489710919559 2023-01-23 04:06:16.417187: step: 1212/531, loss: 0.0003632068692240864 2023-01-23 04:06:17.549484: step: 1216/531, loss: 0.006293964572250843 2023-01-23 04:06:18.676302: step: 1220/531, loss: 0.026813222095370293 2023-01-23 04:06:19.808603: step: 1224/531, loss: 0.0004943371168337762 2023-01-23 04:06:20.938239: step: 1228/531, loss: 0.0018658638000488281 2023-01-23 04:06:22.047626: step: 1232/531, loss: 0.00010814666893566027 2023-01-23 04:06:23.182789: step: 1236/531, loss: 0.0005912781343795359 2023-01-23 04:06:24.275779: step: 1240/531, loss: 5.257129669189453e-05 2023-01-23 04:06:25.380785: step: 1244/531, loss: 0.0009036064147949219 2023-01-23 04:06:26.512966: step: 1248/531, loss: 0.0002368926943745464 2023-01-23 04:06:27.631711: step: 1252/531, loss: 0.0025201798416674137 2023-01-23 04:06:28.782784: step: 1256/531, loss: 0.008607483468949795 2023-01-23 04:06:29.914946: step: 1260/531, loss: 0.004907465074211359 2023-01-23 04:06:31.016625: step: 1264/531, loss: 5.14984130859375e-05 2023-01-23 04:06:32.124082: step: 1268/531, loss: 3.185272362316027e-05 2023-01-23 04:06:33.251576: step: 1272/531, loss: 0.0006665230030193925 2023-01-23 04:06:34.373453: step: 1276/531, loss: 0.0028783800080418587 2023-01-23 04:06:35.495412: step: 1280/531, loss: 0.012724113650619984 2023-01-23 04:06:36.617058: step: 1284/531, loss: 1.6689300537109375e-05 2023-01-23 04:06:37.716462: step: 1288/531, loss: 5.14984139954322e-06 2023-01-23 04:06:38.848016: step: 1292/531, loss: 0.00038313865661621094 2023-01-23 04:06:39.965008: step: 1296/531, loss: 0.0003246307314839214 2023-01-23 04:06:41.055451: step: 1300/531, loss: 0.0014455795753747225 2023-01-23 04:06:42.204301: step: 1304/531, loss: 1.1444094525359105e-06 2023-01-23 04:06:43.327123: step: 1308/531, loss: 0.011186599731445312 2023-01-23 04:06:44.458179: step: 1312/531, loss: 7.591247413074598e-05 2023-01-23 04:06:45.576531: step: 1316/531, loss: 9.603500075172633e-05 2023-01-23 04:06:46.695484: step: 1320/531, loss: 0.00023784636869095266 2023-01-23 04:06:47.828160: step: 1324/531, loss: 0.005619430914521217 2023-01-23 04:06:48.953375: step: 1328/531, loss: 4.844665454584174e-05 2023-01-23 04:06:50.113744: step: 1332/531, loss: 3.99589553126134e-05 2023-01-23 04:06:51.280203: step: 1336/531, loss: 0.0009550094837322831 2023-01-23 04:06:52.430113: step: 1340/531, loss: 0.00047512055607512593 2023-01-23 04:06:53.604108: step: 1344/531, loss: 7.22885160939768e-05 2023-01-23 04:06:54.720548: step: 1348/531, loss: 0.00516121368855238 2023-01-23 04:06:55.838991: step: 1352/531, loss: 0.008965874090790749 2023-01-23 04:06:56.981434: step: 1356/531, loss: 0.0012616158928722143 2023-01-23 04:06:58.089669: step: 1360/531, loss: 3.0994415283203125e-05 2023-01-23 04:06:59.228773: step: 1364/531, loss: 8.487701052217744e-06 2023-01-23 04:07:00.348237: step: 1368/531, loss: 0.00011940002877963707 2023-01-23 04:07:01.505041: step: 1372/531, loss: 0.0003047943173442036 2023-01-23 04:07:02.646205: step: 1376/531, loss: 0.000244140625 2023-01-23 04:07:03.743328: step: 1380/531, loss: 0.0003014564572367817 2023-01-23 04:07:04.876447: step: 1384/531, loss: 1.850128137448337e-05 2023-01-23 04:07:05.979288: step: 1388/531, loss: 4.0054324017546605e-06 2023-01-23 04:07:07.116198: step: 1392/531, loss: 0.0003837585391011089 2023-01-23 04:07:08.231293: step: 1396/531, loss: 0.0009661674266681075 2023-01-23 04:07:09.377555: step: 1400/531, loss: 0.004594612400978804 2023-01-23 04:07:10.479063: step: 1404/531, loss: 0.00023975371732376516 2023-01-23 04:07:11.601648: step: 1408/531, loss: 0.0028760910499840975 2023-01-23 04:07:12.701023: step: 1412/531, loss: 0.002426338382065296 2023-01-23 04:07:13.812590: step: 1416/531, loss: 0.005239391699433327 2023-01-23 04:07:14.928356: step: 1420/531, loss: 0.0023806095123291016 2023-01-23 04:07:16.063879: step: 1424/531, loss: 0.0005450248718261719 2023-01-23 04:07:17.203661: step: 1428/531, loss: 0.05249834060668945 2023-01-23 04:07:18.355582: step: 1432/531, loss: 0.007484626956284046 2023-01-23 04:07:19.477523: step: 1436/531, loss: 0.0014860153896734118 2023-01-23 04:07:20.599641: step: 1440/531, loss: 0.00012235641770530492 2023-01-23 04:07:21.743446: step: 1444/531, loss: 0.04383678361773491 2023-01-23 04:07:22.832288: step: 1448/531, loss: 8.602142770541832e-05 2023-01-23 04:07:23.973102: step: 1452/531, loss: 0.00787649117410183 2023-01-23 04:07:25.092301: step: 1456/531, loss: 0.0013366700150072575 2023-01-23 04:07:26.222408: step: 1460/531, loss: 9.51766996877268e-05 2023-01-23 04:07:27.348332: step: 1464/531, loss: 0.0008792877197265625 2023-01-23 04:07:28.494450: step: 1468/531, loss: 0.01833334006369114 2023-01-23 04:07:29.595381: step: 1472/531, loss: 0.00472869910299778 2023-01-23 04:07:30.745430: step: 1476/531, loss: 0.0004978656652383506 2023-01-23 04:07:31.853433: step: 1480/531, loss: 0.00022697450185660273 2023-01-23 04:07:32.992086: step: 1484/531, loss: 0.014154244214296341 2023-01-23 04:07:34.139211: step: 1488/531, loss: 9.059906005859375e-06 2023-01-23 04:07:35.243129: step: 1492/531, loss: 0.0001750946103129536 2023-01-23 04:07:36.380324: step: 1496/531, loss: 0.001508522080257535 2023-01-23 04:07:37.503420: step: 1500/531, loss: 0.020079948008060455 2023-01-23 04:07:38.610417: step: 1504/531, loss: 0.001806068466976285 2023-01-23 04:07:39.762841: step: 1508/531, loss: 0.0003389358753338456 2023-01-23 04:07:40.874325: step: 1512/531, loss: 0.00014200211444403976 2023-01-23 04:07:42.024027: step: 1516/531, loss: 0.0082848547026515 2023-01-23 04:07:43.115052: step: 1520/531, loss: 1.5735627130197827e-06 2023-01-23 04:07:44.260780: step: 1524/531, loss: 8.792877633823082e-05 2023-01-23 04:07:45.358283: step: 1528/531, loss: 1.2493133908719756e-05 2023-01-23 04:07:46.513117: step: 1532/531, loss: 0.0059944153763353825 2023-01-23 04:07:47.626823: step: 1536/531, loss: 1.3685226804227568e-05 2023-01-23 04:07:48.762169: step: 1540/531, loss: 0.0004153251647949219 2023-01-23 04:07:49.858584: step: 1544/531, loss: 6.8664553509734105e-06 2023-01-23 04:07:50.985687: step: 1548/531, loss: 9.565354412188753e-05 2023-01-23 04:07:52.118794: step: 1552/531, loss: 0.00021209717669989914 2023-01-23 04:07:53.229494: step: 1556/531, loss: 0.00229053501971066 2023-01-23 04:07:54.357287: step: 1560/531, loss: 5.8650970458984375e-05 2023-01-23 04:07:55.474191: step: 1564/531, loss: 0.25569456815719604 2023-01-23 04:07:56.596590: step: 1568/531, loss: 0.0006315231439657509 2023-01-23 04:07:57.686150: step: 1572/531, loss: 4.091262962901965e-05 2023-01-23 04:07:58.848419: step: 1576/531, loss: 0.03510742262005806 2023-01-23 04:07:59.978975: step: 1580/531, loss: 0.00010614394705044106 2023-01-23 04:08:01.113421: step: 1584/531, loss: 0.020201491191983223 2023-01-23 04:08:02.266698: step: 1588/531, loss: 0.05397796630859375 2023-01-23 04:08:03.398289: step: 1592/531, loss: 0.00043249133159406483 2023-01-23 04:08:04.515577: step: 1596/531, loss: 0.0014174937969073653 2023-01-23 04:08:05.658747: step: 1600/531, loss: 0.051859185099601746 2023-01-23 04:08:06.790341: step: 1604/531, loss: 0.0003941536124330014 2023-01-23 04:08:07.911058: step: 1608/531, loss: 0.0037469866219908 2023-01-23 04:08:09.012173: step: 1612/531, loss: 5.168914867681451e-05 2023-01-23 04:08:10.126736: step: 1616/531, loss: 0.00011749268014682457 2023-01-23 04:08:11.253803: step: 1620/531, loss: 1.9073486328125e-06 2023-01-23 04:08:12.371995: step: 1624/531, loss: 0.00046076776925474405 2023-01-23 04:08:13.522795: step: 1628/531, loss: -2.498626781743951e-05 2023-01-23 04:08:14.625173: step: 1632/531, loss: 0.0126069076359272 2023-01-23 04:08:15.746721: step: 1636/531, loss: 0.00014972686767578125 2023-01-23 04:08:16.855591: step: 1640/531, loss: 0.010813714005053043 2023-01-23 04:08:18.005836: step: 1644/531, loss: 0.00025177001953125 2023-01-23 04:08:19.143021: step: 1648/531, loss: 0.0017105102306231856 2023-01-23 04:08:20.246756: step: 1652/531, loss: 0.0006741523975506425 2023-01-23 04:08:21.344979: step: 1656/531, loss: 0.00034923554630950093 2023-01-23 04:08:22.482194: step: 1660/531, loss: 0.004012489225715399 2023-01-23 04:08:23.590495: step: 1664/531, loss: -1.1444091114753974e-06 2023-01-23 04:08:24.705120: step: 1668/531, loss: 0.03929634019732475 2023-01-23 04:08:25.822715: step: 1672/531, loss: 0.005597400479018688 2023-01-23 04:08:26.932323: step: 1676/531, loss: 0.029470443725585938 2023-01-23 04:08:28.085487: step: 1680/531, loss: 6.866455078125e-05 2023-01-23 04:08:29.186367: step: 1684/531, loss: 0.02064990997314453 2023-01-23 04:08:30.294295: step: 1688/531, loss: 0.0011292457347735763 2023-01-23 04:08:31.436095: step: 1692/531, loss: 0.18194931745529175 2023-01-23 04:08:32.580674: step: 1696/531, loss: 0.0006795883527956903 2023-01-23 04:08:33.689467: step: 1700/531, loss: 0.06164684519171715 2023-01-23 04:08:34.763914: step: 1704/531, loss: 0.00022420883760787547 2023-01-23 04:08:35.881407: step: 1708/531, loss: 6.12258882028982e-05 2023-01-23 04:08:37.026558: step: 1712/531, loss: 0.5242816805839539 2023-01-23 04:08:38.175289: step: 1716/531, loss: 0.00068836216814816 2023-01-23 04:08:39.307915: step: 1720/531, loss: 0.00018177033052779734 2023-01-23 04:08:40.404735: step: 1724/531, loss: 0.0023394108284264803 2023-01-23 04:08:41.542303: step: 1728/531, loss: 0.001183509943075478 2023-01-23 04:08:42.642109: step: 1732/531, loss: 0.027262257412075996 2023-01-23 04:08:43.753166: step: 1736/531, loss: 0.0010611058678478003 2023-01-23 04:08:44.895417: step: 1740/531, loss: 0.017176246270537376 2023-01-23 04:08:45.998403: step: 1744/531, loss: 0.00023422243248205632 2023-01-23 04:08:47.136486: step: 1748/531, loss: 0.06627864390611649 2023-01-23 04:08:48.259292: step: 1752/531, loss: 0.04366188123822212 2023-01-23 04:08:49.353035: step: 1756/531, loss: 6.0653688706224784e-05 2023-01-23 04:08:50.472547: step: 1760/531, loss: 8.878708467818797e-05 2023-01-23 04:08:51.630927: step: 1764/531, loss: 0.006194115150719881 2023-01-23 04:08:52.731490: step: 1768/531, loss: 9.5367431640625e-07 2023-01-23 04:08:53.844683: step: 1772/531, loss: 0.0012831687927246094 2023-01-23 04:08:54.967836: step: 1776/531, loss: 0.000175046909134835 2023-01-23 04:08:56.091414: step: 1780/531, loss: 2.6702882678364404e-06 2023-01-23 04:08:57.192179: step: 1784/531, loss: 0.0063908579759299755 2023-01-23 04:08:58.320156: step: 1788/531, loss: 0.013032151386141777 2023-01-23 04:08:59.454441: step: 1792/531, loss: 0.00047588348388671875 2023-01-23 04:09:00.591181: step: 1796/531, loss: 0.0012002944713458419 2023-01-23 04:09:01.698391: step: 1800/531, loss: 0.0009578705066815019 2023-01-23 04:09:02.825130: step: 1804/531, loss: 0.004888630472123623 2023-01-23 04:09:03.931168: step: 1808/531, loss: 0.011440277099609375 2023-01-23 04:09:05.044503: step: 1812/531, loss: 0.0028636932838708162 2023-01-23 04:09:06.183608: step: 1816/531, loss: 0.002319050021469593 2023-01-23 04:09:07.294859: step: 1820/531, loss: 0.010092162527143955 2023-01-23 04:09:08.437918: step: 1824/531, loss: 0.012401724234223366 2023-01-23 04:09:09.556490: step: 1828/531, loss: 0.05125083774328232 2023-01-23 04:09:10.685173: step: 1832/531, loss: 0.13479185104370117 2023-01-23 04:09:11.816193: step: 1836/531, loss: 0.00039329531136900187 2023-01-23 04:09:12.941139: step: 1840/531, loss: 6.29425039733178e-06 2023-01-23 04:09:14.075329: step: 1844/531, loss: 0.006687641143798828 2023-01-23 04:09:15.188150: step: 1848/531, loss: 0.010218525305390358 2023-01-23 04:09:16.331026: step: 1852/531, loss: 0.0022821426391601562 2023-01-23 04:09:17.474209: step: 1856/531, loss: 0.0009283066028729081 2023-01-23 04:09:18.609946: step: 1860/531, loss: 0.003015375230461359 2023-01-23 04:09:19.732113: step: 1864/531, loss: 0.017287828028202057 2023-01-23 04:09:20.863959: step: 1868/531, loss: 0.00264072441495955 2023-01-23 04:09:22.003104: step: 1872/531, loss: 4.57763671875e-05 2023-01-23 04:09:23.147994: step: 1876/531, loss: 9.80854092631489e-05 2023-01-23 04:09:24.253394: step: 1880/531, loss: 4.863738922722405e-06 2023-01-23 04:09:25.385563: step: 1884/531, loss: 6.389617738022935e-06 2023-01-23 04:09:26.491721: step: 1888/531, loss: 0.0016894340515136719 2023-01-23 04:09:27.613788: step: 1892/531, loss: 0.02133622020483017 2023-01-23 04:09:28.737513: step: 1896/531, loss: 2.9468537832144648e-05 2023-01-23 04:09:29.868884: step: 1900/531, loss: 0.0552794449031353 2023-01-23 04:09:30.981984: step: 1904/531, loss: 0.12874050438404083 2023-01-23 04:09:32.107850: step: 1908/531, loss: 0.01023874245584011 2023-01-23 04:09:33.256582: step: 1912/531, loss: 0.0005775452009402215 2023-01-23 04:09:34.416047: step: 1916/531, loss: 7.963180541992188e-05 2023-01-23 04:09:35.550784: step: 1920/531, loss: 0.000913197873160243 2023-01-23 04:09:36.669661: step: 1924/531, loss: 9.446144395042211e-05 2023-01-23 04:09:37.783787: step: 1928/531, loss: 0.04133796691894531 2023-01-23 04:09:38.923577: step: 1932/531, loss: 4.372596595203504e-05 2023-01-23 04:09:40.065197: step: 1936/531, loss: 0.04000205919146538 2023-01-23 04:09:41.185425: step: 1940/531, loss: 0.0007588386652059853 2023-01-23 04:09:42.350317: step: 1944/531, loss: 0.00013265610323287547 2023-01-23 04:09:43.463565: step: 1948/531, loss: 0.021369459107518196 2023-01-23 04:09:44.597042: step: 1952/531, loss: 0.0002723693905863911 2023-01-23 04:09:45.712600: step: 1956/531, loss: 0.0004154682392254472 2023-01-23 04:09:46.853562: step: 1960/531, loss: 0.0014532089699059725 2023-01-23 04:09:47.990712: step: 1964/531, loss: 0.08775182068347931 2023-01-23 04:09:49.103137: step: 1968/531, loss: 0.026155663654208183 2023-01-23 04:09:50.203304: step: 1972/531, loss: 0.0004389762762002647 2023-01-23 04:09:51.315667: step: 1976/531, loss: 0.026421070098876953 2023-01-23 04:09:52.426692: step: 1980/531, loss: 0.003487825393676758 2023-01-23 04:09:53.545755: step: 1984/531, loss: 0.0011960983974859118 2023-01-23 04:09:54.665660: step: 1988/531, loss: 0.00344505300745368 2023-01-23 04:09:55.779983: step: 1992/531, loss: 0.0004194736829958856 2023-01-23 04:09:56.890738: step: 1996/531, loss: 0.07059655338525772 2023-01-23 04:09:57.993888: step: 2000/531, loss: 0.00010774135444080457 2023-01-23 04:09:59.112288: step: 2004/531, loss: 0.0015066147316247225 2023-01-23 04:10:00.254101: step: 2008/531, loss: 6.198883056640625e-05 2023-01-23 04:10:01.409272: step: 2012/531, loss: 0.00017194748215842992 2023-01-23 04:10:02.548875: step: 2016/531, loss: 0.00011730194091796875 2023-01-23 04:10:03.667117: step: 2020/531, loss: 0.00019559860811568797 2023-01-23 04:10:04.783958: step: 2024/531, loss: 0.0018524168990552425 2023-01-23 04:10:05.975555: step: 2028/531, loss: -3.910064606316155e-06 2023-01-23 04:10:07.099863: step: 2032/531, loss: 0.03407545015215874 2023-01-23 04:10:08.203229: step: 2036/531, loss: 0.00023360253544524312 2023-01-23 04:10:09.310942: step: 2040/531, loss: 0.0008902549743652344 2023-01-23 04:10:10.448065: step: 2044/531, loss: 4.568099757307209e-05 2023-01-23 04:10:11.586792: step: 2048/531, loss: 0.0017230988014489412 2023-01-23 04:10:12.734191: step: 2052/531, loss: 0.5555196404457092 2023-01-23 04:10:13.849453: step: 2056/531, loss: 0.02086200751364231 2023-01-23 04:10:14.987091: step: 2060/531, loss: 0.00061798095703125 2023-01-23 04:10:16.155312: step: 2064/531, loss: 0.04857692867517471 2023-01-23 04:10:17.279676: step: 2068/531, loss: 3.2711028325138614e-05 2023-01-23 04:10:18.419457: step: 2072/531, loss: 6.771087555534905e-06 2023-01-23 04:10:19.562594: step: 2076/531, loss: 0.0002229690580861643 2023-01-23 04:10:20.680896: step: 2080/531, loss: 0.00030436518136411905 2023-01-23 04:10:21.853349: step: 2084/531, loss: 0.019772911444306374 2023-01-23 04:10:22.995032: step: 2088/531, loss: 0.0003482818428892642 2023-01-23 04:10:24.142020: step: 2092/531, loss: 0.008302878588438034 2023-01-23 04:10:25.250217: step: 2096/531, loss: 0.03240451589226723 2023-01-23 04:10:26.377417: step: 2100/531, loss: 0.014634991064667702 2023-01-23 04:10:27.467435: step: 2104/531, loss: 0.00015134812565520406 2023-01-23 04:10:28.601015: step: 2108/531, loss: 0.004219627473503351 2023-01-23 04:10:29.727229: step: 2112/531, loss: 0.01053011417388916 2023-01-23 04:10:30.829759: step: 2116/531, loss: 5.607604907709174e-05 2023-01-23 04:10:31.972451: step: 2120/531, loss: 0.00022482872009277344 2023-01-23 04:10:33.108825: step: 2124/531, loss: 0.027836419641971588 ================================================== Loss: 0.014 -------------------- Dev: {'event': {'p': 0.5681381957773513, 'r': 0.7882822902796272, 'f1': 0.660345789180145}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Test: {'event': {'p': 0.6137299771167049, 'r': 0.7996422182468694, 'f1': 0.6944588296219576}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Chinese: {'event': {'p': 0.5679012345679012, 'r': 0.8518518518518519, 'f1': 0.6814814814814814}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Korean: {'event': {'p': 0.5714285714285714, 'r': 0.6349206349206349, 'f1': 0.6015037593984962}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Russian: {'event': {'p': 0.28846153846153844, 'r': 0.4166666666666667, 'f1': 0.3409090909090909}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 27 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 04:11:13.296366: step: 4/531, loss: 0.032869912683963776 2023-01-23 04:11:14.417710: step: 8/531, loss: 0.01137313898652792 2023-01-23 04:11:15.545609: step: 12/531, loss: 0.0008121490245684981 2023-01-23 04:11:16.654442: step: 16/531, loss: 1.640319896978326e-05 2023-01-23 04:11:17.775363: step: 20/531, loss: 0.011584949679672718 2023-01-23 04:11:18.886805: step: 24/531, loss: 0.007736015599220991 2023-01-23 04:11:20.000078: step: 28/531, loss: 0.013990783132612705 2023-01-23 04:11:21.176344: step: 32/531, loss: 0.0017811775906011462 2023-01-23 04:11:22.280153: step: 36/531, loss: 0.003659153124317527 2023-01-23 04:11:23.435074: step: 40/531, loss: 0.00013484954251907766 2023-01-23 04:11:24.568463: step: 44/531, loss: 0.12456703186035156 2023-01-23 04:11:25.674813: step: 48/531, loss: 0.001987648196518421 2023-01-23 04:11:26.820051: step: 52/531, loss: 0.00011539459228515625 2023-01-23 04:11:27.921509: step: 56/531, loss: 0.15163154900074005 2023-01-23 04:11:29.047313: step: 60/531, loss: 0.00109100341796875 2023-01-23 04:11:30.158080: step: 64/531, loss: 0.003566217375919223 2023-01-23 04:11:31.287944: step: 68/531, loss: 0.003479576203972101 2023-01-23 04:11:32.433425: step: 72/531, loss: 3.914833359885961e-05 2023-01-23 04:11:33.568610: step: 76/531, loss: 0.010857963934540749 2023-01-23 04:11:34.678825: step: 80/531, loss: 0.011627960950136185 2023-01-23 04:11:35.816818: step: 84/531, loss: 0.00428009033203125 2023-01-23 04:11:36.960045: step: 88/531, loss: -3.2424925393570447e-06 2023-01-23 04:11:38.071576: step: 92/531, loss: 0.006854820065200329 2023-01-23 04:11:39.178481: step: 96/531, loss: 0.007114696316421032 2023-01-23 04:11:40.308307: step: 100/531, loss: 0.0016622543334960938 2023-01-23 04:11:41.423840: step: 104/531, loss: 0.0006792068597860634 2023-01-23 04:11:42.510284: step: 108/531, loss: 0.0022104859817773104 2023-01-23 04:11:43.649743: step: 112/531, loss: 0.039315417408943176 2023-01-23 04:11:44.742670: step: 116/531, loss: 0.00018577575974632055 2023-01-23 04:11:45.875547: step: 120/531, loss: 0.006172371096909046 2023-01-23 04:11:46.991211: step: 124/531, loss: 0.0001924514799611643 2023-01-23 04:11:48.083069: step: 128/531, loss: 0.053377628326416016 2023-01-23 04:11:49.189443: step: 132/531, loss: 0.0035102844703942537 2023-01-23 04:11:50.298526: step: 136/531, loss: 1.163482647825731e-05 2023-01-23 04:11:51.409083: step: 140/531, loss: 0.0008392333984375 2023-01-23 04:11:52.521285: step: 144/531, loss: 2.708435022213962e-05 2023-01-23 04:11:53.643271: step: 148/531, loss: 0.00012254714965820312 2023-01-23 04:11:54.780148: step: 152/531, loss: 3.43322744811303e-06 2023-01-23 04:11:55.916069: step: 156/531, loss: 0.00011100769916083664 2023-01-23 04:11:57.041213: step: 160/531, loss: 0.03128385543823242 2023-01-23 04:11:58.175033: step: 164/531, loss: 0.03144221007823944 2023-01-23 04:11:59.290210: step: 168/531, loss: 6.828308687545359e-05 2023-01-23 04:12:00.401390: step: 172/531, loss: 0.024213124066591263 2023-01-23 04:12:01.526657: step: 176/531, loss: 3.8480757211800665e-05 2023-01-23 04:12:02.668207: step: 180/531, loss: 0.00016117097402457148 2023-01-23 04:12:03.765398: step: 184/531, loss: 0.00038757326547056437 2023-01-23 04:12:04.896622: step: 188/531, loss: 0.03155650943517685 2023-01-23 04:12:06.010134: step: 192/531, loss: 0.0013959885109215975 2023-01-23 04:12:07.129147: step: 196/531, loss: 0.002661085221916437 2023-01-23 04:12:08.217863: step: 200/531, loss: 0.020292092114686966 2023-01-23 04:12:09.344472: step: 204/531, loss: 0.005685615353286266 2023-01-23 04:12:10.466514: step: 208/531, loss: 0.33903443813323975 2023-01-23 04:12:11.586805: step: 212/531, loss: 1.869201696536038e-05 2023-01-23 04:12:12.711901: step: 216/531, loss: 0.0003068924124818295 2023-01-23 04:12:13.876281: step: 220/531, loss: 0.0036836625076830387 2023-01-23 04:12:15.019821: step: 224/531, loss: 0.0013276099925860763 2023-01-23 04:12:16.132559: step: 228/531, loss: 0.0012371063930913806 2023-01-23 04:12:17.269072: step: 232/531, loss: 0.01437530480325222 2023-01-23 04:12:18.417358: step: 236/531, loss: 0.030065536499023438 2023-01-23 04:12:19.561235: step: 240/531, loss: 0.012996578589081764 2023-01-23 04:12:20.660482: step: 244/531, loss: 0.0002751350693870336 2023-01-23 04:12:21.781048: step: 248/531, loss: 0.01992177963256836 2023-01-23 04:12:22.902152: step: 252/531, loss: 0.0002990722714457661 2023-01-23 04:12:24.046946: step: 256/531, loss: 0.026076793670654297 2023-01-23 04:12:25.161650: step: 260/531, loss: 0.015161419287323952 2023-01-23 04:12:26.285924: step: 264/531, loss: 0.012009048834443092 2023-01-23 04:12:27.436997: step: 268/531, loss: 0.00163946149405092 2023-01-23 04:12:28.563441: step: 272/531, loss: 0.02469616010785103 2023-01-23 04:12:29.705368: step: 276/531, loss: 0.014821052551269531 2023-01-23 04:12:30.823309: step: 280/531, loss: 0.018169784918427467 2023-01-23 04:12:31.949334: step: 284/531, loss: 0.006551647558808327 2023-01-23 04:12:33.108799: step: 288/531, loss: 0.0009578705066815019 2023-01-23 04:12:34.219966: step: 292/531, loss: 3.4141543437726796e-05 2023-01-23 04:12:35.378760: step: 296/531, loss: 0.022285079583525658 2023-01-23 04:12:36.486251: step: 300/531, loss: 0.0014776230091229081 2023-01-23 04:12:37.606077: step: 304/531, loss: 0.00114860525354743 2023-01-23 04:12:38.741419: step: 308/531, loss: 0.0004520416259765625 2023-01-23 04:12:39.883782: step: 312/531, loss: 9.450913057662547e-05 2023-01-23 04:12:41.013638: step: 316/531, loss: 0.2662227749824524 2023-01-23 04:12:42.142212: step: 320/531, loss: 0.0005800247308798134 2023-01-23 04:12:43.249108: step: 324/531, loss: 0.018753720447421074 2023-01-23 04:12:44.398173: step: 328/531, loss: 0.0412229560315609 2023-01-23 04:12:45.531294: step: 332/531, loss: 2.6130677724722773e-05 2023-01-23 04:12:46.685280: step: 336/531, loss: -4.7683716530855236e-08 2023-01-23 04:12:47.793929: step: 340/531, loss: 1.7786025637178682e-05 2023-01-23 04:12:48.886007: step: 344/531, loss: 6.663352542091161e-05 2023-01-23 04:12:50.013543: step: 348/531, loss: 0.12062511593103409 2023-01-23 04:12:51.121677: step: 352/531, loss: 0.023876190185546875 2023-01-23 04:12:52.235569: step: 356/531, loss: 0.005062866024672985 2023-01-23 04:12:53.336530: step: 360/531, loss: 6.581246998393908e-05 2023-01-23 04:12:54.473045: step: 364/531, loss: 0.05022259056568146 2023-01-23 04:12:55.586072: step: 368/531, loss: 0.00013942719670012593 2023-01-23 04:12:56.718602: step: 372/531, loss: 0.0028100013732910156 2023-01-23 04:12:57.871522: step: 376/531, loss: -7.534026735811494e-06 2023-01-23 04:12:59.010798: step: 380/531, loss: 0.0001866340753622353 2023-01-23 04:13:00.163567: step: 384/531, loss: 0.0005877495277673006 2023-01-23 04:13:01.301352: step: 388/531, loss: 0.007878398522734642 2023-01-23 04:13:02.422583: step: 392/531, loss: 0.010847663506865501 2023-01-23 04:13:03.583520: step: 396/531, loss: 0.08948802947998047 2023-01-23 04:13:04.707477: step: 400/531, loss: 0.0006517409929074347 2023-01-23 04:13:05.870817: step: 404/531, loss: 0.0004644394211936742 2023-01-23 04:13:06.982573: step: 408/531, loss: 0.00035839079646393657 2023-01-23 04:13:08.114178: step: 412/531, loss: 1.3828278042637976e-06 2023-01-23 04:13:09.243068: step: 416/531, loss: 0.009991645812988281 2023-01-23 04:13:10.356875: step: 420/531, loss: 0.00010013580322265625 2023-01-23 04:13:11.485334: step: 424/531, loss: 0.007972145453095436 2023-01-23 04:13:12.623974: step: 428/531, loss: 0.003837204072624445 2023-01-23 04:13:13.747665: step: 432/531, loss: 0.014476395212113857 2023-01-23 04:13:14.862638: step: 436/531, loss: 0.013527489267289639 2023-01-23 04:13:15.992439: step: 440/531, loss: 0.006007385440170765 2023-01-23 04:13:17.115869: step: 444/531, loss: 1.411438006471144e-05 2023-01-23 04:13:18.232324: step: 448/531, loss: 0.0007270813221111894 2023-01-23 04:13:19.327396: step: 452/531, loss: 0.01282720547169447 2023-01-23 04:13:20.480955: step: 456/531, loss: 0.000610732997301966 2023-01-23 04:13:21.629026: step: 460/531, loss: 0.00016469955153297633 2023-01-23 04:13:22.731775: step: 464/531, loss: 0.009271049872040749 2023-01-23 04:13:23.849649: step: 468/531, loss: 0.005040311720222235 2023-01-23 04:13:24.967187: step: 472/531, loss: 0.00562477158382535 2023-01-23 04:13:26.082391: step: 476/531, loss: 0.0023674010299146175 2023-01-23 04:13:27.201744: step: 480/531, loss: 0.008716393262147903 2023-01-23 04:13:28.344705: step: 484/531, loss: 1.3637542906508315e-05 2023-01-23 04:13:29.467139: step: 488/531, loss: 0.0015808106400072575 2023-01-23 04:13:30.599976: step: 492/531, loss: 0.00034332278301008046 2023-01-23 04:13:31.725121: step: 496/531, loss: 0.008898163214325905 2023-01-23 04:13:32.864865: step: 500/531, loss: 0.021915435791015625 2023-01-23 04:13:33.953139: step: 504/531, loss: 8.163452002918348e-05 2023-01-23 04:13:35.055464: step: 508/531, loss: 0.00023155214148573577 2023-01-23 04:13:36.167040: step: 512/531, loss: 0.00039711000863462687 2023-01-23 04:13:37.261929: step: 516/531, loss: 0.0008734703296795487 2023-01-23 04:13:38.409033: step: 520/531, loss: 0.00014181136793922633 2023-01-23 04:13:39.530505: step: 524/531, loss: 8.39531458041165e-06 2023-01-23 04:13:40.657694: step: 528/531, loss: 8.20159948489163e-06 2023-01-23 04:13:41.799149: step: 532/531, loss: 2.6512147087487392e-05 2023-01-23 04:13:42.912749: step: 536/531, loss: 0.0020344748627394438 2023-01-23 04:13:44.039600: step: 540/531, loss: 0.05070533603429794 2023-01-23 04:13:45.184356: step: 544/531, loss: 0.006438541691750288 2023-01-23 04:13:46.305628: step: 548/531, loss: 0.00014414788165595382 2023-01-23 04:13:47.418367: step: 552/531, loss: 0.028077125549316406 2023-01-23 04:13:48.551605: step: 556/531, loss: 0.20700938999652863 2023-01-23 04:13:49.710119: step: 560/531, loss: 0.09249468147754669 2023-01-23 04:13:50.838152: step: 564/531, loss: 0.0003253936592955142 2023-01-23 04:13:51.941653: step: 568/531, loss: 0.00022964477830100805 2023-01-23 04:13:53.062168: step: 572/531, loss: 3.471374657237902e-05 2023-01-23 04:13:54.202822: step: 576/531, loss: 0.010111141949892044 2023-01-23 04:13:55.334233: step: 580/531, loss: 2.4223329091910273e-05 2023-01-23 04:13:56.453575: step: 584/531, loss: 0.0017568588955327868 2023-01-23 04:13:57.550143: step: 588/531, loss: 3.547668529790826e-05 2023-01-23 04:13:58.665217: step: 592/531, loss: 5.569458153331652e-05 2023-01-23 04:13:59.784199: step: 596/531, loss: 0.018486596643924713 2023-01-23 04:14:00.885349: step: 600/531, loss: 3.719329833984375e-05 2023-01-23 04:14:02.004270: step: 604/531, loss: 0.0002512931823730469 2023-01-23 04:14:03.122474: step: 608/531, loss: 0.010549736209213734 2023-01-23 04:14:04.240139: step: 612/531, loss: 0.03506297990679741 2023-01-23 04:14:05.349194: step: 616/531, loss: 7.209778414107859e-05 2023-01-23 04:14:06.474901: step: 620/531, loss: 3.108978125965223e-05 2023-01-23 04:14:07.611391: step: 624/531, loss: 0.009072494693100452 2023-01-23 04:14:08.766335: step: 628/531, loss: 0.007215404417365789 2023-01-23 04:14:09.902020: step: 632/531, loss: 0.0004233360232319683 2023-01-23 04:14:11.021503: step: 636/531, loss: 0.044142913073301315 2023-01-23 04:14:12.148190: step: 640/531, loss: 0.00036535260733217 2023-01-23 04:14:13.250719: step: 644/531, loss: 0.002957153134047985 2023-01-23 04:14:14.378286: step: 648/531, loss: 0.00016708375187590718 2023-01-23 04:14:15.525052: step: 652/531, loss: 0.00025196076603606343 2023-01-23 04:14:16.648709: step: 656/531, loss: 0.00043201446533203125 2023-01-23 04:14:17.784033: step: 660/531, loss: 0.0026784895453602076 2023-01-23 04:14:18.914050: step: 664/531, loss: 0.0004905700916424394 2023-01-23 04:14:20.051103: step: 668/531, loss: 0.0006551742553710938 2023-01-23 04:14:21.164429: step: 672/531, loss: -2.7179717108083423e-06 2023-01-23 04:14:22.322316: step: 676/531, loss: 5.316734313964844e-05 2023-01-23 04:14:23.432256: step: 680/531, loss: 0.00174541468732059 2023-01-23 04:14:24.548279: step: 684/531, loss: 0.000821018242277205 2023-01-23 04:14:25.686255: step: 688/531, loss: 0.00010328293137717992 2023-01-23 04:14:26.779455: step: 692/531, loss: 0.007507896516472101 2023-01-23 04:14:27.911493: step: 696/531, loss: 0.00809936597943306 2023-01-23 04:14:29.040422: step: 700/531, loss: 0.00024790765019133687 2023-01-23 04:14:30.156666: step: 704/531, loss: 8.39233416627394e-06 2023-01-23 04:14:31.281705: step: 708/531, loss: 4.696846190199722e-06 2023-01-23 04:14:32.423874: step: 712/531, loss: 0.011433983221650124 2023-01-23 04:14:33.563041: step: 716/531, loss: 9.431838407181203e-05 2023-01-23 04:14:34.673152: step: 720/531, loss: 5.7220458984375e-06 2023-01-23 04:14:35.816422: step: 724/531, loss: 0.00012941360182594508 2023-01-23 04:14:36.951346: step: 728/531, loss: 0.016379930078983307 2023-01-23 04:14:38.051498: step: 732/531, loss: 0.0003966331423725933 2023-01-23 04:14:39.164665: step: 736/531, loss: 0.0005819320795126259 2023-01-23 04:14:40.297430: step: 740/531, loss: 0.00239906320348382 2023-01-23 04:14:41.411590: step: 744/531, loss: 7.863045175326988e-05 2023-01-23 04:14:42.545514: step: 748/531, loss: 0.00263385777361691 2023-01-23 04:14:43.654975: step: 752/531, loss: 0.013336754404008389 2023-01-23 04:14:44.775947: step: 756/531, loss: 0.017293168231844902 2023-01-23 04:14:45.939910: step: 760/531, loss: 0.0017330170376226306 2023-01-23 04:14:47.056135: step: 764/531, loss: 0.0007416725275106728 2023-01-23 04:14:48.208953: step: 768/531, loss: 0.03218469396233559 2023-01-23 04:14:49.328044: step: 772/531, loss: 1.8119812921213452e-06 2023-01-23 04:14:50.449720: step: 776/531, loss: 5.970001075183973e-05 2023-01-23 04:14:51.589032: step: 780/531, loss: 2.9349326723604463e-05 2023-01-23 04:14:52.727538: step: 784/531, loss: 0.29564762115478516 2023-01-23 04:14:53.843240: step: 788/531, loss: 0.027411652728915215 2023-01-23 04:14:54.927196: step: 792/531, loss: 3.814697265625e-06 2023-01-23 04:14:56.052837: step: 796/531, loss: 8.916854312701616e-06 2023-01-23 04:14:57.153537: step: 800/531, loss: 0.0001122474714065902 2023-01-23 04:14:58.259056: step: 804/531, loss: 1.182556115963962e-05 2023-01-23 04:14:59.382397: step: 808/531, loss: 0.0025780678261071444 2023-01-23 04:15:00.498638: step: 812/531, loss: 0.0016448020469397306 2023-01-23 04:15:01.625873: step: 816/531, loss: 0.002602481748908758 2023-01-23 04:15:02.761504: step: 820/531, loss: 0.027302933856844902 2023-01-23 04:15:03.923418: step: 824/531, loss: 0.005278587341308594 2023-01-23 04:15:05.006023: step: 828/531, loss: 0.007113552186638117 2023-01-23 04:15:06.119207: step: 832/531, loss: 0.02209172397851944 2023-01-23 04:15:07.263853: step: 836/531, loss: 0.00026569367037154734 2023-01-23 04:15:08.421452: step: 840/531, loss: 0.00014038085646461695 2023-01-23 04:15:09.511684: step: 844/531, loss: 7.758141146041453e-05 2023-01-23 04:15:10.678459: step: 848/531, loss: 0.008357048965990543 2023-01-23 04:15:11.805723: step: 852/531, loss: 0.006354332435876131 2023-01-23 04:15:12.914340: step: 856/531, loss: 0.0002624511835165322 2023-01-23 04:15:14.049086: step: 860/531, loss: 0.02940206602215767 2023-01-23 04:15:15.169797: step: 864/531, loss: 0.0004516601620707661 2023-01-23 04:15:16.298262: step: 868/531, loss: 0.018115997314453125 2023-01-23 04:15:17.417164: step: 872/531, loss: 0.027750682085752487 2023-01-23 04:15:18.553769: step: 876/531, loss: 6.961822509765625e-05 2023-01-23 04:15:19.667770: step: 880/531, loss: 0.003585338592529297 2023-01-23 04:15:20.798074: step: 884/531, loss: -1.7833710444392636e-05 2023-01-23 04:15:21.917462: step: 888/531, loss: 0.005775928497314453 2023-01-23 04:15:23.052346: step: 892/531, loss: 0.0016241073608398438 2023-01-23 04:15:24.186049: step: 896/531, loss: 0.02279062382876873 2023-01-23 04:15:25.285698: step: 900/531, loss: 2.1219253540039062e-05 2023-01-23 04:15:26.405589: step: 904/531, loss: 8.678436279296875e-05 2023-01-23 04:15:27.524915: step: 908/531, loss: 0.000606155430432409 2023-01-23 04:15:28.639720: step: 912/531, loss: 7.4386593951203395e-06 2023-01-23 04:15:29.764529: step: 916/531, loss: 0.008005904965102673 2023-01-23 04:15:30.913983: step: 920/531, loss: 0.0012835502857342362 2023-01-23 04:15:32.020982: step: 924/531, loss: 0.00035848619882017374 2023-01-23 04:15:33.122650: step: 928/531, loss: 2.86102294921875e-06 2023-01-23 04:15:34.260581: step: 932/531, loss: 9.069442603504285e-05 2023-01-23 04:15:35.390692: step: 936/531, loss: 0.00040578845073468983 2023-01-23 04:15:36.531905: step: 940/531, loss: 0.01350097730755806 2023-01-23 04:15:37.629804: step: 944/531, loss: 0.0004211425839457661 2023-01-23 04:15:38.750979: step: 948/531, loss: 0.0013070107670500875 2023-01-23 04:15:39.894006: step: 952/531, loss: 0.0015855790115892887 2023-01-23 04:15:41.019947: step: 956/531, loss: 0.004326820373535156 2023-01-23 04:15:42.155671: step: 960/531, loss: 0.041608620434999466 2023-01-23 04:15:43.294828: step: 964/531, loss: 0.014751816168427467 2023-01-23 04:15:44.405862: step: 968/531, loss: 0.009824848733842373 2023-01-23 04:15:45.565043: step: 972/531, loss: 0.003799057099968195 2023-01-23 04:15:46.693805: step: 976/531, loss: 0.022680187597870827 2023-01-23 04:15:47.803363: step: 980/531, loss: 0.0005342483636923134 2023-01-23 04:15:48.903640: step: 984/531, loss: 0.014588928781449795 2023-01-23 04:15:49.999224: step: 988/531, loss: 7.915497008070815e-06 2023-01-23 04:15:51.127489: step: 992/531, loss: 0.003313350724056363 2023-01-23 04:15:52.260211: step: 996/531, loss: 0.20702151954174042 2023-01-23 04:15:53.422758: step: 1000/531, loss: 2.536773718020413e-05 2023-01-23 04:15:54.567845: step: 1004/531, loss: 0.0031054497230798006 2023-01-23 04:15:55.696629: step: 1008/531, loss: 0.017128562554717064 2023-01-23 04:15:56.815398: step: 1012/531, loss: 0.0001161575346486643 2023-01-23 04:15:57.933309: step: 1016/531, loss: 0.025709105655550957 2023-01-23 04:15:59.057401: step: 1020/531, loss: 0.0026129246689379215 2023-01-23 04:16:00.157459: step: 1024/531, loss: 0.0002593994140625 2023-01-23 04:16:01.293911: step: 1028/531, loss: 0.0020751000847667456 2023-01-23 04:16:02.430198: step: 1032/531, loss: 0.01820697821676731 2023-01-23 04:16:03.575785: step: 1036/531, loss: 0.0007259369012899697 2023-01-23 04:16:04.695831: step: 1040/531, loss: 0.015400601550936699 2023-01-23 04:16:05.825221: step: 1044/531, loss: 7.9107288911473e-05 2023-01-23 04:16:06.922470: step: 1048/531, loss: 5.3024294174974784e-05 2023-01-23 04:16:08.052292: step: 1052/531, loss: 0.00101556780282408 2023-01-23 04:16:09.225413: step: 1056/531, loss: 0.02218799665570259 2023-01-23 04:16:10.343637: step: 1060/531, loss: 0.026259994134306908 2023-01-23 04:16:11.480518: step: 1064/531, loss: 0.013966751284897327 2023-01-23 04:16:12.602146: step: 1068/531, loss: 0.00045413972111418843 2023-01-23 04:16:13.735148: step: 1072/531, loss: 0.0023661614395678043 2023-01-23 04:16:14.857323: step: 1076/531, loss: 0.005993461702018976 2023-01-23 04:16:15.982149: step: 1080/531, loss: 0.02153310924768448 2023-01-23 04:16:17.118069: step: 1084/531, loss: 0.004776477813720703 2023-01-23 04:16:18.223749: step: 1088/531, loss: 0.0001930236758198589 2023-01-23 04:16:19.348848: step: 1092/531, loss: -4.148483185417717e-06 2023-01-23 04:16:20.468212: step: 1096/531, loss: 9.479522850597277e-05 2023-01-23 04:16:21.616432: step: 1100/531, loss: 0.00266265869140625 2023-01-23 04:16:22.723907: step: 1104/531, loss: 0.007720566354691982 2023-01-23 04:16:23.855776: step: 1108/531, loss: 0.004929161164909601 2023-01-23 04:16:24.963571: step: 1112/531, loss: 0.0001089096040232107 2023-01-23 04:16:26.072773: step: 1116/531, loss: 0.07059125602245331 2023-01-23 04:16:27.181588: step: 1120/531, loss: 2.3269654775504023e-05 2023-01-23 04:16:28.299280: step: 1124/531, loss: 0.01633920706808567 2023-01-23 04:16:29.419558: step: 1128/531, loss: 0.0043392181396484375 2023-01-23 04:16:30.534856: step: 1132/531, loss: 0.00028929710970260203 2023-01-23 04:16:31.634864: step: 1136/531, loss: 0.00821685791015625 2023-01-23 04:16:32.766577: step: 1140/531, loss: 0.004209041595458984 2023-01-23 04:16:33.881195: step: 1144/531, loss: 0.00011234283010708168 2023-01-23 04:16:34.997993: step: 1148/531, loss: 0.0002739906485658139 2023-01-23 04:16:36.138410: step: 1152/531, loss: 0.015117360278964043 2023-01-23 04:16:37.250307: step: 1156/531, loss: 0.0002511024649720639 2023-01-23 04:16:38.364685: step: 1160/531, loss: 0.0004574775812216103 2023-01-23 04:16:39.457332: step: 1164/531, loss: 0.004498672671616077 2023-01-23 04:16:40.578185: step: 1168/531, loss: 0.02213726006448269 2023-01-23 04:16:41.684873: step: 1172/531, loss: 0.00012111663818359375 2023-01-23 04:16:42.823856: step: 1176/531, loss: 2.098083541568485e-06 2023-01-23 04:16:43.971506: step: 1180/531, loss: 0.00018043519230559468 2023-01-23 04:16:45.067969: step: 1184/531, loss: 1.23977656585339e-06 2023-01-23 04:16:46.197274: step: 1188/531, loss: 8.401871309615672e-05 2023-01-23 04:16:47.340268: step: 1192/531, loss: 0.018071364611387253 2023-01-23 04:16:48.461095: step: 1196/531, loss: 8.58306884765625e-06 2023-01-23 04:16:49.556181: step: 1200/531, loss: 0.0033739092759788036 2023-01-23 04:16:50.656515: step: 1204/531, loss: 0.0014198303688317537 2023-01-23 04:16:51.793577: step: 1208/531, loss: 0.007958030328154564 2023-01-23 04:16:52.922526: step: 1212/531, loss: 1.678466833254788e-05 2023-01-23 04:16:54.041465: step: 1216/531, loss: 0.0005546570173464715 2023-01-23 04:16:55.159705: step: 1220/531, loss: 0.0019398690201342106 2023-01-23 04:16:56.294677: step: 1224/531, loss: 0.0013425827492028475 2023-01-23 04:16:57.442761: step: 1228/531, loss: 0.023987198248505592 2023-01-23 04:16:58.575917: step: 1232/531, loss: 0.006106758490204811 2023-01-23 04:16:59.707989: step: 1236/531, loss: 0.004833793733268976 2023-01-23 04:17:00.846733: step: 1240/531, loss: 0.0038857460021972656 2023-01-23 04:17:01.990146: step: 1244/531, loss: 0.025882530957460403 2023-01-23 04:17:03.098817: step: 1248/531, loss: 0.002964019775390625 2023-01-23 04:17:04.244561: step: 1252/531, loss: 6.10351571594947e-06 2023-01-23 04:17:05.421317: step: 1256/531, loss: 0.03321528807282448 2023-01-23 04:17:06.543431: step: 1260/531, loss: 7.686615572310984e-05 2023-01-23 04:17:07.678130: step: 1264/531, loss: 5.7220458984375e-06 2023-01-23 04:17:08.785741: step: 1268/531, loss: 0.0004215240478515625 2023-01-23 04:17:09.912352: step: 1272/531, loss: 2.28881845032447e-06 2023-01-23 04:17:11.042261: step: 1276/531, loss: 5.626678466796875e-05 2023-01-23 04:17:12.175646: step: 1280/531, loss: 0.006225490476936102 2023-01-23 04:17:13.297206: step: 1284/531, loss: 0.04358420521020889 2023-01-23 04:17:14.416581: step: 1288/531, loss: 0.013238477520644665 2023-01-23 04:17:15.569440: step: 1292/531, loss: 5.2833554946118966e-05 2023-01-23 04:17:16.689306: step: 1296/531, loss: 2.155303991457913e-05 2023-01-23 04:17:17.828297: step: 1300/531, loss: 0.004207992926239967 2023-01-23 04:17:18.931499: step: 1304/531, loss: 0.0008137702825479209 2023-01-23 04:17:20.053052: step: 1308/531, loss: 2.3412703740177676e-05 2023-01-23 04:17:21.176676: step: 1312/531, loss: 8.840561349643394e-05 2023-01-23 04:17:22.304775: step: 1316/531, loss: 0.0001884460507426411 2023-01-23 04:17:23.452261: step: 1320/531, loss: 4.57763690064894e-06 2023-01-23 04:17:24.588097: step: 1324/531, loss: 0.00011520386033225805 2023-01-23 04:17:25.724712: step: 1328/531, loss: 0.0070549012161791325 2023-01-23 04:17:26.872966: step: 1332/531, loss: 0.0007776260608807206 2023-01-23 04:17:28.047351: step: 1336/531, loss: 7.677823305130005e-06 2023-01-23 04:17:29.174868: step: 1340/531, loss: 1.5068053471622989e-05 2023-01-23 04:17:30.306611: step: 1344/531, loss: 0.0001941680966410786 2023-01-23 04:17:31.438512: step: 1348/531, loss: 0.04260826110839844 2023-01-23 04:17:32.562544: step: 1352/531, loss: 0.00030994415283203125 2023-01-23 04:17:33.713613: step: 1356/531, loss: 0.00022239686222746968 2023-01-23 04:17:34.823145: step: 1360/531, loss: 0.022145235911011696 2023-01-23 04:17:35.931218: step: 1364/531, loss: 0.02117137983441353 2023-01-23 04:17:37.055454: step: 1368/531, loss: 0.0033872604835778475 2023-01-23 04:17:38.186853: step: 1372/531, loss: 0.03773879632353783 2023-01-23 04:17:39.297688: step: 1376/531, loss: 0.0007390021928586066 2023-01-23 04:17:40.407693: step: 1380/531, loss: 0.0001729965297272429 2023-01-23 04:17:41.536899: step: 1384/531, loss: 0.0005800247308798134 2023-01-23 04:17:42.627814: step: 1388/531, loss: 2.6226043701171875e-05 2023-01-23 04:17:43.798522: step: 1392/531, loss: 0.00010404587374068797 2023-01-23 04:17:44.977635: step: 1396/531, loss: 0.0001596450892975554 2023-01-23 04:17:46.069122: step: 1400/531, loss: 0.011488009244203568 2023-01-23 04:17:47.174664: step: 1404/531, loss: 0.0032346725929528475 2023-01-23 04:17:48.344802: step: 1408/531, loss: 0.060723211616277695 2023-01-23 04:17:49.467879: step: 1412/531, loss: 0.00080108642578125 2023-01-23 04:17:50.602137: step: 1416/531, loss: 0.008340835571289062 2023-01-23 04:17:51.707696: step: 1420/531, loss: 0.00047836307203397155 2023-01-23 04:17:52.816879: step: 1424/531, loss: 0.0009883880848065019 2023-01-23 04:17:53.965614: step: 1428/531, loss: 0.00024509429931640625 2023-01-23 04:17:55.089793: step: 1432/531, loss: 0.0003125190851278603 2023-01-23 04:17:56.217192: step: 1436/531, loss: 4.673004150390625e-05 2023-01-23 04:17:57.378273: step: 1440/531, loss: 0.005370807833969593 2023-01-23 04:17:58.521609: step: 1444/531, loss: 0.0233046542853117 2023-01-23 04:17:59.646775: step: 1448/531, loss: 0.00013008118548896164 2023-01-23 04:18:00.741522: step: 1452/531, loss: 0.0004642486455850303 2023-01-23 04:18:01.853449: step: 1456/531, loss: 0.0006960868486203253 2023-01-23 04:18:02.984920: step: 1460/531, loss: 0.023892974480986595 2023-01-23 04:18:04.122595: step: 1464/531, loss: 2.765656063274946e-06 2023-01-23 04:18:05.252862: step: 1468/531, loss: 0.01681060716509819 2023-01-23 04:18:06.381502: step: 1472/531, loss: 0.0021217346657067537 2023-01-23 04:18:07.520575: step: 1476/531, loss: 7.2479248046875e-05 2023-01-23 04:18:08.653554: step: 1480/531, loss: 0.018645094707608223 2023-01-23 04:18:09.753972: step: 1484/531, loss: 1.0490417707842425e-06 2023-01-23 04:18:10.875084: step: 1488/531, loss: 6.389617919921875e-05 2023-01-23 04:18:11.977372: step: 1492/531, loss: 0.0037463190965354443 2023-01-23 04:18:13.121758: step: 1496/531, loss: 0.0002151489316020161 2023-01-23 04:18:14.268784: step: 1500/531, loss: 0.0007448196993209422 2023-01-23 04:18:15.404892: step: 1504/531, loss: 0.00015525816706940532 2023-01-23 04:18:16.525750: step: 1508/531, loss: 1.1157989320054185e-05 2023-01-23 04:18:17.664710: step: 1512/531, loss: 0.0005368232959881425 2023-01-23 04:18:18.784498: step: 1516/531, loss: 0.002760982373729348 2023-01-23 04:18:19.900370: step: 1520/531, loss: 0.0002831935998983681 2023-01-23 04:18:21.021670: step: 1524/531, loss: 5.927085658186115e-05 2023-01-23 04:18:22.163158: step: 1528/531, loss: 0.012388181872665882 2023-01-23 04:18:23.279812: step: 1532/531, loss: 0.0011230468517169356 2023-01-23 04:18:24.403800: step: 1536/531, loss: 0.002504062606021762 2023-01-23 04:18:25.524481: step: 1540/531, loss: 0.007088852114975452 2023-01-23 04:18:26.643465: step: 1544/531, loss: 0.001753950142301619 2023-01-23 04:18:27.803333: step: 1548/531, loss: 0.00023450850858353078 2023-01-23 04:18:28.942715: step: 1552/531, loss: 0.002294349716976285 2023-01-23 04:18:30.071490: step: 1556/531, loss: 3.128051685052924e-05 2023-01-23 04:18:31.181072: step: 1560/531, loss: -1.144409225162235e-06 2023-01-23 04:18:32.326718: step: 1564/531, loss: 0.00031647682772018015 2023-01-23 04:18:33.402322: step: 1568/531, loss: 0.03847246244549751 2023-01-23 04:18:34.539917: step: 1572/531, loss: 1.2397766795402276e-06 2023-01-23 04:18:35.686196: step: 1576/531, loss: 0.054741859436035156 2023-01-23 04:18:36.839445: step: 1580/531, loss: 0.0008403778774663806 2023-01-23 04:18:37.956584: step: 1584/531, loss: 0.0003098964807577431 2023-01-23 04:18:39.072688: step: 1588/531, loss: 4.1627885366324335e-05 2023-01-23 04:18:40.174642: step: 1592/531, loss: 0.006285286042839289 2023-01-23 04:18:41.320297: step: 1596/531, loss: 0.00282115931622684 2023-01-23 04:18:42.460467: step: 1600/531, loss: 0.08352365344762802 2023-01-23 04:18:43.597410: step: 1604/531, loss: 0.0003783702850341797 2023-01-23 04:18:44.701733: step: 1608/531, loss: 5.435943603515625e-05 2023-01-23 04:18:45.827454: step: 1612/531, loss: 2.574920654296875e-05 2023-01-23 04:18:46.941880: step: 1616/531, loss: -4.100799742445815e-06 2023-01-23 04:18:48.050962: step: 1620/531, loss: 0.00026025774423033 2023-01-23 04:18:49.183949: step: 1624/531, loss: 2.384185791015625e-06 2023-01-23 04:18:50.311149: step: 1628/531, loss: 3.380775160621852e-05 2023-01-23 04:18:51.456335: step: 1632/531, loss: 7.400512549793348e-05 2023-01-23 04:18:52.588768: step: 1636/531, loss: 4.692077709478326e-05 2023-01-23 04:18:53.707976: step: 1640/531, loss: 0.42092058062553406 2023-01-23 04:18:54.822130: step: 1644/531, loss: 0.24106641113758087 2023-01-23 04:18:55.938109: step: 1648/531, loss: 0.0036483765579760075 2023-01-23 04:18:57.083901: step: 1652/531, loss: 0.0003770828479900956 2023-01-23 04:18:58.184716: step: 1656/531, loss: 0.0006292343023233116 2023-01-23 04:18:59.311033: step: 1660/531, loss: 0.013663101010024548 2023-01-23 04:19:00.423834: step: 1664/531, loss: 0.020683003589510918 2023-01-23 04:19:01.529969: step: 1668/531, loss: 0.009380340576171875 2023-01-23 04:19:02.656150: step: 1672/531, loss: 6.675720669591101e-06 2023-01-23 04:19:03.793560: step: 1676/531, loss: 0.007296848110854626 2023-01-23 04:19:04.933072: step: 1680/531, loss: 0.0007396697765216231 2023-01-23 04:19:06.070050: step: 1684/531, loss: 4.0626528061693534e-05 2023-01-23 04:19:07.194001: step: 1688/531, loss: 0.010335540398955345 2023-01-23 04:19:08.331213: step: 1692/531, loss: 7.82012939453125e-05 2023-01-23 04:19:09.456000: step: 1696/531, loss: 0.0024898527190089226 2023-01-23 04:19:10.550372: step: 1700/531, loss: 0.0002401351957814768 2023-01-23 04:19:11.671436: step: 1704/531, loss: 0.00022721290588378906 2023-01-23 04:19:12.796489: step: 1708/531, loss: 0.002048683352768421 2023-01-23 04:19:13.933532: step: 1712/531, loss: 8.964539119915571e-06 2023-01-23 04:19:15.049828: step: 1716/531, loss: 0.0006032944074831903 2023-01-23 04:19:16.191253: step: 1720/531, loss: 0.010449791327118874 2023-01-23 04:19:17.279987: step: 1724/531, loss: 3.52859501617786e-06 2023-01-23 04:19:18.373442: step: 1728/531, loss: 0.0011196136474609375 2023-01-23 04:19:19.479160: step: 1732/531, loss: 0.00024547576322220266 2023-01-23 04:19:20.576807: step: 1736/531, loss: 5.149840944795869e-06 2023-01-23 04:19:21.729701: step: 1740/531, loss: 0.3806924819946289 2023-01-23 04:19:22.887642: step: 1744/531, loss: 0.045430660247802734 2023-01-23 04:19:23.996998: step: 1748/531, loss: 4.901886131847277e-05 2023-01-23 04:19:25.130062: step: 1752/531, loss: 0.00022697450185660273 2023-01-23 04:19:26.242955: step: 1756/531, loss: 0.000573706638533622 2023-01-23 04:19:27.377234: step: 1760/531, loss: 0.0004383087216410786 2023-01-23 04:19:28.518394: step: 1764/531, loss: 2.5653840566519648e-05 2023-01-23 04:19:29.688506: step: 1768/531, loss: 4.2629242670955136e-05 2023-01-23 04:19:30.817357: step: 1772/531, loss: 9.62108388193883e-05 2023-01-23 04:19:31.953730: step: 1776/531, loss: 0.01283888891339302 2023-01-23 04:19:33.104559: step: 1780/531, loss: 2.0885467165498994e-05 2023-01-23 04:19:34.213912: step: 1784/531, loss: 5.14984139954322e-06 2023-01-23 04:19:35.328573: step: 1788/531, loss: 6.518364534713328e-05 2023-01-23 04:19:36.436046: step: 1792/531, loss: 0.0001871108979685232 2023-01-23 04:19:37.570136: step: 1796/531, loss: 0.10783214867115021 2023-01-23 04:19:38.682772: step: 1800/531, loss: 0.0047248839400708675 2023-01-23 04:19:39.791892: step: 1804/531, loss: 0.0028430938255041838 2023-01-23 04:19:40.928724: step: 1808/531, loss: 0.09369969367980957 2023-01-23 04:19:42.039804: step: 1812/531, loss: 0.00015335081843659282 2023-01-23 04:19:43.174139: step: 1816/531, loss: 0.04500322416424751 2023-01-23 04:19:44.304799: step: 1820/531, loss: 0.0003167152462992817 2023-01-23 04:19:45.412247: step: 1824/531, loss: 3.051757857974735e-06 2023-01-23 04:19:46.519419: step: 1828/531, loss: 0.00010747909982455894 2023-01-23 04:19:47.641687: step: 1832/531, loss: 0.0003711700555868447 2023-01-23 04:19:48.773842: step: 1836/531, loss: 3.871917579090223e-05 2023-01-23 04:19:49.876150: step: 1840/531, loss: 0.02488117292523384 2023-01-23 04:19:50.968575: step: 1844/531, loss: 0.0032421110663563013 2023-01-23 04:19:52.139140: step: 1848/531, loss: 0.0074443817138671875 2023-01-23 04:19:53.273353: step: 1852/531, loss: 7.305145118152723e-05 2023-01-23 04:19:54.397043: step: 1856/531, loss: 0.004403972532600164 2023-01-23 04:19:55.531626: step: 1860/531, loss: 0.06438350677490234 2023-01-23 04:19:56.657204: step: 1864/531, loss: 0.0074637411162257195 2023-01-23 04:19:57.790429: step: 1868/531, loss: 0.004278755281120539 2023-01-23 04:19:58.920867: step: 1872/531, loss: 0.0014993667136877775 2023-01-23 04:20:00.048582: step: 1876/531, loss: 1.316070574830519e-05 2023-01-23 04:20:01.159206: step: 1880/531, loss: 5.912781125516631e-05 2023-01-23 04:20:02.281924: step: 1884/531, loss: 0.0011793137528002262 2023-01-23 04:20:03.384573: step: 1888/531, loss: -1.621246337890625e-05 2023-01-23 04:20:04.530921: step: 1892/531, loss: 4.4345855712890625e-05 2023-01-23 04:20:05.643619: step: 1896/531, loss: 0.012486649677157402 2023-01-23 04:20:06.783990: step: 1900/531, loss: 0.004736042115837336 2023-01-23 04:20:07.917786: step: 1904/531, loss: 0.00022182465181685984 2023-01-23 04:20:09.033075: step: 1908/531, loss: 2.174377368646674e-05 2023-01-23 04:20:10.145196: step: 1912/531, loss: 2.3365020751953125e-05 2023-01-23 04:20:11.273593: step: 1916/531, loss: 0.008999919518828392 2023-01-23 04:20:12.403480: step: 1920/531, loss: 0.0002548217889852822 2023-01-23 04:20:13.504241: step: 1924/531, loss: 0.02043762430548668 2023-01-23 04:20:14.646215: step: 1928/531, loss: 0.010158729739487171 2023-01-23 04:20:15.886293: step: 1932/531, loss: 0.027324294671416283 2023-01-23 04:20:17.004407: step: 1936/531, loss: 8.22067231638357e-05 2023-01-23 04:20:18.103650: step: 1940/531, loss: 0.0005418777000159025 2023-01-23 04:20:19.216821: step: 1944/531, loss: 1.373290979245212e-05 2023-01-23 04:20:20.371026: step: 1948/531, loss: 0.00910201109945774 2023-01-23 04:20:21.506788: step: 1952/531, loss: 0.032892417162656784 2023-01-23 04:20:22.603605: step: 1956/531, loss: 0.0015090942615643144 2023-01-23 04:20:23.730808: step: 1960/531, loss: 0.00014009476581122726 2023-01-23 04:20:24.851267: step: 1964/531, loss: 0.00041751860408112407 2023-01-23 04:20:25.997991: step: 1968/531, loss: 0.06038255617022514 2023-01-23 04:20:27.135919: step: 1972/531, loss: 0.01559219416230917 2023-01-23 04:20:28.243430: step: 1976/531, loss: 0.004004573915153742 2023-01-23 04:20:29.367576: step: 1980/531, loss: 0.0007836341974325478 2023-01-23 04:20:30.460226: step: 1984/531, loss: 0.0003679275687318295 2023-01-23 04:20:31.566964: step: 1988/531, loss: 0.0022205354180186987 2023-01-23 04:20:32.713010: step: 1992/531, loss: 0.0010107994312420487 2023-01-23 04:20:33.874760: step: 1996/531, loss: 0.0005348205449990928 2023-01-23 04:20:34.984167: step: 2000/531, loss: 0.0001381397305522114 2023-01-23 04:20:36.121330: step: 2004/531, loss: 0.018889904022216797 2023-01-23 04:20:37.249544: step: 2008/531, loss: 0.00020360946655273438 2023-01-23 04:20:38.382698: step: 2012/531, loss: 0.012942076660692692 2023-01-23 04:20:39.522110: step: 2016/531, loss: -5.722046125811175e-07 2023-01-23 04:20:40.629462: step: 2020/531, loss: 0.03893766552209854 2023-01-23 04:20:41.773049: step: 2024/531, loss: 0.011793326586484909 2023-01-23 04:20:42.926061: step: 2028/531, loss: 0.0036993026733398438 2023-01-23 04:20:44.054528: step: 2032/531, loss: -8.296966370835435e-06 2023-01-23 04:20:45.172031: step: 2036/531, loss: 0.009547615423798561 2023-01-23 04:20:46.327527: step: 2040/531, loss: 0.0011316300369799137 2023-01-23 04:20:47.473982: step: 2044/531, loss: 0.03160962834954262 2023-01-23 04:20:48.605533: step: 2048/531, loss: 0.00853576697409153 2023-01-23 04:20:49.731779: step: 2052/531, loss: 7.104873930074973e-06 2023-01-23 04:20:50.858074: step: 2056/531, loss: 0.007495594210922718 2023-01-23 04:20:51.967316: step: 2060/531, loss: 0.00033202170743606985 2023-01-23 04:20:53.083856: step: 2064/531, loss: 0.012041855603456497 2023-01-23 04:20:54.182738: step: 2068/531, loss: 0.0003266334533691406 2023-01-23 04:20:55.285040: step: 2072/531, loss: 3.890991138177924e-05 2023-01-23 04:20:56.375050: step: 2076/531, loss: 2.1648407710017636e-05 2023-01-23 04:20:57.480985: step: 2080/531, loss: 1.902580333990045e-05 2023-01-23 04:20:58.580107: step: 2084/531, loss: 0.018280887976288795 2023-01-23 04:20:59.683464: step: 2088/531, loss: 0.012325095944106579 2023-01-23 04:21:00.833252: step: 2092/531, loss: 0.00011081696720793843 2023-01-23 04:21:01.924232: step: 2096/531, loss: 0.0012516023125499487 2023-01-23 04:21:03.039959: step: 2100/531, loss: 0.0034067153465002775 2023-01-23 04:21:04.151350: step: 2104/531, loss: 0.019527912139892578 2023-01-23 04:21:05.307657: step: 2108/531, loss: 0.0006746292347088456 2023-01-23 04:21:06.450388: step: 2112/531, loss: 1.8978118532686494e-05 2023-01-23 04:21:07.570144: step: 2116/531, loss: 0.00045690534170717 2023-01-23 04:21:08.714836: step: 2120/531, loss: 0.0008192539680749178 2023-01-23 04:21:09.862833: step: 2124/531, loss: 0.026607418432831764 ================================================== Loss: 0.013 -------------------- Dev: {'event': {'p': 0.5611164581328201, 'r': 0.7762982689747004, 'f1': 0.6513966480446928}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Test: {'event': {'p': 0.6030534351145038, 'r': 0.800834824090638, 'f1': 0.6880122950819673}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Chinese: {'event': {'p': 0.5681818181818182, 'r': 0.9259259259259259, 'f1': 0.7042253521126761}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Korean: {'event': {'p': 0.5423728813559322, 'r': 0.5079365079365079, 'f1': 0.5245901639344263}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Russian: {'event': {'p': 0.32653061224489793, 'r': 0.4444444444444444, 'f1': 0.3764705882352941}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 28 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 04:21:50.190534: step: 4/531, loss: 0.0006275177001953125 2023-01-23 04:21:51.287507: step: 8/531, loss: 0.028791142627596855 2023-01-23 04:21:52.403912: step: 12/531, loss: 0.0001349449303233996 2023-01-23 04:21:53.500225: step: 16/531, loss: 0.0019812583923339844 2023-01-23 04:21:54.615255: step: 20/531, loss: 8.983612497104332e-05 2023-01-23 04:21:55.737019: step: 24/531, loss: 0.00016198158846236765 2023-01-23 04:21:56.848264: step: 28/531, loss: 0.0012340545654296875 2023-01-23 04:21:57.984564: step: 32/531, loss: 0.00019979476928710938 2023-01-23 04:21:59.089819: step: 36/531, loss: 0.007952691055834293 2023-01-23 04:22:00.175237: step: 40/531, loss: 0.0002736568567343056 2023-01-23 04:22:01.308353: step: 44/531, loss: 0.0031127932015806437 2023-01-23 04:22:02.417960: step: 48/531, loss: 0.002771759172901511 2023-01-23 04:22:03.542614: step: 52/531, loss: 0.00010395050048828125 2023-01-23 04:22:04.659621: step: 56/531, loss: 0.0006320953834801912 2023-01-23 04:22:05.795859: step: 60/531, loss: 0.027740001678466797 2023-01-23 04:22:06.943092: step: 64/531, loss: 0.03337583690881729 2023-01-23 04:22:08.070086: step: 68/531, loss: 0.036821987479925156 2023-01-23 04:22:09.199888: step: 72/531, loss: 0.00028929710970260203 2023-01-23 04:22:10.317007: step: 76/531, loss: 0.006862735841423273 2023-01-23 04:22:11.433132: step: 80/531, loss: 0.00020446778216864914 2023-01-23 04:22:12.554336: step: 84/531, loss: 0.009822655469179153 2023-01-23 04:22:13.704681: step: 88/531, loss: 0.0024022103752940893 2023-01-23 04:22:14.802502: step: 92/531, loss: 7.214546349132434e-05 2023-01-23 04:22:15.954426: step: 96/531, loss: 0.0005019187810830772 2023-01-23 04:22:17.127739: step: 100/531, loss: 9.994507126975805e-05 2023-01-23 04:22:18.258037: step: 104/531, loss: -1.52587890625e-05 2023-01-23 04:22:19.397097: step: 108/531, loss: 0.00011463165719760582 2023-01-23 04:22:20.507147: step: 112/531, loss: 0.026998519897460938 2023-01-23 04:22:21.632200: step: 116/531, loss: 2.7656553811539197e-06 2023-01-23 04:22:22.739040: step: 120/531, loss: 0.0004839897155761719 2023-01-23 04:22:23.865291: step: 124/531, loss: 1.1920928955078125e-06 2023-01-23 04:22:25.052829: step: 128/531, loss: 6.30378708592616e-05 2023-01-23 04:22:26.180159: step: 132/531, loss: 0.0050300597213208675 2023-01-23 04:22:27.337296: step: 136/531, loss: 0.0007247925386764109 2023-01-23 04:22:28.478259: step: 140/531, loss: 0.006578660104423761 2023-01-23 04:22:29.595736: step: 144/531, loss: 8.277893357444555e-05 2023-01-23 04:22:30.702887: step: 148/531, loss: 0.0004604339774232358 2023-01-23 04:22:31.858178: step: 152/531, loss: 0.006082343868911266 2023-01-23 04:22:32.962048: step: 156/531, loss: 4.158019874012098e-05 2023-01-23 04:22:34.075751: step: 160/531, loss: 8.535384949936997e-06 2023-01-23 04:22:35.232720: step: 164/531, loss: 0.0004722595331259072 2023-01-23 04:22:36.339282: step: 168/531, loss: 0.0003643989621195942 2023-01-23 04:22:37.490133: step: 172/531, loss: 0.0011552810901775956 2023-01-23 04:22:38.631843: step: 176/531, loss: 0.007882309146225452 2023-01-23 04:22:39.762887: step: 180/531, loss: 0.0005466461298055947 2023-01-23 04:22:40.889358: step: 184/531, loss: 0.0002628326474223286 2023-01-23 04:22:41.977249: step: 188/531, loss: 5.578995114774443e-06 2023-01-23 04:22:43.082606: step: 192/531, loss: 9.822845640883315e-06 2023-01-23 04:22:44.223241: step: 196/531, loss: 0.00012459754361771047 2023-01-23 04:22:45.354150: step: 200/531, loss: 4.08649466407951e-05 2023-01-23 04:22:46.465814: step: 204/531, loss: 0.0016778946155682206 2023-01-23 04:22:47.566501: step: 208/531, loss: 0.0025581358931958675 2023-01-23 04:22:48.680144: step: 212/531, loss: 0.0029953003395348787 2023-01-23 04:22:49.808534: step: 216/531, loss: 2.47955313170678e-06 2023-01-23 04:22:50.938831: step: 220/531, loss: 0.008331107906997204 2023-01-23 04:22:52.048870: step: 224/531, loss: 0.0006005287286825478 2023-01-23 04:22:53.153676: step: 228/531, loss: 0.002193450927734375 2023-01-23 04:22:54.281587: step: 232/531, loss: 0.0006574630970135331 2023-01-23 04:22:55.399099: step: 236/531, loss: 0.010582923889160156 2023-01-23 04:22:56.514750: step: 240/531, loss: 0.0018580436008051038 2023-01-23 04:22:57.623612: step: 244/531, loss: 3.604888843256049e-05 2023-01-23 04:22:58.747583: step: 248/531, loss: 1.33514404296875e-05 2023-01-23 04:22:59.872628: step: 252/531, loss: 0.0003647804260253906 2023-01-23 04:23:01.001811: step: 256/531, loss: 0.003345298580825329 2023-01-23 04:23:02.088933: step: 260/531, loss: 0.00014553070650435984 2023-01-23 04:23:03.180520: step: 264/531, loss: 0.0002353191375732422 2023-01-23 04:23:04.306381: step: 268/531, loss: 0.0002189636288676411 2023-01-23 04:23:05.416724: step: 272/531, loss: 3.0422212148550898e-05 2023-01-23 04:23:06.547283: step: 276/531, loss: 0.0053914072923362255 2023-01-23 04:23:07.676820: step: 280/531, loss: 0.00017261505126953125 2023-01-23 04:23:08.798835: step: 284/531, loss: 0.005187606904655695 2023-01-23 04:23:09.932825: step: 288/531, loss: 2.670288040462765e-06 2023-01-23 04:23:11.054035: step: 292/531, loss: 0.005283260252326727 2023-01-23 04:23:12.188616: step: 296/531, loss: 0.01437301654368639 2023-01-23 04:23:13.293971: step: 300/531, loss: 0.001302480697631836 2023-01-23 04:23:14.429898: step: 304/531, loss: 0.0005994797102175653 2023-01-23 04:23:15.555629: step: 308/531, loss: 0.00015754700871184468 2023-01-23 04:23:16.701634: step: 312/531, loss: 0.00027971266536042094 2023-01-23 04:23:17.830805: step: 316/531, loss: 0.0273303035646677 2023-01-23 04:23:18.962426: step: 320/531, loss: 0.0008176803821697831 2023-01-23 04:23:20.113080: step: 324/531, loss: 0.00035114289494231343 2023-01-23 04:23:21.250168: step: 328/531, loss: 0.009064388461411 2023-01-23 04:23:22.377208: step: 332/531, loss: 0.387355238199234 2023-01-23 04:23:23.486132: step: 336/531, loss: 0.0007592201582156122 2023-01-23 04:23:24.623072: step: 340/531, loss: 4.1484832763671875e-05 2023-01-23 04:23:25.721671: step: 344/531, loss: 1.8119813830708154e-05 2023-01-23 04:23:26.819907: step: 348/531, loss: 5.626678557746345e-06 2023-01-23 04:23:27.919836: step: 352/531, loss: 2.193450927734375e-05 2023-01-23 04:23:29.044608: step: 356/531, loss: 0.00015535354032181203 2023-01-23 04:23:30.128822: step: 360/531, loss: 6.413459777832031e-05 2023-01-23 04:23:31.245386: step: 364/531, loss: 0.005117225926369429 2023-01-23 04:23:32.358046: step: 368/531, loss: 0.0014378547202795744 2023-01-23 04:23:33.499074: step: 372/531, loss: 0.0001203536958200857 2023-01-23 04:23:34.612088: step: 376/531, loss: 0.008551979437470436 2023-01-23 04:23:35.738046: step: 380/531, loss: 1.697540210443549e-05 2023-01-23 04:23:36.902037: step: 384/531, loss: 4.234314110362902e-05 2023-01-23 04:23:38.038039: step: 388/531, loss: 0.0012197495670989156 2023-01-23 04:23:39.168257: step: 392/531, loss: 7.081032526912168e-05 2023-01-23 04:23:40.296873: step: 396/531, loss: 0.0003067970392294228 2023-01-23 04:23:41.428545: step: 400/531, loss: 0.00039272307185456157 2023-01-23 04:23:42.537481: step: 404/531, loss: 0.010199165903031826 2023-01-23 04:23:43.629449: step: 408/531, loss: 2.6702882678364404e-06 2023-01-23 04:23:44.749429: step: 412/531, loss: 0.00025196076603606343 2023-01-23 04:23:45.866915: step: 416/531, loss: 2.431869688734878e-05 2023-01-23 04:23:47.012353: step: 420/531, loss: 0.00035200119600631297 2023-01-23 04:23:48.129612: step: 424/531, loss: 0.050284769386053085 2023-01-23 04:23:49.280828: step: 428/531, loss: 1.0681153071345761e-05 2023-01-23 04:23:50.396677: step: 432/531, loss: 0.0037250518798828125 2023-01-23 04:23:51.503990: step: 436/531, loss: 0.006777572445571423 2023-01-23 04:23:52.622842: step: 440/531, loss: 0.00025234222994185984 2023-01-23 04:23:53.754699: step: 444/531, loss: 0.0021465301979333162 2023-01-23 04:23:54.865788: step: 448/531, loss: 0.00035257337731309235 2023-01-23 04:23:55.974987: step: 452/531, loss: 0.013058042153716087 2023-01-23 04:23:57.096478: step: 456/531, loss: 0.002982520963996649 2023-01-23 04:23:58.221451: step: 460/531, loss: -6.675719532722724e-07 2023-01-23 04:23:59.342284: step: 464/531, loss: 7.553100294899195e-05 2023-01-23 04:24:00.473335: step: 468/531, loss: 2.384185791015625e-05 2023-01-23 04:24:01.598215: step: 472/531, loss: 0.00028066636878065765 2023-01-23 04:24:02.756884: step: 476/531, loss: 0.00016356707783415914 2023-01-23 04:24:03.870322: step: 480/531, loss: 0.0002578735293354839 2023-01-23 04:24:05.020867: step: 484/531, loss: 0.004223346244543791 2023-01-23 04:24:06.113813: step: 488/531, loss: 0.0002828121359925717 2023-01-23 04:24:07.278069: step: 492/531, loss: 0.006344795227050781 2023-01-23 04:24:08.398720: step: 496/531, loss: 0.0006813049549236894 2023-01-23 04:24:09.530342: step: 500/531, loss: 0.00041036607581190765 2023-01-23 04:24:10.654188: step: 504/531, loss: 0.017905807122588158 2023-01-23 04:24:11.766484: step: 508/531, loss: -7.3909759521484375e-06 2023-01-23 04:24:12.903053: step: 512/531, loss: 8.19206252344884e-05 2023-01-23 04:24:14.028566: step: 516/531, loss: 0.026958562433719635 2023-01-23 04:24:15.157069: step: 520/531, loss: 0.00355281843803823 2023-01-23 04:24:16.331519: step: 524/531, loss: 6.67572021484375e-05 2023-01-23 04:24:17.452921: step: 528/531, loss: 0.0011706352233886719 2023-01-23 04:24:18.574699: step: 532/531, loss: 0.0007376670837402344 2023-01-23 04:24:19.709675: step: 536/531, loss: 0.12850303947925568 2023-01-23 04:24:20.820776: step: 540/531, loss: 5.626679012493696e-06 2023-01-23 04:24:21.956474: step: 544/531, loss: 0.0002131462242687121 2023-01-23 04:24:23.082273: step: 548/531, loss: 0.0006744861602783203 2023-01-23 04:24:24.210898: step: 552/531, loss: 0.1536209136247635 2023-01-23 04:24:25.324742: step: 556/531, loss: 7.22885160939768e-05 2023-01-23 04:24:26.455060: step: 560/531, loss: 0.02694263495504856 2023-01-23 04:24:27.584769: step: 564/531, loss: 0.04169035330414772 2023-01-23 04:24:28.726808: step: 568/531, loss: 0.00014758111501578242 2023-01-23 04:24:29.824620: step: 572/531, loss: 0.016062546521425247 2023-01-23 04:24:30.966239: step: 576/531, loss: 2.1457672119140625e-05 2023-01-23 04:24:32.090175: step: 580/531, loss: 0.0025464058853685856 2023-01-23 04:24:33.234492: step: 584/531, loss: 0.07575168460607529 2023-01-23 04:24:34.335705: step: 588/531, loss: 0.00017507077427580953 2023-01-23 04:24:35.443872: step: 592/531, loss: 0.000419425981817767 2023-01-23 04:24:36.562512: step: 596/531, loss: 0.0002468586026225239 2023-01-23 04:24:37.677801: step: 600/531, loss: 2.0694733393611386e-05 2023-01-23 04:24:38.777713: step: 604/531, loss: -4.76837158203125e-07 2023-01-23 04:24:39.879990: step: 608/531, loss: 0.12374935299158096 2023-01-23 04:24:40.989934: step: 612/531, loss: 0.003525242442265153 2023-01-23 04:24:42.123591: step: 616/531, loss: 0.0032036779448390007 2023-01-23 04:24:43.250656: step: 620/531, loss: 9.756088547874242e-05 2023-01-23 04:24:44.383332: step: 624/531, loss: 0.0003226280095987022 2023-01-23 04:24:45.496919: step: 628/531, loss: 0.24145831167697906 2023-01-23 04:24:46.642113: step: 632/531, loss: 0.00030498503474518657 2023-01-23 04:24:47.772085: step: 636/531, loss: 0.004517651163041592 2023-01-23 04:24:48.914396: step: 640/531, loss: 0.006214332301169634 2023-01-23 04:24:50.036290: step: 644/531, loss: 0.1092003807425499 2023-01-23 04:24:51.175976: step: 648/531, loss: 1.3637543816003017e-05 2023-01-23 04:24:52.315465: step: 652/531, loss: 9.34600830078125e-05 2023-01-23 04:24:53.434322: step: 656/531, loss: 0.002922630403190851 2023-01-23 04:24:54.561497: step: 660/531, loss: 0.00040616991464048624 2023-01-23 04:24:55.673040: step: 664/531, loss: 0.025318719446659088 2023-01-23 04:24:56.792129: step: 668/531, loss: 0.00018568038649391383 2023-01-23 04:24:57.910962: step: 672/531, loss: 0.00024623872013762593 2023-01-23 04:24:59.030058: step: 676/531, loss: 0.00030894280644133687 2023-01-23 04:25:00.162825: step: 680/531, loss: 0.002146816346794367 2023-01-23 04:25:01.272508: step: 684/531, loss: 7.114410254871473e-05 2023-01-23 04:25:02.417221: step: 688/531, loss: 0.024901390075683594 2023-01-23 04:25:03.523960: step: 692/531, loss: 0.03612246364355087 2023-01-23 04:25:04.703912: step: 696/531, loss: 3.528594970703125e-05 2023-01-23 04:25:05.816905: step: 700/531, loss: 0.003978920169174671 2023-01-23 04:25:06.940839: step: 704/531, loss: 0.00023670197697356343 2023-01-23 04:25:08.066508: step: 708/531, loss: 2.8705595468636602e-05 2023-01-23 04:25:09.201113: step: 712/531, loss: 0.021152114495635033 2023-01-23 04:25:10.367282: step: 716/531, loss: 0.00015449525380972773 2023-01-23 04:25:11.496028: step: 720/531, loss: 6.389617738022935e-06 2023-01-23 04:25:12.652027: step: 724/531, loss: 0.049501992762088776 2023-01-23 04:25:13.774664: step: 728/531, loss: 0.007954979315400124 2023-01-23 04:25:14.887325: step: 732/531, loss: 0.0003467559872660786 2023-01-23 04:25:16.026255: step: 736/531, loss: 0.0006144046783447266 2023-01-23 04:25:17.135533: step: 740/531, loss: 0.0006265639676712453 2023-01-23 04:25:18.278454: step: 744/531, loss: 0.0001218795805471018 2023-01-23 04:25:19.413773: step: 748/531, loss: 4.997253563487902e-05 2023-01-23 04:25:20.563957: step: 752/531, loss: 2.47955313170678e-06 2023-01-23 04:25:21.666238: step: 756/531, loss: 0.0007749557844363153 2023-01-23 04:25:22.817719: step: 760/531, loss: 0.006569290068000555 2023-01-23 04:25:23.932414: step: 764/531, loss: -2.384185791015625e-07 2023-01-23 04:25:25.074167: step: 768/531, loss: 0.028141213580965996 2023-01-23 04:25:26.199376: step: 772/531, loss: 1.3446808225126006e-05 2023-01-23 04:25:27.356176: step: 776/531, loss: 0.0006719589000567794 2023-01-23 04:25:28.486706: step: 780/531, loss: 5.340576535672881e-06 2023-01-23 04:25:29.606068: step: 784/531, loss: 0.0001621246337890625 2023-01-23 04:25:30.761244: step: 788/531, loss: 0.00015354156494140625 2023-01-23 04:25:31.911529: step: 792/531, loss: 0.053298093378543854 2023-01-23 04:25:33.051878: step: 796/531, loss: 0.01825723610818386 2023-01-23 04:25:34.237766: step: 800/531, loss: 0.003969001583755016 2023-01-23 04:25:35.395234: step: 804/531, loss: 1.71661376953125e-05 2023-01-23 04:25:36.499668: step: 808/531, loss: 5.9890749980695546e-05 2023-01-23 04:25:37.632176: step: 812/531, loss: 0.0004479408380575478 2023-01-23 04:25:38.755324: step: 816/531, loss: 0.00014057158841751516 2023-01-23 04:25:39.945344: step: 820/531, loss: 0.04956188425421715 2023-01-23 04:25:41.067377: step: 824/531, loss: 0.0006712913163937628 2023-01-23 04:25:42.207131: step: 828/531, loss: 0.004286480136215687 2023-01-23 04:25:43.336480: step: 832/531, loss: 2.4890900022001006e-05 2023-01-23 04:25:44.463397: step: 836/531, loss: 0.0023303984198719263 2023-01-23 04:25:45.597609: step: 840/531, loss: 0.0004444122314453125 2023-01-23 04:25:46.718217: step: 844/531, loss: 0.0026824951637536287 2023-01-23 04:25:47.853895: step: 848/531, loss: 0.00048246385995298624 2023-01-23 04:25:48.942901: step: 852/531, loss: 8.459091623080894e-05 2023-01-23 04:25:50.067825: step: 856/531, loss: 0.024524878710508347 2023-01-23 04:25:51.175866: step: 860/531, loss: 0.001531314803287387 2023-01-23 04:25:52.292619: step: 864/531, loss: 0.0035223006270825863 2023-01-23 04:25:53.446082: step: 868/531, loss: 0.0002191543608205393 2023-01-23 04:25:54.552629: step: 872/531, loss: 8.649825758766383e-05 2023-01-23 04:25:55.636696: step: 876/531, loss: 0.00010013581049861386 2023-01-23 04:25:56.736721: step: 880/531, loss: 0.0480194091796875 2023-01-23 04:25:57.837655: step: 884/531, loss: 0.0009717941284179688 2023-01-23 04:25:58.963855: step: 888/531, loss: 0.04506396874785423 2023-01-23 04:26:00.070425: step: 892/531, loss: 1.811981201171875e-05 2023-01-23 04:26:01.184614: step: 896/531, loss: 5.5980683100642636e-05 2023-01-23 04:26:02.309947: step: 900/531, loss: 0.0012220382923260331 2023-01-23 04:26:03.428168: step: 904/531, loss: 2.040863000729587e-05 2023-01-23 04:26:04.599427: step: 908/531, loss: 0.008162117563188076 2023-01-23 04:26:05.696737: step: 912/531, loss: 0.0025362493470311165 2023-01-23 04:26:06.843889: step: 916/531, loss: 0.005315971560776234 2023-01-23 04:26:07.939739: step: 920/531, loss: 5.164146205061115e-05 2023-01-23 04:26:09.096447: step: 924/531, loss: 0.0030103682074695826 2023-01-23 04:26:10.199553: step: 928/531, loss: 0.013195991516113281 2023-01-23 04:26:11.322754: step: 932/531, loss: 3.156661841785535e-05 2023-01-23 04:26:12.462365: step: 936/531, loss: 8.668899681651965e-05 2023-01-23 04:26:13.586058: step: 940/531, loss: 0.0025838850997388363 2023-01-23 04:26:14.686308: step: 944/531, loss: 0.0019424438942223787 2023-01-23 04:26:15.799830: step: 948/531, loss: 4.162192271905951e-05 2023-01-23 04:26:16.923868: step: 952/531, loss: 0.0014663697220385075 2023-01-23 04:26:18.056279: step: 956/531, loss: 0.0003228187561035156 2023-01-23 04:26:19.177107: step: 960/531, loss: 0.0006592751014977694 2023-01-23 04:26:20.286801: step: 964/531, loss: 0.0011466980213299394 2023-01-23 04:26:21.411367: step: 968/531, loss: 0.0023582458961755037 2023-01-23 04:26:22.525582: step: 972/531, loss: 0.0003616333124227822 2023-01-23 04:26:23.645446: step: 976/531, loss: 4.6348573960131034e-05 2023-01-23 04:26:24.733756: step: 980/531, loss: 5.4359438763640355e-06 2023-01-23 04:26:25.869659: step: 984/531, loss: 0.0003030777152162045 2023-01-23 04:26:27.025402: step: 988/531, loss: 0.00016098022751975805 2023-01-23 04:26:28.157297: step: 992/531, loss: 0.002762222196906805 2023-01-23 04:26:29.267110: step: 996/531, loss: 0.0001794815034372732 2023-01-23 04:26:30.411686: step: 1000/531, loss: 1.1539459592313506e-05 2023-01-23 04:26:31.529157: step: 1004/531, loss: 0.0001548767031636089 2023-01-23 04:26:32.660583: step: 1008/531, loss: 0.00472679128870368 2023-01-23 04:26:33.785180: step: 1012/531, loss: 3.528594970703125e-05 2023-01-23 04:26:34.936964: step: 1016/531, loss: 0.004105186555534601 2023-01-23 04:26:36.033114: step: 1020/531, loss: 6.3896181927702855e-06 2023-01-23 04:26:37.148375: step: 1024/531, loss: 0.0002396106719970703 2023-01-23 04:26:38.264540: step: 1028/531, loss: 0.0048239706084132195 2023-01-23 04:26:39.387297: step: 1032/531, loss: 0.0006738663068972528 2023-01-23 04:26:40.549906: step: 1036/531, loss: 0.05202064663171768 2023-01-23 04:26:41.682273: step: 1040/531, loss: 1.0967255548166577e-05 2023-01-23 04:26:42.814089: step: 1044/531, loss: 1.8024444216280244e-05 2023-01-23 04:26:43.924226: step: 1048/531, loss: 0.010198498144745827 2023-01-23 04:26:45.041037: step: 1052/531, loss: 0.0006591796991415322 2023-01-23 04:26:46.194701: step: 1056/531, loss: 6.389617919921875e-05 2023-01-23 04:26:47.317118: step: 1060/531, loss: 0.004205131437629461 2023-01-23 04:26:48.479976: step: 1064/531, loss: 0.0002487182500772178 2023-01-23 04:26:49.599200: step: 1068/531, loss: 0.0003993988211732358 2023-01-23 04:26:50.738161: step: 1072/531, loss: 0.0020080567337572575 2023-01-23 04:26:51.838241: step: 1076/531, loss: 2.7084352041129023e-05 2023-01-23 04:26:52.977566: step: 1080/531, loss: 0.02887124940752983 2023-01-23 04:26:54.088690: step: 1084/531, loss: 0.0010878562461584806 2023-01-23 04:26:55.213161: step: 1088/531, loss: 0.0023478507064282894 2023-01-23 04:26:56.346863: step: 1092/531, loss: 0.0006430625799112022 2023-01-23 04:26:57.489986: step: 1096/531, loss: 0.021694278344511986 2023-01-23 04:26:58.599879: step: 1100/531, loss: 1.9550323486328125e-05 2023-01-23 04:26:59.733361: step: 1104/531, loss: 0.44525790214538574 2023-01-23 04:27:00.839710: step: 1108/531, loss: 0.0010629654861986637 2023-01-23 04:27:01.968548: step: 1112/531, loss: 8.869171324477065e-06 2023-01-23 04:27:03.087952: step: 1116/531, loss: 0.008517743088304996 2023-01-23 04:27:04.254643: step: 1120/531, loss: 0.05313320457935333 2023-01-23 04:27:05.359086: step: 1124/531, loss: 0.0593930222094059 2023-01-23 04:27:06.473005: step: 1128/531, loss: 4.0531158447265625e-05 2023-01-23 04:27:07.565880: step: 1132/531, loss: 5.569458153331652e-05 2023-01-23 04:27:08.678626: step: 1136/531, loss: 0.0003452301025390625 2023-01-23 04:27:09.810194: step: 1140/531, loss: 8.788705599727109e-06 2023-01-23 04:27:10.921142: step: 1144/531, loss: 0.00179290771484375 2023-01-23 04:27:12.081022: step: 1148/531, loss: 0.025395013391971588 2023-01-23 04:27:13.218262: step: 1152/531, loss: 4.96864304295741e-05 2023-01-23 04:27:14.343851: step: 1156/531, loss: 0.0003906250058207661 2023-01-23 04:27:15.475631: step: 1160/531, loss: 0.015845583751797676 2023-01-23 04:27:16.585534: step: 1164/531, loss: 2.86102294921875e-06 2023-01-23 04:27:17.704414: step: 1168/531, loss: 0.00900735892355442 2023-01-23 04:27:18.819078: step: 1172/531, loss: 0.0360538475215435 2023-01-23 04:27:19.930672: step: 1176/531, loss: 0.007651138585060835 2023-01-23 04:27:21.050272: step: 1180/531, loss: 1.7166139514301904e-05 2023-01-23 04:27:22.175610: step: 1184/531, loss: 0.019014835357666016 2023-01-23 04:27:23.276851: step: 1188/531, loss: 2.288818359375e-05 2023-01-23 04:27:24.422694: step: 1192/531, loss: 0.00558395404368639 2023-01-23 04:27:25.548499: step: 1196/531, loss: 2.6988982426701114e-05 2023-01-23 04:27:26.648918: step: 1200/531, loss: 0.19798269867897034 2023-01-23 04:27:27.777440: step: 1204/531, loss: 0.005815219599753618 2023-01-23 04:27:28.927947: step: 1208/531, loss: 0.0716014876961708 2023-01-23 04:27:30.035486: step: 1212/531, loss: 0.050624847412109375 2023-01-23 04:27:31.162716: step: 1216/531, loss: 0.027128983289003372 2023-01-23 04:27:32.265589: step: 1220/531, loss: 0.00086297991219908 2023-01-23 04:27:33.415960: step: 1224/531, loss: 1.831054760259576e-05 2023-01-23 04:27:34.547061: step: 1228/531, loss: 0.0074364664033055305 2023-01-23 04:27:35.727761: step: 1232/531, loss: 0.234603151679039 2023-01-23 04:27:36.853561: step: 1236/531, loss: 2.5415420168428682e-05 2023-01-23 04:27:37.942696: step: 1240/531, loss: 4.6443943574558944e-05 2023-01-23 04:27:39.070208: step: 1244/531, loss: 0.01067581120878458 2023-01-23 04:27:40.200521: step: 1248/531, loss: 0.05488729476928711 2023-01-23 04:27:41.301640: step: 1252/531, loss: 0.00031147003755904734 2023-01-23 04:27:42.448553: step: 1256/531, loss: 0.0013028144603595138 2023-01-23 04:27:43.569024: step: 1260/531, loss: 8.096695091808215e-05 2023-01-23 04:27:44.754517: step: 1264/531, loss: 0.0037929534446448088 2023-01-23 04:27:45.855337: step: 1268/531, loss: 0.009043884463608265 2023-01-23 04:27:46.987080: step: 1272/531, loss: 0.00036554335383698344 2023-01-23 04:27:48.081266: step: 1276/531, loss: 0.00019073487783316523 2023-01-23 04:27:49.207432: step: 1280/531, loss: 0.004482317250221968 2023-01-23 04:27:50.296632: step: 1284/531, loss: 0.0016658783424645662 2023-01-23 04:27:51.447862: step: 1288/531, loss: 7.06672653905116e-05 2023-01-23 04:27:52.593355: step: 1292/531, loss: 0.00017938614473678172 2023-01-23 04:27:53.706592: step: 1296/531, loss: 0.001176547957584262 2023-01-23 04:27:54.821726: step: 1300/531, loss: 0.01898946985602379 2023-01-23 04:27:55.950933: step: 1304/531, loss: 0.008185530081391335 2023-01-23 04:27:57.064208: step: 1308/531, loss: 0.0017718315357342362 2023-01-23 04:27:58.185872: step: 1312/531, loss: 0.005686188116669655 2023-01-23 04:27:59.284484: step: 1316/531, loss: 0.2636191248893738 2023-01-23 04:28:00.407545: step: 1320/531, loss: 0.006782532203942537 2023-01-23 04:28:01.515576: step: 1324/531, loss: 0.017702199518680573 2023-01-23 04:28:02.649236: step: 1328/531, loss: 0.049163054674863815 2023-01-23 04:28:03.783590: step: 1332/531, loss: 5.984306335449219e-05 2023-01-23 04:28:04.881034: step: 1336/531, loss: 0.0001834869326557964 2023-01-23 04:28:06.000347: step: 1340/531, loss: 0.0005691528785973787 2023-01-23 04:28:07.121434: step: 1344/531, loss: 0.0005155563703738153 2023-01-23 04:28:08.249736: step: 1348/531, loss: 0.4969814419746399 2023-01-23 04:28:09.388849: step: 1352/531, loss: 0.0010271072387695312 2023-01-23 04:28:10.525600: step: 1356/531, loss: 0.0006879806751385331 2023-01-23 04:28:11.650352: step: 1360/531, loss: 1.0776519957289565e-05 2023-01-23 04:28:12.768830: step: 1364/531, loss: 0.005980110261589289 2023-01-23 04:28:13.871565: step: 1368/531, loss: 0.0027632713317871094 2023-01-23 04:28:14.984244: step: 1372/531, loss: 0.0001032829241012223 2023-01-23 04:28:16.145778: step: 1376/531, loss: 0.00030269622220657766 2023-01-23 04:28:17.284522: step: 1380/531, loss: 0.010082913562655449 2023-01-23 04:28:18.418944: step: 1384/531, loss: 2.3508073354605585e-05 2023-01-23 04:28:19.547336: step: 1388/531, loss: 0.0002986908075399697 2023-01-23 04:28:20.668261: step: 1392/531, loss: 6.408691115211695e-05 2023-01-23 04:28:21.781285: step: 1396/531, loss: 0.006342983338981867 2023-01-23 04:28:22.905360: step: 1400/531, loss: 0.0005537032848224044 2023-01-23 04:28:24.003973: step: 1404/531, loss: 1.7452239262638614e-05 2023-01-23 04:28:25.184350: step: 1408/531, loss: 0.0005129814380779862 2023-01-23 04:28:26.308006: step: 1412/531, loss: 0.016759777441620827 2023-01-23 04:28:27.413591: step: 1416/531, loss: 0.022570062428712845 2023-01-23 04:28:28.554655: step: 1420/531, loss: 0.0007602691766805947 2023-01-23 04:28:29.691288: step: 1424/531, loss: 0.0034173966851085424 2023-01-23 04:28:30.808249: step: 1428/531, loss: 0.0012047768104821444 2023-01-23 04:28:31.920075: step: 1432/531, loss: 0.0007106781122274697 2023-01-23 04:28:33.015473: step: 1436/531, loss: 0.00017709731764625758 2023-01-23 04:28:34.170468: step: 1440/531, loss: 0.05722656473517418 2023-01-23 04:28:35.269541: step: 1444/531, loss: 0.000150585183291696 2023-01-23 04:28:36.396662: step: 1448/531, loss: 0.002532958984375 2023-01-23 04:28:37.505790: step: 1452/531, loss: 0.0002384185791015625 2023-01-23 04:28:38.613719: step: 1456/531, loss: 0.12424660474061966 2023-01-23 04:28:39.775101: step: 1460/531, loss: 0.0006864547613076866 2023-01-23 04:28:40.920477: step: 1464/531, loss: 0.06259050220251083 2023-01-23 04:28:42.080832: step: 1468/531, loss: 0.02921314351260662 2023-01-23 04:28:43.180192: step: 1472/531, loss: 0.05714721605181694 2023-01-23 04:28:44.303035: step: 1476/531, loss: 0.0008366584661416709 2023-01-23 04:28:45.410863: step: 1480/531, loss: 0.001455593155696988 2023-01-23 04:28:46.553870: step: 1484/531, loss: 0.018951939418911934 2023-01-23 04:28:47.673922: step: 1488/531, loss: 0.0001108169526560232 2023-01-23 04:28:48.783432: step: 1492/531, loss: 0.003665161319077015 2023-01-23 04:28:49.888291: step: 1496/531, loss: 0.0005284309154376388 2023-01-23 04:28:51.018081: step: 1500/531, loss: 0.001697969390079379 2023-01-23 04:28:52.133952: step: 1504/531, loss: 7.572174217784777e-05 2023-01-23 04:28:53.254543: step: 1508/531, loss: 0.003948211669921875 2023-01-23 04:28:54.387576: step: 1512/531, loss: 0.00846786517649889 2023-01-23 04:28:55.506381: step: 1516/531, loss: 0.0001069068894139491 2023-01-23 04:28:56.584095: step: 1520/531, loss: 1.0967255548166577e-06 2023-01-23 04:28:57.727556: step: 1524/531, loss: 0.019928455352783203 2023-01-23 04:28:58.849906: step: 1528/531, loss: 0.00030608175438828766 2023-01-23 04:28:59.972114: step: 1532/531, loss: 0.0037707327865064144 2023-01-23 04:29:01.124828: step: 1536/531, loss: 0.0005949020269326866 2023-01-23 04:29:02.240583: step: 1540/531, loss: 0.0007134437328204513 2023-01-23 04:29:03.405786: step: 1544/531, loss: 0.005471706390380859 2023-01-23 04:29:04.502387: step: 1548/531, loss: 0.00018711091252043843 2023-01-23 04:29:05.634394: step: 1552/531, loss: 0.005074119661003351 2023-01-23 04:29:06.754583: step: 1556/531, loss: 0.00048246385995298624 2023-01-23 04:29:07.878266: step: 1560/531, loss: 0.0010433674324303865 2023-01-23 04:29:08.997198: step: 1564/531, loss: 8.430481102550402e-05 2023-01-23 04:29:10.113480: step: 1568/531, loss: 0.0010838985908776522 2023-01-23 04:29:11.220437: step: 1572/531, loss: 0.0006448745843954384 2023-01-23 04:29:12.355916: step: 1576/531, loss: 0.017004871740937233 2023-01-23 04:29:13.518999: step: 1580/531, loss: 0.024933815002441406 2023-01-23 04:29:14.635460: step: 1584/531, loss: 0.01393432728946209 2023-01-23 04:29:15.772075: step: 1588/531, loss: 0.0001682281435932964 2023-01-23 04:29:16.894178: step: 1592/531, loss: 0.00042490960913710296 2023-01-23 04:29:18.044731: step: 1596/531, loss: 0.022207261994481087 2023-01-23 04:29:19.173281: step: 1600/531, loss: 0.050908852368593216 2023-01-23 04:29:20.296452: step: 1604/531, loss: 0.05037698522210121 2023-01-23 04:29:21.393108: step: 1608/531, loss: 0.00016498567129019648 2023-01-23 04:29:22.531295: step: 1612/531, loss: 0.04270875081419945 2023-01-23 04:29:23.644110: step: 1616/531, loss: 0.023236369714140892 2023-01-23 04:29:24.757514: step: 1620/531, loss: 0.00011291504051769152 2023-01-23 04:29:25.858447: step: 1624/531, loss: 0.00031476019648835063 2023-01-23 04:29:26.984867: step: 1628/531, loss: 7.781982276355848e-05 2023-01-23 04:29:28.128904: step: 1632/531, loss: 0.0023525238502770662 2023-01-23 04:29:29.269859: step: 1636/531, loss: 0.011433601379394531 2023-01-23 04:29:30.396910: step: 1640/531, loss: 0.004158496856689453 2023-01-23 04:29:31.512317: step: 1644/531, loss: 0.012745380401611328 2023-01-23 04:29:32.628264: step: 1648/531, loss: 0.015035247430205345 2023-01-23 04:29:33.768448: step: 1652/531, loss: 0.0007318020216189325 2023-01-23 04:29:34.876930: step: 1656/531, loss: 0.05518350750207901 2023-01-23 04:29:35.983508: step: 1660/531, loss: 0.0008961677667684853 2023-01-23 04:29:37.101485: step: 1664/531, loss: 0.0011484622955322266 2023-01-23 04:29:38.235695: step: 1668/531, loss: 0.004088640213012695 2023-01-23 04:29:39.365465: step: 1672/531, loss: 0.4882128834724426 2023-01-23 04:29:40.480033: step: 1676/531, loss: 0.007285499945282936 2023-01-23 04:29:41.595992: step: 1680/531, loss: 0.015175247564911842 2023-01-23 04:29:42.708361: step: 1684/531, loss: 0.006510543636977673 2023-01-23 04:29:43.802553: step: 1688/531, loss: 5.154609607416205e-05 2023-01-23 04:29:44.895033: step: 1692/531, loss: 0.007799339480698109 2023-01-23 04:29:45.991907: step: 1696/531, loss: 0.003286647843196988 2023-01-23 04:29:47.103322: step: 1700/531, loss: 0.014508056454360485 2023-01-23 04:29:48.223915: step: 1704/531, loss: 0.013638973236083984 2023-01-23 04:29:49.329845: step: 1708/531, loss: 0.0020320890471339226 2023-01-23 04:29:50.439806: step: 1712/531, loss: 0.011674500070512295 2023-01-23 04:29:51.548321: step: 1716/531, loss: 0.007084751036018133 2023-01-23 04:29:52.693617: step: 1720/531, loss: 0.00011281967454124242 2023-01-23 04:29:53.837908: step: 1724/531, loss: 0.016599273309111595 2023-01-23 04:29:54.953479: step: 1728/531, loss: 0.015539360232651234 2023-01-23 04:29:56.062424: step: 1732/531, loss: 0.006419277284294367 2023-01-23 04:29:57.195382: step: 1736/531, loss: 0.0050182342529296875 2023-01-23 04:29:58.319310: step: 1740/531, loss: 0.00027952194795943797 2023-01-23 04:29:59.435745: step: 1744/531, loss: 5.779266211902723e-05 2023-01-23 04:30:00.557646: step: 1748/531, loss: 1.4638900211139116e-05 2023-01-23 04:30:01.690020: step: 1752/531, loss: 0.06650781631469727 2023-01-23 04:30:02.828087: step: 1756/531, loss: 0.004096793942153454 2023-01-23 04:30:03.963555: step: 1760/531, loss: 0.0002225875941803679 2023-01-23 04:30:05.094352: step: 1764/531, loss: 0.006667566020041704 2023-01-23 04:30:06.231499: step: 1768/531, loss: 0.058296963572502136 2023-01-23 04:30:07.348695: step: 1772/531, loss: 0.0013595580821856856 2023-01-23 04:30:08.462217: step: 1776/531, loss: 2.384185791015625e-06 2023-01-23 04:30:09.601258: step: 1780/531, loss: 0.01043548621237278 2023-01-23 04:30:10.688805: step: 1784/531, loss: 0.0002168655482819304 2023-01-23 04:30:11.878420: step: 1788/531, loss: 7.200240816018777e-06 2023-01-23 04:30:13.010644: step: 1792/531, loss: 0.006196784786880016 2023-01-23 04:30:14.133823: step: 1796/531, loss: 0.007871055975556374 2023-01-23 04:30:15.278891: step: 1800/531, loss: 0.0109748849645257 2023-01-23 04:30:16.413345: step: 1804/531, loss: 0.0007970333681441844 2023-01-23 04:30:17.505988: step: 1808/531, loss: 5.073547436040826e-05 2023-01-23 04:30:18.623163: step: 1812/531, loss: 0.042394354939460754 2023-01-23 04:30:19.734030: step: 1816/531, loss: 0.026506900787353516 2023-01-23 04:30:20.883755: step: 1820/531, loss: 0.0004114151233807206 2023-01-23 04:30:22.007823: step: 1824/531, loss: 0.12249825149774551 2023-01-23 04:30:23.121029: step: 1828/531, loss: 0.06791038066148758 2023-01-23 04:30:24.242050: step: 1832/531, loss: 7.286071922862902e-05 2023-01-23 04:30:25.353343: step: 1836/531, loss: 0.005318641662597656 2023-01-23 04:30:26.456719: step: 1840/531, loss: 0.0014286041259765625 2023-01-23 04:30:27.575616: step: 1844/531, loss: 0.03651876747608185 2023-01-23 04:30:28.710678: step: 1848/531, loss: 0.008816909976303577 2023-01-23 04:30:29.820200: step: 1852/531, loss: 0.004405403044074774 2023-01-23 04:30:30.954343: step: 1856/531, loss: 0.0003368377801962197 2023-01-23 04:30:32.075713: step: 1860/531, loss: 0.0007091521983966231 2023-01-23 04:30:33.192873: step: 1864/531, loss: 0.0005462646950036287 2023-01-23 04:30:34.329765: step: 1868/531, loss: 0.003952980041503906 2023-01-23 04:30:35.477992: step: 1872/531, loss: 0.008650732226669788 2023-01-23 04:30:36.622628: step: 1876/531, loss: 0.00034160615177825093 2023-01-23 04:30:37.741933: step: 1880/531, loss: 0.000929546426050365 2023-01-23 04:30:38.877616: step: 1884/531, loss: 0.019374657422304153 2023-01-23 04:30:39.992279: step: 1888/531, loss: 0.00424990663304925 2023-01-23 04:30:41.095739: step: 1892/531, loss: 0.0002956390380859375 2023-01-23 04:30:42.207145: step: 1896/531, loss: 0.0001506805419921875 2023-01-23 04:30:43.328009: step: 1900/531, loss: 3.24249267578125e-05 2023-01-23 04:30:44.471838: step: 1904/531, loss: 0.0462704636156559 2023-01-23 04:30:45.605747: step: 1908/531, loss: 0.17189064621925354 2023-01-23 04:30:46.729564: step: 1912/531, loss: 0.00206413259729743 2023-01-23 04:30:47.875296: step: 1916/531, loss: 0.00014228820509742945 2023-01-23 04:30:49.023062: step: 1920/531, loss: 5.874633643543348e-05 2023-01-23 04:30:50.158834: step: 1924/531, loss: 0.006545448210090399 2023-01-23 04:30:51.282070: step: 1928/531, loss: 0.0008230209350585938 2023-01-23 04:30:52.373614: step: 1932/531, loss: 0.0035373687278479338 2023-01-23 04:30:53.486060: step: 1936/531, loss: 1.4019013178767636e-05 2023-01-23 04:30:54.617221: step: 1940/531, loss: 2.8419495720299892e-05 2023-01-23 04:30:55.740550: step: 1944/531, loss: 0.00018520356388762593 2023-01-23 04:30:56.861858: step: 1948/531, loss: 6.68525681248866e-05 2023-01-23 04:30:57.988971: step: 1952/531, loss: -1.144409225162235e-06 2023-01-23 04:30:59.126143: step: 1956/531, loss: 1.5258788153005298e-06 2023-01-23 04:31:00.275300: step: 1960/531, loss: 0.0025363920722156763 2023-01-23 04:31:01.405406: step: 1964/531, loss: 0.00012540817260742188 2023-01-23 04:31:02.542206: step: 1968/531, loss: -3.337860107421875e-06 2023-01-23 04:31:03.678300: step: 1972/531, loss: 0.1348293274641037 2023-01-23 04:31:04.788457: step: 1976/531, loss: 0.1118554174900055 2023-01-23 04:31:05.924493: step: 1980/531, loss: 0.008698750287294388 2023-01-23 04:31:07.049097: step: 1984/531, loss: 0.0007928848499432206 2023-01-23 04:31:08.184295: step: 1988/531, loss: 0.0004413604619912803 2023-01-23 04:31:09.303022: step: 1992/531, loss: 0.01868290826678276 2023-01-23 04:31:10.408479: step: 1996/531, loss: -7.62939453125e-06 2023-01-23 04:31:11.563615: step: 2000/531, loss: 1.3542176020564511e-05 2023-01-23 04:31:12.708016: step: 2004/531, loss: 5.836486889165826e-05 2023-01-23 04:31:13.809815: step: 2008/531, loss: 0.0001342296600341797 2023-01-23 04:31:14.959065: step: 2012/531, loss: 0.009788322262465954 2023-01-23 04:31:16.066871: step: 2016/531, loss: 0.015025138854980469 2023-01-23 04:31:17.191251: step: 2020/531, loss: 0.0004955291515216231 2023-01-23 04:31:18.343236: step: 2024/531, loss: 0.0027963160537183285 2023-01-23 04:31:19.469397: step: 2028/531, loss: 0.00017876624769996852 2023-01-23 04:31:20.615256: step: 2032/531, loss: 0.0012434959644451737 2023-01-23 04:31:21.773274: step: 2036/531, loss: 0.00025424957857467234 2023-01-23 04:31:22.859354: step: 2040/531, loss: 0.0023969649337232113 2023-01-23 04:31:23.973278: step: 2044/531, loss: 8.56399565236643e-05 2023-01-23 04:31:25.078886: step: 2048/531, loss: 0.0001846313534770161 2023-01-23 04:31:26.177555: step: 2052/531, loss: 0.011302662082016468 2023-01-23 04:31:27.280529: step: 2056/531, loss: 1.4781951904296875e-05 2023-01-23 04:31:28.415984: step: 2060/531, loss: 0.00016460419283248484 2023-01-23 04:31:29.545513: step: 2064/531, loss: 0.004124450962990522 2023-01-23 04:31:30.670812: step: 2068/531, loss: 0.02116584964096546 2023-01-23 04:31:31.789295: step: 2072/531, loss: 0.026836587116122246 2023-01-23 04:31:32.903737: step: 2076/531, loss: 2.8324127924861386e-05 2023-01-23 04:31:34.057009: step: 2080/531, loss: 0.02808237075805664 2023-01-23 04:31:35.189514: step: 2084/531, loss: 0.003188800998032093 2023-01-23 04:31:36.333344: step: 2088/531, loss: 0.0003784179862122983 2023-01-23 04:31:37.446558: step: 2092/531, loss: 0.0013854980934411287 2023-01-23 04:31:38.589314: step: 2096/531, loss: 0.0006001472938805819 2023-01-23 04:31:39.718015: step: 2100/531, loss: 0.015095424838364124 2023-01-23 04:31:40.849489: step: 2104/531, loss: 0.0004020690976176411 2023-01-23 04:31:41.971856: step: 2108/531, loss: 3.156662205583416e-05 2023-01-23 04:31:43.081033: step: 2112/531, loss: 7.801055471645668e-05 2023-01-23 04:31:44.200643: step: 2116/531, loss: 0.04162111133337021 2023-01-23 04:31:45.308877: step: 2120/531, loss: 0.028938865289092064 2023-01-23 04:31:46.417624: step: 2124/531, loss: 5.4359438763640355e-06 ================================================== Loss: 0.014 -------------------- Dev: {'event': {'p': 0.5852674066599395, 'r': 0.7723035952063915, 'f1': 0.6659012629161883}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Test: {'event': {'p': 0.6288561936402468, 'r': 0.7901013714967203, 'f1': 0.7003171247357294}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Chinese: {'event': {'p': 0.5697674418604651, 'r': 0.9074074074074074, 'f1': 0.7}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Korean: {'event': {'p': 0.5614035087719298, 'r': 0.5079365079365079, 'f1': 0.5333333333333333}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Russian: {'event': {'p': 0.40816326530612246, 'r': 0.5555555555555556, 'f1': 0.47058823529411764}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ****************************** Epoch: 29 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 04:32:26.646758: step: 4/531, loss: 0.0025509835686534643 2023-01-23 04:32:27.754876: step: 8/531, loss: 0.00034246445284225047 2023-01-23 04:32:28.851745: step: 12/531, loss: 0.11987762153148651 2023-01-23 04:32:29.989134: step: 16/531, loss: 8.96453821042087e-06 2023-01-23 04:32:31.142797: step: 20/531, loss: 0.0030508041381835938 2023-01-23 04:32:32.239516: step: 24/531, loss: 0.0001088142380467616 2023-01-23 04:32:33.345435: step: 28/531, loss: 0.0001069068894139491 2023-01-23 04:32:34.459654: step: 32/531, loss: 0.020453929901123047 2023-01-23 04:32:35.608887: step: 36/531, loss: 0.00250587472692132 2023-01-23 04:32:36.716307: step: 40/531, loss: 0.00016345977201126516 2023-01-23 04:32:37.826599: step: 44/531, loss: 0.0005069732433184981 2023-01-23 04:32:38.946744: step: 48/531, loss: 5.2261355449445546e-05 2023-01-23 04:32:40.046773: step: 52/531, loss: 0.012812805362045765 2023-01-23 04:32:41.200201: step: 56/531, loss: 0.0005630493978969753 2023-01-23 04:32:42.336808: step: 60/531, loss: 0.005355262663215399 2023-01-23 04:32:43.442098: step: 64/531, loss: 4.8208235966740176e-05 2023-01-23 04:32:44.574061: step: 68/531, loss: 0.004857540130615234 2023-01-23 04:32:45.684825: step: 72/531, loss: 0.024040794000029564 2023-01-23 04:32:46.778263: step: 76/531, loss: 0.001294040703214705 2023-01-23 04:32:47.925283: step: 80/531, loss: 1.5163423086050898e-05 2023-01-23 04:32:49.068152: step: 84/531, loss: 0.0001426696835551411 2023-01-23 04:32:50.187954: step: 88/531, loss: 0.01425857562571764 2023-01-23 04:32:51.299528: step: 92/531, loss: 0.0014289856189861894 2023-01-23 04:32:52.424490: step: 96/531, loss: 0.028946973383426666 2023-01-23 04:32:53.557372: step: 100/531, loss: 0.0007213592762127519 2023-01-23 04:32:54.701874: step: 104/531, loss: 0.0002178192080464214 2023-01-23 04:32:55.809518: step: 108/531, loss: 3.5953522456111386e-05 2023-01-23 04:32:56.915035: step: 112/531, loss: 0.06125025823712349 2023-01-23 04:32:58.026478: step: 116/531, loss: 0.005682135000824928 2023-01-23 04:32:59.154797: step: 120/531, loss: 0.00015163421630859375 2023-01-23 04:33:00.266482: step: 124/531, loss: 0.00011920928955078125 2023-01-23 04:33:01.397735: step: 128/531, loss: 2.4223329091910273e-05 2023-01-23 04:33:02.527831: step: 132/531, loss: 0.00037784577580168843 2023-01-23 04:33:03.675084: step: 136/531, loss: 0.0016256810631603003 2023-01-23 04:33:04.804313: step: 140/531, loss: 0.0010035515297204256 2023-01-23 04:33:05.920377: step: 144/531, loss: 0.0015749931335449219 2023-01-23 04:33:07.035619: step: 148/531, loss: 5.4836273193359375e-05 2023-01-23 04:33:08.181302: step: 152/531, loss: 0.0009161948692053556 2023-01-23 04:33:09.291730: step: 156/531, loss: 8.39233416627394e-06 2023-01-23 04:33:10.449606: step: 160/531, loss: 0.06011023372411728 2023-01-23 04:33:11.558433: step: 164/531, loss: 0.0018825532170012593 2023-01-23 04:33:12.688688: step: 168/531, loss: 0.46514949202537537 2023-01-23 04:33:13.835070: step: 172/531, loss: 0.0125891687348485 2023-01-23 04:33:14.979502: step: 176/531, loss: 0.0005283832433633506 2023-01-23 04:33:16.092421: step: 180/531, loss: 0.00036940575228072703 2023-01-23 04:33:17.191146: step: 184/531, loss: 0.0002588748757261783 2023-01-23 04:33:18.302814: step: 188/531, loss: 0.0009900570148602128 2023-01-23 04:33:19.437224: step: 192/531, loss: 0.0072325230576097965 2023-01-23 04:33:20.585042: step: 196/531, loss: 0.9481627345085144 2023-01-23 04:33:21.735347: step: 200/531, loss: 0.00028543471125885844 2023-01-23 04:33:22.856280: step: 204/531, loss: 0.03043527714908123 2023-01-23 04:33:23.977685: step: 208/531, loss: 0.02886362187564373 2023-01-23 04:33:25.119311: step: 212/531, loss: 0.007921266369521618 2023-01-23 04:33:26.236403: step: 216/531, loss: 0.002102756407111883 2023-01-23 04:33:27.365681: step: 220/531, loss: 7.127523713279516e-05 2023-01-23 04:33:28.478184: step: 224/531, loss: 0.007027149200439453 2023-01-23 04:33:29.588423: step: 228/531, loss: 0.00030188559321686625 2023-01-23 04:33:30.704624: step: 232/531, loss: 0.002290153643116355 2023-01-23 04:33:31.840880: step: 236/531, loss: 0.00013484954251907766 2023-01-23 04:33:32.957435: step: 240/531, loss: 0.0022546767722815275 2023-01-23 04:33:34.092871: step: 244/531, loss: 0.0007330894586630166 2023-01-23 04:33:35.225934: step: 248/531, loss: 0.00046391485375352204 2023-01-23 04:33:36.390257: step: 252/531, loss: 0.06503858417272568 2023-01-23 04:33:37.513620: step: 256/531, loss: 0.008624649606645107 2023-01-23 04:33:38.611627: step: 260/531, loss: 5.7220458984375e-06 2023-01-23 04:33:39.718559: step: 264/531, loss: 0.031436823308467865 2023-01-23 04:33:40.809309: step: 268/531, loss: 0.025232218205928802 2023-01-23 04:33:41.954338: step: 272/531, loss: 0.08242025226354599 2023-01-23 04:33:43.071781: step: 276/531, loss: 0.0024717331398278475 2023-01-23 04:33:44.194898: step: 280/531, loss: 0.010301684960722923 2023-01-23 04:33:45.335160: step: 284/531, loss: 2.4318694613612024e-06 2023-01-23 04:33:46.486031: step: 288/531, loss: 0.001957368804141879 2023-01-23 04:33:47.602827: step: 292/531, loss: 0.0001146316499216482 2023-01-23 04:33:48.726546: step: 296/531, loss: 0.008628464303910732 2023-01-23 04:33:49.846829: step: 300/531, loss: 0.0004521369992289692 2023-01-23 04:33:50.980220: step: 304/531, loss: 0.002352142473682761 2023-01-23 04:33:52.073451: step: 308/531, loss: 1.2874603271484375e-05 2023-01-23 04:33:53.180766: step: 312/531, loss: 2.57492069977161e-06 2023-01-23 04:33:54.301388: step: 316/531, loss: 1.1444091114753974e-06 2023-01-23 04:33:55.402655: step: 320/531, loss: 0.00024433137150481343 2023-01-23 04:33:56.555926: step: 324/531, loss: 0.0005514145013876259 2023-01-23 04:33:57.679455: step: 328/531, loss: 0.0002881050168070942 2023-01-23 04:33:58.793904: step: 332/531, loss: 0.0010843276977539062 2023-01-23 04:33:59.917369: step: 336/531, loss: 0.005403232295066118 2023-01-23 04:34:01.057390: step: 340/531, loss: 0.00910263042896986 2023-01-23 04:34:02.170336: step: 344/531, loss: -3.43322744811303e-06 2023-01-23 04:34:03.274076: step: 348/531, loss: -3.43322744811303e-06 2023-01-23 04:34:04.381328: step: 352/531, loss: 3.185272362316027e-05 2023-01-23 04:34:05.508931: step: 356/531, loss: 1.6307831174344756e-05 2023-01-23 04:34:06.629334: step: 360/531, loss: 0.0008256912115029991 2023-01-23 04:34:07.741839: step: 364/531, loss: 0.0001316070556640625 2023-01-23 04:34:08.874135: step: 368/531, loss: 0.00012140274338889867 2023-01-23 04:34:10.020129: step: 372/531, loss: 8.96453821042087e-06 2023-01-23 04:34:11.124716: step: 376/531, loss: 0.00683670025318861 2023-01-23 04:34:12.205163: step: 380/531, loss: 3.24249276673072e-06 2023-01-23 04:34:13.314533: step: 384/531, loss: 6.408691842807457e-05 2023-01-23 04:34:14.432630: step: 388/531, loss: 0.047516822814941406 2023-01-23 04:34:15.564298: step: 392/531, loss: 0.038408663123846054 2023-01-23 04:34:16.676517: step: 396/531, loss: -2.2411345526052173e-06 2023-01-23 04:34:17.797072: step: 400/531, loss: 0.0006032944074831903 2023-01-23 04:34:18.911049: step: 404/531, loss: 5.464554124046117e-05 2023-01-23 04:34:20.047990: step: 408/531, loss: 0.0002157211274607107 2023-01-23 04:34:21.161296: step: 412/531, loss: 0.030631449073553085 2023-01-23 04:34:22.299206: step: 416/531, loss: 4.138946678722277e-05 2023-01-23 04:34:23.464333: step: 420/531, loss: 0.04975862428545952 2023-01-23 04:34:24.606418: step: 424/531, loss: 0.00068836216814816 2023-01-23 04:34:25.753993: step: 428/531, loss: 0.02055676095187664 2023-01-23 04:34:26.919817: step: 432/531, loss: 0.00014319419278763235 2023-01-23 04:34:28.064444: step: 436/531, loss: 0.00010337829735362902 2023-01-23 04:34:29.172344: step: 440/531, loss: 4.6062468754826114e-05 2023-01-23 04:34:30.293590: step: 444/531, loss: 9.441375368623994e-06 2023-01-23 04:34:31.421183: step: 448/531, loss: 0.947634220123291 2023-01-23 04:34:32.520818: step: 452/531, loss: 0.0023738860618323088 2023-01-23 04:34:33.622667: step: 456/531, loss: 0.0002551078796386719 2023-01-23 04:34:34.745503: step: 460/531, loss: 0.008489036932587624 2023-01-23 04:34:35.874158: step: 464/531, loss: 0.0013393402332440019 2023-01-23 04:34:36.983887: step: 468/531, loss: 0.011681747622787952 2023-01-23 04:34:38.113240: step: 472/531, loss: 0.005531406961381435 2023-01-23 04:34:39.242580: step: 476/531, loss: 0.051253318786621094 2023-01-23 04:34:40.355338: step: 480/531, loss: 3.6716461181640625e-05 2023-01-23 04:34:41.471874: step: 484/531, loss: 0.0033349990844726562 2023-01-23 04:34:42.614936: step: 488/531, loss: 0.05388984829187393 2023-01-23 04:34:43.743412: step: 492/531, loss: 0.002191257430240512 2023-01-23 04:34:44.884416: step: 496/531, loss: 1.220703143189894e-05 2023-01-23 04:34:46.010076: step: 500/531, loss: 0.00561566324904561 2023-01-23 04:34:47.168957: step: 504/531, loss: 0.01273345947265625 2023-01-23 04:34:48.341164: step: 508/531, loss: 0.054681967943906784 2023-01-23 04:34:49.462377: step: 512/531, loss: 2.841949390131049e-05 2023-01-23 04:34:50.592059: step: 516/531, loss: 0.0009076118585653603 2023-01-23 04:34:51.727298: step: 520/531, loss: 0.0005132675287313759 2023-01-23 04:34:52.853654: step: 524/531, loss: 0.007507037837058306 2023-01-23 04:34:53.971247: step: 528/531, loss: 0.00024452211800962687 2023-01-23 04:34:55.111691: step: 532/531, loss: 0.015096664428710938 2023-01-23 04:34:56.220128: step: 536/531, loss: 2.2315980459097773e-05 2023-01-23 04:34:57.326048: step: 540/531, loss: 5.073547436040826e-05 2023-01-23 04:34:58.441180: step: 544/531, loss: 0.0015371324261650443 2023-01-23 04:34:59.611543: step: 548/531, loss: 0.022781943902373314 2023-01-23 04:35:00.742611: step: 552/531, loss: 0.0002980232238769531 2023-01-23 04:35:01.884925: step: 556/531, loss: 0.001135158585384488 2023-01-23 04:35:03.019684: step: 560/531, loss: 0.00031719208345748484 2023-01-23 04:35:04.138731: step: 564/531, loss: 1.6975403923424892e-05 2023-01-23 04:35:05.294077: step: 568/531, loss: 0.001083850977011025 2023-01-23 04:35:06.422363: step: 572/531, loss: 0.029671192169189453 2023-01-23 04:35:07.533076: step: 576/531, loss: 0.0007109642610885203 2023-01-23 04:35:08.666796: step: 580/531, loss: 0.0014524459838867188 2023-01-23 04:35:09.775211: step: 584/531, loss: 0.02809906005859375 2023-01-23 04:35:10.967955: step: 588/531, loss: 4.806518700206652e-05 2023-01-23 04:35:12.119543: step: 592/531, loss: 0.000721836113370955 2023-01-23 04:35:13.268820: step: 596/531, loss: 0.005446815863251686 2023-01-23 04:35:14.392695: step: 600/531, loss: 5.91278076171875e-05 2023-01-23 04:35:15.496881: step: 604/531, loss: 0.01587248034775257 2023-01-23 04:35:16.647311: step: 608/531, loss: 0.004073906224220991 2023-01-23 04:35:17.804963: step: 612/531, loss: 7.009506225585938e-05 2023-01-23 04:35:18.950377: step: 616/531, loss: 0.00017499923706054688 2023-01-23 04:35:20.054495: step: 620/531, loss: 0.019237900152802467 2023-01-23 04:35:21.186409: step: 624/531, loss: 0.006781387608498335 2023-01-23 04:35:22.307982: step: 628/531, loss: 8.430481102550402e-05 2023-01-23 04:35:23.441539: step: 632/531, loss: 0.001216030097566545 2023-01-23 04:35:24.566386: step: 636/531, loss: 8.106231689453125e-06 2023-01-23 04:35:25.782022: step: 640/531, loss: 0.0002494812069926411 2023-01-23 04:35:26.890273: step: 644/531, loss: 0.0004675865056924522 2023-01-23 04:35:28.018256: step: 648/531, loss: 0.0058078765869140625 2023-01-23 04:35:29.137652: step: 652/531, loss: 0.05218668282032013 2023-01-23 04:35:30.259027: step: 656/531, loss: 0.0011812209850177169 2023-01-23 04:35:31.406532: step: 660/531, loss: 0.004022025968879461 2023-01-23 04:35:32.516080: step: 664/531, loss: 9.5367431640625e-06 2023-01-23 04:35:33.686334: step: 668/531, loss: 0.012993049807846546 2023-01-23 04:35:34.804884: step: 672/531, loss: 0.004771995823830366 2023-01-23 04:35:35.955698: step: 676/531, loss: 0.001386451767757535 2023-01-23 04:35:37.067409: step: 680/531, loss: 3.170967102050781e-05 2023-01-23 04:35:38.171265: step: 684/531, loss: 0.004673290532082319 2023-01-23 04:35:39.292847: step: 688/531, loss: 0.02231273613870144 2023-01-23 04:35:40.405474: step: 692/531, loss: 0.06851554661989212 2023-01-23 04:35:41.547926: step: 696/531, loss: 3.490447852527723e-05 2023-01-23 04:35:42.657440: step: 700/531, loss: 6.198883056640625e-05 2023-01-23 04:35:43.783387: step: 704/531, loss: 5.6457516620866954e-05 2023-01-23 04:35:44.895741: step: 708/531, loss: 0.002029895782470703 2023-01-23 04:35:46.014469: step: 712/531, loss: 0.0063069346360862255 2023-01-23 04:35:47.127462: step: 716/531, loss: 1.602172778802924e-05 2023-01-23 04:35:48.254491: step: 720/531, loss: 0.0016890049446374178 2023-01-23 04:35:49.404656: step: 724/531, loss: 1.0395049685030244e-05 2023-01-23 04:35:50.550300: step: 728/531, loss: 0.013375855050981045 2023-01-23 04:35:51.696359: step: 732/531, loss: 0.058811575174331665 2023-01-23 04:35:52.827583: step: 736/531, loss: 0.00027751922607421875 2023-01-23 04:35:53.970372: step: 740/531, loss: 0.03550739586353302 2023-01-23 04:35:55.113899: step: 744/531, loss: 0.11045493930578232 2023-01-23 04:35:56.258106: step: 748/531, loss: 0.008289719000458717 2023-01-23 04:35:57.400280: step: 752/531, loss: 9.422302537132055e-05 2023-01-23 04:35:58.515344: step: 756/531, loss: 0.0001981735258596018 2023-01-23 04:35:59.634910: step: 760/531, loss: 0.026142168790102005 2023-01-23 04:36:00.767244: step: 764/531, loss: 0.0065392497926950455 2023-01-23 04:36:01.904422: step: 768/531, loss: 0.0001352310209767893 2023-01-23 04:36:03.044125: step: 772/531, loss: 0.003180217929184437 2023-01-23 04:36:04.148312: step: 776/531, loss: 0.0003345966397318989 2023-01-23 04:36:05.261160: step: 780/531, loss: 0.00020141602726653218 2023-01-23 04:36:06.365249: step: 784/531, loss: 0.00010433197894599289 2023-01-23 04:36:07.494335: step: 788/531, loss: -3.6239625842426904e-06 2023-01-23 04:36:08.579485: step: 792/531, loss: 0.01801004447042942 2023-01-23 04:36:09.692900: step: 796/531, loss: 4.6348573960131034e-05 2023-01-23 04:36:10.790498: step: 800/531, loss: 1.3351441339182202e-06 2023-01-23 04:36:11.961835: step: 804/531, loss: 0.013704968616366386 2023-01-23 04:36:13.074861: step: 808/531, loss: 2.250671423098538e-05 2023-01-23 04:36:14.201886: step: 812/531, loss: 0.0023981095291674137 2023-01-23 04:36:15.313906: step: 816/531, loss: 0.0024518491700291634 2023-01-23 04:36:16.452475: step: 820/531, loss: 0.003085517790168524 2023-01-23 04:36:17.589025: step: 824/531, loss: 3.5762786865234375e-05 2023-01-23 04:36:18.712589: step: 828/531, loss: 0.027862071990966797 2023-01-23 04:36:19.860059: step: 832/531, loss: 0.010223960503935814 2023-01-23 04:36:20.982542: step: 836/531, loss: 0.016177557408809662 2023-01-23 04:36:22.110870: step: 840/531, loss: 0.012003231793642044 2023-01-23 04:36:23.234898: step: 844/531, loss: 0.02688327059149742 2023-01-23 04:36:24.387961: step: 848/531, loss: 0.003919505979865789 2023-01-23 04:36:25.540398: step: 852/531, loss: 0.06739606708288193 2023-01-23 04:36:26.669349: step: 856/531, loss: 0.005322265438735485 2023-01-23 04:36:27.793475: step: 860/531, loss: 0.06640568375587463 2023-01-23 04:36:28.888968: step: 864/531, loss: 0.0015245437389239669 2023-01-23 04:36:30.027839: step: 868/531, loss: 0.0010734081733971834 2023-01-23 04:36:31.146518: step: 872/531, loss: 0.029058076441287994 2023-01-23 04:36:32.305708: step: 876/531, loss: 0.0002967834589071572 2023-01-23 04:36:33.437820: step: 880/531, loss: 5.7744979130802676e-05 2023-01-23 04:36:34.545476: step: 884/531, loss: 0.0025772093795239925 2023-01-23 04:36:35.665498: step: 888/531, loss: 2.288818359375e-05 2023-01-23 04:36:36.794849: step: 892/531, loss: 2.231597864010837e-05 2023-01-23 04:36:37.909751: step: 896/531, loss: 4.706382969743572e-05 2023-01-23 04:36:39.041656: step: 900/531, loss: 0.0006538391462527215 2023-01-23 04:36:40.139159: step: 904/531, loss: 0.0008965493179857731 2023-01-23 04:36:41.285342: step: 908/531, loss: 0.00011024475679732859 2023-01-23 04:36:42.382747: step: 912/531, loss: 7.438660304615041e-06 2023-01-23 04:36:43.500058: step: 916/531, loss: 9.765624417923391e-05 2023-01-23 04:36:44.625413: step: 920/531, loss: -1.0967255548166577e-06 2023-01-23 04:36:45.718382: step: 924/531, loss: -0.0 2023-01-23 04:36:46.812244: step: 928/531, loss: 9.393692380399443e-06 2023-01-23 04:36:47.907289: step: 932/531, loss: 2.3365021206700476e-06 2023-01-23 04:36:49.023935: step: 936/531, loss: 3.7765505112474784e-05 2023-01-23 04:36:50.202027: step: 940/531, loss: 0.0010574341285973787 2023-01-23 04:36:51.304659: step: 944/531, loss: 0.0022607804276049137 2023-01-23 04:36:52.460812: step: 948/531, loss: 0.0010807036887854338 2023-01-23 04:36:53.601186: step: 952/531, loss: 6.904602196300402e-05 2023-01-23 04:36:54.710051: step: 956/531, loss: 0.00017280578322242945 2023-01-23 04:36:55.831768: step: 960/531, loss: 0.00015873908705543727 2023-01-23 04:36:56.961658: step: 964/531, loss: 0.03656425699591637 2023-01-23 04:36:58.080645: step: 968/531, loss: 0.0006323814741335809 2023-01-23 04:36:59.206460: step: 972/531, loss: 0.0001966476411325857 2023-01-23 04:37:00.310469: step: 976/531, loss: 1.4638901120633818e-05 2023-01-23 04:37:01.438745: step: 980/531, loss: 0.020313262939453125 2023-01-23 04:37:02.552244: step: 984/531, loss: 0.006087875459343195 2023-01-23 04:37:03.697599: step: 988/531, loss: 0.0005690574762411416 2023-01-23 04:37:04.830493: step: 992/531, loss: 0.056046582758426666 2023-01-23 04:37:05.928198: step: 996/531, loss: 3.414153979974799e-05 2023-01-23 04:37:07.047767: step: 1000/531, loss: 0.0016624450217932463 2023-01-23 04:37:08.173677: step: 1004/531, loss: 0.0012004852760583162 2023-01-23 04:37:09.295180: step: 1008/531, loss: 0.004714012145996094 2023-01-23 04:37:10.444946: step: 1012/531, loss: 0.024550437927246094 2023-01-23 04:37:11.548376: step: 1016/531, loss: 0.012376213446259499 2023-01-23 04:37:12.694976: step: 1020/531, loss: 0.008895112201571465 2023-01-23 04:37:13.824059: step: 1024/531, loss: 0.009144734591245651 2023-01-23 04:37:14.943137: step: 1028/531, loss: 0.00043668749276548624 2023-01-23 04:37:16.077927: step: 1032/531, loss: 3.938675217796117e-05 2023-01-23 04:37:17.248627: step: 1036/531, loss: 0.00045261383638717234 2023-01-23 04:37:18.391357: step: 1040/531, loss: 0.000240325927734375 2023-01-23 04:37:19.523819: step: 1044/531, loss: 1.888275073724799e-05 2023-01-23 04:37:20.621485: step: 1048/531, loss: -4.0054324017546605e-06 2023-01-23 04:37:21.755608: step: 1052/531, loss: 0.037900160998106 2023-01-23 04:37:22.873231: step: 1056/531, loss: 0.013981628231704235 2023-01-23 04:37:23.976510: step: 1060/531, loss: 1.0395049685030244e-05 2023-01-23 04:37:25.089908: step: 1064/531, loss: 2.765655517578125e-05 2023-01-23 04:37:26.222604: step: 1068/531, loss: 0.00029139520484022796 2023-01-23 04:37:27.330994: step: 1072/531, loss: 0.0021213532891124487 2023-01-23 04:37:28.434599: step: 1076/531, loss: 0.04845290258526802 2023-01-23 04:37:29.579907: step: 1080/531, loss: -2.1457672119140625e-06 2023-01-23 04:37:30.686247: step: 1084/531, loss: 0.008490849286317825 2023-01-23 04:37:31.792987: step: 1088/531, loss: 0.002239799592643976 2023-01-23 04:37:32.920149: step: 1092/531, loss: 0.008220577612519264 2023-01-23 04:37:34.014691: step: 1096/531, loss: 0.0003144264337606728 2023-01-23 04:37:35.148898: step: 1100/531, loss: 6.170273263705894e-05 2023-01-23 04:37:36.258151: step: 1104/531, loss: 2.9277802241267636e-05 2023-01-23 04:37:37.386538: step: 1108/531, loss: 7.724762326688506e-06 2023-01-23 04:37:38.518780: step: 1112/531, loss: 0.0011811256408691406 2023-01-23 04:37:39.645790: step: 1116/531, loss: 0.0074310302734375 2023-01-23 04:37:40.750063: step: 1120/531, loss: 0.024226855486631393 2023-01-23 04:37:41.888769: step: 1124/531, loss: 0.0081291189417243 2023-01-23 04:37:43.001480: step: 1128/531, loss: 0.002590179443359375 2023-01-23 04:37:44.127932: step: 1132/531, loss: 1.296997106692288e-05 2023-01-23 04:37:45.237292: step: 1136/531, loss: 9.651183790992945e-05 2023-01-23 04:37:46.372049: step: 1140/531, loss: 3.700256274896674e-05 2023-01-23 04:37:47.495075: step: 1144/531, loss: 0.4072877764701843 2023-01-23 04:37:48.614396: step: 1148/531, loss: 1.4019012269272935e-05 2023-01-23 04:37:49.714599: step: 1152/531, loss: 0.05686759948730469 2023-01-23 04:37:50.805593: step: 1156/531, loss: 0.01558542251586914 2023-01-23 04:37:51.927993: step: 1160/531, loss: 0.000504183757584542 2023-01-23 04:37:53.024031: step: 1164/531, loss: 9.222031076205894e-05 2023-01-23 04:37:54.136211: step: 1168/531, loss: 0.000812530517578125 2023-01-23 04:37:55.287956: step: 1172/531, loss: 0.009466457180678844 2023-01-23 04:37:56.428472: step: 1176/531, loss: 0.0001279830903513357 2023-01-23 04:37:57.552164: step: 1180/531, loss: 0.00021228790865279734 2023-01-23 04:37:58.680969: step: 1184/531, loss: 1.583099401614163e-05 2023-01-23 04:37:59.838664: step: 1188/531, loss: 0.023228121921420097 2023-01-23 04:38:00.954919: step: 1192/531, loss: 0.0003137111780233681 2023-01-23 04:38:02.103788: step: 1196/531, loss: 9.97066599666141e-05 2023-01-23 04:38:03.227365: step: 1200/531, loss: 0.00044965746928937733 2023-01-23 04:38:04.331792: step: 1204/531, loss: 0.005221414379775524 2023-01-23 04:38:05.445510: step: 1208/531, loss: 1.773834264895413e-05 2023-01-23 04:38:06.574893: step: 1212/531, loss: 0.07778330147266388 2023-01-23 04:38:07.685791: step: 1216/531, loss: 4.2724612285383046e-05 2023-01-23 04:38:08.800210: step: 1220/531, loss: 0.0025962828658521175 2023-01-23 04:38:09.912853: step: 1224/531, loss: 0.0004519462527241558 2023-01-23 04:38:11.060768: step: 1228/531, loss: -8.630751835880801e-06 2023-01-23 04:38:12.201941: step: 1232/531, loss: 0.0004171848122496158 2023-01-23 04:38:13.291787: step: 1236/531, loss: 1.8501283193472773e-05 2023-01-23 04:38:14.435800: step: 1240/531, loss: 0.033056020736694336 2023-01-23 04:38:15.568607: step: 1244/531, loss: 0.0008514404762536287 2023-01-23 04:38:16.683964: step: 1248/531, loss: 1.0681153071345761e-05 2023-01-23 04:38:17.830675: step: 1252/531, loss: 0.01489028986543417 2023-01-23 04:38:18.931531: step: 1256/531, loss: 4.4631960918195546e-05 2023-01-23 04:38:20.045372: step: 1260/531, loss: 8.916855222196318e-06 2023-01-23 04:38:21.141014: step: 1264/531, loss: 0.00166835798881948 2023-01-23 04:38:22.264046: step: 1268/531, loss: 0.005706119816750288 2023-01-23 04:38:23.399312: step: 1272/531, loss: 0.0023047449067234993 2023-01-23 04:38:24.544663: step: 1276/531, loss: 0.0018450261559337378 2023-01-23 04:38:25.674977: step: 1280/531, loss: 0.00107917794957757 2023-01-23 04:38:26.811250: step: 1284/531, loss: 0.002374029252678156 2023-01-23 04:38:27.944368: step: 1288/531, loss: 0.06363458931446075 2023-01-23 04:38:29.048654: step: 1292/531, loss: 0.0006917953724041581 2023-01-23 04:38:30.180386: step: 1296/531, loss: 0.0015007973415777087 2023-01-23 04:38:31.291351: step: 1300/531, loss: 0.0024959563743323088 2023-01-23 04:38:32.390254: step: 1304/531, loss: 0.00024662018404342234 2023-01-23 04:38:33.534737: step: 1308/531, loss: 0.05018739774823189 2023-01-23 04:38:34.655131: step: 1312/531, loss: 3.6478043057286413e-06 2023-01-23 04:38:35.786343: step: 1316/531, loss: 0.003646993776783347 2023-01-23 04:38:36.894552: step: 1320/531, loss: 1.3351441339182202e-05 2023-01-23 04:38:38.006152: step: 1324/531, loss: 0.0019901276100426912 2023-01-23 04:38:39.121567: step: 1328/531, loss: 0.003984689712524414 2023-01-23 04:38:40.250441: step: 1332/531, loss: 0.029755594208836555 2023-01-23 04:38:41.332267: step: 1336/531, loss: 0.00015778541273903102 2023-01-23 04:38:42.470513: step: 1340/531, loss: 7.190704491222277e-05 2023-01-23 04:38:43.579084: step: 1344/531, loss: 0.00046091078547760844 2023-01-23 04:38:44.729125: step: 1348/531, loss: 0.015927555039525032 2023-01-23 04:38:45.821043: step: 1352/531, loss: 0.0021800994873046875 2023-01-23 04:38:46.911168: step: 1356/531, loss: 3.490447852527723e-05 2023-01-23 04:38:48.083673: step: 1360/531, loss: 0.01369552593678236 2023-01-23 04:38:49.226973: step: 1364/531, loss: 0.028188277035951614 2023-01-23 04:38:50.335277: step: 1368/531, loss: 0.0005002975813113153 2023-01-23 04:38:51.462939: step: 1372/531, loss: 0.00038375856820493937 2023-01-23 04:38:52.564062: step: 1376/531, loss: 0.00012722014798782766 2023-01-23 04:38:53.676958: step: 1380/531, loss: 4.1484832763671875e-05 2023-01-23 04:38:54.779605: step: 1384/531, loss: 0.005601120181381702 2023-01-23 04:38:55.895649: step: 1388/531, loss: -9.5367431640625e-07 2023-01-23 04:38:57.008452: step: 1392/531, loss: 1.602172778802924e-05 2023-01-23 04:38:58.120609: step: 1396/531, loss: 9.5367431640625e-07 2023-01-23 04:38:59.240557: step: 1400/531, loss: 0.0012981415493413806 2023-01-23 04:39:00.359944: step: 1404/531, loss: 0.0013186454307287931 2023-01-23 04:39:01.504788: step: 1408/531, loss: 0.0022861480247229338 2023-01-23 04:39:02.616559: step: 1412/531, loss: 1.182556115963962e-05 2023-01-23 04:39:03.734912: step: 1416/531, loss: 0.008926964364945889 2023-01-23 04:39:04.844325: step: 1420/531, loss: 0.18557921051979065 2023-01-23 04:39:05.946756: step: 1424/531, loss: 0.002209282014518976 2023-01-23 04:39:07.073020: step: 1428/531, loss: 2.7942656743107364e-05 2023-01-23 04:39:08.198377: step: 1432/531, loss: 0.0011861324310302734 2023-01-23 04:39:09.323582: step: 1436/531, loss: 1.3351441339182202e-06 2023-01-23 04:39:10.417684: step: 1440/531, loss: 1.2397767022775952e-05 2023-01-23 04:39:11.536829: step: 1444/531, loss: 0.0011178969871252775 2023-01-23 04:39:12.635697: step: 1448/531, loss: 4.673003786592744e-06 2023-01-23 04:39:13.770135: step: 1452/531, loss: 4.301071021473035e-05 2023-01-23 04:39:14.897989: step: 1456/531, loss: 0.00025653839111328125 2023-01-23 04:39:16.015691: step: 1460/531, loss: 0.0005153656238690019 2023-01-23 04:39:17.146209: step: 1464/531, loss: 0.003677845001220703 2023-01-23 04:39:18.257596: step: 1468/531, loss: 0.019048118963837624 2023-01-23 04:39:19.386172: step: 1472/531, loss: 0.003568267682567239 2023-01-23 04:39:20.496820: step: 1476/531, loss: 0.0010761261219158769 2023-01-23 04:39:21.602926: step: 1480/531, loss: 0.000274848920525983 2023-01-23 04:39:22.738433: step: 1484/531, loss: 0.00606040982529521 2023-01-23 04:39:23.843899: step: 1488/531, loss: 0.11348118633031845 2023-01-23 04:39:24.979843: step: 1492/531, loss: 0.02407226711511612 2023-01-23 04:39:26.083500: step: 1496/531, loss: 2.47955313170678e-06 2023-01-23 04:39:27.208507: step: 1500/531, loss: 0.0001659393310546875 2023-01-23 04:39:28.363988: step: 1504/531, loss: 0.0012492657406255603 2023-01-23 04:39:29.491542: step: 1508/531, loss: 0.0015710830921307206 2023-01-23 04:39:30.618611: step: 1512/531, loss: 3.271103196311742e-05 2023-01-23 04:39:31.748694: step: 1516/531, loss: -8.01086389401462e-06 2023-01-23 04:39:32.902455: step: 1520/531, loss: 0.05632324516773224 2023-01-23 04:39:34.029756: step: 1524/531, loss: 0.00045714378939010203 2023-01-23 04:39:35.169470: step: 1528/531, loss: 0.00028333664522506297 2023-01-23 04:39:36.287659: step: 1532/531, loss: 0.00015163421630859375 2023-01-23 04:39:37.389158: step: 1536/531, loss: 0.0019630431197583675 2023-01-23 04:39:38.566502: step: 1540/531, loss: 0.0010982513194903731 2023-01-23 04:39:39.676821: step: 1544/531, loss: 0.03984341025352478 2023-01-23 04:39:40.812248: step: 1548/531, loss: 2.0122528439969756e-05 2023-01-23 04:39:41.954023: step: 1552/531, loss: 3.166198803228326e-05 2023-01-23 04:39:43.106261: step: 1556/531, loss: 0.06506137549877167 2023-01-23 04:39:44.202852: step: 1560/531, loss: 1.7070769899873994e-05 2023-01-23 04:39:45.302898: step: 1564/531, loss: 7.44819626561366e-05 2023-01-23 04:39:46.440025: step: 1568/531, loss: 0.00012493133544921875 2023-01-23 04:39:47.577945: step: 1572/531, loss: 0.012578964233398438 2023-01-23 04:39:48.721309: step: 1576/531, loss: 0.0027603148482739925 2023-01-23 04:39:49.802991: step: 1580/531, loss: 6.017685154802166e-05 2023-01-23 04:39:50.921451: step: 1584/531, loss: 9.536747711536009e-07 2023-01-23 04:39:52.054150: step: 1588/531, loss: 0.0011640548473224044 2023-01-23 04:39:53.179118: step: 1592/531, loss: 0.00033283233642578125 2023-01-23 04:39:54.304680: step: 1596/531, loss: 0.003079319139942527 2023-01-23 04:39:55.437965: step: 1600/531, loss: 0.000200176247744821 2023-01-23 04:39:56.565849: step: 1604/531, loss: 3.108978125965223e-05 2023-01-23 04:39:57.709911: step: 1608/531, loss: 0.012112236581742764 2023-01-23 04:39:58.838514: step: 1612/531, loss: 0.2612442076206207 2023-01-23 04:39:59.954199: step: 1616/531, loss: 0.039405059069395065 2023-01-23 04:40:01.112308: step: 1620/531, loss: 0.005214118864387274 2023-01-23 04:40:02.233014: step: 1624/531, loss: 0.033624839037656784 2023-01-23 04:40:03.364866: step: 1628/531, loss: 0.006922101601958275 2023-01-23 04:40:04.485384: step: 1632/531, loss: 0.0015802383422851562 2023-01-23 04:40:05.607028: step: 1636/531, loss: 0.010342169553041458 2023-01-23 04:40:06.716589: step: 1640/531, loss: 6.8664553509734105e-06 2023-01-23 04:40:07.813671: step: 1644/531, loss: 0.007073927205055952 2023-01-23 04:40:08.966678: step: 1648/531, loss: 0.0008792877197265625 2023-01-23 04:40:10.102917: step: 1652/531, loss: 0.0024515152908861637 2023-01-23 04:40:11.228548: step: 1656/531, loss: 0.0013290405040606856 2023-01-23 04:40:12.376124: step: 1660/531, loss: 5.1212311518611386e-05 2023-01-23 04:40:13.515455: step: 1664/531, loss: 0.00011134147644042969 2023-01-23 04:40:14.636174: step: 1668/531, loss: 4.301071021473035e-05 2023-01-23 04:40:15.782360: step: 1672/531, loss: 0.0014890669845044613 2023-01-23 04:40:16.911378: step: 1676/531, loss: 0.0007377624278888106 2023-01-23 04:40:18.051384: step: 1680/531, loss: 0.03756523132324219 2023-01-23 04:40:19.164195: step: 1684/531, loss: 0.008022690191864967 2023-01-23 04:40:20.291004: step: 1688/531, loss: 0.011565589345991611 2023-01-23 04:40:21.491925: step: 1692/531, loss: 3.242493221478071e-06 2023-01-23 04:40:22.618454: step: 1696/531, loss: 5.187988426769152e-05 2023-01-23 04:40:23.758601: step: 1700/531, loss: 0.0014257431030273438 2023-01-23 04:40:24.889660: step: 1704/531, loss: 0.00040473940316587687 2023-01-23 04:40:26.033415: step: 1708/531, loss: 0.20126645267009735 2023-01-23 04:40:27.132873: step: 1712/531, loss: -1.5258789289873675e-06 2023-01-23 04:40:28.243094: step: 1716/531, loss: 5.14984139954322e-06 2023-01-23 04:40:29.367446: step: 1720/531, loss: 0.0004879951593466103 2023-01-23 04:40:30.459151: step: 1724/531, loss: -5.14984139954322e-06 2023-01-23 04:40:31.582003: step: 1728/531, loss: 5.626678557746345e-06 2023-01-23 04:40:32.679937: step: 1732/531, loss: 0.0013959885109215975 2023-01-23 04:40:33.815810: step: 1736/531, loss: 0.0009515762212686241 2023-01-23 04:40:34.963122: step: 1740/531, loss: 0.038869474083185196 2023-01-23 04:40:36.131039: step: 1744/531, loss: 7.548332359874621e-05 2023-01-23 04:40:37.274380: step: 1748/531, loss: 1.3160706657799892e-05 2023-01-23 04:40:38.396203: step: 1752/531, loss: 0.011229324154555798 2023-01-23 04:40:39.534008: step: 1756/531, loss: 0.007871055975556374 2023-01-23 04:40:40.633179: step: 1760/531, loss: 7.62939453125e-06 2023-01-23 04:40:41.710169: step: 1764/531, loss: 0.00031294822110794485 2023-01-23 04:40:42.806184: step: 1768/531, loss: 0.06326790153980255 2023-01-23 04:40:43.932314: step: 1772/531, loss: 0.008076286874711514 2023-01-23 04:40:45.079893: step: 1776/531, loss: 1.0204315003647935e-05 2023-01-23 04:40:46.185414: step: 1780/531, loss: 8.77380352903856e-06 2023-01-23 04:40:47.303920: step: 1784/531, loss: 0.01799907721579075 2023-01-23 04:40:48.478744: step: 1788/531, loss: 1.5163422176556196e-05 2023-01-23 04:40:49.586909: step: 1792/531, loss: 0.00021448134793899953 2023-01-23 04:40:50.712819: step: 1796/531, loss: 9.059906005859375e-06 2023-01-23 04:40:51.840452: step: 1800/531, loss: 0.00017852782912086695 2023-01-23 04:40:52.929908: step: 1804/531, loss: 9.336472430732101e-05 2023-01-23 04:40:54.070051: step: 1808/531, loss: 0.005179357249289751 2023-01-23 04:40:55.204119: step: 1812/531, loss: 0.0002711295965127647 2023-01-23 04:40:56.318239: step: 1816/531, loss: 0.004403496161103249 2023-01-23 04:40:57.405220: step: 1820/531, loss: 0.0013646126026287675 2023-01-23 04:40:58.544665: step: 1824/531, loss: 6.0367583500919864e-05 2023-01-23 04:40:59.631848: step: 1828/531, loss: 0.0002537727414164692 2023-01-23 04:41:00.775538: step: 1832/531, loss: 0.06229753792285919 2023-01-23 04:41:01.879349: step: 1836/531, loss: 0.013969684019684792 2023-01-23 04:41:02.995278: step: 1840/531, loss: 0.029348278418183327 2023-01-23 04:41:04.101466: step: 1844/531, loss: 0.0003563881036825478 2023-01-23 04:41:05.208828: step: 1848/531, loss: 0.0006028652423992753 2023-01-23 04:41:06.322725: step: 1852/531, loss: 1.430511474609375e-05 2023-01-23 04:41:07.425446: step: 1856/531, loss: 9.155272891803179e-06 2023-01-23 04:41:08.540924: step: 1860/531, loss: 0.000637912773527205 2023-01-23 04:41:09.687547: step: 1864/531, loss: 0.0011831284500658512 2023-01-23 04:41:10.802472: step: 1868/531, loss: 3.4713742934400216e-05 2023-01-23 04:41:11.933397: step: 1872/531, loss: -6.67572021484375e-06 2023-01-23 04:41:13.086991: step: 1876/531, loss: -1.6403197150793858e-05 2023-01-23 04:41:14.187343: step: 1880/531, loss: 0.0006703853723593056 2023-01-23 04:41:15.297696: step: 1884/531, loss: 0.003070879029110074 2023-01-23 04:41:16.427937: step: 1888/531, loss: 0.0059950826689600945 2023-01-23 04:41:17.541989: step: 1892/531, loss: 0.008964157663285732 2023-01-23 04:41:18.666609: step: 1896/531, loss: 0.02835979498922825 2023-01-23 04:41:19.822146: step: 1900/531, loss: 2.002716064453125e-05 2023-01-23 04:41:20.923363: step: 1904/531, loss: 0.00601086625829339 2023-01-23 04:41:22.056854: step: 1908/531, loss: 0.015177535824477673 2023-01-23 04:41:23.189391: step: 1912/531, loss: 3.585815284168348e-05 2023-01-23 04:41:24.341382: step: 1916/531, loss: 0.00032367705716751516 2023-01-23 04:41:25.452565: step: 1920/531, loss: 5.044937279308215e-05 2023-01-23 04:41:26.563846: step: 1924/531, loss: 2.0027162008773303e-06 2023-01-23 04:41:27.680517: step: 1928/531, loss: 0.03934779018163681 2023-01-23 04:41:28.819018: step: 1932/531, loss: 5.8460234868107364e-05 2023-01-23 04:41:29.947296: step: 1936/531, loss: 3.814697265625e-06 2023-01-23 04:41:31.072046: step: 1940/531, loss: 0.056699562817811966 2023-01-23 04:41:32.200962: step: 1944/531, loss: 0.01460113562643528 2023-01-23 04:41:33.332271: step: 1948/531, loss: 1.029968279908644e-05 2023-01-23 04:41:34.455580: step: 1952/531, loss: 0.15948066115379333 2023-01-23 04:41:35.579019: step: 1956/531, loss: 0.0003173828299622983 2023-01-23 04:41:36.694251: step: 1960/531, loss: 0.0003345012664794922 2023-01-23 04:41:37.815671: step: 1964/531, loss: 0.0008962631109170616 2023-01-23 04:41:38.972365: step: 1968/531, loss: 0.00035457609919831157 2023-01-23 04:41:40.107910: step: 1972/531, loss: 0.004614639561623335 2023-01-23 04:41:41.207629: step: 1976/531, loss: 0.0005940437549725175 2023-01-23 04:41:42.337779: step: 1980/531, loss: 0.0007096290937624872 2023-01-23 04:41:43.453629: step: 1984/531, loss: 0.00011162758164573461 2023-01-23 04:41:44.599720: step: 1988/531, loss: 0.0014090538024902344 2023-01-23 04:41:45.715194: step: 1992/531, loss: 0.00277366628870368 2023-01-23 04:41:46.858553: step: 1996/531, loss: 0.0002563476446084678 2023-01-23 04:41:48.006493: step: 2000/531, loss: 0.04197826609015465 2023-01-23 04:41:49.089255: step: 2004/531, loss: 4.7302248276537284e-05 2023-01-23 04:41:50.256823: step: 2008/531, loss: 6.732940528308973e-05 2023-01-23 04:41:51.403258: step: 2012/531, loss: 0.0011829376453533769 2023-01-23 04:41:52.558329: step: 2016/531, loss: 0.004150581546127796 2023-01-23 04:41:53.687697: step: 2020/531, loss: 0.012096690945327282 2023-01-23 04:41:54.802170: step: 2024/531, loss: 0.064447782933712 2023-01-23 04:41:55.925430: step: 2028/531, loss: 5.6743621826171875e-05 2023-01-23 04:41:57.019373: step: 2032/531, loss: 0.00031003952608443797 2023-01-23 04:41:58.130915: step: 2036/531, loss: 0.006014919839799404 2023-01-23 04:41:59.248710: step: 2040/531, loss: 0.0021823407150804996 2023-01-23 04:42:00.353525: step: 2044/531, loss: 0.013766050338745117 2023-01-23 04:42:01.486175: step: 2048/531, loss: 0.0007156371721066535 2023-01-23 04:42:02.612014: step: 2052/531, loss: 0.0020761489868164062 2023-01-23 04:42:03.755294: step: 2056/531, loss: 0.007338905241340399 2023-01-23 04:42:04.902985: step: 2060/531, loss: 0.0005844116094522178 2023-01-23 04:42:06.028602: step: 2064/531, loss: 0.0003313064808025956 2023-01-23 04:42:07.151516: step: 2068/531, loss: 0.010087205097079277 2023-01-23 04:42:08.260047: step: 2072/531, loss: 0.0001329422084381804 2023-01-23 04:42:09.405650: step: 2076/531, loss: 0.019666101783514023 2023-01-23 04:42:10.504749: step: 2080/531, loss: 0.0005884170532226562 2023-01-23 04:42:11.626574: step: 2084/531, loss: 0.15691107511520386 2023-01-23 04:42:12.758274: step: 2088/531, loss: 0.0007205962901934981 2023-01-23 04:42:13.910982: step: 2092/531, loss: 0.010035133920609951 2023-01-23 04:42:15.018548: step: 2096/531, loss: 0.0005013942718505859 2023-01-23 04:42:16.150410: step: 2100/531, loss: -1.010894811770413e-05 2023-01-23 04:42:17.290685: step: 2104/531, loss: 0.008501911535859108 2023-01-23 04:42:18.416749: step: 2108/531, loss: 0.02015857584774494 2023-01-23 04:42:19.562646: step: 2112/531, loss: 1.8548966181697324e-05 2023-01-23 04:42:20.665599: step: 2116/531, loss: 0.0002402305690338835 2023-01-23 04:42:21.787301: step: 2120/531, loss: 0.02184896543622017 2023-01-23 04:42:22.904362: step: 2124/531, loss: 0.0030816078651696444 ================================================== Loss: 0.015 -------------------- Dev: {'event': {'p': 0.5923309788092835, 'r': 0.7816245006657789, 'f1': 0.6739380022962113}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Test: {'event': {'p': 0.6377159309021113, 'r': 0.7924865831842576, 'f1': 0.706726934325977}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Chinese: {'event': {'p': 0.5882352941176471, 'r': 0.9259259259259259, 'f1': 0.7194244604316546}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Russian: {'event': {'p': 0.4418604651162791, 'r': 0.5277777777777778, 'f1': 0.4810126582278481}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6472819216182049, 'r': 0.681757656458056, 'f1': 0.6640726329442282}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6192226890756303, 'r': 0.7030411449016101, 'f1': 0.6584752862328959}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.7272727272727273, 'r': 0.7407407407407407, 'f1': 0.7339449541284404}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066597294484911, 'r': 0.7762982689747004, 'f1': 0.6810747663551402}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Eng Test for Korean: {'event': {'p': 0.6443575964826576, 'r': 0.7865235539654144, 'f1': 0.708378088077336}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Korean: {'event': {'p': 0.7755102040816326, 'r': 0.6031746031746031, 'f1': 0.6785714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} -------------------- Eng Dev for Russian: {'event': {'p': 0.5520361990950227, 'r': 0.8122503328894807, 'f1': 0.6573275862068966}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Eng Test for Russian: {'event': {'p': 0.591710758377425, 'r': 0.8002385211687537, 'f1': 0.6803548795944233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'event': {'p': 0.48148148148148145, 'r': 0.7222222222222222, 'f1': 0.5777777777777777}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8}