Command that produces this log: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trigger_label_ffn.layers.0.weight: torch.Size([450, 1024]) >>> trigger_label_ffn.layers.0.bias: torch.Size([450]) >>> trigger_label_ffn.layers.1.weight: torch.Size([233, 450]) >>> trigger_label_ffn.layers.1.bias: torch.Size([233]) >>> trigger_crf.transition: torch.Size([235, 235]) n_trainable_params: 560511990, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-22 23:23:11.551232: step: 4/527, loss: 23.298704147338867 2023-01-22 23:23:12.651287: step: 8/527, loss: 11.17903995513916 2023-01-22 23:23:13.756580: step: 12/527, loss: 26.089847564697266 2023-01-22 23:23:14.853868: step: 16/527, loss: 3.1199731826782227 2023-01-22 23:23:15.953008: step: 20/527, loss: 3.0875887870788574 2023-01-22 23:23:17.035423: step: 24/527, loss: 16.458778381347656 2023-01-22 23:23:18.140291: step: 28/527, loss: 22.77322769165039 2023-01-22 23:23:19.261660: step: 32/527, loss: 14.06472110748291 2023-01-22 23:23:20.407949: step: 36/527, loss: 11.991277694702148 2023-01-22 23:23:21.534802: step: 40/527, loss: 9.615082740783691 2023-01-22 23:23:22.665623: step: 44/527, loss: 13.315023422241211 2023-01-22 23:23:23.776660: step: 48/527, loss: 11.216556549072266 2023-01-22 23:23:24.911869: step: 52/527, loss: 10.861495018005371 2023-01-22 23:23:26.007928: step: 56/527, loss: 19.935651779174805 2023-01-22 23:23:27.098296: step: 60/527, loss: 2.597614288330078 2023-01-22 23:23:28.224414: step: 64/527, loss: 16.626096725463867 2023-01-22 23:23:29.331544: step: 68/527, loss: 12.195844650268555 2023-01-22 23:23:30.440749: step: 72/527, loss: 3.4090874195098877 2023-01-22 23:23:31.557852: step: 76/527, loss: 14.483343124389648 2023-01-22 23:23:32.724887: step: 80/527, loss: 24.72446632385254 2023-01-22 23:23:33.822453: step: 84/527, loss: 5.178360939025879 2023-01-22 23:23:34.934267: step: 88/527, loss: 2.646289348602295 2023-01-22 23:23:36.056808: step: 92/527, loss: 11.701351165771484 2023-01-22 23:23:37.186029: step: 96/527, loss: 10.688973426818848 2023-01-22 23:23:38.286756: step: 100/527, loss: 12.138690948486328 2023-01-22 23:23:39.400165: step: 104/527, loss: 2.0844249725341797 2023-01-22 23:23:40.512584: step: 108/527, loss: 3.36696195602417 2023-01-22 23:23:41.623569: step: 112/527, loss: 29.702808380126953 2023-01-22 23:23:42.751452: step: 116/527, loss: 8.852483749389648 2023-01-22 23:23:43.849792: step: 120/527, loss: 3.291964292526245 2023-01-22 23:23:44.970638: step: 124/527, loss: 16.746883392333984 2023-01-22 23:23:46.128201: step: 128/527, loss: 7.291083812713623 2023-01-22 23:23:47.262066: step: 132/527, loss: 18.865188598632812 2023-01-22 23:23:48.389291: step: 136/527, loss: 3.309422492980957 2023-01-22 23:23:49.493767: step: 140/527, loss: 4.419771194458008 2023-01-22 23:23:50.609235: step: 144/527, loss: 10.253215789794922 2023-01-22 23:23:51.706492: step: 148/527, loss: 19.14675521850586 2023-01-22 23:23:52.798996: step: 152/527, loss: 2.9764788150787354 2023-01-22 23:23:53.905671: step: 156/527, loss: 9.114055633544922 2023-01-22 23:23:55.032888: step: 160/527, loss: 4.554574966430664 2023-01-22 23:23:56.197999: step: 164/527, loss: 4.2900590896606445 2023-01-22 23:23:57.344947: step: 168/527, loss: 4.220833778381348 2023-01-22 23:23:58.450294: step: 172/527, loss: 3.238072395324707 2023-01-22 23:23:59.582866: step: 176/527, loss: 17.893260955810547 2023-01-22 23:24:00.692120: step: 180/527, loss: 4.618285179138184 2023-01-22 23:24:01.811766: step: 184/527, loss: 4.224296569824219 2023-01-22 23:24:02.933705: step: 188/527, loss: 13.275412559509277 2023-01-22 23:24:04.026643: step: 192/527, loss: 4.548651695251465 2023-01-22 23:24:05.147539: step: 196/527, loss: 4.975196838378906 2023-01-22 23:24:06.246465: step: 200/527, loss: 2.763195514678955 2023-01-22 23:24:07.347076: step: 204/527, loss: 9.431214332580566 2023-01-22 23:24:08.466277: step: 208/527, loss: 3.18403959274292 2023-01-22 23:24:09.580357: step: 212/527, loss: 8.645074844360352 2023-01-22 23:24:10.705540: step: 216/527, loss: 13.355669021606445 2023-01-22 23:24:11.814453: step: 220/527, loss: 3.613096237182617 2023-01-22 23:24:12.917544: step: 224/527, loss: 13.06393051147461 2023-01-22 23:24:14.034551: step: 228/527, loss: 13.4402437210083 2023-01-22 23:24:15.145829: step: 232/527, loss: 3.1571521759033203 2023-01-22 23:24:16.267341: step: 236/527, loss: 15.730257034301758 2023-01-22 23:24:17.385857: step: 240/527, loss: 14.734528541564941 2023-01-22 23:24:18.504382: step: 244/527, loss: 29.4234676361084 2023-01-22 23:24:19.617505: step: 248/527, loss: 13.96733570098877 2023-01-22 23:24:20.721330: step: 252/527, loss: 11.352165222167969 2023-01-22 23:24:21.821837: step: 256/527, loss: 13.285268783569336 2023-01-22 23:24:22.952739: step: 260/527, loss: 32.28551483154297 2023-01-22 23:24:24.082397: step: 264/527, loss: 17.00605583190918 2023-01-22 23:24:25.211024: step: 268/527, loss: 2.4271886348724365 2023-01-22 23:24:26.323119: step: 272/527, loss: 3.566790819168091 2023-01-22 23:24:27.492850: step: 276/527, loss: 3.7898523807525635 2023-01-22 23:24:28.578379: step: 280/527, loss: 2.877229928970337 2023-01-22 23:24:29.702264: step: 284/527, loss: 3.207869529724121 2023-01-22 23:24:30.822186: step: 288/527, loss: 3.6515989303588867 2023-01-22 23:24:31.972560: step: 292/527, loss: 16.385099411010742 2023-01-22 23:24:33.094766: step: 296/527, loss: 14.307119369506836 2023-01-22 23:24:34.198182: step: 300/527, loss: 11.828729629516602 2023-01-22 23:24:35.312037: step: 304/527, loss: 3.2087841033935547 2023-01-22 23:24:36.435687: step: 308/527, loss: 8.430937767028809 2023-01-22 23:24:37.585690: step: 312/527, loss: 4.066773414611816 2023-01-22 23:24:38.712953: step: 316/527, loss: 7.552035808563232 2023-01-22 23:24:39.834330: step: 320/527, loss: 14.486663818359375 2023-01-22 23:24:40.927848: step: 324/527, loss: 2.7562456130981445 2023-01-22 23:24:42.057769: step: 328/527, loss: 20.86489486694336 2023-01-22 23:24:43.176554: step: 332/527, loss: 9.661310195922852 2023-01-22 23:24:44.284751: step: 336/527, loss: 2.8045647144317627 2023-01-22 23:24:45.406441: step: 340/527, loss: 15.216832160949707 2023-01-22 23:24:46.527501: step: 344/527, loss: 3.346928119659424 2023-01-22 23:24:47.685701: step: 348/527, loss: 15.470113754272461 2023-01-22 23:24:48.785703: step: 352/527, loss: 19.84587287902832 2023-01-22 23:24:49.896553: step: 356/527, loss: 15.20010757446289 2023-01-22 23:24:51.001670: step: 360/527, loss: 1.4692747592926025 2023-01-22 23:24:52.079408: step: 364/527, loss: 2.4089255332946777 2023-01-22 23:24:53.202528: step: 368/527, loss: 5.787039756774902 2023-01-22 23:24:54.309865: step: 372/527, loss: 11.902318000793457 2023-01-22 23:24:55.426472: step: 376/527, loss: 15.947282791137695 2023-01-22 23:24:56.539484: step: 380/527, loss: 9.210901260375977 2023-01-22 23:24:57.645811: step: 384/527, loss: 11.299958229064941 2023-01-22 23:24:58.749666: step: 388/527, loss: 2.2086942195892334 2023-01-22 23:24:59.877312: step: 392/527, loss: 8.672484397888184 2023-01-22 23:25:01.019321: step: 396/527, loss: 13.195979118347168 2023-01-22 23:25:02.169882: step: 400/527, loss: 11.198301315307617 2023-01-22 23:25:03.283142: step: 404/527, loss: 10.764354705810547 2023-01-22 23:25:04.367833: step: 408/527, loss: 1.770259141921997 2023-01-22 23:25:05.499835: step: 412/527, loss: 6.66694450378418 2023-01-22 23:25:06.590346: step: 416/527, loss: 11.2218017578125 2023-01-22 23:25:07.715273: step: 420/527, loss: 11.64199161529541 2023-01-22 23:25:08.810562: step: 424/527, loss: 5.863048553466797 2023-01-22 23:25:09.926523: step: 428/527, loss: 1.362597942352295 2023-01-22 23:25:11.057357: step: 432/527, loss: 25.305295944213867 2023-01-22 23:25:12.202621: step: 436/527, loss: 19.804800033569336 2023-01-22 23:25:13.317941: step: 440/527, loss: 8.378047943115234 2023-01-22 23:25:14.438455: step: 444/527, loss: 2.0445876121520996 2023-01-22 23:25:15.564892: step: 448/527, loss: 10.909842491149902 2023-01-22 23:25:16.691822: step: 452/527, loss: 2.1260976791381836 2023-01-22 23:25:17.800202: step: 456/527, loss: 9.138895034790039 2023-01-22 23:25:18.932531: step: 460/527, loss: 1.4602832794189453 2023-01-22 23:25:20.020539: step: 464/527, loss: 1.927833080291748 2023-01-22 23:25:21.141339: step: 468/527, loss: 2.6051862239837646 2023-01-22 23:25:22.239997: step: 472/527, loss: 10.984026908874512 2023-01-22 23:25:23.355955: step: 476/527, loss: 5.466901779174805 2023-01-22 23:25:24.454784: step: 480/527, loss: 1.729323387145996 2023-01-22 23:25:25.606735: step: 484/527, loss: 3.0027432441711426 2023-01-22 23:25:26.742394: step: 488/527, loss: 5.657947540283203 2023-01-22 23:25:27.861334: step: 492/527, loss: 2.444866180419922 2023-01-22 23:25:28.981614: step: 496/527, loss: 1.8976857662200928 2023-01-22 23:25:30.109565: step: 500/527, loss: 1.5381652116775513 2023-01-22 23:25:31.241243: step: 504/527, loss: 4.749081134796143 2023-01-22 23:25:32.350867: step: 508/527, loss: 1.6343910694122314 2023-01-22 23:25:33.448238: step: 512/527, loss: 1.5177936553955078 2023-01-22 23:25:34.552172: step: 516/527, loss: 6.4152984619140625 2023-01-22 23:25:35.671624: step: 520/527, loss: 1.229395866394043 2023-01-22 23:25:36.769264: step: 524/527, loss: 7.334926128387451 2023-01-22 23:25:37.871284: step: 528/527, loss: 5.558004379272461 2023-01-22 23:25:38.989001: step: 532/527, loss: 4.138823986053467 2023-01-22 23:25:40.090596: step: 536/527, loss: 1.235190749168396 2023-01-22 23:25:41.224507: step: 540/527, loss: 2.811471462249756 2023-01-22 23:25:42.351310: step: 544/527, loss: 0.8452047109603882 2023-01-22 23:25:43.451803: step: 548/527, loss: 1.1159751415252686 2023-01-22 23:25:44.551945: step: 552/527, loss: 3.3278636932373047 2023-01-22 23:25:45.691692: step: 556/527, loss: 1.3206007480621338 2023-01-22 23:25:46.799450: step: 560/527, loss: 2.820141553878784 2023-01-22 23:25:47.915544: step: 564/527, loss: 3.703164577484131 2023-01-22 23:25:49.044434: step: 568/527, loss: 2.5737457275390625 2023-01-22 23:25:50.193884: step: 572/527, loss: 0.7740475535392761 2023-01-22 23:25:51.296186: step: 576/527, loss: 0.551067054271698 2023-01-22 23:25:52.428585: step: 580/527, loss: 0.9118250012397766 2023-01-22 23:25:53.548354: step: 584/527, loss: 2.628262758255005 2023-01-22 23:25:54.664575: step: 588/527, loss: 1.581235408782959 2023-01-22 23:25:55.768546: step: 592/527, loss: 0.5293468236923218 2023-01-22 23:25:56.874419: step: 596/527, loss: 3.2675068378448486 2023-01-22 23:25:57.987343: step: 600/527, loss: 0.9840301871299744 2023-01-22 23:25:59.116744: step: 604/527, loss: 0.8499903678894043 2023-01-22 23:26:00.223892: step: 608/527, loss: 0.7708979845046997 2023-01-22 23:26:01.350569: step: 612/527, loss: 0.5774157047271729 2023-01-22 23:26:02.495518: step: 616/527, loss: 0.7160772085189819 2023-01-22 23:26:03.610457: step: 620/527, loss: 0.78236323595047 2023-01-22 23:26:04.701721: step: 624/527, loss: 4.907248020172119 2023-01-22 23:26:05.815787: step: 628/527, loss: 1.2145988941192627 2023-01-22 23:26:06.945475: step: 632/527, loss: 2.2446177005767822 2023-01-22 23:26:08.042666: step: 636/527, loss: 1.3998390436172485 2023-01-22 23:26:09.171924: step: 640/527, loss: 6.518995761871338 2023-01-22 23:26:10.256956: step: 644/527, loss: 1.2100446224212646 2023-01-22 23:26:11.377556: step: 648/527, loss: 7.992973327636719 2023-01-22 23:26:12.499361: step: 652/527, loss: 2.0180697441101074 2023-01-22 23:26:13.594112: step: 656/527, loss: 1.200575590133667 2023-01-22 23:26:14.700470: step: 660/527, loss: 1.2557106018066406 2023-01-22 23:26:15.796039: step: 664/527, loss: 0.6658799648284912 2023-01-22 23:26:16.959404: step: 668/527, loss: 2.404552698135376 2023-01-22 23:26:18.098666: step: 672/527, loss: 2.729940176010132 2023-01-22 23:26:19.184115: step: 676/527, loss: 2.278646469116211 2023-01-22 23:26:20.299945: step: 680/527, loss: 0.37135210633277893 2023-01-22 23:26:21.407127: step: 684/527, loss: 0.28588616847991943 2023-01-22 23:26:22.534358: step: 688/527, loss: 2.0488429069519043 2023-01-22 23:26:23.645519: step: 692/527, loss: 0.6801798939704895 2023-01-22 23:26:24.775938: step: 696/527, loss: 1.479017734527588 2023-01-22 23:26:25.866810: step: 700/527, loss: 0.4229392409324646 2023-01-22 23:26:27.011542: step: 704/527, loss: 1.5955603122711182 2023-01-22 23:26:28.152950: step: 708/527, loss: 1.8345609903335571 2023-01-22 23:26:29.276776: step: 712/527, loss: 0.5905696153640747 2023-01-22 23:26:30.364422: step: 716/527, loss: 0.48341989517211914 2023-01-22 23:26:31.463806: step: 720/527, loss: 1.385216236114502 2023-01-22 23:26:32.584401: step: 724/527, loss: 5.307806491851807 2023-01-22 23:26:33.695738: step: 728/527, loss: 0.6275202631950378 2023-01-22 23:26:34.811465: step: 732/527, loss: 0.4168476462364197 2023-01-22 23:26:35.921255: step: 736/527, loss: 1.195639967918396 2023-01-22 23:26:37.061274: step: 740/527, loss: 0.5715931057929993 2023-01-22 23:26:38.174466: step: 744/527, loss: 0.7279413938522339 2023-01-22 23:26:39.298224: step: 748/527, loss: 0.7316750884056091 2023-01-22 23:26:40.449664: step: 752/527, loss: 1.8104383945465088 2023-01-22 23:26:41.585314: step: 756/527, loss: 0.35374611616134644 2023-01-22 23:26:42.723102: step: 760/527, loss: 1.613642930984497 2023-01-22 23:26:43.810217: step: 764/527, loss: 3.2861971855163574 2023-01-22 23:26:44.891083: step: 768/527, loss: 0.9485872983932495 2023-01-22 23:26:45.973025: step: 772/527, loss: 0.6092826128005981 2023-01-22 23:26:47.071424: step: 776/527, loss: 2.5584630966186523 2023-01-22 23:26:48.190869: step: 780/527, loss: 0.3839254379272461 2023-01-22 23:26:49.337622: step: 784/527, loss: 0.7904701232910156 2023-01-22 23:26:50.434632: step: 788/527, loss: 1.8957479000091553 2023-01-22 23:26:51.535472: step: 792/527, loss: 0.2699732184410095 2023-01-22 23:26:52.661507: step: 796/527, loss: 1.9948726892471313 2023-01-22 23:26:53.771532: step: 800/527, loss: 1.1398056745529175 2023-01-22 23:26:54.903416: step: 804/527, loss: 3.541609525680542 2023-01-22 23:26:56.029303: step: 808/527, loss: 0.8285200595855713 2023-01-22 23:26:57.145899: step: 812/527, loss: 0.18635237216949463 2023-01-22 23:26:58.281923: step: 816/527, loss: 0.7432762384414673 2023-01-22 23:26:59.365476: step: 820/527, loss: 1.7212629318237305 2023-01-22 23:27:00.481600: step: 824/527, loss: 0.6409740447998047 2023-01-22 23:27:01.593877: step: 828/527, loss: 1.890131950378418 2023-01-22 23:27:02.713173: step: 832/527, loss: 2.3066790103912354 2023-01-22 23:27:03.822457: step: 836/527, loss: 0.32247740030288696 2023-01-22 23:27:04.954336: step: 840/527, loss: 1.4502936601638794 2023-01-22 23:27:06.086060: step: 844/527, loss: 1.6325743198394775 2023-01-22 23:27:07.208230: step: 848/527, loss: 0.6449156403541565 2023-01-22 23:27:08.350116: step: 852/527, loss: 3.262441396713257 2023-01-22 23:27:09.474491: step: 856/527, loss: 0.27129870653152466 2023-01-22 23:27:10.597682: step: 860/527, loss: 0.4194567799568176 2023-01-22 23:27:11.714305: step: 864/527, loss: 0.4426535665988922 2023-01-22 23:27:12.820033: step: 868/527, loss: 1.0641406774520874 2023-01-22 23:27:13.930881: step: 872/527, loss: 0.5724228024482727 2023-01-22 23:27:15.039066: step: 876/527, loss: 2.0590264797210693 2023-01-22 23:27:16.168834: step: 880/527, loss: 0.6681488752365112 2023-01-22 23:27:17.261250: step: 884/527, loss: 2.3275246620178223 2023-01-22 23:27:18.364812: step: 888/527, loss: 1.1875947713851929 2023-01-22 23:27:19.452477: step: 892/527, loss: 2.179564952850342 2023-01-22 23:27:20.586612: step: 896/527, loss: 2.9457297325134277 2023-01-22 23:27:21.691163: step: 900/527, loss: 4.995299339294434 2023-01-22 23:27:22.825318: step: 904/527, loss: 0.3346892297267914 2023-01-22 23:27:23.945776: step: 908/527, loss: 0.2718140184879303 2023-01-22 23:27:25.102288: step: 912/527, loss: 0.5803699493408203 2023-01-22 23:27:26.194985: step: 916/527, loss: 0.294408917427063 2023-01-22 23:27:27.293084: step: 920/527, loss: 1.2341583967208862 2023-01-22 23:27:28.404562: step: 924/527, loss: 0.4182208180427551 2023-01-22 23:27:29.516053: step: 928/527, loss: 0.4166804254055023 2023-01-22 23:27:30.619642: step: 932/527, loss: 1.071181058883667 2023-01-22 23:27:31.735708: step: 936/527, loss: 7.8886399269104 2023-01-22 23:27:32.862982: step: 940/527, loss: 3.144742488861084 2023-01-22 23:27:33.973882: step: 944/527, loss: 1.3239195346832275 2023-01-22 23:27:35.078626: step: 948/527, loss: 0.9189945459365845 2023-01-22 23:27:36.191217: step: 952/527, loss: 1.1120198965072632 2023-01-22 23:27:37.315081: step: 956/527, loss: 1.6622108221054077 2023-01-22 23:27:38.403563: step: 960/527, loss: 0.6418648362159729 2023-01-22 23:27:39.531372: step: 964/527, loss: 2.4725942611694336 2023-01-22 23:27:40.644519: step: 968/527, loss: 2.957956314086914 2023-01-22 23:27:41.736277: step: 972/527, loss: 0.12524865567684174 2023-01-22 23:27:42.869483: step: 976/527, loss: 4.013078212738037 2023-01-22 23:27:43.995482: step: 980/527, loss: 0.19023752212524414 2023-01-22 23:27:45.132618: step: 984/527, loss: 1.1993707418441772 2023-01-22 23:27:46.217034: step: 988/527, loss: 0.4049256443977356 2023-01-22 23:27:47.319112: step: 992/527, loss: 0.6819015741348267 2023-01-22 23:27:48.485131: step: 996/527, loss: 0.4340020716190338 2023-01-22 23:27:49.593964: step: 1000/527, loss: 0.7013236880302429 2023-01-22 23:27:50.685483: step: 1004/527, loss: 2.070523977279663 2023-01-22 23:27:51.802943: step: 1008/527, loss: 3.735775947570801 2023-01-22 23:27:52.930400: step: 1012/527, loss: 0.4360175132751465 2023-01-22 23:27:54.059138: step: 1016/527, loss: 0.5247083902359009 2023-01-22 23:27:55.218795: step: 1020/527, loss: 3.2997775077819824 2023-01-22 23:27:56.368025: step: 1024/527, loss: 2.4600210189819336 2023-01-22 23:27:57.475000: step: 1028/527, loss: 0.5743644833564758 2023-01-22 23:27:58.579951: step: 1032/527, loss: 1.6084976196289062 2023-01-22 23:27:59.695974: step: 1036/527, loss: 1.3321189880371094 2023-01-22 23:28:00.828223: step: 1040/527, loss: 0.19133087992668152 2023-01-22 23:28:01.936949: step: 1044/527, loss: 0.4211674928665161 2023-01-22 23:28:03.060447: step: 1048/527, loss: 0.6423962712287903 2023-01-22 23:28:04.205845: step: 1052/527, loss: 1.8461812734603882 2023-01-22 23:28:05.333863: step: 1056/527, loss: 2.1372907161712646 2023-01-22 23:28:06.433322: step: 1060/527, loss: 1.204171895980835 2023-01-22 23:28:07.536223: step: 1064/527, loss: 0.3970535397529602 2023-01-22 23:28:08.646355: step: 1068/527, loss: 0.43122321367263794 2023-01-22 23:28:09.767291: step: 1072/527, loss: 2.170353889465332 2023-01-22 23:28:10.870822: step: 1076/527, loss: 1.472381830215454 2023-01-22 23:28:11.970545: step: 1080/527, loss: 0.9341005086898804 2023-01-22 23:28:13.067086: step: 1084/527, loss: 1.4656577110290527 2023-01-22 23:28:14.201198: step: 1088/527, loss: 0.5932026505470276 2023-01-22 23:28:15.307321: step: 1092/527, loss: 3.2769429683685303 2023-01-22 23:28:16.421839: step: 1096/527, loss: 0.6678022146224976 2023-01-22 23:28:17.518054: step: 1100/527, loss: 1.446798324584961 2023-01-22 23:28:18.630219: step: 1104/527, loss: 3.290282964706421 2023-01-22 23:28:19.734307: step: 1108/527, loss: 1.2525103092193604 2023-01-22 23:28:20.839743: step: 1112/527, loss: 1.8739287853240967 2023-01-22 23:28:21.947482: step: 1116/527, loss: 1.6859760284423828 2023-01-22 23:28:23.115685: step: 1120/527, loss: 0.8996938467025757 2023-01-22 23:28:24.191663: step: 1124/527, loss: 1.9738459587097168 2023-01-22 23:28:25.298697: step: 1128/527, loss: 3.137787103652954 2023-01-22 23:28:26.427834: step: 1132/527, loss: 0.5248814821243286 2023-01-22 23:28:27.530421: step: 1136/527, loss: 2.0746726989746094 2023-01-22 23:28:28.625918: step: 1140/527, loss: 1.0960570573806763 2023-01-22 23:28:29.727185: step: 1144/527, loss: 0.31056922674179077 2023-01-22 23:28:30.843517: step: 1148/527, loss: 2.9502995014190674 2023-01-22 23:28:31.961449: step: 1152/527, loss: 0.5570597052574158 2023-01-22 23:28:33.070473: step: 1156/527, loss: 0.8082716464996338 2023-01-22 23:28:34.181289: step: 1160/527, loss: 0.723256528377533 2023-01-22 23:28:35.277166: step: 1164/527, loss: 0.4372190833091736 2023-01-22 23:28:36.353987: step: 1168/527, loss: 0.2185339480638504 2023-01-22 23:28:37.460837: step: 1172/527, loss: 0.6127884984016418 2023-01-22 23:28:38.576786: step: 1176/527, loss: 0.5128602981567383 2023-01-22 23:28:39.741255: step: 1180/527, loss: 0.7026681900024414 2023-01-22 23:28:40.855653: step: 1184/527, loss: 0.5505791306495667 2023-01-22 23:28:41.969199: step: 1188/527, loss: 0.39161816239356995 2023-01-22 23:28:43.067122: step: 1192/527, loss: 0.4343474507331848 2023-01-22 23:28:44.187607: step: 1196/527, loss: 0.5897807478904724 2023-01-22 23:28:45.295893: step: 1200/527, loss: 0.3465515971183777 2023-01-22 23:28:46.445493: step: 1204/527, loss: 0.594846785068512 2023-01-22 23:28:47.555343: step: 1208/527, loss: 2.2052197456359863 2023-01-22 23:28:48.648985: step: 1212/527, loss: 0.4247792959213257 2023-01-22 23:28:49.754515: step: 1216/527, loss: 0.24631667137145996 2023-01-22 23:28:50.880226: step: 1220/527, loss: 2.401458978652954 2023-01-22 23:28:51.987925: step: 1224/527, loss: 2.119553327560425 2023-01-22 23:28:53.072278: step: 1228/527, loss: 0.43200409412384033 2023-01-22 23:28:54.176509: step: 1232/527, loss: 1.5831866264343262 2023-01-22 23:28:55.308686: step: 1236/527, loss: 0.5252863168716431 2023-01-22 23:28:56.409030: step: 1240/527, loss: 0.8064610362052917 2023-01-22 23:28:57.529644: step: 1244/527, loss: 2.0962066650390625 2023-01-22 23:28:58.663620: step: 1248/527, loss: 0.515377402305603 2023-01-22 23:28:59.759391: step: 1252/527, loss: 0.27525532245635986 2023-01-22 23:29:00.894529: step: 1256/527, loss: 0.5191051959991455 2023-01-22 23:29:01.987263: step: 1260/527, loss: 0.2484794557094574 2023-01-22 23:29:03.098614: step: 1264/527, loss: 1.324589729309082 2023-01-22 23:29:04.205952: step: 1268/527, loss: 0.7637085318565369 2023-01-22 23:29:05.304777: step: 1272/527, loss: 0.6067211627960205 2023-01-22 23:29:06.441440: step: 1276/527, loss: 0.7518082857131958 2023-01-22 23:29:07.594993: step: 1280/527, loss: 0.5272313356399536 2023-01-22 23:29:08.680791: step: 1284/527, loss: 3.1354522705078125 2023-01-22 23:29:09.799615: step: 1288/527, loss: 0.8052797317504883 2023-01-22 23:29:10.938590: step: 1292/527, loss: 1.3554528951644897 2023-01-22 23:29:12.062701: step: 1296/527, loss: 0.8909964561462402 2023-01-22 23:29:13.191219: step: 1300/527, loss: 0.5566272735595703 2023-01-22 23:29:14.312665: step: 1304/527, loss: 0.4766858220100403 2023-01-22 23:29:15.425865: step: 1308/527, loss: 1.9354594945907593 2023-01-22 23:29:16.557561: step: 1312/527, loss: 0.29067105054855347 2023-01-22 23:29:17.669017: step: 1316/527, loss: 0.27712929248809814 2023-01-22 23:29:18.791933: step: 1320/527, loss: 0.4867759943008423 2023-01-22 23:29:19.919668: step: 1324/527, loss: 0.4455300569534302 2023-01-22 23:29:21.018655: step: 1328/527, loss: 0.2211351990699768 2023-01-22 23:29:22.118814: step: 1332/527, loss: 0.28282785415649414 2023-01-22 23:29:23.216901: step: 1336/527, loss: 2.0443952083587646 2023-01-22 23:29:24.323582: step: 1340/527, loss: 0.44707414507865906 2023-01-22 23:29:25.458077: step: 1344/527, loss: 1.613107681274414 2023-01-22 23:29:26.572026: step: 1348/527, loss: 1.103171944618225 2023-01-22 23:29:27.673132: step: 1352/527, loss: 1.4725905656814575 2023-01-22 23:29:28.782841: step: 1356/527, loss: 1.538714051246643 2023-01-22 23:29:29.902573: step: 1360/527, loss: 2.90914249420166 2023-01-22 23:29:30.994356: step: 1364/527, loss: 0.20819544792175293 2023-01-22 23:29:32.108377: step: 1368/527, loss: 4.111978530883789 2023-01-22 23:29:33.240603: step: 1372/527, loss: 2.559821844100952 2023-01-22 23:29:34.336476: step: 1376/527, loss: 0.5388041734695435 2023-01-22 23:29:35.452053: step: 1380/527, loss: 0.3268166780471802 2023-01-22 23:29:36.562467: step: 1384/527, loss: 0.28429141640663147 2023-01-22 23:29:37.684407: step: 1388/527, loss: 0.26779043674468994 2023-01-22 23:29:38.798057: step: 1392/527, loss: 0.1634858250617981 2023-01-22 23:29:39.897767: step: 1396/527, loss: 0.23047037422657013 2023-01-22 23:29:41.012907: step: 1400/527, loss: 1.6838372945785522 2023-01-22 23:29:42.140558: step: 1404/527, loss: 0.8953489661216736 2023-01-22 23:29:43.241927: step: 1408/527, loss: 0.424577534198761 2023-01-22 23:29:44.347928: step: 1412/527, loss: 1.6298184394836426 2023-01-22 23:29:45.474378: step: 1416/527, loss: 0.4104093909263611 2023-01-22 23:29:46.577718: step: 1420/527, loss: 0.95198655128479 2023-01-22 23:29:47.722382: step: 1424/527, loss: 0.5748883485794067 2023-01-22 23:29:48.821840: step: 1428/527, loss: 0.3222489356994629 2023-01-22 23:29:49.983447: step: 1432/527, loss: 2.584757089614868 2023-01-22 23:29:51.094681: step: 1436/527, loss: 0.9204427003860474 2023-01-22 23:29:52.220312: step: 1440/527, loss: 0.5373786687850952 2023-01-22 23:29:53.297390: step: 1444/527, loss: 0.37053626775741577 2023-01-22 23:29:54.408473: step: 1448/527, loss: 0.9937222003936768 2023-01-22 23:29:55.539392: step: 1452/527, loss: 0.3383520245552063 2023-01-22 23:29:56.656014: step: 1456/527, loss: 0.6849720478057861 2023-01-22 23:29:57.745578: step: 1460/527, loss: 0.2560383081436157 2023-01-22 23:29:58.857343: step: 1464/527, loss: 1.824838638305664 2023-01-22 23:29:59.952827: step: 1468/527, loss: 0.3990805745124817 2023-01-22 23:30:01.079377: step: 1472/527, loss: 1.3486689329147339 2023-01-22 23:30:02.206194: step: 1476/527, loss: 1.3528306484222412 2023-01-22 23:30:03.351715: step: 1480/527, loss: 1.2125530242919922 2023-01-22 23:30:04.453156: step: 1484/527, loss: 0.8566564321517944 2023-01-22 23:30:05.583783: step: 1488/527, loss: 0.4572031497955322 2023-01-22 23:30:06.686603: step: 1492/527, loss: 0.18106165528297424 2023-01-22 23:30:07.820784: step: 1496/527, loss: 1.1860369443893433 2023-01-22 23:30:08.906770: step: 1500/527, loss: 0.6649714708328247 2023-01-22 23:30:10.009790: step: 1504/527, loss: 0.19908590614795685 2023-01-22 23:30:11.153624: step: 1508/527, loss: 1.3781462907791138 2023-01-22 23:30:12.269920: step: 1512/527, loss: 0.8104506731033325 2023-01-22 23:30:13.372541: step: 1516/527, loss: 0.824360728263855 2023-01-22 23:30:14.505701: step: 1520/527, loss: 0.29937633872032166 2023-01-22 23:30:15.641823: step: 1524/527, loss: 4.915238380432129 2023-01-22 23:30:16.781870: step: 1528/527, loss: 2.220987319946289 2023-01-22 23:30:17.876025: step: 1532/527, loss: 0.7077765464782715 2023-01-22 23:30:18.975352: step: 1536/527, loss: 0.37568509578704834 2023-01-22 23:30:20.089640: step: 1540/527, loss: 2.2647054195404053 2023-01-22 23:30:21.170011: step: 1544/527, loss: 1.9965778589248657 2023-01-22 23:30:22.306552: step: 1548/527, loss: 1.3086352348327637 2023-01-22 23:30:23.410638: step: 1552/527, loss: 0.6749702095985413 2023-01-22 23:30:24.543309: step: 1556/527, loss: 0.4000672399997711 2023-01-22 23:30:25.660576: step: 1560/527, loss: 1.5065810680389404 2023-01-22 23:30:26.795993: step: 1564/527, loss: 2.4180479049682617 2023-01-22 23:30:27.902888: step: 1568/527, loss: 0.3390617370605469 2023-01-22 23:30:29.009775: step: 1572/527, loss: 1.172560214996338 2023-01-22 23:30:30.144969: step: 1576/527, loss: 0.6292140483856201 2023-01-22 23:30:31.277537: step: 1580/527, loss: 0.8719101548194885 2023-01-22 23:30:32.429927: step: 1584/527, loss: 1.8703562021255493 2023-01-22 23:30:33.561621: step: 1588/527, loss: 0.7215129733085632 2023-01-22 23:30:34.682512: step: 1592/527, loss: 3.0191850662231445 2023-01-22 23:30:35.791193: step: 1596/527, loss: 1.450263500213623 2023-01-22 23:30:36.929076: step: 1600/527, loss: 2.0740275382995605 2023-01-22 23:30:38.035745: step: 1604/527, loss: 2.142699718475342 2023-01-22 23:30:39.142703: step: 1608/527, loss: 0.16845369338989258 2023-01-22 23:30:40.260656: step: 1612/527, loss: 0.32914966344833374 2023-01-22 23:30:41.397198: step: 1616/527, loss: 0.7662199139595032 2023-01-22 23:30:42.527039: step: 1620/527, loss: 1.9744610786437988 2023-01-22 23:30:43.623264: step: 1624/527, loss: 0.4635851979255676 2023-01-22 23:30:44.753048: step: 1628/527, loss: 0.6281766891479492 2023-01-22 23:30:45.859115: step: 1632/527, loss: 1.1585177183151245 2023-01-22 23:30:47.009686: step: 1636/527, loss: 2.6801319122314453 2023-01-22 23:30:48.117286: step: 1640/527, loss: 0.7020303010940552 2023-01-22 23:30:49.248207: step: 1644/527, loss: 0.3365822434425354 2023-01-22 23:30:50.356560: step: 1648/527, loss: 1.0705496072769165 2023-01-22 23:30:51.473499: step: 1652/527, loss: 1.5442209243774414 2023-01-22 23:30:52.563323: step: 1656/527, loss: 4.564550876617432 2023-01-22 23:30:53.700077: step: 1660/527, loss: 0.31720733642578125 2023-01-22 23:30:54.795332: step: 1664/527, loss: 1.8574330806732178 2023-01-22 23:30:55.906683: step: 1668/527, loss: 0.9798682928085327 2023-01-22 23:30:57.024508: step: 1672/527, loss: 0.19750958681106567 2023-01-22 23:30:58.130692: step: 1676/527, loss: 0.18295665085315704 2023-01-22 23:30:59.283040: step: 1680/527, loss: 0.5710514783859253 2023-01-22 23:31:00.396036: step: 1684/527, loss: 0.40340834856033325 2023-01-22 23:31:01.485851: step: 1688/527, loss: 0.45031118392944336 2023-01-22 23:31:02.631069: step: 1692/527, loss: 0.49437159299850464 2023-01-22 23:31:03.761478: step: 1696/527, loss: 0.44882121682167053 2023-01-22 23:31:04.889979: step: 1700/527, loss: 2.6795060634613037 2023-01-22 23:31:06.004562: step: 1704/527, loss: 0.688677191734314 2023-01-22 23:31:07.116930: step: 1708/527, loss: 1.096032977104187 2023-01-22 23:31:08.197701: step: 1712/527, loss: 0.5273263454437256 2023-01-22 23:31:09.333588: step: 1716/527, loss: 1.9204206466674805 2023-01-22 23:31:10.434452: step: 1720/527, loss: 0.2613970637321472 2023-01-22 23:31:11.560140: step: 1724/527, loss: 2.618271827697754 2023-01-22 23:31:12.691742: step: 1728/527, loss: 0.5051380395889282 2023-01-22 23:31:13.793530: step: 1732/527, loss: 1.3047374486923218 2023-01-22 23:31:14.898963: step: 1736/527, loss: 0.473741739988327 2023-01-22 23:31:16.012787: step: 1740/527, loss: 1.3489004373550415 2023-01-22 23:31:17.121260: step: 1744/527, loss: 0.42201292514801025 2023-01-22 23:31:18.252590: step: 1748/527, loss: 2.2122833728790283 2023-01-22 23:31:19.399588: step: 1752/527, loss: 1.8622092008590698 2023-01-22 23:31:20.498378: step: 1756/527, loss: 1.1798655986785889 2023-01-22 23:31:21.606528: step: 1760/527, loss: 0.4417538642883301 2023-01-22 23:31:22.720717: step: 1764/527, loss: 1.4459447860717773 2023-01-22 23:31:23.832398: step: 1768/527, loss: 0.43311917781829834 2023-01-22 23:31:24.925190: step: 1772/527, loss: 0.28755927085876465 2023-01-22 23:31:26.006420: step: 1776/527, loss: 4.705626487731934 2023-01-22 23:31:27.130151: step: 1780/527, loss: 6.888480186462402 2023-01-22 23:31:28.223666: step: 1784/527, loss: 3.0172150135040283 2023-01-22 23:31:29.382242: step: 1788/527, loss: 0.27122220396995544 2023-01-22 23:31:30.493079: step: 1792/527, loss: 1.080742597579956 2023-01-22 23:31:31.601982: step: 1796/527, loss: 0.5888214111328125 2023-01-22 23:31:32.716814: step: 1800/527, loss: 0.33384573459625244 2023-01-22 23:31:33.857337: step: 1804/527, loss: 0.5154205560684204 2023-01-22 23:31:34.944267: step: 1808/527, loss: 2.7377469539642334 2023-01-22 23:31:36.030282: step: 1812/527, loss: 0.7977047562599182 2023-01-22 23:31:37.159648: step: 1816/527, loss: 0.5075576305389404 2023-01-22 23:31:38.285727: step: 1820/527, loss: 0.4341048002243042 2023-01-22 23:31:39.402351: step: 1824/527, loss: 0.9525284767150879 2023-01-22 23:31:40.548166: step: 1828/527, loss: 0.7073334455490112 2023-01-22 23:31:41.661593: step: 1832/527, loss: 2.572289228439331 2023-01-22 23:31:42.777082: step: 1836/527, loss: 0.3693413734436035 2023-01-22 23:31:43.886423: step: 1840/527, loss: 2.424553871154785 2023-01-22 23:31:45.003974: step: 1844/527, loss: 1.1136678457260132 2023-01-22 23:31:46.129144: step: 1848/527, loss: 1.049425721168518 2023-01-22 23:31:47.247799: step: 1852/527, loss: 8.466513633728027 2023-01-22 23:31:48.350214: step: 1856/527, loss: 0.2021808624267578 2023-01-22 23:31:49.463664: step: 1860/527, loss: 0.2533864974975586 2023-01-22 23:31:50.561974: step: 1864/527, loss: 0.267726331949234 2023-01-22 23:31:51.662972: step: 1868/527, loss: 0.19612160325050354 2023-01-22 23:31:52.769903: step: 1872/527, loss: 1.1699615716934204 2023-01-22 23:31:53.879963: step: 1876/527, loss: 3.9304897785186768 2023-01-22 23:31:55.029595: step: 1880/527, loss: 0.5027409791946411 2023-01-22 23:31:56.137315: step: 1884/527, loss: 2.0073020458221436 2023-01-22 23:31:57.272842: step: 1888/527, loss: 0.40575096011161804 2023-01-22 23:31:58.396133: step: 1892/527, loss: 1.5256905555725098 2023-01-22 23:31:59.522647: step: 1896/527, loss: 4.968748092651367 2023-01-22 23:32:00.603545: step: 1900/527, loss: 0.299780011177063 2023-01-22 23:32:01.714573: step: 1904/527, loss: 0.5224254131317139 2023-01-22 23:32:02.810173: step: 1908/527, loss: 0.21581010520458221 2023-01-22 23:32:03.884103: step: 1912/527, loss: 1.128339171409607 2023-01-22 23:32:05.019004: step: 1916/527, loss: 2.557401418685913 2023-01-22 23:32:06.121285: step: 1920/527, loss: 1.1100330352783203 2023-01-22 23:32:07.225684: step: 1924/527, loss: 0.498715341091156 2023-01-22 23:32:08.337734: step: 1928/527, loss: 7.218281269073486 2023-01-22 23:32:09.458972: step: 1932/527, loss: 0.997623085975647 2023-01-22 23:32:10.529935: step: 1936/527, loss: 0.27027639746665955 2023-01-22 23:32:11.632887: step: 1940/527, loss: 0.5404546856880188 2023-01-22 23:32:12.743929: step: 1944/527, loss: 0.8608279228210449 2023-01-22 23:32:13.858043: step: 1948/527, loss: 0.4378185272216797 2023-01-22 23:32:14.984530: step: 1952/527, loss: 1.277090311050415 2023-01-22 23:32:16.082525: step: 1956/527, loss: 0.1199650838971138 2023-01-22 23:32:17.182221: step: 1960/527, loss: 0.8804100155830383 2023-01-22 23:32:18.341820: step: 1964/527, loss: 0.2628374397754669 2023-01-22 23:32:19.471536: step: 1968/527, loss: 7.704134941101074 2023-01-22 23:32:20.565405: step: 1972/527, loss: 0.7970272898674011 2023-01-22 23:32:21.683225: step: 1976/527, loss: 0.7692806124687195 2023-01-22 23:32:22.810388: step: 1980/527, loss: 2.452371120452881 2023-01-22 23:32:23.947064: step: 1984/527, loss: 0.6181381344795227 2023-01-22 23:32:25.074841: step: 1988/527, loss: 0.2897105813026428 2023-01-22 23:32:26.217159: step: 1992/527, loss: 3.9894003868103027 2023-01-22 23:32:27.331545: step: 1996/527, loss: 0.5279205441474915 2023-01-22 23:32:28.427549: step: 2000/527, loss: 0.34472066164016724 2023-01-22 23:32:29.520088: step: 2004/527, loss: 0.19982147216796875 2023-01-22 23:32:30.675692: step: 2008/527, loss: 0.5787714719772339 2023-01-22 23:32:31.805343: step: 2012/527, loss: 0.1962549388408661 2023-01-22 23:32:32.933211: step: 2016/527, loss: 1.4819895029067993 2023-01-22 23:32:34.032154: step: 2020/527, loss: 0.90080326795578 2023-01-22 23:32:35.147024: step: 2024/527, loss: 0.816665530204773 2023-01-22 23:32:36.246116: step: 2028/527, loss: 0.7738326787948608 2023-01-22 23:32:37.383863: step: 2032/527, loss: 2.7024121284484863 2023-01-22 23:32:38.512492: step: 2036/527, loss: 0.4749559462070465 2023-01-22 23:32:39.740516: step: 2040/527, loss: 0.30739325284957886 2023-01-22 23:32:40.837136: step: 2044/527, loss: 0.36025017499923706 2023-01-22 23:32:41.934843: step: 2048/527, loss: 0.5957506895065308 2023-01-22 23:32:43.040631: step: 2052/527, loss: 0.33476340770721436 2023-01-22 23:32:44.125455: step: 2056/527, loss: 1.6934397220611572 2023-01-22 23:32:45.258586: step: 2060/527, loss: 0.4859869182109833 2023-01-22 23:32:46.397669: step: 2064/527, loss: 1.5259751081466675 2023-01-22 23:32:47.525822: step: 2068/527, loss: 0.7743636965751648 2023-01-22 23:32:48.657044: step: 2072/527, loss: 0.3628057539463043 2023-01-22 23:32:49.762813: step: 2076/527, loss: 1.2008018493652344 2023-01-22 23:32:50.880595: step: 2080/527, loss: 0.3144242465496063 2023-01-22 23:32:51.993760: step: 2084/527, loss: 0.2734830379486084 2023-01-22 23:32:53.094928: step: 2088/527, loss: 0.40227755904197693 2023-01-22 23:32:54.227408: step: 2092/527, loss: 0.571160614490509 2023-01-22 23:32:55.349700: step: 2096/527, loss: 1.4052577018737793 2023-01-22 23:32:56.462843: step: 2100/527, loss: 3.8038036823272705 2023-01-22 23:32:57.569624: step: 2104/527, loss: 0.20042724907398224 2023-01-22 23:32:58.690672: step: 2108/527, loss: 0.31341809034347534 ================================================== Loss: 3.319 -------------------- Dev: {'event': {'p': 0.627208480565371, 'r': 0.47270306258322237, 'f1': 0.5391040242976463}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test: {'event': {'p': 0.5309396485867074, 'r': 0.39714285714285713, 'f1': 0.4543968617195162}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'event': {'p': 0.5769230769230769, 'r': 0.2777777777777778, 'f1': 0.375}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'event': {'p': 0.7647058823529411, 'r': 0.20634920634920634, 'f1': 0.32499999999999996}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'event': {'p': 0.5, 'r': 0.08333333333333333, 'f1': 0.14285714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.627208480565371, 'r': 0.47270306258322237, 'f1': 0.5391040242976463}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Eng Test for Chinese: {'event': {'p': 0.5309396485867074, 'r': 0.39714285714285713, 'f1': 0.4543968617195162}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'event': {'p': 0.5769230769230769, 'r': 0.2777777777777778, 'f1': 0.375}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Eng Dev for Korean: {'event': {'p': 0.627208480565371, 'r': 0.47270306258322237, 'f1': 0.5391040242976463}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Eng Test for Korean: {'event': {'p': 0.5309396485867074, 'r': 0.39714285714285713, 'f1': 0.4543968617195162}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'event': {'p': 0.7647058823529411, 'r': 0.20634920634920634, 'f1': 0.32499999999999996}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Eng Dev for Russian: {'event': {'p': 0.627208480565371, 'r': 0.47270306258322237, 'f1': 0.5391040242976463}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Eng Test for Russian: {'event': {'p': 0.5309396485867074, 'r': 0.39714285714285713, 'f1': 0.4543968617195162}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'event': {'p': 0.5, 'r': 0.08333333333333333, 'f1': 0.14285714285714285}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-22 23:34:01.582296: step: 4/527, loss: 0.48955804109573364 2023-01-22 23:34:02.723889: step: 8/527, loss: 1.466174840927124 2023-01-22 23:34:03.856050: step: 12/527, loss: 0.35630694031715393 2023-01-22 23:34:04.973506: step: 16/527, loss: 0.16813354194164276 2023-01-22 23:34:06.092657: step: 20/527, loss: 0.5058125853538513 2023-01-22 23:34:07.223183: step: 24/527, loss: 0.996319055557251 2023-01-22 23:34:08.342130: step: 28/527, loss: 1.1384345293045044 2023-01-22 23:34:09.441642: step: 32/527, loss: 1.8335384130477905 2023-01-22 23:34:10.562434: step: 36/527, loss: 0.3281337022781372 2023-01-22 23:34:11.670869: step: 40/527, loss: 0.4102742373943329 2023-01-22 23:34:12.793706: step: 44/527, loss: 0.9760766625404358 2023-01-22 23:34:13.893974: step: 48/527, loss: 0.4607202112674713 2023-01-22 23:34:15.014339: step: 52/527, loss: 0.21661405265331268 2023-01-22 23:34:16.100840: step: 56/527, loss: 0.7160504460334778 2023-01-22 23:34:17.200289: step: 60/527, loss: 0.27713119983673096 2023-01-22 23:34:18.293852: step: 64/527, loss: 0.9000151753425598 2023-01-22 23:34:19.452071: step: 68/527, loss: 1.534056544303894 2023-01-22 23:34:20.584435: step: 72/527, loss: 0.6578210592269897 2023-01-22 23:34:21.663961: step: 76/527, loss: 1.5781265497207642 2023-01-22 23:34:22.793185: step: 80/527, loss: 0.3551687002182007 2023-01-22 23:34:23.903010: step: 84/527, loss: 6.695432662963867 2023-01-22 23:34:24.998691: step: 88/527, loss: 0.21213141083717346 2023-01-22 23:34:26.124085: step: 92/527, loss: 0.30454015731811523 2023-01-22 23:34:27.246035: step: 96/527, loss: 0.21898995339870453 2023-01-22 23:34:28.343850: step: 100/527, loss: 0.6064965724945068 2023-01-22 23:34:29.447446: step: 104/527, loss: 0.32294169068336487 2023-01-22 23:34:30.591931: step: 108/527, loss: 7.444394111633301 2023-01-22 23:34:31.728503: step: 112/527, loss: 1.1095454692840576 2023-01-22 23:34:32.843300: step: 116/527, loss: 0.29709187150001526 2023-01-22 23:34:33.976699: step: 120/527, loss: 0.13045310974121094 2023-01-22 23:34:35.083094: step: 124/527, loss: 0.16961747407913208 2023-01-22 23:34:36.169549: step: 128/527, loss: 0.17125794291496277 2023-01-22 23:34:37.290704: step: 132/527, loss: 5.192537784576416 2023-01-22 23:34:38.385956: step: 136/527, loss: 1.659091591835022 2023-01-22 23:34:39.476886: step: 140/527, loss: 0.2833220660686493 2023-01-22 23:34:40.584704: step: 144/527, loss: 0.6889618039131165 2023-01-22 23:34:41.687976: step: 148/527, loss: 0.7711418867111206 2023-01-22 23:34:42.842580: step: 152/527, loss: 2.9161343574523926 2023-01-22 23:34:43.981721: step: 156/527, loss: 0.4966874122619629 2023-01-22 23:34:45.105213: step: 160/527, loss: 0.3882254958152771 2023-01-22 23:34:46.194254: step: 164/527, loss: 0.8941822052001953 2023-01-22 23:34:47.326086: step: 168/527, loss: 0.547783613204956 2023-01-22 23:34:48.447598: step: 172/527, loss: 0.16548863053321838 2023-01-22 23:34:49.550460: step: 176/527, loss: 0.9105170965194702 2023-01-22 23:34:50.670505: step: 180/527, loss: 1.0770020484924316 2023-01-22 23:34:51.776969: step: 184/527, loss: 0.17274294793605804 2023-01-22 23:34:52.878578: step: 188/527, loss: 0.5551362633705139 2023-01-22 23:34:53.989260: step: 192/527, loss: 2.2001235485076904 2023-01-22 23:34:55.132035: step: 196/527, loss: 2.36144757270813 2023-01-22 23:34:56.262426: step: 200/527, loss: 1.6640422344207764 2023-01-22 23:34:57.368032: step: 204/527, loss: 0.40534576773643494 2023-01-22 23:34:58.470394: step: 208/527, loss: 0.8676781058311462 2023-01-22 23:34:59.579621: step: 212/527, loss: 0.8855735659599304 2023-01-22 23:35:00.692197: step: 216/527, loss: 0.34090396761894226 2023-01-22 23:35:01.801260: step: 220/527, loss: 0.10328011959791183 2023-01-22 23:35:02.920219: step: 224/527, loss: 0.34760522842407227 2023-01-22 23:35:04.030009: step: 228/527, loss: 0.2622261941432953 2023-01-22 23:35:05.132730: step: 232/527, loss: 0.7437978982925415 2023-01-22 23:35:06.258976: step: 236/527, loss: 1.2518001794815063 2023-01-22 23:35:07.354947: step: 240/527, loss: 0.5707436800003052 2023-01-22 23:35:08.472021: step: 244/527, loss: 0.7825153470039368 2023-01-22 23:35:09.624688: step: 248/527, loss: 1.836161732673645 2023-01-22 23:35:10.735568: step: 252/527, loss: 0.3499748110771179 2023-01-22 23:35:11.885537: step: 256/527, loss: 2.0855844020843506 2023-01-22 23:35:12.986469: step: 260/527, loss: 0.386616051197052 2023-01-22 23:35:14.137684: step: 264/527, loss: 1.3517508506774902 2023-01-22 23:35:15.227985: step: 268/527, loss: 0.2443222999572754 2023-01-22 23:35:16.343242: step: 272/527, loss: 1.2600563764572144 2023-01-22 23:35:17.472231: step: 276/527, loss: 0.7719128131866455 2023-01-22 23:35:18.585731: step: 280/527, loss: 3.9093616008758545 2023-01-22 23:35:19.714800: step: 284/527, loss: 0.8474264740943909 2023-01-22 23:35:20.836116: step: 288/527, loss: 0.904846727848053 2023-01-22 23:35:21.928713: step: 292/527, loss: 0.543824315071106 2023-01-22 23:35:23.045045: step: 296/527, loss: 1.0935778617858887 2023-01-22 23:35:24.161740: step: 300/527, loss: 0.9467129707336426 2023-01-22 23:35:25.257002: step: 304/527, loss: 0.3917180895805359 2023-01-22 23:35:26.353194: step: 308/527, loss: 0.32930245995521545 2023-01-22 23:35:27.474879: step: 312/527, loss: 0.18159084022045135 2023-01-22 23:35:28.592180: step: 316/527, loss: 0.9713813066482544 2023-01-22 23:35:29.693155: step: 320/527, loss: 0.2717186212539673 2023-01-22 23:35:30.825324: step: 324/527, loss: 0.5812457799911499 2023-01-22 23:35:31.960261: step: 328/527, loss: 1.818238377571106 2023-01-22 23:35:33.068605: step: 332/527, loss: 0.4060227572917938 2023-01-22 23:35:34.154539: step: 336/527, loss: 6.988563537597656 2023-01-22 23:35:35.288019: step: 340/527, loss: 1.8308956623077393 2023-01-22 23:35:36.374909: step: 344/527, loss: 0.9451266527175903 2023-01-22 23:35:37.535043: step: 348/527, loss: 0.4262138307094574 2023-01-22 23:35:38.651742: step: 352/527, loss: 0.1653548777103424 2023-01-22 23:35:39.783222: step: 356/527, loss: 0.6653217077255249 2023-01-22 23:35:40.881567: step: 360/527, loss: 0.24937179684638977 2023-01-22 23:35:42.004086: step: 364/527, loss: 1.1947906017303467 2023-01-22 23:35:43.091160: step: 368/527, loss: 0.7476380467414856 2023-01-22 23:35:44.192143: step: 372/527, loss: 0.7044621706008911 2023-01-22 23:35:45.322768: step: 376/527, loss: 1.4395205974578857 2023-01-22 23:35:46.475130: step: 380/527, loss: 0.46668338775634766 2023-01-22 23:35:47.609421: step: 384/527, loss: 0.6788533926010132 2023-01-22 23:35:48.712501: step: 388/527, loss: 6.986363410949707 2023-01-22 23:35:49.806088: step: 392/527, loss: 0.22851315140724182 2023-01-22 23:35:50.908584: step: 396/527, loss: 2.8738744258880615 2023-01-22 23:35:52.015342: step: 400/527, loss: 0.20713792741298676 2023-01-22 23:35:53.148476: step: 404/527, loss: 1.1463865041732788 2023-01-22 23:35:54.244515: step: 408/527, loss: 7.201968669891357 2023-01-22 23:35:55.357943: step: 412/527, loss: 0.15522675216197968 2023-01-22 23:35:56.471263: step: 416/527, loss: 0.13088180124759674 2023-01-22 23:35:57.600826: step: 420/527, loss: 0.33661580085754395 2023-01-22 23:35:58.733510: step: 424/527, loss: 0.4904075562953949 2023-01-22 23:35:59.868670: step: 428/527, loss: 0.3506813049316406 2023-01-22 23:36:00.985321: step: 432/527, loss: 2.774151563644409 2023-01-22 23:36:02.119999: step: 436/527, loss: 0.7703391313552856 2023-01-22 23:36:03.209538: step: 440/527, loss: 0.500515341758728 2023-01-22 23:36:04.330214: step: 444/527, loss: 0.2721587121486664 2023-01-22 23:36:05.465227: step: 448/527, loss: 0.3008522689342499 2023-01-22 23:36:06.574652: step: 452/527, loss: 1.2819223403930664 2023-01-22 23:36:07.674976: step: 456/527, loss: 0.49417054653167725 2023-01-22 23:36:08.819202: step: 460/527, loss: 0.08008956909179688 2023-01-22 23:36:09.910547: step: 464/527, loss: 0.36218443512916565 2023-01-22 23:36:11.014822: step: 468/527, loss: 0.31525689363479614 2023-01-22 23:36:12.137257: step: 472/527, loss: 0.9883606433868408 2023-01-22 23:36:13.258048: step: 476/527, loss: 0.23636884987354279 2023-01-22 23:36:14.377829: step: 480/527, loss: 1.193852186203003 2023-01-22 23:36:15.473000: step: 484/527, loss: 1.4867358207702637 2023-01-22 23:36:16.600721: step: 488/527, loss: 0.5293081998825073 2023-01-22 23:36:17.696144: step: 492/527, loss: 0.2099650800228119 2023-01-22 23:36:18.819811: step: 496/527, loss: 0.6615556478500366 2023-01-22 23:36:19.927045: step: 500/527, loss: 0.23160554468631744 2023-01-22 23:36:21.015495: step: 504/527, loss: 0.366739958524704 2023-01-22 23:36:22.121210: step: 508/527, loss: 1.613642930984497 2023-01-22 23:36:23.226654: step: 512/527, loss: 0.23827491700649261 2023-01-22 23:36:24.327048: step: 516/527, loss: 0.2559822201728821 2023-01-22 23:36:25.438092: step: 520/527, loss: 0.10874795913696289 2023-01-22 23:36:26.589102: step: 524/527, loss: 0.2618190050125122 2023-01-22 23:36:27.698210: step: 528/527, loss: 1.1079684495925903 2023-01-22 23:36:28.828525: step: 532/527, loss: 1.0028618574142456 2023-01-22 23:36:29.936186: step: 536/527, loss: 0.12403498589992523 2023-01-22 23:36:31.065638: step: 540/527, loss: 0.10908882319927216 2023-01-22 23:36:32.169285: step: 544/527, loss: 0.20107778906822205 2023-01-22 23:36:33.294790: step: 548/527, loss: 0.3245050609111786 2023-01-22 23:36:34.417604: step: 552/527, loss: 1.3465570211410522 2023-01-22 23:36:35.506248: step: 556/527, loss: 0.47787612676620483 2023-01-22 23:36:36.639744: step: 560/527, loss: 0.10955943912267685 2023-01-22 23:36:37.782859: step: 564/527, loss: 0.30516186356544495 2023-01-22 23:36:38.897871: step: 568/527, loss: 0.356654554605484 2023-01-22 23:36:40.024618: step: 572/527, loss: 0.4576318860054016 2023-01-22 23:36:41.138671: step: 576/527, loss: 1.3618965148925781 2023-01-22 23:36:42.244655: step: 580/527, loss: 0.8448148965835571 2023-01-22 23:36:43.341743: step: 584/527, loss: 0.19629907608032227 2023-01-22 23:36:44.453445: step: 588/527, loss: 0.2771604657173157 2023-01-22 23:36:45.575720: step: 592/527, loss: 0.9000728130340576 2023-01-22 23:36:46.694784: step: 596/527, loss: 1.2041209936141968 2023-01-22 23:36:47.822193: step: 600/527, loss: 0.525337815284729 2023-01-22 23:36:48.934270: step: 604/527, loss: 1.5502173900604248 2023-01-22 23:36:50.053678: step: 608/527, loss: 0.8513447642326355 2023-01-22 23:36:51.186182: step: 612/527, loss: 3.1387481689453125 2023-01-22 23:36:52.306199: step: 616/527, loss: 0.32541051506996155 2023-01-22 23:36:53.418392: step: 620/527, loss: 0.18478699028491974 2023-01-22 23:36:54.534024: step: 624/527, loss: 0.32146158814430237 2023-01-22 23:36:55.638521: step: 628/527, loss: 0.2090333104133606 2023-01-22 23:36:56.738953: step: 632/527, loss: 0.2918381690979004 2023-01-22 23:36:57.838041: step: 636/527, loss: 1.7401584386825562 2023-01-22 23:36:58.937802: step: 640/527, loss: 0.4173397123813629 2023-01-22 23:37:00.063829: step: 644/527, loss: 1.3212857246398926 2023-01-22 23:37:01.184592: step: 648/527, loss: 0.3152189254760742 2023-01-22 23:37:02.323300: step: 652/527, loss: 0.07118840515613556 2023-01-22 23:37:03.426437: step: 656/527, loss: 0.8042302131652832 2023-01-22 23:37:04.539909: step: 660/527, loss: 0.7121111154556274 2023-01-22 23:37:05.675245: step: 664/527, loss: 0.4650789499282837 2023-01-22 23:37:06.792596: step: 668/527, loss: 0.1480245590209961 2023-01-22 23:37:07.891156: step: 672/527, loss: 0.28826895356178284 2023-01-22 23:37:08.996049: step: 676/527, loss: 0.772638201713562 2023-01-22 23:37:10.092025: step: 680/527, loss: 0.13559189438819885 2023-01-22 23:37:11.213290: step: 684/527, loss: 1.9147491455078125 2023-01-22 23:37:12.338648: step: 688/527, loss: 0.3616272807121277 2023-01-22 23:37:13.483563: step: 692/527, loss: 0.7064182758331299 2023-01-22 23:37:14.604746: step: 696/527, loss: 2.249508857727051 2023-01-22 23:37:15.717520: step: 700/527, loss: 0.8944169282913208 2023-01-22 23:37:16.835996: step: 704/527, loss: 0.8163971304893494 2023-01-22 23:37:17.935051: step: 708/527, loss: 0.18458214402198792 2023-01-22 23:37:19.026517: step: 712/527, loss: 0.15241804718971252 2023-01-22 23:37:20.140066: step: 716/527, loss: 0.19965657591819763 2023-01-22 23:37:21.284165: step: 720/527, loss: 1.0484287738800049 2023-01-22 23:37:22.401710: step: 724/527, loss: 1.5262222290039062 2023-01-22 23:37:23.493714: step: 728/527, loss: 0.10742178559303284 2023-01-22 23:37:24.598700: step: 732/527, loss: 1.6643099784851074 2023-01-22 23:37:25.708793: step: 736/527, loss: 0.2840823233127594 2023-01-22 23:37:26.832642: step: 740/527, loss: 0.30755615234375 2023-01-22 23:37:27.973349: step: 744/527, loss: 0.18293671309947968 2023-01-22 23:37:29.080625: step: 748/527, loss: 1.617738127708435 2023-01-22 23:37:30.224434: step: 752/527, loss: 0.2676420211791992 2023-01-22 23:37:31.325919: step: 756/527, loss: 0.35020875930786133 2023-01-22 23:37:32.449679: step: 760/527, loss: 0.13954877853393555 2023-01-22 23:37:33.602229: step: 764/527, loss: 0.5549153089523315 2023-01-22 23:37:34.761463: step: 768/527, loss: 1.6945971250534058 2023-01-22 23:37:35.882074: step: 772/527, loss: 0.21222396194934845 2023-01-22 23:37:37.011269: step: 776/527, loss: 0.35026460886001587 2023-01-22 23:37:38.142012: step: 780/527, loss: 1.1629325151443481 2023-01-22 23:37:39.261924: step: 784/527, loss: 0.18467026948928833 2023-01-22 23:37:40.372936: step: 788/527, loss: 0.2201048880815506 2023-01-22 23:37:41.514855: step: 792/527, loss: 0.7045050859451294 2023-01-22 23:37:42.628569: step: 796/527, loss: 0.17034488916397095 2023-01-22 23:37:43.732964: step: 800/527, loss: 0.61002117395401 2023-01-22 23:37:44.829660: step: 804/527, loss: 0.57471764087677 2023-01-22 23:37:45.948132: step: 808/527, loss: 0.3278518617153168 2023-01-22 23:37:47.074723: step: 812/527, loss: 0.17721137404441833 2023-01-22 23:37:48.165157: step: 816/527, loss: 1.3893016576766968 2023-01-22 23:37:49.300898: step: 820/527, loss: 0.16541129350662231 2023-01-22 23:37:50.413773: step: 824/527, loss: 1.5428470373153687 2023-01-22 23:37:51.537968: step: 828/527, loss: 0.255013108253479 2023-01-22 23:37:52.652174: step: 832/527, loss: 0.6745571494102478 2023-01-22 23:37:53.747755: step: 836/527, loss: 0.5526704788208008 2023-01-22 23:37:54.845525: step: 840/527, loss: 0.34180185198783875 2023-01-22 23:37:55.950408: step: 844/527, loss: 1.331214189529419 2023-01-22 23:37:57.072681: step: 848/527, loss: 0.17377133667469025 2023-01-22 23:37:58.171081: step: 852/527, loss: 1.3320116996765137 2023-01-22 23:37:59.271115: step: 856/527, loss: 1.1521515846252441 2023-01-22 23:38:00.385385: step: 860/527, loss: 0.8331040143966675 2023-01-22 23:38:01.483773: step: 864/527, loss: 0.290000855922699 2023-01-22 23:38:02.548892: step: 868/527, loss: 0.22493115067481995 2023-01-22 23:38:03.699897: step: 872/527, loss: 0.8376018404960632 2023-01-22 23:38:04.802564: step: 876/527, loss: 0.061017513275146484 2023-01-22 23:38:05.944900: step: 880/527, loss: 0.27890872955322266 2023-01-22 23:38:07.082619: step: 884/527, loss: 0.6691693663597107 2023-01-22 23:38:08.175466: step: 888/527, loss: 0.5690058469772339 2023-01-22 23:38:09.335833: step: 892/527, loss: 0.3967205286026001 2023-01-22 23:38:10.459759: step: 896/527, loss: 0.19949685037136078 2023-01-22 23:38:11.565971: step: 900/527, loss: 0.7724200487136841 2023-01-22 23:38:12.667192: step: 904/527, loss: 0.27631789445877075 2023-01-22 23:38:13.799654: step: 908/527, loss: 0.3881289064884186 2023-01-22 23:38:14.928852: step: 912/527, loss: 0.8545264005661011 2023-01-22 23:38:16.034941: step: 916/527, loss: 0.9950926303863525 2023-01-22 23:38:17.140893: step: 920/527, loss: 0.9637256860733032 2023-01-22 23:38:18.268252: step: 924/527, loss: 0.204999178647995 2023-01-22 23:38:19.398514: step: 928/527, loss: 0.80538409948349 2023-01-22 23:38:20.528845: step: 932/527, loss: 0.16891475021839142 2023-01-22 23:38:21.663603: step: 936/527, loss: 0.49927815794944763 2023-01-22 23:38:22.771645: step: 940/527, loss: 0.5177841186523438 2023-01-22 23:38:23.896893: step: 944/527, loss: 0.10658760368824005 2023-01-22 23:38:25.040929: step: 948/527, loss: 0.8161695599555969 2023-01-22 23:38:26.164603: step: 952/527, loss: 1.055686354637146 2023-01-22 23:38:27.278896: step: 956/527, loss: 0.31629544496536255 2023-01-22 23:38:28.393919: step: 960/527, loss: 1.5012067556381226 2023-01-22 23:38:29.507549: step: 964/527, loss: 0.21044759452342987 2023-01-22 23:38:30.648833: step: 968/527, loss: 2.1814136505126953 2023-01-22 23:38:31.736895: step: 972/527, loss: 0.4474642276763916 2023-01-22 23:38:32.862421: step: 976/527, loss: 0.19904372096061707 2023-01-22 23:38:33.966135: step: 980/527, loss: 1.011764645576477 2023-01-22 23:38:35.062211: step: 984/527, loss: 0.2791154384613037 2023-01-22 23:38:36.156969: step: 988/527, loss: 0.5204005241394043 2023-01-22 23:38:37.308993: step: 992/527, loss: 0.264761745929718 2023-01-22 23:38:38.409478: step: 996/527, loss: 0.23842459917068481 2023-01-22 23:38:39.491430: step: 1000/527, loss: 0.09732875972986221 2023-01-22 23:38:40.596279: step: 1004/527, loss: 0.5582643151283264 2023-01-22 23:38:41.748532: step: 1008/527, loss: 0.24666127562522888 2023-01-22 23:38:42.853252: step: 1012/527, loss: 0.3967497944831848 2023-01-22 23:38:44.023193: step: 1016/527, loss: 0.3107507526874542 2023-01-22 23:38:45.166312: step: 1020/527, loss: 0.2532840669155121 2023-01-22 23:38:46.310880: step: 1024/527, loss: 0.2240493893623352 2023-01-22 23:38:47.418963: step: 1028/527, loss: 2.051966428756714 2023-01-22 23:38:48.542057: step: 1032/527, loss: 0.33020439743995667 2023-01-22 23:38:49.654996: step: 1036/527, loss: 0.287565141916275 2023-01-22 23:38:50.755479: step: 1040/527, loss: 1.6228686571121216 2023-01-22 23:38:51.860368: step: 1044/527, loss: 0.4113144874572754 2023-01-22 23:38:52.990830: step: 1048/527, loss: 0.3564572334289551 2023-01-22 23:38:54.087050: step: 1052/527, loss: 0.2971695065498352 2023-01-22 23:38:55.203602: step: 1056/527, loss: 0.258919894695282 2023-01-22 23:38:56.314866: step: 1060/527, loss: 0.7856267690658569 2023-01-22 23:38:57.442678: step: 1064/527, loss: 0.7656046748161316 2023-01-22 23:38:58.562873: step: 1068/527, loss: 1.8671091794967651 2023-01-22 23:38:59.675098: step: 1072/527, loss: 0.38578784465789795 2023-01-22 23:39:00.818146: step: 1076/527, loss: 0.4394359290599823 2023-01-22 23:39:01.931852: step: 1080/527, loss: 0.18547268211841583 2023-01-22 23:39:03.055746: step: 1084/527, loss: 0.2080131620168686 2023-01-22 23:39:04.156928: step: 1088/527, loss: 1.4777467250823975 2023-01-22 23:39:05.293968: step: 1092/527, loss: 0.21778731048107147 2023-01-22 23:39:06.413247: step: 1096/527, loss: 7.022336006164551 2023-01-22 23:39:07.512088: step: 1100/527, loss: 0.20288430154323578 2023-01-22 23:39:08.618338: step: 1104/527, loss: 0.48537150025367737 2023-01-22 23:39:09.765732: step: 1108/527, loss: 0.196641206741333 2023-01-22 23:39:10.876180: step: 1112/527, loss: 0.24698826670646667 2023-01-22 23:39:12.004198: step: 1116/527, loss: 0.48211997747421265 2023-01-22 23:39:13.135698: step: 1120/527, loss: 0.1801077425479889 2023-01-22 23:39:14.236589: step: 1124/527, loss: 0.10775332152843475 2023-01-22 23:39:15.316562: step: 1128/527, loss: 0.381283700466156 2023-01-22 23:39:16.418647: step: 1132/527, loss: 0.09606008231639862 2023-01-22 23:39:17.530134: step: 1136/527, loss: 0.1600854992866516 2023-01-22 23:39:18.642710: step: 1140/527, loss: 0.3644355833530426 2023-01-22 23:39:19.771144: step: 1144/527, loss: 0.9671630859375 2023-01-22 23:39:20.884008: step: 1148/527, loss: 1.2280585765838623 2023-01-22 23:39:21.993069: step: 1152/527, loss: 1.2624545097351074 2023-01-22 23:39:23.108517: step: 1156/527, loss: 1.665624737739563 2023-01-22 23:39:24.282110: step: 1160/527, loss: 0.26979005336761475 2023-01-22 23:39:25.418607: step: 1164/527, loss: 0.29928797483444214 2023-01-22 23:39:26.555660: step: 1168/527, loss: 0.3420730531215668 2023-01-22 23:39:27.643711: step: 1172/527, loss: 2.4461779594421387 2023-01-22 23:39:28.820962: step: 1176/527, loss: 0.31473052501678467 2023-01-22 23:39:29.921249: step: 1180/527, loss: 0.8288987874984741 2023-01-22 23:39:31.072761: step: 1184/527, loss: 1.4570404291152954 2023-01-22 23:39:32.201451: step: 1188/527, loss: 0.6560747027397156 2023-01-22 23:39:33.294323: step: 1192/527, loss: 0.1135459914803505 2023-01-22 23:39:34.397401: step: 1196/527, loss: 0.6497849822044373 2023-01-22 23:39:35.522365: step: 1200/527, loss: 0.4643517732620239 2023-01-22 23:39:36.637877: step: 1204/527, loss: 0.23417240381240845 2023-01-22 23:39:37.748026: step: 1208/527, loss: 0.11558208614587784 2023-01-22 23:39:38.851966: step: 1212/527, loss: 0.23825012147426605 2023-01-22 23:39:39.949719: step: 1216/527, loss: 0.41295814514160156 2023-01-22 23:39:41.039790: step: 1220/527, loss: 0.20897871255874634 2023-01-22 23:39:42.145881: step: 1224/527, loss: 0.9823281168937683 2023-01-22 23:39:43.248238: step: 1228/527, loss: 0.094927117228508 2023-01-22 23:39:44.385683: step: 1232/527, loss: 1.93697988986969 2023-01-22 23:39:45.551333: step: 1236/527, loss: 0.4876123070716858 2023-01-22 23:39:46.665580: step: 1240/527, loss: 0.15807271003723145 2023-01-22 23:39:47.767232: step: 1244/527, loss: 0.3072170317173004 2023-01-22 23:39:48.853870: step: 1248/527, loss: 0.22928762435913086 2023-01-22 23:39:50.009054: step: 1252/527, loss: 0.24584990739822388 2023-01-22 23:39:51.128370: step: 1256/527, loss: 0.41490334272384644 2023-01-22 23:39:52.242463: step: 1260/527, loss: 0.2963305711746216 2023-01-22 23:39:53.323881: step: 1264/527, loss: 0.10053520649671555 2023-01-22 23:39:54.452529: step: 1268/527, loss: 0.7227115631103516 2023-01-22 23:39:55.547863: step: 1272/527, loss: 1.0870440006256104 2023-01-22 23:39:56.666887: step: 1276/527, loss: 0.9287055730819702 2023-01-22 23:39:57.783719: step: 1280/527, loss: 0.39011627435684204 2023-01-22 23:39:58.909228: step: 1284/527, loss: 1.5694022178649902 2023-01-22 23:40:00.036935: step: 1288/527, loss: 0.2660086750984192 2023-01-22 23:40:01.136114: step: 1292/527, loss: 0.20604261755943298 2023-01-22 23:40:02.246509: step: 1296/527, loss: 1.3050010204315186 2023-01-22 23:40:03.348907: step: 1300/527, loss: 1.0639495849609375 2023-01-22 23:40:04.474183: step: 1304/527, loss: 0.9227062463760376 2023-01-22 23:40:05.616360: step: 1308/527, loss: 0.43316563963890076 2023-01-22 23:40:06.732556: step: 1312/527, loss: 0.47301241755485535 2023-01-22 23:40:07.853465: step: 1316/527, loss: 1.5292580127716064 2023-01-22 23:40:08.971553: step: 1320/527, loss: 1.5884895324707031 2023-01-22 23:40:10.077051: step: 1324/527, loss: 0.6982278227806091 2023-01-22 23:40:11.175020: step: 1328/527, loss: 0.6975236535072327 2023-01-22 23:40:12.291203: step: 1332/527, loss: 0.1938372701406479 2023-01-22 23:40:13.397926: step: 1336/527, loss: 0.06430401653051376 2023-01-22 23:40:14.550022: step: 1340/527, loss: 0.2321162223815918 2023-01-22 23:40:15.668918: step: 1344/527, loss: 2.779973030090332 2023-01-22 23:40:16.824582: step: 1348/527, loss: 0.5846744775772095 2023-01-22 23:40:17.950395: step: 1352/527, loss: 0.14413009583950043 2023-01-22 23:40:19.060508: step: 1356/527, loss: 0.3913077116012573 2023-01-22 23:40:20.167329: step: 1360/527, loss: 0.3192369341850281 2023-01-22 23:40:21.279346: step: 1364/527, loss: 0.6409803628921509 2023-01-22 23:40:22.396475: step: 1368/527, loss: 0.04847393184900284 2023-01-22 23:40:23.520661: step: 1372/527, loss: 0.08990946412086487 2023-01-22 23:40:24.640687: step: 1376/527, loss: 1.3807991743087769 2023-01-22 23:40:25.781465: step: 1380/527, loss: 1.8782823085784912 2023-01-22 23:40:26.901452: step: 1384/527, loss: 0.07490682601928711 2023-01-22 23:40:28.007112: step: 1388/527, loss: 0.18190395832061768 2023-01-22 23:40:29.097541: step: 1392/527, loss: 0.7041284441947937 2023-01-22 23:40:30.203817: step: 1396/527, loss: 0.17479224503040314 2023-01-22 23:40:31.331680: step: 1400/527, loss: 0.9497172832489014 2023-01-22 23:40:32.430425: step: 1404/527, loss: 0.4697246551513672 2023-01-22 23:40:33.515221: step: 1408/527, loss: 0.6437335014343262 2023-01-22 23:40:34.637045: step: 1412/527, loss: 0.5565390586853027 2023-01-22 23:40:35.753197: step: 1416/527, loss: 0.4132024645805359 2023-01-22 23:40:36.902889: step: 1420/527, loss: 0.4425075650215149 2023-01-22 23:40:38.015929: step: 1424/527, loss: 0.20741325616836548 2023-01-22 23:40:39.162363: step: 1428/527, loss: 0.40257158875465393 2023-01-22 23:40:40.275349: step: 1432/527, loss: 0.30314359068870544 2023-01-22 23:40:41.370827: step: 1436/527, loss: 0.2702926993370056 2023-01-22 23:40:42.474242: step: 1440/527, loss: 0.17117223143577576 2023-01-22 23:40:43.596712: step: 1444/527, loss: 1.8015871047973633 2023-01-22 23:40:44.709087: step: 1448/527, loss: 1.0656838417053223 2023-01-22 23:40:45.833004: step: 1452/527, loss: 0.5567184686660767 2023-01-22 23:40:46.975523: step: 1456/527, loss: 0.2849445044994354 2023-01-22 23:40:48.090006: step: 1460/527, loss: 0.26066744327545166 2023-01-22 23:40:49.239784: step: 1464/527, loss: 0.4099099040031433 2023-01-22 23:40:50.339638: step: 1468/527, loss: 0.21477475762367249 2023-01-22 23:40:51.489533: step: 1472/527, loss: 1.5466792583465576 2023-01-22 23:40:52.606320: step: 1476/527, loss: 0.2308189421892166 2023-01-22 23:40:53.722340: step: 1480/527, loss: 0.3078714907169342 2023-01-22 23:40:54.846283: step: 1484/527, loss: 0.2572745680809021 2023-01-22 23:40:55.950841: step: 1488/527, loss: 1.0283666849136353 2023-01-22 23:40:57.047040: step: 1492/527, loss: 0.2598365843296051 2023-01-22 23:40:58.178774: step: 1496/527, loss: 1.9921785593032837 2023-01-22 23:40:59.283217: step: 1500/527, loss: 0.22850003838539124 2023-01-22 23:41:00.393032: step: 1504/527, loss: 0.9154081344604492 2023-01-22 23:41:01.497603: step: 1508/527, loss: 0.2039547860622406 2023-01-22 23:41:02.630463: step: 1512/527, loss: 1.2271795272827148 2023-01-22 23:41:03.723895: step: 1516/527, loss: 0.13336606323719025 2023-01-22 23:41:04.849884: step: 1520/527, loss: 0.4729459285736084 2023-01-22 23:41:05.943507: step: 1524/527, loss: 0.1932951956987381 2023-01-22 23:41:07.081170: step: 1528/527, loss: 1.945611834526062 2023-01-22 23:41:08.191927: step: 1532/527, loss: 1.7316627502441406 2023-01-22 23:41:09.322744: step: 1536/527, loss: 2.6612277030944824 2023-01-22 23:41:10.424398: step: 1540/527, loss: 0.18422779440879822 2023-01-22 23:41:11.519897: step: 1544/527, loss: 0.09578094631433487 2023-01-22 23:41:12.607860: step: 1548/527, loss: 0.46580085158348083 2023-01-22 23:41:13.712156: step: 1552/527, loss: 0.787284255027771 2023-01-22 23:41:14.804559: step: 1556/527, loss: 0.12551012635231018 2023-01-22 23:41:15.932005: step: 1560/527, loss: 1.0120360851287842 2023-01-22 23:41:17.040969: step: 1564/527, loss: 0.13845491409301758 2023-01-22 23:41:18.174282: step: 1568/527, loss: 0.10843883454799652 2023-01-22 23:41:19.303976: step: 1572/527, loss: 3.7761125564575195 2023-01-22 23:41:20.406608: step: 1576/527, loss: 0.20947308838367462 2023-01-22 23:41:21.530321: step: 1580/527, loss: 0.12904532253742218 2023-01-22 23:41:22.693192: step: 1584/527, loss: 2.431868314743042 2023-01-22 23:41:23.803734: step: 1588/527, loss: 0.2086331844329834 2023-01-22 23:41:24.915950: step: 1592/527, loss: 0.3782004415988922 2023-01-22 23:41:26.009669: step: 1596/527, loss: 0.9793174862861633 2023-01-22 23:41:27.105377: step: 1600/527, loss: 0.07106056809425354 2023-01-22 23:41:28.195369: step: 1604/527, loss: 0.6578100919723511 2023-01-22 23:41:29.290978: step: 1608/527, loss: 0.2724035382270813 2023-01-22 23:41:30.392593: step: 1612/527, loss: 0.7021909356117249 2023-01-22 23:41:31.526214: step: 1616/527, loss: 0.3221290707588196 2023-01-22 23:41:32.662927: step: 1620/527, loss: 0.22217103838920593 2023-01-22 23:41:33.747370: step: 1624/527, loss: 6.8734965324401855 2023-01-22 23:41:34.873702: step: 1628/527, loss: 1.6036103963851929 2023-01-22 23:41:35.999412: step: 1632/527, loss: 0.7212901711463928 2023-01-22 23:41:37.123614: step: 1636/527, loss: 0.36735811829566956 2023-01-22 23:41:38.237633: step: 1640/527, loss: 0.33795303106307983 2023-01-22 23:41:39.335113: step: 1644/527, loss: 0.3518761396408081 2023-01-22 23:41:40.449879: step: 1648/527, loss: 1.5078743696212769 2023-01-22 23:41:41.540534: step: 1652/527, loss: 0.1540747582912445 2023-01-22 23:41:42.628736: step: 1656/527, loss: 0.16613636910915375 2023-01-22 23:41:43.709537: step: 1660/527, loss: 0.07519713044166565 2023-01-22 23:41:44.813387: step: 1664/527, loss: 0.7167521715164185 2023-01-22 23:41:45.941355: step: 1668/527, loss: 0.18565607070922852 2023-01-22 23:41:47.052349: step: 1672/527, loss: 0.3964042663574219 2023-01-22 23:41:48.158850: step: 1676/527, loss: 0.05642149597406387 2023-01-22 23:41:49.297924: step: 1680/527, loss: 0.15735659003257751 2023-01-22 23:41:50.394988: step: 1684/527, loss: 1.0823335647583008 2023-01-22 23:41:51.524624: step: 1688/527, loss: 0.9088883399963379 2023-01-22 23:41:52.624264: step: 1692/527, loss: 0.40829670429229736 2023-01-22 23:41:53.733994: step: 1696/527, loss: 0.1751539409160614 2023-01-22 23:41:54.840458: step: 1700/527, loss: 0.26682233810424805 2023-01-22 23:41:55.953832: step: 1704/527, loss: 0.24486637115478516 2023-01-22 23:41:57.056968: step: 1708/527, loss: 0.23768892884254456 2023-01-22 23:41:58.182027: step: 1712/527, loss: 0.21505403518676758 2023-01-22 23:41:59.297354: step: 1716/527, loss: 0.21891427040100098 2023-01-22 23:42:00.438613: step: 1720/527, loss: 0.14747437834739685 2023-01-22 23:42:01.551246: step: 1724/527, loss: 0.20381461083889008 2023-01-22 23:42:02.646062: step: 1728/527, loss: 9.230738639831543 2023-01-22 23:42:03.755475: step: 1732/527, loss: 1.4502363204956055 2023-01-22 23:42:04.875616: step: 1736/527, loss: 1.0933117866516113 2023-01-22 23:42:06.012931: step: 1740/527, loss: 0.761847972869873 2023-01-22 23:42:07.142199: step: 1744/527, loss: 0.5283024311065674 2023-01-22 23:42:08.271994: step: 1748/527, loss: 0.9965077042579651 2023-01-22 23:42:09.400922: step: 1752/527, loss: 1.0281659364700317 2023-01-22 23:42:10.512556: step: 1756/527, loss: 0.1795811653137207 2023-01-22 23:42:11.639229: step: 1760/527, loss: 0.2241329699754715 2023-01-22 23:42:12.772377: step: 1764/527, loss: 0.9095956683158875 2023-01-22 23:42:13.886167: step: 1768/527, loss: 0.28363296389579773 2023-01-22 23:42:14.994758: step: 1772/527, loss: 0.3083708882331848 2023-01-22 23:42:16.118358: step: 1776/527, loss: 0.65910404920578 2023-01-22 23:42:17.233649: step: 1780/527, loss: 0.09091377258300781 2023-01-22 23:42:18.354548: step: 1784/527, loss: 1.3716351985931396 2023-01-22 23:42:19.490056: step: 1788/527, loss: 0.1879730224609375 2023-01-22 23:42:20.592412: step: 1792/527, loss: 1.139765977859497 2023-01-22 23:42:21.683615: step: 1796/527, loss: 2.39186954498291 2023-01-22 23:42:22.793340: step: 1800/527, loss: 1.9225071668624878 2023-01-22 23:42:23.906508: step: 1804/527, loss: 0.6676848530769348 2023-01-22 23:42:25.027921: step: 1808/527, loss: 1.0369346141815186 2023-01-22 23:42:26.182998: step: 1812/527, loss: 0.203252911567688 2023-01-22 23:42:27.292832: step: 1816/527, loss: 0.30394524335861206 2023-01-22 23:42:28.420463: step: 1820/527, loss: 0.748797595500946 2023-01-22 23:42:29.537751: step: 1824/527, loss: 1.1922361850738525 2023-01-22 23:42:30.658658: step: 1828/527, loss: 0.7607652544975281 2023-01-22 23:42:31.751254: step: 1832/527, loss: 0.14432699978351593 2023-01-22 23:42:32.866880: step: 1836/527, loss: 0.2402600347995758 2023-01-22 23:42:34.012200: step: 1840/527, loss: 0.6234188079833984 2023-01-22 23:42:35.131509: step: 1844/527, loss: 1.4823126792907715 2023-01-22 23:42:36.263632: step: 1848/527, loss: 0.20493902266025543 2023-01-22 23:42:37.367956: step: 1852/527, loss: 0.44853726029396057 2023-01-22 23:42:38.507299: step: 1856/527, loss: 2.113954544067383 2023-01-22 23:42:39.618166: step: 1860/527, loss: 0.29280200600624084 2023-01-22 23:42:40.724825: step: 1864/527, loss: 0.4451705813407898 2023-01-22 23:42:41.867306: step: 1868/527, loss: 1.202926516532898 2023-01-22 23:42:42.990264: step: 1872/527, loss: 0.15281638503074646 2023-01-22 23:42:44.093148: step: 1876/527, loss: 0.5236794352531433 2023-01-22 23:42:45.200376: step: 1880/527, loss: 0.7365829348564148 2023-01-22 23:42:46.300734: step: 1884/527, loss: 0.5049744844436646 2023-01-22 23:42:47.425347: step: 1888/527, loss: 0.3946446478366852 2023-01-22 23:42:48.530046: step: 1892/527, loss: 0.1823606938123703 2023-01-22 23:42:49.638149: step: 1896/527, loss: 0.2072470635175705 2023-01-22 23:42:50.783150: step: 1900/527, loss: 0.4567932188510895 2023-01-22 23:42:51.923644: step: 1904/527, loss: 0.6553575396537781 2023-01-22 23:42:53.027559: step: 1908/527, loss: 0.2890511751174927 2023-01-22 23:42:54.122923: step: 1912/527, loss: 0.11705876141786575 2023-01-22 23:42:55.218739: step: 1916/527, loss: 2.7663867473602295 2023-01-22 23:42:56.371577: step: 1920/527, loss: 0.18343916535377502 2023-01-22 23:42:57.492176: step: 1924/527, loss: 0.17275866866111755 2023-01-22 23:42:58.620173: step: 1928/527, loss: 0.16366301476955414 2023-01-22 23:42:59.714876: step: 1932/527, loss: 0.03639573976397514 2023-01-22 23:43:00.819952: step: 1936/527, loss: 0.16920800507068634 2023-01-22 23:43:01.938028: step: 1940/527, loss: 0.317962646484375 2023-01-22 23:43:03.049010: step: 1944/527, loss: 0.7147844433784485 2023-01-22 23:43:04.176888: step: 1948/527, loss: 0.9657028913497925 2023-01-22 23:43:05.302955: step: 1952/527, loss: 1.3206175565719604 2023-01-22 23:43:06.438671: step: 1956/527, loss: 0.8681854009628296 2023-01-22 23:43:07.561836: step: 1960/527, loss: 0.5280121564865112 2023-01-22 23:43:08.690584: step: 1964/527, loss: 0.18877273797988892 2023-01-22 23:43:09.836423: step: 1968/527, loss: 0.2010248750448227 2023-01-22 23:43:11.001571: step: 1972/527, loss: 0.033550359308719635 2023-01-22 23:43:12.081433: step: 1976/527, loss: 0.4398748278617859 2023-01-22 23:43:13.185623: step: 1980/527, loss: 0.18755817413330078 2023-01-22 23:43:14.315526: step: 1984/527, loss: 1.0861334800720215 2023-01-22 23:43:15.403246: step: 1988/527, loss: 0.5326269268989563 2023-01-22 23:43:16.505115: step: 1992/527, loss: 0.2384411245584488 2023-01-22 23:43:17.612363: step: 1996/527, loss: 0.1842707097530365 2023-01-22 23:43:18.723152: step: 2000/527, loss: 0.2859558165073395 2023-01-22 23:43:19.843480: step: 2004/527, loss: 0.043778322637081146 2023-01-22 23:43:20.962216: step: 2008/527, loss: 1.393704891204834 2023-01-22 23:43:22.071753: step: 2012/527, loss: 0.7679749131202698 2023-01-22 23:43:23.176295: step: 2016/527, loss: 1.2648415565490723 2023-01-22 23:43:24.301641: step: 2020/527, loss: 0.46009406447410583 2023-01-22 23:43:25.409366: step: 2024/527, loss: 0.26336565613746643 2023-01-22 23:43:26.514984: step: 2028/527, loss: 0.08755011856555939 2023-01-22 23:43:27.621963: step: 2032/527, loss: 0.22198128700256348 2023-01-22 23:43:28.726624: step: 2036/527, loss: 0.9999496340751648 2023-01-22 23:43:29.828914: step: 2040/527, loss: 0.9257692098617554 2023-01-22 23:43:30.973386: step: 2044/527, loss: 0.2057945281267166 2023-01-22 23:43:32.054994: step: 2048/527, loss: 0.38532111048698425 2023-01-22 23:43:33.165487: step: 2052/527, loss: 0.2405363917350769 2023-01-22 23:43:34.307203: step: 2056/527, loss: 1.3372652530670166 2023-01-22 23:43:35.406445: step: 2060/527, loss: 0.4042108356952667 2023-01-22 23:43:36.513359: step: 2064/527, loss: 6.940031051635742 2023-01-22 23:43:37.640929: step: 2068/527, loss: 1.7936713695526123 2023-01-22 23:43:38.751693: step: 2072/527, loss: 0.18203188478946686 2023-01-22 23:43:39.855692: step: 2076/527, loss: 0.10747461766004562 2023-01-22 23:43:40.968367: step: 2080/527, loss: 0.3761855363845825 2023-01-22 23:43:42.161962: step: 2084/527, loss: 0.17500925064086914 2023-01-22 23:43:43.245795: step: 2088/527, loss: 0.04080705717206001 2023-01-22 23:43:44.380358: step: 2092/527, loss: 0.8781456351280212 2023-01-22 23:43:45.481411: step: 2096/527, loss: 0.2540857195854187 2023-01-22 23:43:46.608143: step: 2100/527, loss: 0.2488357573747635 2023-01-22 23:43:47.750889: step: 2104/527, loss: 0.09587278962135315 2023-01-22 23:43:48.860828: step: 2108/527, loss: 0.09662125259637833 ================================================== Loss: 0.774 -------------------- Dev: {'event': {'p': 0.5560344827586207, 'r': 0.6870838881491345, 'f1': 0.6146515783204287}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test: {'event': {'p': 0.5831313620940378, 'r': 0.6874285714285714, 'f1': 0.6309992132179386}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Chinese: {'event': {'p': 0.559322033898305, 'r': 0.6111111111111112, 'f1': 0.5840707964601771}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Korean: {'event': {'p': 0.631578947368421, 'r': 0.38095238095238093, 'f1': 0.4752475247524752}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Russian: {'event': {'p': 0.5526315789473685, 'r': 0.5833333333333334, 'f1': 0.5675675675675677}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.5560344827586207, 'r': 0.6870838881491345, 'f1': 0.6146515783204287}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Eng Test for Chinese: {'event': {'p': 0.5831313620940378, 'r': 0.6874285714285714, 'f1': 0.6309992132179386}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'event': {'p': 0.559322033898305, 'r': 0.6111111111111112, 'f1': 0.5840707964601771}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} -------------------- Eng Dev for Korean: {'event': {'p': 0.5560344827586207, 'r': 0.6870838881491345, 'f1': 0.6146515783204287}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Eng Test for Korean: {'event': {'p': 0.5831313620940378, 'r': 0.6874285714285714, 'f1': 0.6309992132179386}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'event': {'p': 0.631578947368421, 'r': 0.38095238095238093, 'f1': 0.4752475247524752}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} -------------------- Eng Dev for Russian: {'event': {'p': 0.5560344827586207, 'r': 0.6870838881491345, 'f1': 0.6146515783204287}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Eng Test for Russian: {'event': {'p': 0.5831313620940378, 'r': 0.6874285714285714, 'f1': 0.6309992132179386}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'event': {'p': 0.5526315789473685, 'r': 0.5833333333333334, 'f1': 0.5675675675675677}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-22 23:44:52.240094: step: 4/527, loss: 0.508809506893158 2023-01-22 23:44:53.412567: step: 8/527, loss: 0.08051471412181854 2023-01-22 23:44:54.539698: step: 12/527, loss: 1.1197643280029297 2023-01-22 23:44:55.637842: step: 16/527, loss: 0.7132484912872314 2023-01-22 23:44:56.734687: step: 20/527, loss: 0.6464167237281799 2023-01-22 23:44:57.818613: step: 24/527, loss: 0.09567365050315857 2023-01-22 23:44:58.934145: step: 28/527, loss: 0.3732467591762543 2023-01-22 23:45:00.047318: step: 32/527, loss: 1.211378812789917 2023-01-22 23:45:01.157266: step: 36/527, loss: 0.6955430507659912 2023-01-22 23:45:02.294076: step: 40/527, loss: 0.6174247860908508 2023-01-22 23:45:03.405752: step: 44/527, loss: 1.0429612398147583 2023-01-22 23:45:04.506728: step: 48/527, loss: 0.38752707839012146 2023-01-22 23:45:05.624297: step: 52/527, loss: 0.8731134533882141 2023-01-22 23:45:06.703531: step: 56/527, loss: 7.186557292938232 2023-01-22 23:45:07.775138: step: 60/527, loss: 0.11953286826610565 2023-01-22 23:45:08.904341: step: 64/527, loss: 0.9726343154907227 2023-01-22 23:45:10.032164: step: 68/527, loss: 0.17540550231933594 2023-01-22 23:45:11.152896: step: 72/527, loss: 0.4749521017074585 2023-01-22 23:45:12.250543: step: 76/527, loss: 0.16490773856639862 2023-01-22 23:45:13.386672: step: 80/527, loss: 0.49111253023147583 2023-01-22 23:45:14.448622: step: 84/527, loss: 0.7758349180221558 2023-01-22 23:45:15.577870: step: 88/527, loss: 0.11393478512763977 2023-01-22 23:45:16.702214: step: 92/527, loss: 0.8853338360786438 2023-01-22 23:45:17.793444: step: 96/527, loss: 0.3115384578704834 2023-01-22 23:45:18.911707: step: 100/527, loss: 0.209774449467659 2023-01-22 23:45:20.081485: step: 104/527, loss: 0.3178957998752594 2023-01-22 23:45:21.215196: step: 108/527, loss: 0.5851931571960449 2023-01-22 23:45:22.329272: step: 112/527, loss: 0.2537763714790344 2023-01-22 23:45:23.472841: step: 116/527, loss: 2.578620672225952 2023-01-22 23:45:24.577609: step: 120/527, loss: 0.49000898003578186 2023-01-22 23:45:25.675415: step: 124/527, loss: 2.6648151874542236 2023-01-22 23:45:26.800527: step: 128/527, loss: 0.1741112768650055 2023-01-22 23:45:27.900519: step: 132/527, loss: 0.22406668961048126 2023-01-22 23:45:29.021023: step: 136/527, loss: 0.23551559448242188 2023-01-22 23:45:30.145019: step: 140/527, loss: 0.8233947157859802 2023-01-22 23:45:31.293453: step: 144/527, loss: 0.17566613852977753 2023-01-22 23:45:32.431405: step: 148/527, loss: 0.29378029704093933 2023-01-22 23:45:33.538593: step: 152/527, loss: 0.48547208309173584 2023-01-22 23:45:34.630574: step: 156/527, loss: 0.24952569603919983 2023-01-22 23:45:35.745768: step: 160/527, loss: 0.20701494812965393 2023-01-22 23:45:36.859983: step: 164/527, loss: 0.7263386845588684 2023-01-22 23:45:37.976481: step: 168/527, loss: 1.0169198513031006 2023-01-22 23:45:39.101003: step: 172/527, loss: 0.10271382331848145 2023-01-22 23:45:40.203954: step: 176/527, loss: 0.1205935925245285 2023-01-22 23:45:41.331313: step: 180/527, loss: 0.24087849259376526 2023-01-22 23:45:42.441758: step: 184/527, loss: 0.06403245776891708 2023-01-22 23:45:43.570880: step: 188/527, loss: 2.921477794647217 2023-01-22 23:45:44.678234: step: 192/527, loss: 1.823606252670288 2023-01-22 23:45:45.786541: step: 196/527, loss: 0.04609017074108124 2023-01-22 23:45:46.914312: step: 200/527, loss: 0.4537319540977478 2023-01-22 23:45:48.007729: step: 204/527, loss: 0.6913636922836304 2023-01-22 23:45:49.135599: step: 208/527, loss: 0.19006015360355377 2023-01-22 23:45:50.314906: step: 212/527, loss: 1.1852829456329346 2023-01-22 23:45:51.425835: step: 216/527, loss: 0.43352508544921875 2023-01-22 23:45:52.529137: step: 220/527, loss: 0.20796769857406616 2023-01-22 23:45:53.623949: step: 224/527, loss: 0.6497514247894287 2023-01-22 23:45:54.724777: step: 228/527, loss: 0.285788357257843 2023-01-22 23:45:55.840699: step: 232/527, loss: 0.06697969883680344 2023-01-22 23:45:56.975732: step: 236/527, loss: 0.6509712934494019 2023-01-22 23:45:58.090040: step: 240/527, loss: 0.2568542957305908 2023-01-22 23:45:59.191082: step: 244/527, loss: 0.16602811217308044 2023-01-22 23:46:00.302146: step: 248/527, loss: 0.4349152445793152 2023-01-22 23:46:01.380250: step: 252/527, loss: 0.048705484718084335 2023-01-22 23:46:02.499483: step: 256/527, loss: 0.17351874709129333 2023-01-22 23:46:03.581939: step: 260/527, loss: 0.15449538826942444 2023-01-22 23:46:04.715714: step: 264/527, loss: 0.7872732281684875 2023-01-22 23:46:05.824424: step: 268/527, loss: 0.08169607818126678 2023-01-22 23:46:07.000392: step: 272/527, loss: 0.18699485063552856 2023-01-22 23:46:08.160201: step: 276/527, loss: 0.3095025420188904 2023-01-22 23:46:09.287316: step: 280/527, loss: 2.523277521133423 2023-01-22 23:46:10.415620: step: 284/527, loss: 0.8526454567909241 2023-01-22 23:46:11.550640: step: 288/527, loss: 0.18119679391384125 2023-01-22 23:46:12.634847: step: 292/527, loss: 0.1618306189775467 2023-01-22 23:46:13.782519: step: 296/527, loss: 0.20381546020507812 2023-01-22 23:46:14.869606: step: 300/527, loss: 0.13776426017284393 2023-01-22 23:46:16.004705: step: 304/527, loss: 0.4403475821018219 2023-01-22 23:46:17.169252: step: 308/527, loss: 1.9599671363830566 2023-01-22 23:46:18.300472: step: 312/527, loss: 0.8410625457763672 2023-01-22 23:46:19.458667: step: 316/527, loss: 0.11824560165405273 2023-01-22 23:46:20.592160: step: 320/527, loss: 0.10421428829431534 2023-01-22 23:46:21.721923: step: 324/527, loss: 1.369499921798706 2023-01-22 23:46:22.823186: step: 328/527, loss: 0.7528577446937561 2023-01-22 23:46:23.974693: step: 332/527, loss: 0.7158708572387695 2023-01-22 23:46:25.092891: step: 336/527, loss: 0.6861649751663208 2023-01-22 23:46:26.204626: step: 340/527, loss: 0.2016763687133789 2023-01-22 23:46:27.334194: step: 344/527, loss: 1.667033314704895 2023-01-22 23:46:28.444207: step: 348/527, loss: 0.31558865308761597 2023-01-22 23:46:29.557094: step: 352/527, loss: 2.111006736755371 2023-01-22 23:46:30.671630: step: 356/527, loss: 0.19294653832912445 2023-01-22 23:46:31.753950: step: 360/527, loss: 0.5113725662231445 2023-01-22 23:46:32.879223: step: 364/527, loss: 0.12709054350852966 2023-01-22 23:46:34.009833: step: 368/527, loss: 0.4004729390144348 2023-01-22 23:46:35.112998: step: 372/527, loss: 0.3806811273097992 2023-01-22 23:46:36.227393: step: 376/527, loss: 0.05758824571967125 2023-01-22 23:46:37.365818: step: 380/527, loss: 0.19645214080810547 2023-01-22 23:46:38.512411: step: 384/527, loss: 0.2338651716709137 2023-01-22 23:46:39.660927: step: 388/527, loss: 0.14209969341754913 2023-01-22 23:46:40.783209: step: 392/527, loss: 0.9109208583831787 2023-01-22 23:46:41.885829: step: 396/527, loss: 0.11826296150684357 2023-01-22 23:46:43.000915: step: 400/527, loss: 0.3750064969062805 2023-01-22 23:46:44.113436: step: 404/527, loss: 1.0944753885269165 2023-01-22 23:46:45.207950: step: 408/527, loss: 0.13722772896289825 2023-01-22 23:46:46.319172: step: 412/527, loss: 0.0429803840816021 2023-01-22 23:46:47.458812: step: 416/527, loss: 1.160137414932251 2023-01-22 23:46:48.581842: step: 420/527, loss: 0.11524347960948944 2023-01-22 23:46:49.737200: step: 424/527, loss: 0.3231050670146942 2023-01-22 23:46:50.853332: step: 428/527, loss: 0.9715282320976257 2023-01-22 23:46:51.950323: step: 432/527, loss: 0.449584424495697 2023-01-22 23:46:53.050243: step: 436/527, loss: 0.7992695569992065 2023-01-22 23:46:54.133935: step: 440/527, loss: 0.40562957525253296 2023-01-22 23:46:55.264771: step: 444/527, loss: 0.2608977258205414 2023-01-22 23:46:56.379020: step: 448/527, loss: 0.271353155374527 2023-01-22 23:46:57.510003: step: 452/527, loss: 0.1906244456768036 2023-01-22 23:46:58.642785: step: 456/527, loss: 0.1685314178466797 2023-01-22 23:46:59.764907: step: 460/527, loss: 0.6777358055114746 2023-01-22 23:47:00.878390: step: 464/527, loss: 0.12882956862449646 2023-01-22 23:47:02.011555: step: 468/527, loss: 0.1659383773803711 2023-01-22 23:47:03.125215: step: 472/527, loss: 0.49204179644584656 2023-01-22 23:47:04.236438: step: 476/527, loss: 0.08797939121723175 2023-01-22 23:47:05.353215: step: 480/527, loss: 0.22639484703540802 2023-01-22 23:47:06.468925: step: 484/527, loss: 1.5357418060302734 2023-01-22 23:47:07.588243: step: 488/527, loss: 0.3083256781101227 2023-01-22 23:47:08.687076: step: 492/527, loss: 0.11426444351673126 2023-01-22 23:47:09.791264: step: 496/527, loss: 0.20675823092460632 2023-01-22 23:47:10.880281: step: 500/527, loss: 0.25360843539237976 2023-01-22 23:47:11.990001: step: 504/527, loss: 0.1589408814907074 2023-01-22 23:47:13.091247: step: 508/527, loss: 0.07644256949424744 2023-01-22 23:47:14.210899: step: 512/527, loss: 0.46519842743873596 2023-01-22 23:47:15.364736: step: 516/527, loss: 0.7580075263977051 2023-01-22 23:47:16.486127: step: 520/527, loss: 0.7077133059501648 2023-01-22 23:47:17.615777: step: 524/527, loss: 0.599079966545105 2023-01-22 23:47:18.726427: step: 528/527, loss: 0.5845640897750854 2023-01-22 23:47:19.834679: step: 532/527, loss: 0.4947095811367035 2023-01-22 23:47:20.961598: step: 536/527, loss: 0.41240742802619934 2023-01-22 23:47:22.051682: step: 540/527, loss: 0.149766743183136 2023-01-22 23:47:23.163074: step: 544/527, loss: 0.7612636685371399 2023-01-22 23:47:24.295166: step: 548/527, loss: 1.1650491952896118 2023-01-22 23:47:25.418430: step: 552/527, loss: 0.19652405381202698 2023-01-22 23:47:26.527545: step: 556/527, loss: 0.8278763294219971 2023-01-22 23:47:27.688757: step: 560/527, loss: 0.1411007046699524 2023-01-22 23:47:28.776587: step: 564/527, loss: 0.17499347031116486 2023-01-22 23:47:29.885727: step: 568/527, loss: 0.1611528843641281 2023-01-22 23:47:30.973893: step: 572/527, loss: 0.1049351692199707 2023-01-22 23:47:32.125307: step: 576/527, loss: 0.6969813704490662 2023-01-22 23:47:33.281757: step: 580/527, loss: 0.8075006604194641 2023-01-22 23:47:34.363829: step: 584/527, loss: 0.1691059172153473 2023-01-22 23:47:35.510328: step: 588/527, loss: 0.5877780914306641 2023-01-22 23:47:36.630744: step: 592/527, loss: 1.3590736389160156 2023-01-22 23:47:37.783955: step: 596/527, loss: 0.04550343006849289 2023-01-22 23:47:38.900860: step: 600/527, loss: 0.0836394801735878 2023-01-22 23:47:39.993962: step: 604/527, loss: 0.0873233824968338 2023-01-22 23:47:41.111470: step: 608/527, loss: 0.2814594507217407 2023-01-22 23:47:42.245148: step: 612/527, loss: 0.15087956190109253 2023-01-22 23:47:43.344065: step: 616/527, loss: 0.7622390389442444 2023-01-22 23:47:44.439658: step: 620/527, loss: 0.700497567653656 2023-01-22 23:47:45.533266: step: 624/527, loss: 0.3470827043056488 2023-01-22 23:47:46.678594: step: 628/527, loss: 0.08805885165929794 2023-01-22 23:47:47.767623: step: 632/527, loss: 0.582568347454071 2023-01-22 23:47:48.895040: step: 636/527, loss: 0.20462360978126526 2023-01-22 23:47:50.005552: step: 640/527, loss: 0.07517413794994354 2023-01-22 23:47:51.086114: step: 644/527, loss: 0.13510771095752716 2023-01-22 23:47:52.202751: step: 648/527, loss: 0.14372387528419495 2023-01-22 23:47:53.316284: step: 652/527, loss: 0.08327741920948029 2023-01-22 23:47:54.468410: step: 656/527, loss: 0.7339638471603394 2023-01-22 23:47:55.601850: step: 660/527, loss: 0.22133898735046387 2023-01-22 23:47:56.700404: step: 664/527, loss: 0.04356861114501953 2023-01-22 23:47:57.815598: step: 668/527, loss: 0.11088243126869202 2023-01-22 23:47:58.952181: step: 672/527, loss: 0.43668168783187866 2023-01-22 23:48:00.066541: step: 676/527, loss: 0.6186572909355164 2023-01-22 23:48:01.197511: step: 680/527, loss: 0.12462206184864044 2023-01-22 23:48:02.320055: step: 684/527, loss: 0.7752035856246948 2023-01-22 23:48:03.418332: step: 688/527, loss: 0.13928189873695374 2023-01-22 23:48:04.515353: step: 692/527, loss: 0.913519561290741 2023-01-22 23:48:05.644499: step: 696/527, loss: 0.869016706943512 2023-01-22 23:48:06.732129: step: 700/527, loss: 0.4711214005947113 2023-01-22 23:48:07.843340: step: 704/527, loss: 0.3347194194793701 2023-01-22 23:48:08.965758: step: 708/527, loss: 0.1446743905544281 2023-01-22 23:48:10.079704: step: 712/527, loss: 0.057020239531993866 2023-01-22 23:48:11.184412: step: 716/527, loss: 0.19327889382839203 2023-01-22 23:48:12.308550: step: 720/527, loss: 0.14151154458522797 2023-01-22 23:48:13.445515: step: 724/527, loss: 1.6256033182144165 2023-01-22 23:48:14.572025: step: 728/527, loss: 0.6168744564056396 2023-01-22 23:48:15.724222: step: 732/527, loss: 0.7165476083755493 2023-01-22 23:48:16.820674: step: 736/527, loss: 0.7513967752456665 2023-01-22 23:48:17.912285: step: 740/527, loss: 0.09074673801660538 2023-01-22 23:48:19.027165: step: 744/527, loss: 0.3354890048503876 2023-01-22 23:48:20.148097: step: 748/527, loss: 1.184076189994812 2023-01-22 23:48:21.267300: step: 752/527, loss: 0.028474999591708183 2023-01-22 23:48:22.384483: step: 756/527, loss: 0.21302320063114166 2023-01-22 23:48:23.522224: step: 760/527, loss: 0.05036201700568199 2023-01-22 23:48:24.660018: step: 764/527, loss: 0.18559235334396362 2023-01-22 23:48:25.772819: step: 768/527, loss: 0.32539892196655273 2023-01-22 23:48:26.893076: step: 772/527, loss: 0.7735402584075928 2023-01-22 23:48:27.981086: step: 776/527, loss: 0.05064937844872475 2023-01-22 23:48:29.082111: step: 780/527, loss: 0.40441039204597473 2023-01-22 23:48:30.183065: step: 784/527, loss: 0.17137542366981506 2023-01-22 23:48:31.308327: step: 788/527, loss: 0.3164135813713074 2023-01-22 23:48:32.421697: step: 792/527, loss: 0.7469159960746765 2023-01-22 23:48:33.550747: step: 796/527, loss: 0.2222343385219574 2023-01-22 23:48:34.654319: step: 800/527, loss: 0.13113151490688324 2023-01-22 23:48:35.771880: step: 804/527, loss: 0.2250564694404602 2023-01-22 23:48:36.889279: step: 808/527, loss: 0.19738531112670898 2023-01-22 23:48:38.009011: step: 812/527, loss: 0.5884044170379639 2023-01-22 23:48:39.128804: step: 816/527, loss: 0.15230293571949005 2023-01-22 23:48:40.250048: step: 820/527, loss: 3.5776891708374023 2023-01-22 23:48:41.395191: step: 824/527, loss: 0.09833803027868271 2023-01-22 23:48:42.500977: step: 828/527, loss: 0.14841929078102112 2023-01-22 23:48:43.603848: step: 832/527, loss: 8.15155029296875 2023-01-22 23:48:44.710990: step: 836/527, loss: 0.18123993277549744 2023-01-22 23:48:45.837511: step: 840/527, loss: 0.08970227092504501 2023-01-22 23:48:46.939732: step: 844/527, loss: 0.2677004337310791 2023-01-22 23:48:48.048298: step: 848/527, loss: 0.16472646594047546 2023-01-22 23:48:49.181210: step: 852/527, loss: 0.36883941292762756 2023-01-22 23:48:50.301439: step: 856/527, loss: 0.924812376499176 2023-01-22 23:48:51.431748: step: 860/527, loss: 0.21590128540992737 2023-01-22 23:48:52.556613: step: 864/527, loss: 0.18724651634693146 2023-01-22 23:48:53.670325: step: 868/527, loss: 0.12695884704589844 2023-01-22 23:48:54.805921: step: 872/527, loss: 0.8031549453735352 2023-01-22 23:48:55.921918: step: 876/527, loss: 0.13297969102859497 2023-01-22 23:48:57.030200: step: 880/527, loss: 1.044066309928894 2023-01-22 23:48:58.136527: step: 884/527, loss: 0.18827533721923828 2023-01-22 23:48:59.251304: step: 888/527, loss: 1.5006786584854126 2023-01-22 23:49:00.381154: step: 892/527, loss: 0.287637323141098 2023-01-22 23:49:01.501654: step: 896/527, loss: 0.080182746052742 2023-01-22 23:49:02.614284: step: 900/527, loss: 0.43495798110961914 2023-01-22 23:49:03.775874: step: 904/527, loss: 0.38494405150413513 2023-01-22 23:49:04.857002: step: 908/527, loss: 0.2986149191856384 2023-01-22 23:49:05.960332: step: 912/527, loss: 0.09951372444629669 2023-01-22 23:49:07.063400: step: 916/527, loss: 0.07324619591236115 2023-01-22 23:49:08.225950: step: 920/527, loss: 0.15875226259231567 2023-01-22 23:49:09.358886: step: 924/527, loss: 0.4924079179763794 2023-01-22 23:49:10.464022: step: 928/527, loss: 0.1666729897260666 2023-01-22 23:49:11.569911: step: 932/527, loss: 0.1510227918624878 2023-01-22 23:49:12.706245: step: 936/527, loss: 0.1508079469203949 2023-01-22 23:49:13.817414: step: 940/527, loss: 0.0981116071343422 2023-01-22 23:49:14.932438: step: 944/527, loss: 0.473650723695755 2023-01-22 23:49:16.050154: step: 948/527, loss: 0.4679538607597351 2023-01-22 23:49:17.193060: step: 952/527, loss: 1.9771320819854736 2023-01-22 23:49:18.284821: step: 956/527, loss: 0.5962545275688171 2023-01-22 23:49:19.372767: step: 960/527, loss: 0.08491936326026917 2023-01-22 23:49:20.507264: step: 964/527, loss: 0.4256337285041809 2023-01-22 23:49:21.614357: step: 968/527, loss: 0.974960207939148 2023-01-22 23:49:22.697295: step: 972/527, loss: 0.14189091324806213 2023-01-22 23:49:23.821504: step: 976/527, loss: 0.21337108314037323 2023-01-22 23:49:24.914202: step: 980/527, loss: 0.21850091218948364 2023-01-22 23:49:26.024413: step: 984/527, loss: 0.021103763952851295 2023-01-22 23:49:27.151635: step: 988/527, loss: 0.06029815599322319 2023-01-22 23:49:28.285441: step: 992/527, loss: 0.38073769211769104 2023-01-22 23:49:29.419078: step: 996/527, loss: 0.09785423427820206 2023-01-22 23:49:30.521579: step: 1000/527, loss: 0.1756284236907959 2023-01-22 23:49:31.724167: step: 1004/527, loss: 0.13993793725967407 2023-01-22 23:49:32.830605: step: 1008/527, loss: 0.11952514946460724 2023-01-22 23:49:33.932394: step: 1012/527, loss: 0.2876017689704895 2023-01-22 23:49:35.031815: step: 1016/527, loss: 0.1903502494096756 2023-01-22 23:49:36.158353: step: 1020/527, loss: 0.7590410709381104 2023-01-22 23:49:37.276303: step: 1024/527, loss: 0.21371503174304962 2023-01-22 23:49:38.420007: step: 1028/527, loss: 0.13532547652721405 2023-01-22 23:49:39.531802: step: 1032/527, loss: 0.17921629548072815 2023-01-22 23:49:40.656170: step: 1036/527, loss: 0.10391278564929962 2023-01-22 23:49:41.747644: step: 1040/527, loss: 0.5931336283683777 2023-01-22 23:49:42.864402: step: 1044/527, loss: 0.44791144132614136 2023-01-22 23:49:43.979422: step: 1048/527, loss: 0.2544996738433838 2023-01-22 23:49:45.074334: step: 1052/527, loss: 0.6879607439041138 2023-01-22 23:49:46.177417: step: 1056/527, loss: 1.048452615737915 2023-01-22 23:49:47.313191: step: 1060/527, loss: 0.1454302817583084 2023-01-22 23:49:48.444286: step: 1064/527, loss: 0.16948223114013672 2023-01-22 23:49:49.576371: step: 1068/527, loss: 6.81207799911499 2023-01-22 23:49:50.703964: step: 1072/527, loss: 0.09495621174573898 2023-01-22 23:49:51.799863: step: 1076/527, loss: 0.36988967657089233 2023-01-22 23:49:52.921741: step: 1080/527, loss: 1.2426155805587769 2023-01-22 23:49:54.041497: step: 1084/527, loss: 0.7794598937034607 2023-01-22 23:49:55.201170: step: 1088/527, loss: 0.11607380211353302 2023-01-22 23:49:56.311627: step: 1092/527, loss: 0.5163612365722656 2023-01-22 23:49:57.408176: step: 1096/527, loss: 0.7128528952598572 2023-01-22 23:49:58.549788: step: 1100/527, loss: 0.2510445713996887 2023-01-22 23:49:59.662978: step: 1104/527, loss: 6.766900062561035 2023-01-22 23:50:00.780353: step: 1108/527, loss: 2.0661354064941406 2023-01-22 23:50:01.905921: step: 1112/527, loss: 0.20085038244724274 2023-01-22 23:50:03.028680: step: 1116/527, loss: 0.15568295121192932 2023-01-22 23:50:04.159205: step: 1120/527, loss: 0.5789952278137207 2023-01-22 23:50:05.278488: step: 1124/527, loss: 0.1073971763253212 2023-01-22 23:50:06.382534: step: 1128/527, loss: 0.139032781124115 2023-01-22 23:50:07.543111: step: 1132/527, loss: 0.5800192356109619 2023-01-22 23:50:08.664065: step: 1136/527, loss: 0.07965927571058273 2023-01-22 23:50:09.754961: step: 1140/527, loss: 0.2582598626613617 2023-01-22 23:50:10.865544: step: 1144/527, loss: 0.06700535118579865 2023-01-22 23:50:11.985239: step: 1148/527, loss: 0.041037701070308685 2023-01-22 23:50:13.085937: step: 1152/527, loss: 0.04176654666662216 2023-01-22 23:50:14.196790: step: 1156/527, loss: 0.08579878509044647 2023-01-22 23:50:15.277180: step: 1160/527, loss: 0.14223642647266388 2023-01-22 23:50:16.396015: step: 1164/527, loss: 0.3107718825340271 2023-01-22 23:50:17.535635: step: 1168/527, loss: 1.2067625522613525 2023-01-22 23:50:18.669815: step: 1172/527, loss: 0.264103502035141 2023-01-22 23:50:19.804503: step: 1176/527, loss: 0.12725920975208282 2023-01-22 23:50:20.904387: step: 1180/527, loss: 0.5885448455810547 2023-01-22 23:50:22.042036: step: 1184/527, loss: 0.21629352867603302 2023-01-22 23:50:23.131155: step: 1188/527, loss: 0.08324048668146133 2023-01-22 23:50:24.250738: step: 1192/527, loss: 0.4931715726852417 2023-01-22 23:50:25.339462: step: 1196/527, loss: 0.14759770035743713 2023-01-22 23:50:26.435922: step: 1200/527, loss: 0.40107476711273193 2023-01-22 23:50:27.528446: step: 1204/527, loss: 0.13036805391311646 2023-01-22 23:50:28.650095: step: 1208/527, loss: 2.3990983963012695 2023-01-22 23:50:29.740488: step: 1212/527, loss: 0.20966807007789612 2023-01-22 23:50:30.859968: step: 1216/527, loss: 0.03937487676739693 2023-01-22 23:50:31.963147: step: 1220/527, loss: 0.11510948836803436 2023-01-22 23:50:33.072445: step: 1224/527, loss: 0.08761286735534668 2023-01-22 23:50:34.176655: step: 1228/527, loss: 0.19578705728054047 2023-01-22 23:50:35.302613: step: 1232/527, loss: 0.7736690640449524 2023-01-22 23:50:36.434036: step: 1236/527, loss: 0.4297598898410797 2023-01-22 23:50:37.558595: step: 1240/527, loss: 0.5477831959724426 2023-01-22 23:50:38.651077: step: 1244/527, loss: 0.7490943074226379 2023-01-22 23:50:39.749264: step: 1248/527, loss: 0.11891823261976242 2023-01-22 23:50:40.871752: step: 1252/527, loss: 0.06427083164453506 2023-01-22 23:50:42.007975: step: 1256/527, loss: 0.14949139952659607 2023-01-22 23:50:43.114768: step: 1260/527, loss: 0.7002460360527039 2023-01-22 23:50:44.215473: step: 1264/527, loss: 0.0852302610874176 2023-01-22 23:50:45.324239: step: 1268/527, loss: 2.069143295288086 2023-01-22 23:50:46.425870: step: 1272/527, loss: 0.1096169501543045 2023-01-22 23:50:47.526565: step: 1276/527, loss: 1.391528606414795 2023-01-22 23:50:48.616938: step: 1280/527, loss: 0.09534025192260742 2023-01-22 23:50:49.727871: step: 1284/527, loss: 0.2504711151123047 2023-01-22 23:50:50.836790: step: 1288/527, loss: 2.4685471057891846 2023-01-22 23:50:51.944142: step: 1292/527, loss: 0.7661879658699036 2023-01-22 23:50:53.055788: step: 1296/527, loss: 0.034363701939582825 2023-01-22 23:50:54.174323: step: 1300/527, loss: 0.6315479278564453 2023-01-22 23:50:55.282204: step: 1304/527, loss: 0.3477337062358856 2023-01-22 23:50:56.399683: step: 1308/527, loss: 0.19355988502502441 2023-01-22 23:50:57.476042: step: 1312/527, loss: 2.4004874229431152 2023-01-22 23:50:58.610975: step: 1316/527, loss: 0.25543269515037537 2023-01-22 23:50:59.679764: step: 1320/527, loss: 0.1204003393650055 2023-01-22 23:51:00.800942: step: 1324/527, loss: 0.2172868251800537 2023-01-22 23:51:01.933243: step: 1328/527, loss: 1.6003550291061401 2023-01-22 23:51:03.056587: step: 1332/527, loss: 1.1160590648651123 2023-01-22 23:51:04.169464: step: 1336/527, loss: 0.143450066447258 2023-01-22 23:51:05.299964: step: 1340/527, loss: 0.22737398743629456 2023-01-22 23:51:06.477115: step: 1344/527, loss: 0.09410958737134933 2023-01-22 23:51:07.621476: step: 1348/527, loss: 0.4932255744934082 2023-01-22 23:51:08.714923: step: 1352/527, loss: 0.10659746825695038 2023-01-22 23:51:09.850603: step: 1356/527, loss: 6.355819225311279 2023-01-22 23:51:10.972793: step: 1360/527, loss: 0.15165939927101135 2023-01-22 23:51:12.091574: step: 1364/527, loss: 0.3142017424106598 2023-01-22 23:51:13.224737: step: 1368/527, loss: 0.9376929998397827 2023-01-22 23:51:14.354144: step: 1372/527, loss: 0.995358943939209 2023-01-22 23:51:15.466041: step: 1376/527, loss: 1.19259512424469 2023-01-22 23:51:16.558739: step: 1380/527, loss: 0.08177123218774796 2023-01-22 23:51:17.706876: step: 1384/527, loss: 0.11256589740514755 2023-01-22 23:51:18.804544: step: 1388/527, loss: 0.08342233300209045 2023-01-22 23:51:19.938808: step: 1392/527, loss: 0.12773257493972778 2023-01-22 23:51:21.057308: step: 1396/527, loss: 0.12763291597366333 2023-01-22 23:51:22.170947: step: 1400/527, loss: 0.10100296139717102 2023-01-22 23:51:23.318881: step: 1404/527, loss: 6.4847636222839355 2023-01-22 23:51:24.442041: step: 1408/527, loss: 0.30751386284828186 2023-01-22 23:51:25.538659: step: 1412/527, loss: 0.1453719139099121 2023-01-22 23:51:26.694716: step: 1416/527, loss: 0.7211806178092957 2023-01-22 23:51:27.805951: step: 1420/527, loss: 0.1758289933204651 2023-01-22 23:51:28.900978: step: 1424/527, loss: 0.24334917962551117 2023-01-22 23:51:30.044605: step: 1428/527, loss: 0.11822004616260529 2023-01-22 23:51:31.130937: step: 1432/527, loss: 0.6254950165748596 2023-01-22 23:51:32.236589: step: 1436/527, loss: 0.7044002413749695 2023-01-22 23:51:33.352952: step: 1440/527, loss: 0.26617228984832764 2023-01-22 23:51:34.508487: step: 1444/527, loss: 0.11087627708911896 2023-01-22 23:51:35.639375: step: 1448/527, loss: 0.2744206488132477 2023-01-22 23:51:36.762814: step: 1452/527, loss: 1.1198296546936035 2023-01-22 23:51:37.891505: step: 1456/527, loss: 0.549225926399231 2023-01-22 23:51:39.019035: step: 1460/527, loss: 0.11290311813354492 2023-01-22 23:51:40.138512: step: 1464/527, loss: 0.2970294952392578 2023-01-22 23:51:41.218839: step: 1468/527, loss: 0.18934154510498047 2023-01-22 23:51:42.352155: step: 1472/527, loss: 0.05210290104150772 2023-01-22 23:51:43.465516: step: 1476/527, loss: 0.10870710015296936 2023-01-22 23:51:44.594693: step: 1480/527, loss: 0.30841749906539917 2023-01-22 23:51:45.698626: step: 1484/527, loss: 0.1739308387041092 2023-01-22 23:51:46.806982: step: 1488/527, loss: 0.8741557598114014 2023-01-22 23:51:47.921852: step: 1492/527, loss: 0.4375793933868408 2023-01-22 23:51:49.038261: step: 1496/527, loss: 0.2901790142059326 2023-01-22 23:51:50.152208: step: 1500/527, loss: 1.1248713731765747 2023-01-22 23:51:51.251235: step: 1504/527, loss: 1.3819787502288818 2023-01-22 23:51:52.344699: step: 1508/527, loss: 0.281766802072525 2023-01-22 23:51:53.463271: step: 1512/527, loss: 0.06937137246131897 2023-01-22 23:51:54.627743: step: 1516/527, loss: 0.3909940719604492 2023-01-22 23:51:55.714951: step: 1520/527, loss: 0.2660723924636841 2023-01-22 23:51:56.829011: step: 1524/527, loss: 1.8916209936141968 2023-01-22 23:51:57.948070: step: 1528/527, loss: 0.7295657396316528 2023-01-22 23:51:59.059436: step: 1532/527, loss: 1.5496559143066406 2023-01-22 23:52:00.181064: step: 1536/527, loss: 0.0745358020067215 2023-01-22 23:52:01.287581: step: 1540/527, loss: 0.28694668412208557 2023-01-22 23:52:02.403453: step: 1544/527, loss: 0.7614359855651855 2023-01-22 23:52:03.478943: step: 1548/527, loss: 0.17527265846729279 2023-01-22 23:52:04.613348: step: 1552/527, loss: 0.2575010359287262 2023-01-22 23:52:05.714330: step: 1556/527, loss: 0.08287763595581055 2023-01-22 23:52:06.841150: step: 1560/527, loss: 0.861185610294342 2023-01-22 23:52:07.947299: step: 1564/527, loss: 1.2788594961166382 2023-01-22 23:52:09.077826: step: 1568/527, loss: 0.7603168487548828 2023-01-22 23:52:10.204833: step: 1572/527, loss: 0.5276376008987427 2023-01-22 23:52:11.307007: step: 1576/527, loss: 0.17102785408496857 2023-01-22 23:52:12.454788: step: 1580/527, loss: 0.3335081934928894 2023-01-22 23:52:13.595656: step: 1584/527, loss: 0.09093207865953445 2023-01-22 23:52:14.733224: step: 1588/527, loss: 0.2155836969614029 2023-01-22 23:52:15.844393: step: 1592/527, loss: 0.5206926465034485 2023-01-22 23:52:16.938358: step: 1596/527, loss: 1.7206834554672241 2023-01-22 23:52:18.069494: step: 1600/527, loss: 0.7576001286506653 2023-01-22 23:52:19.186539: step: 1604/527, loss: 0.18618564307689667 2023-01-22 23:52:20.289195: step: 1608/527, loss: 3.081526279449463 2023-01-22 23:52:21.419064: step: 1612/527, loss: 0.19366303086280823 2023-01-22 23:52:22.522969: step: 1616/527, loss: 0.4724538028240204 2023-01-22 23:52:23.643031: step: 1620/527, loss: 0.3779865503311157 2023-01-22 23:52:24.747084: step: 1624/527, loss: 1.2595628499984741 2023-01-22 23:52:25.861762: step: 1628/527, loss: 0.07382285594940186 2023-01-22 23:52:26.976801: step: 1632/527, loss: 0.12430582195520401 2023-01-22 23:52:28.104965: step: 1636/527, loss: 0.24232524633407593 2023-01-22 23:52:29.193611: step: 1640/527, loss: 0.3426475524902344 2023-01-22 23:52:30.333097: step: 1644/527, loss: 0.2910544276237488 2023-01-22 23:52:31.436338: step: 1648/527, loss: 0.3327888548374176 2023-01-22 23:52:32.558150: step: 1652/527, loss: 0.11315460503101349 2023-01-22 23:52:33.666356: step: 1656/527, loss: 0.24544401466846466 2023-01-22 23:52:34.782414: step: 1660/527, loss: 0.10720377415418625 2023-01-22 23:52:35.890440: step: 1664/527, loss: 0.953731894493103 2023-01-22 23:52:37.012589: step: 1668/527, loss: 1.661987066268921 2023-01-22 23:52:38.115195: step: 1672/527, loss: 0.3664335310459137 2023-01-22 23:52:39.274969: step: 1676/527, loss: 0.47709712386131287 2023-01-22 23:52:40.447520: step: 1680/527, loss: 0.01964864693582058 2023-01-22 23:52:41.555780: step: 1684/527, loss: 0.09250341355800629 2023-01-22 23:52:42.689077: step: 1688/527, loss: 0.04162721708416939 2023-01-22 23:52:43.793230: step: 1692/527, loss: 0.21190519630908966 2023-01-22 23:52:44.895986: step: 1696/527, loss: 1.5235652923583984 2023-01-22 23:52:46.024522: step: 1700/527, loss: 0.0755079984664917 2023-01-22 23:52:47.145954: step: 1704/527, loss: 0.39317160844802856 2023-01-22 23:52:48.272517: step: 1708/527, loss: 0.18369001150131226 2023-01-22 23:52:49.378651: step: 1712/527, loss: 0.17624130845069885 2023-01-22 23:52:50.508549: step: 1716/527, loss: 0.24089355766773224 2023-01-22 23:52:51.661046: step: 1720/527, loss: 0.1675625890493393 2023-01-22 23:52:52.777066: step: 1724/527, loss: 1.108909010887146 2023-01-22 23:52:53.877839: step: 1728/527, loss: 0.08311252295970917 2023-01-22 23:52:55.005676: step: 1732/527, loss: 0.8410710096359253 2023-01-22 23:52:56.102275: step: 1736/527, loss: 0.45169803500175476 2023-01-22 23:52:57.253233: step: 1740/527, loss: 0.3264934718608856 2023-01-22 23:52:58.358872: step: 1744/527, loss: 0.05677900090813637 2023-01-22 23:52:59.457939: step: 1748/527, loss: 0.3532448410987854 2023-01-22 23:53:00.576615: step: 1752/527, loss: 0.13659420609474182 2023-01-22 23:53:01.718050: step: 1756/527, loss: 0.13202890753746033 2023-01-22 23:53:02.866668: step: 1760/527, loss: 0.2342444658279419 2023-01-22 23:53:03.994552: step: 1764/527, loss: 0.11645559966564178 2023-01-22 23:53:05.108041: step: 1768/527, loss: 0.0801548957824707 2023-01-22 23:53:06.234750: step: 1772/527, loss: 0.16870741546154022 2023-01-22 23:53:07.356464: step: 1776/527, loss: 0.3093426823616028 2023-01-22 23:53:08.456515: step: 1780/527, loss: 0.4707120954990387 2023-01-22 23:53:09.571221: step: 1784/527, loss: 0.1456715613603592 2023-01-22 23:53:10.713824: step: 1788/527, loss: 0.7960838079452515 2023-01-22 23:53:11.855715: step: 1792/527, loss: 0.6862504482269287 2023-01-22 23:53:12.953793: step: 1796/527, loss: 0.7375777959823608 2023-01-22 23:53:14.032455: step: 1800/527, loss: 0.03491075336933136 2023-01-22 23:53:15.140098: step: 1804/527, loss: 0.03256731107831001 2023-01-22 23:53:16.259495: step: 1808/527, loss: 0.6108474135398865 2023-01-22 23:53:17.347707: step: 1812/527, loss: 0.11911864578723907 2023-01-22 23:53:18.446594: step: 1816/527, loss: 0.049486566334962845 2023-01-22 23:53:19.568617: step: 1820/527, loss: 0.1568562537431717 2023-01-22 23:53:20.680764: step: 1824/527, loss: 0.19569067656993866 2023-01-22 23:53:21.805537: step: 1828/527, loss: 0.13616882264614105 2023-01-22 23:53:22.944733: step: 1832/527, loss: 0.337595134973526 2023-01-22 23:53:24.049028: step: 1836/527, loss: 0.4788685739040375 2023-01-22 23:53:25.166998: step: 1840/527, loss: 0.48046931624412537 2023-01-22 23:53:26.255350: step: 1844/527, loss: 0.0921226516366005 2023-01-22 23:53:27.368153: step: 1848/527, loss: 0.2914409637451172 2023-01-22 23:53:28.479999: step: 1852/527, loss: 0.38010454177856445 2023-01-22 23:53:29.585648: step: 1856/527, loss: 0.6869344115257263 2023-01-22 23:53:30.729413: step: 1860/527, loss: 0.2785852551460266 2023-01-22 23:53:31.840687: step: 1864/527, loss: 0.10527582466602325 2023-01-22 23:53:32.910529: step: 1868/527, loss: 0.5615676045417786 2023-01-22 23:53:34.018493: step: 1872/527, loss: 0.22778892517089844 2023-01-22 23:53:35.171217: step: 1876/527, loss: 0.7978864908218384 2023-01-22 23:53:36.294982: step: 1880/527, loss: 0.2650766372680664 2023-01-22 23:53:37.403392: step: 1884/527, loss: 0.37344110012054443 2023-01-22 23:53:38.496206: step: 1888/527, loss: 0.07013054937124252 2023-01-22 23:53:39.599408: step: 1892/527, loss: 0.35323771834373474 2023-01-22 23:53:40.686202: step: 1896/527, loss: 0.07066917419433594 2023-01-22 23:53:41.807627: step: 1900/527, loss: 0.3653683066368103 2023-01-22 23:53:42.924795: step: 1904/527, loss: 0.6925190091133118 2023-01-22 23:53:44.045282: step: 1908/527, loss: 0.49712473154067993 2023-01-22 23:53:45.126262: step: 1912/527, loss: 0.22238358855247498 2023-01-22 23:53:46.232060: step: 1916/527, loss: 0.225530207157135 2023-01-22 23:53:47.332430: step: 1920/527, loss: 0.046700358390808105 2023-01-22 23:53:48.465292: step: 1924/527, loss: 0.1559397280216217 2023-01-22 23:53:49.601072: step: 1928/527, loss: 0.53546541929245 2023-01-22 23:53:50.715734: step: 1932/527, loss: 1.5677920579910278 2023-01-22 23:53:51.830331: step: 1936/527, loss: 0.20982904732227325 2023-01-22 23:53:52.968719: step: 1940/527, loss: 0.18935346603393555 2023-01-22 23:53:54.099539: step: 1944/527, loss: 0.9220942854881287 2023-01-22 23:53:55.242441: step: 1948/527, loss: 0.1754719614982605 2023-01-22 23:53:56.363615: step: 1952/527, loss: 0.16006864607334137 2023-01-22 23:53:57.492071: step: 1956/527, loss: 0.16700410842895508 2023-01-22 23:53:58.627835: step: 1960/527, loss: 1.0917448997497559 2023-01-22 23:53:59.764337: step: 1964/527, loss: 0.0658954605460167 2023-01-22 23:54:00.904923: step: 1968/527, loss: 1.2004772424697876 2023-01-22 23:54:02.038412: step: 1972/527, loss: 1.4372750520706177 2023-01-22 23:54:03.148855: step: 1976/527, loss: 0.9758621454238892 2023-01-22 23:54:04.237620: step: 1980/527, loss: 0.03387107700109482 2023-01-22 23:54:05.362985: step: 1984/527, loss: 3.595634937286377 2023-01-22 23:54:06.492860: step: 1988/527, loss: 0.9712764620780945 2023-01-22 23:54:07.603352: step: 1992/527, loss: 0.42004719376564026 2023-01-22 23:54:08.711663: step: 1996/527, loss: 0.11016550660133362 2023-01-22 23:54:09.854186: step: 2000/527, loss: 0.23884287476539612 2023-01-22 23:54:10.975790: step: 2004/527, loss: 1.3455641269683838 2023-01-22 23:54:12.110601: step: 2008/527, loss: 0.2710852026939392 2023-01-22 23:54:13.242738: step: 2012/527, loss: 6.032918930053711 2023-01-22 23:54:14.354198: step: 2016/527, loss: 0.17980480194091797 2023-01-22 23:54:15.486472: step: 2020/527, loss: 0.7440292239189148 2023-01-22 23:54:16.589269: step: 2024/527, loss: 0.14048504829406738 2023-01-22 23:54:17.707814: step: 2028/527, loss: 1.5821094512939453 2023-01-22 23:54:18.798379: step: 2032/527, loss: 1.537947416305542 2023-01-22 23:54:19.903151: step: 2036/527, loss: 0.11527901142835617 2023-01-22 23:54:21.018124: step: 2040/527, loss: 1.6896952390670776 2023-01-22 23:54:22.133205: step: 2044/527, loss: 0.2969578802585602 2023-01-22 23:54:23.252117: step: 2048/527, loss: 0.41979628801345825 2023-01-22 23:54:24.353664: step: 2052/527, loss: 0.8157370686531067 2023-01-22 23:54:25.475618: step: 2056/527, loss: 0.7642280459403992 2023-01-22 23:54:26.570641: step: 2060/527, loss: 0.4924098551273346 2023-01-22 23:54:27.688093: step: 2064/527, loss: 0.2628178298473358 2023-01-22 23:54:28.805923: step: 2068/527, loss: 0.12557315826416016 2023-01-22 23:54:29.899140: step: 2072/527, loss: 0.1145901009440422 2023-01-22 23:54:31.034074: step: 2076/527, loss: 0.3862895965576172 2023-01-22 23:54:32.155491: step: 2080/527, loss: 2.2748115062713623 2023-01-22 23:54:33.267366: step: 2084/527, loss: 0.8024913668632507 2023-01-22 23:54:34.369407: step: 2088/527, loss: 0.4189472198486328 2023-01-22 23:54:35.476236: step: 2092/527, loss: 0.2964984178543091 2023-01-22 23:54:36.620525: step: 2096/527, loss: 0.9785525798797607 2023-01-22 23:54:37.704024: step: 2100/527, loss: 0.6473172307014465 2023-01-22 23:54:38.799006: step: 2104/527, loss: 0.2812657952308655 2023-01-22 23:54:39.920471: step: 2108/527, loss: 0.06238508224487305 ================================================== Loss: 0.562 -------------------- Dev: {'event': {'p': 0.623059866962306, 'r': 0.748335552596538, 'f1': 0.6799758015728978}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Test: {'event': {'p': 0.6303294573643411, 'r': 0.7434285714285714, 'f1': 0.6822233875196644}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Chinese: {'event': {'p': 0.6, 'r': 0.8333333333333334, 'f1': 0.6976744186046512}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Korean: {'event': {'p': 0.725, 'r': 0.4603174603174603, 'f1': 0.5631067961165048}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Russian: {'event': {'p': 0.45161290322580644, 'r': 0.3888888888888889, 'f1': 0.417910447761194}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.623059866962306, 'r': 0.748335552596538, 'f1': 0.6799758015728978}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Eng Test for Chinese: {'event': {'p': 0.6303294573643411, 'r': 0.7434285714285714, 'f1': 0.6822233875196644}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Chinese: {'event': {'p': 0.6, 'r': 0.8333333333333334, 'f1': 0.6976744186046512}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Eng Dev for Korean: {'event': {'p': 0.623059866962306, 'r': 0.748335552596538, 'f1': 0.6799758015728978}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Eng Test for Korean: {'event': {'p': 0.6303294573643411, 'r': 0.7434285714285714, 'f1': 0.6822233875196644}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Korean: {'event': {'p': 0.725, 'r': 0.4603174603174603, 'f1': 0.5631067961165048}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Eng Dev for Russian: {'event': {'p': 0.5560344827586207, 'r': 0.6870838881491345, 'f1': 0.6146515783204287}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Eng Test for Russian: {'event': {'p': 0.5831313620940378, 'r': 0.6874285714285714, 'f1': 0.6309992132179386}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'event': {'p': 0.5526315789473685, 'r': 0.5833333333333334, 'f1': 0.5675675675675677}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-22 23:55:36.166876: step: 4/527, loss: 0.29523342847824097 2023-01-22 23:55:37.270636: step: 8/527, loss: 0.24055567383766174 2023-01-22 23:55:38.406587: step: 12/527, loss: 0.30832982063293457 2023-01-22 23:55:39.557613: step: 16/527, loss: 0.311708927154541 2023-01-22 23:55:40.669462: step: 20/527, loss: 0.44299525022506714 2023-01-22 23:55:41.757744: step: 24/527, loss: 0.08590765297412872 2023-01-22 23:55:42.889634: step: 28/527, loss: 0.23331400752067566 2023-01-22 23:55:44.013843: step: 32/527, loss: 0.5599204301834106 2023-01-22 23:55:45.104700: step: 36/527, loss: 0.06011553108692169 2023-01-22 23:55:46.266891: step: 40/527, loss: 0.28026342391967773 2023-01-22 23:55:47.407720: step: 44/527, loss: 0.09607162326574326 2023-01-22 23:55:48.527155: step: 48/527, loss: 0.06594524532556534 2023-01-22 23:55:49.627867: step: 52/527, loss: 0.19180460274219513 2023-01-22 23:55:50.737428: step: 56/527, loss: 0.7360360622406006 2023-01-22 23:55:51.853169: step: 60/527, loss: 0.09769344329833984 2023-01-22 23:55:52.969994: step: 64/527, loss: 0.1952771246433258 2023-01-22 23:55:54.090029: step: 68/527, loss: 0.28659144043922424 2023-01-22 23:55:55.230046: step: 72/527, loss: 0.22840900719165802 2023-01-22 23:55:56.342658: step: 76/527, loss: 0.037955570966005325 2023-01-22 23:55:57.461248: step: 80/527, loss: 0.7833908796310425 2023-01-22 23:55:58.598868: step: 84/527, loss: 0.12808018922805786 2023-01-22 23:55:59.710250: step: 88/527, loss: 0.12343281507492065 2023-01-22 23:56:00.831278: step: 92/527, loss: 0.7955486178398132 2023-01-22 23:56:01.948927: step: 96/527, loss: 0.12088718265295029 2023-01-22 23:56:03.054512: step: 100/527, loss: 0.31135714054107666 2023-01-22 23:56:04.194628: step: 104/527, loss: 0.21968041360378265 2023-01-22 23:56:05.303508: step: 108/527, loss: 0.16879454255104065 2023-01-22 23:56:06.408499: step: 112/527, loss: 0.7538687586784363 2023-01-22 23:56:07.540240: step: 116/527, loss: 0.24970674514770508 2023-01-22 23:56:08.655788: step: 120/527, loss: 0.12112836539745331 2023-01-22 23:56:09.779389: step: 124/527, loss: 0.16490879654884338 2023-01-22 23:56:10.885600: step: 128/527, loss: 0.23180007934570312 2023-01-22 23:56:11.988464: step: 132/527, loss: 0.21754293143749237 2023-01-22 23:56:13.105840: step: 136/527, loss: 0.1884976327419281 2023-01-22 23:56:14.229332: step: 140/527, loss: 0.3693264126777649 2023-01-22 23:56:15.342409: step: 144/527, loss: 0.2415207326412201 2023-01-22 23:56:16.450043: step: 148/527, loss: 0.7395491003990173 2023-01-22 23:56:17.588159: step: 152/527, loss: 0.20872434973716736 2023-01-22 23:56:18.675001: step: 156/527, loss: 0.03007795847952366 2023-01-22 23:56:19.804372: step: 160/527, loss: 0.072475865483284 2023-01-22 23:56:20.934730: step: 164/527, loss: 0.11533994972705841 2023-01-22 23:56:22.021897: step: 168/527, loss: 0.11754465103149414 2023-01-22 23:56:23.163127: step: 172/527, loss: 0.09689471870660782 2023-01-22 23:56:24.251319: step: 176/527, loss: 0.21636077761650085 2023-01-22 23:56:25.402712: step: 180/527, loss: 0.1440087854862213 2023-01-22 23:56:26.517050: step: 184/527, loss: 0.3535681366920471 2023-01-22 23:56:27.682168: step: 188/527, loss: 0.1323660910129547 2023-01-22 23:56:28.788745: step: 192/527, loss: 0.13328666985034943 2023-01-22 23:56:29.909827: step: 196/527, loss: 0.4558882713317871 2023-01-22 23:56:31.033553: step: 200/527, loss: 0.24560528993606567 2023-01-22 23:56:32.151848: step: 204/527, loss: 0.3231223225593567 2023-01-22 23:56:33.243863: step: 208/527, loss: 0.9360366463661194 2023-01-22 23:56:34.365470: step: 212/527, loss: 0.7182988524436951 2023-01-22 23:56:35.515806: step: 216/527, loss: 1.230064868927002 2023-01-22 23:56:36.655477: step: 220/527, loss: 0.14730653166770935 2023-01-22 23:56:37.727076: step: 224/527, loss: 0.2925889194011688 2023-01-22 23:56:38.835189: step: 228/527, loss: 0.3287316560745239 2023-01-22 23:56:39.970379: step: 232/527, loss: 0.16178306937217712 2023-01-22 23:56:41.092110: step: 236/527, loss: 0.35347530245780945 2023-01-22 23:56:42.238688: step: 240/527, loss: 0.05575313791632652 2023-01-22 23:56:43.333551: step: 244/527, loss: 0.23297587037086487 2023-01-22 23:56:44.457307: step: 248/527, loss: 0.09823159873485565 2023-01-22 23:56:45.583019: step: 252/527, loss: 0.8506431579589844 2023-01-22 23:56:46.679673: step: 256/527, loss: 0.8213114738464355 2023-01-22 23:56:47.801750: step: 260/527, loss: 0.2058243751525879 2023-01-22 23:56:48.905216: step: 264/527, loss: 0.055585384368896484 2023-01-22 23:56:50.003801: step: 268/527, loss: 2.41886043548584 2023-01-22 23:56:51.129429: step: 272/527, loss: 0.16041022539138794 2023-01-22 23:56:52.258063: step: 276/527, loss: 0.18471652269363403 2023-01-22 23:56:53.359220: step: 280/527, loss: 0.5527294278144836 2023-01-22 23:56:54.494735: step: 284/527, loss: 0.09649810940027237 2023-01-22 23:56:55.612760: step: 288/527, loss: 0.20128989219665527 2023-01-22 23:56:56.735028: step: 292/527, loss: 1.1198081970214844 2023-01-22 23:56:57.851313: step: 296/527, loss: 1.2830760478973389 2023-01-22 23:56:58.939964: step: 300/527, loss: 0.3204280138015747 2023-01-22 23:57:00.069390: step: 304/527, loss: 0.16644175350666046 2023-01-22 23:57:01.186981: step: 308/527, loss: 0.8513381481170654 2023-01-22 23:57:02.289027: step: 312/527, loss: 0.30492687225341797 2023-01-22 23:57:03.383941: step: 316/527, loss: 0.1365855187177658 2023-01-22 23:57:04.497822: step: 320/527, loss: 0.47999104857444763 2023-01-22 23:57:05.621820: step: 324/527, loss: 0.09173393249511719 2023-01-22 23:57:06.780000: step: 328/527, loss: 0.07055292278528214 2023-01-22 23:57:07.892596: step: 332/527, loss: 0.20234471559524536 2023-01-22 23:57:09.052577: step: 336/527, loss: 0.17705698311328888 2023-01-22 23:57:10.157824: step: 340/527, loss: 0.09585509449243546 2023-01-22 23:57:11.244402: step: 344/527, loss: 0.4019201397895813 2023-01-22 23:57:12.343591: step: 348/527, loss: 0.038372136652469635 2023-01-22 23:57:13.443700: step: 352/527, loss: 0.04337067902088165 2023-01-22 23:57:14.567446: step: 356/527, loss: 0.9830796718597412 2023-01-22 23:57:15.693713: step: 360/527, loss: 0.6228225827217102 2023-01-22 23:57:16.839179: step: 364/527, loss: 0.16078686714172363 2023-01-22 23:57:17.945000: step: 368/527, loss: 0.19050121307373047 2023-01-22 23:57:19.027793: step: 372/527, loss: 0.08886180073022842 2023-01-22 23:57:20.143945: step: 376/527, loss: 6.747163772583008 2023-01-22 23:57:21.288242: step: 380/527, loss: 0.7045419216156006 2023-01-22 23:57:22.368814: step: 384/527, loss: 0.10610118508338928 2023-01-22 23:57:23.459118: step: 388/527, loss: 0.23508206009864807 2023-01-22 23:57:24.571596: step: 392/527, loss: 0.2466500848531723 2023-01-22 23:57:25.695924: step: 396/527, loss: 0.05903768539428711 2023-01-22 23:57:26.811390: step: 400/527, loss: 0.12119865417480469 2023-01-22 23:57:27.903143: step: 404/527, loss: 0.39661216735839844 2023-01-22 23:57:29.018361: step: 408/527, loss: 0.29129326343536377 2023-01-22 23:57:30.139061: step: 412/527, loss: 1.1994248628616333 2023-01-22 23:57:31.253073: step: 416/527, loss: 0.08095411956310272 2023-01-22 23:57:32.393457: step: 420/527, loss: 0.531620979309082 2023-01-22 23:57:33.536683: step: 424/527, loss: 0.0606144443154335 2023-01-22 23:57:34.641080: step: 428/527, loss: 0.14632532000541687 2023-01-22 23:57:35.753703: step: 432/527, loss: 0.15788884460926056 2023-01-22 23:57:36.902649: step: 436/527, loss: 0.17314834892749786 2023-01-22 23:57:38.072877: step: 440/527, loss: 0.2165517807006836 2023-01-22 23:57:39.199273: step: 444/527, loss: 0.3165510296821594 2023-01-22 23:57:40.344661: step: 448/527, loss: 0.23098278045654297 2023-01-22 23:57:41.451906: step: 452/527, loss: 0.800719141960144 2023-01-22 23:57:42.587626: step: 456/527, loss: 0.5590640306472778 2023-01-22 23:57:43.733785: step: 460/527, loss: 0.13624043762683868 2023-01-22 23:57:44.859175: step: 464/527, loss: 0.25043773651123047 2023-01-22 23:57:45.972409: step: 468/527, loss: 0.23645392060279846 2023-01-22 23:57:47.089485: step: 472/527, loss: 0.4604577124118805 2023-01-22 23:57:48.213551: step: 476/527, loss: 0.083251953125 2023-01-22 23:57:49.331056: step: 480/527, loss: 0.7196712493896484 2023-01-22 23:57:50.433915: step: 484/527, loss: 0.07926731556653976 2023-01-22 23:57:51.531859: step: 488/527, loss: 0.15770551562309265 2023-01-22 23:57:52.684811: step: 492/527, loss: 0.4499916136264801 2023-01-22 23:57:53.799474: step: 496/527, loss: 6.143533706665039 2023-01-22 23:57:54.905794: step: 500/527, loss: 0.14929285645484924 2023-01-22 23:57:56.050216: step: 504/527, loss: 0.3751159906387329 2023-01-22 23:57:57.161435: step: 508/527, loss: 0.19858865439891815 2023-01-22 23:57:58.252713: step: 512/527, loss: 0.13074883818626404 2023-01-22 23:57:59.358883: step: 516/527, loss: 0.18720608949661255 2023-01-22 23:58:00.487750: step: 520/527, loss: 0.08892136067152023 2023-01-22 23:58:01.588325: step: 524/527, loss: 0.09759245067834854 2023-01-22 23:58:02.694762: step: 528/527, loss: 0.1223384365439415 2023-01-22 23:58:03.796495: step: 532/527, loss: 0.7035112977027893 2023-01-22 23:58:04.959118: step: 536/527, loss: 0.8711899518966675 2023-01-22 23:58:06.110151: step: 540/527, loss: 0.2516135275363922 2023-01-22 23:58:07.256011: step: 544/527, loss: 0.06325416266918182 2023-01-22 23:58:08.390517: step: 548/527, loss: 0.26714763045310974 2023-01-22 23:58:09.519394: step: 552/527, loss: 0.5036737322807312 2023-01-22 23:58:10.635421: step: 556/527, loss: 0.5743014216423035 2023-01-22 23:58:11.762237: step: 560/527, loss: 0.3543386459350586 2023-01-22 23:58:12.862882: step: 564/527, loss: 0.10639238357543945 2023-01-22 23:58:13.956769: step: 568/527, loss: 0.7924562692642212 2023-01-22 23:58:15.103463: step: 572/527, loss: 0.3038768768310547 2023-01-22 23:58:16.240011: step: 576/527, loss: 0.034146498888731 2023-01-22 23:58:17.340968: step: 580/527, loss: 1.4056048393249512 2023-01-22 23:58:18.463365: step: 584/527, loss: 1.034292459487915 2023-01-22 23:58:19.608998: step: 588/527, loss: 0.15713483095169067 2023-01-22 23:58:20.710479: step: 592/527, loss: 0.085680291056633 2023-01-22 23:58:21.819771: step: 596/527, loss: 0.6299510598182678 2023-01-22 23:58:22.950711: step: 600/527, loss: 0.1197303831577301 2023-01-22 23:58:24.045803: step: 604/527, loss: 0.6524761319160461 2023-01-22 23:58:25.143454: step: 608/527, loss: 0.4268460273742676 2023-01-22 23:58:26.250573: step: 612/527, loss: 0.2464122772216797 2023-01-22 23:58:27.359281: step: 616/527, loss: 0.11744027584791183 2023-01-22 23:58:28.445423: step: 620/527, loss: 0.9237141609191895 2023-01-22 23:58:29.562865: step: 624/527, loss: 0.08192434906959534 2023-01-22 23:58:30.672721: step: 628/527, loss: 0.14642827212810516 2023-01-22 23:58:31.808354: step: 632/527, loss: 0.13101109862327576 2023-01-22 23:58:32.931477: step: 636/527, loss: 0.3748794496059418 2023-01-22 23:58:34.048109: step: 640/527, loss: 1.3404457569122314 2023-01-22 23:58:35.158742: step: 644/527, loss: 0.20574846863746643 2023-01-22 23:58:36.287328: step: 648/527, loss: 0.5100795030593872 2023-01-22 23:58:37.393339: step: 652/527, loss: 1.888468861579895 2023-01-22 23:58:38.480650: step: 656/527, loss: 0.12523789703845978 2023-01-22 23:58:39.595080: step: 660/527, loss: 0.1742839813232422 2023-01-22 23:58:40.696314: step: 664/527, loss: 0.08021669834852219 2023-01-22 23:58:41.813440: step: 668/527, loss: 0.12213466316461563 2023-01-22 23:58:42.917854: step: 672/527, loss: 0.009029579348862171 2023-01-22 23:58:44.056873: step: 676/527, loss: 0.9232625365257263 2023-01-22 23:58:45.179783: step: 680/527, loss: 0.6095352172851562 2023-01-22 23:58:46.284791: step: 684/527, loss: 0.10016965866088867 2023-01-22 23:58:47.387286: step: 688/527, loss: 0.4289236068725586 2023-01-22 23:58:48.501987: step: 692/527, loss: 0.7149953246116638 2023-01-22 23:58:49.617203: step: 696/527, loss: 0.05515170097351074 2023-01-22 23:58:50.739931: step: 700/527, loss: 0.18643493950366974 2023-01-22 23:58:51.830584: step: 704/527, loss: 0.6594498157501221 2023-01-22 23:58:52.904482: step: 708/527, loss: 0.0828583687543869 2023-01-22 23:58:54.055473: step: 712/527, loss: 0.09805183857679367 2023-01-22 23:58:55.164661: step: 716/527, loss: 0.6854934692382812 2023-01-22 23:58:56.276132: step: 720/527, loss: 0.07641954720020294 2023-01-22 23:58:57.359483: step: 724/527, loss: 0.10716669261455536 2023-01-22 23:58:58.473164: step: 728/527, loss: 1.2215745449066162 2023-01-22 23:58:59.580284: step: 732/527, loss: 0.4225223660469055 2023-01-22 23:59:00.708457: step: 736/527, loss: 0.23105916380882263 2023-01-22 23:59:01.819754: step: 740/527, loss: 0.1052466481924057 2023-01-22 23:59:02.942337: step: 744/527, loss: 0.3296758532524109 2023-01-22 23:59:04.082100: step: 748/527, loss: 0.7393607497215271 2023-01-22 23:59:05.176160: step: 752/527, loss: 0.10656967759132385 2023-01-22 23:59:06.281843: step: 756/527, loss: 0.1621202826499939 2023-01-22 23:59:07.414196: step: 760/527, loss: 0.13115721940994263 2023-01-22 23:59:08.527856: step: 764/527, loss: 0.34071341156959534 2023-01-22 23:59:09.640950: step: 768/527, loss: 0.12973089516162872 2023-01-22 23:59:10.752900: step: 772/527, loss: 0.16026464104652405 2023-01-22 23:59:11.847495: step: 776/527, loss: 0.7662369608879089 2023-01-22 23:59:13.010565: step: 780/527, loss: 0.12174063175916672 2023-01-22 23:59:14.133570: step: 784/527, loss: 0.07869014889001846 2023-01-22 23:59:15.241479: step: 788/527, loss: 0.7192503213882446 2023-01-22 23:59:16.371937: step: 792/527, loss: 0.2659946382045746 2023-01-22 23:59:17.505742: step: 796/527, loss: 0.16486701369285583 2023-01-22 23:59:18.627349: step: 800/527, loss: 0.33068400621414185 2023-01-22 23:59:19.778939: step: 804/527, loss: 0.15061970055103302 2023-01-22 23:59:20.904841: step: 808/527, loss: 0.2260732799768448 2023-01-22 23:59:22.011207: step: 812/527, loss: 0.1231963187456131 2023-01-22 23:59:23.112131: step: 816/527, loss: 0.056687548756599426 2023-01-22 23:59:24.227122: step: 820/527, loss: 0.3718331456184387 2023-01-22 23:59:25.347688: step: 824/527, loss: 0.6463532447814941 2023-01-22 23:59:26.457499: step: 828/527, loss: 0.11277799308300018 2023-01-22 23:59:27.558930: step: 832/527, loss: 0.25255244970321655 2023-01-22 23:59:28.649806: step: 836/527, loss: 0.2428019940853119 2023-01-22 23:59:29.771784: step: 840/527, loss: 0.6127815246582031 2023-01-22 23:59:30.897521: step: 844/527, loss: 0.805828332901001 2023-01-22 23:59:32.022042: step: 848/527, loss: 0.10094957798719406 2023-01-22 23:59:33.170654: step: 852/527, loss: 0.13343198597431183 2023-01-22 23:59:34.308581: step: 856/527, loss: 1.540419578552246 2023-01-22 23:59:35.433466: step: 860/527, loss: 0.07517042756080627 2023-01-22 23:59:36.606035: step: 864/527, loss: 0.26802197098731995 2023-01-22 23:59:37.692489: step: 868/527, loss: 0.12183710932731628 2023-01-22 23:59:38.787273: step: 872/527, loss: 0.09167900681495667 2023-01-22 23:59:39.922129: step: 876/527, loss: 0.26292967796325684 2023-01-22 23:59:41.032870: step: 880/527, loss: 0.08565893024206161 2023-01-22 23:59:42.150419: step: 884/527, loss: 0.191510871052742 2023-01-22 23:59:43.251647: step: 888/527, loss: 0.21745187044143677 2023-01-22 23:59:44.358130: step: 892/527, loss: 0.578854501247406 2023-01-22 23:59:45.482422: step: 896/527, loss: 0.11720170825719833 2023-01-22 23:59:46.628361: step: 900/527, loss: 0.7298274636268616 2023-01-22 23:59:47.723068: step: 904/527, loss: 0.5172414183616638 2023-01-22 23:59:48.840511: step: 908/527, loss: 0.17615079879760742 2023-01-22 23:59:49.975568: step: 912/527, loss: 0.6806604862213135 2023-01-22 23:59:51.077987: step: 916/527, loss: 0.28286781907081604 2023-01-22 23:59:52.180840: step: 920/527, loss: 0.7868010401725769 2023-01-22 23:59:53.266350: step: 924/527, loss: 0.3200637102127075 2023-01-22 23:59:54.393908: step: 928/527, loss: 0.2528351843357086 2023-01-22 23:59:55.533705: step: 932/527, loss: 0.09375253319740295 2023-01-22 23:59:56.677559: step: 936/527, loss: 1.6057020425796509 2023-01-22 23:59:57.760915: step: 940/527, loss: 0.1982731819152832 2023-01-22 23:59:58.853534: step: 944/527, loss: 0.34476107358932495 2023-01-23 00:00:00.000203: step: 948/527, loss: 0.30681857466697693 2023-01-23 00:00:01.125151: step: 952/527, loss: 0.395455539226532 2023-01-23 00:00:02.254016: step: 956/527, loss: 0.13369092345237732 2023-01-23 00:00:03.334155: step: 960/527, loss: 0.059206388890743256 2023-01-23 00:00:04.439143: step: 964/527, loss: 0.7263615727424622 2023-01-23 00:00:05.560901: step: 968/527, loss: 0.05437064170837402 2023-01-23 00:00:06.669200: step: 972/527, loss: 0.5808128118515015 2023-01-23 00:00:07.767870: step: 976/527, loss: 0.2736876606941223 2023-01-23 00:00:08.872528: step: 980/527, loss: 0.09005871415138245 2023-01-23 00:00:09.981383: step: 984/527, loss: 0.8632826209068298 2023-01-23 00:00:11.109804: step: 988/527, loss: 1.0252221822738647 2023-01-23 00:00:12.210829: step: 992/527, loss: 0.17467424273490906 2023-01-23 00:00:13.320614: step: 996/527, loss: 0.05576139688491821 2023-01-23 00:00:14.436469: step: 1000/527, loss: 0.2604485750198364 2023-01-23 00:00:15.554547: step: 1004/527, loss: 1.2448192834854126 2023-01-23 00:00:16.662054: step: 1008/527, loss: 0.7224701642990112 2023-01-23 00:00:17.818961: step: 1012/527, loss: 0.05256319046020508 2023-01-23 00:00:18.907837: step: 1016/527, loss: 0.40065836906433105 2023-01-23 00:00:20.059710: step: 1020/527, loss: 0.8348200917243958 2023-01-23 00:00:21.179647: step: 1024/527, loss: 0.3438807725906372 2023-01-23 00:00:22.259847: step: 1028/527, loss: 0.7351655960083008 2023-01-23 00:00:23.409713: step: 1032/527, loss: 6.24078369140625 2023-01-23 00:00:24.521139: step: 1036/527, loss: 1.2371599674224854 2023-01-23 00:00:25.611926: step: 1040/527, loss: 0.20953096449375153 2023-01-23 00:00:26.712578: step: 1044/527, loss: 0.03357229381799698 2023-01-23 00:00:27.853501: step: 1048/527, loss: 0.2951011657714844 2023-01-23 00:00:28.994445: step: 1052/527, loss: 0.12666818499565125 2023-01-23 00:00:30.104124: step: 1056/527, loss: 0.7795705199241638 2023-01-23 00:00:31.221953: step: 1060/527, loss: 0.26206228137016296 2023-01-23 00:00:32.337509: step: 1064/527, loss: 0.34461164474487305 2023-01-23 00:00:33.474190: step: 1068/527, loss: 0.13036489486694336 2023-01-23 00:00:34.582322: step: 1072/527, loss: 0.07648658752441406 2023-01-23 00:00:35.678590: step: 1076/527, loss: 0.08582830429077148 2023-01-23 00:00:36.841501: step: 1080/527, loss: 0.35444217920303345 2023-01-23 00:00:38.003322: step: 1084/527, loss: 0.14558488130569458 2023-01-23 00:00:39.131066: step: 1088/527, loss: 0.8985475301742554 2023-01-23 00:00:40.253581: step: 1092/527, loss: 0.23319482803344727 2023-01-23 00:00:41.411342: step: 1096/527, loss: 0.09455452114343643 2023-01-23 00:00:42.498885: step: 1100/527, loss: 0.7094722986221313 2023-01-23 00:00:43.595440: step: 1104/527, loss: 0.17710772156715393 2023-01-23 00:00:44.731759: step: 1108/527, loss: 0.13092699646949768 2023-01-23 00:00:45.842741: step: 1112/527, loss: 0.5701183676719666 2023-01-23 00:00:46.949908: step: 1116/527, loss: 0.2542566955089569 2023-01-23 00:00:48.070976: step: 1120/527, loss: 6.269968509674072 2023-01-23 00:00:49.173437: step: 1124/527, loss: 0.10636921226978302 2023-01-23 00:00:50.278163: step: 1128/527, loss: 0.6528869271278381 2023-01-23 00:00:51.406351: step: 1132/527, loss: 0.23219867050647736 2023-01-23 00:00:52.549035: step: 1136/527, loss: 0.19824066758155823 2023-01-23 00:00:53.666163: step: 1140/527, loss: 0.24031782150268555 2023-01-23 00:00:54.760805: step: 1144/527, loss: 0.06825733184814453 2023-01-23 00:00:55.870170: step: 1148/527, loss: 0.6413553357124329 2023-01-23 00:00:56.999878: step: 1152/527, loss: 0.07785339653491974 2023-01-23 00:00:58.099057: step: 1156/527, loss: 0.3656767010688782 2023-01-23 00:00:59.217360: step: 1160/527, loss: 0.0957103744149208 2023-01-23 00:01:00.340543: step: 1164/527, loss: 0.04970159754157066 2023-01-23 00:01:01.468533: step: 1168/527, loss: 0.20569229125976562 2023-01-23 00:01:02.564609: step: 1172/527, loss: 0.08003134280443192 2023-01-23 00:01:03.680410: step: 1176/527, loss: 0.07006935775279999 2023-01-23 00:01:04.808967: step: 1180/527, loss: 0.20141926407814026 2023-01-23 00:01:05.924415: step: 1184/527, loss: 0.1382962167263031 2023-01-23 00:01:07.022580: step: 1188/527, loss: 0.023036815226078033 2023-01-23 00:01:08.119270: step: 1192/527, loss: 0.12034359574317932 2023-01-23 00:01:09.232553: step: 1196/527, loss: 0.7818715572357178 2023-01-23 00:01:10.345567: step: 1200/527, loss: 0.10831747204065323 2023-01-23 00:01:11.489525: step: 1204/527, loss: 0.11144642531871796 2023-01-23 00:01:12.627568: step: 1208/527, loss: 0.14342650771141052 2023-01-23 00:01:13.743213: step: 1212/527, loss: 6.843223571777344 2023-01-23 00:01:14.861020: step: 1216/527, loss: 0.058576010167598724 2023-01-23 00:01:15.979516: step: 1220/527, loss: 0.4922071695327759 2023-01-23 00:01:17.064726: step: 1224/527, loss: 0.11455860733985901 2023-01-23 00:01:18.165612: step: 1228/527, loss: 0.4115406274795532 2023-01-23 00:01:19.303967: step: 1232/527, loss: 2.1886959075927734 2023-01-23 00:01:20.410051: step: 1236/527, loss: 1.1516563892364502 2023-01-23 00:01:21.489362: step: 1240/527, loss: 0.22236990928649902 2023-01-23 00:01:22.598732: step: 1244/527, loss: 0.18879419565200806 2023-01-23 00:01:23.722199: step: 1248/527, loss: 0.6179380416870117 2023-01-23 00:01:24.836512: step: 1252/527, loss: 0.7509297728538513 2023-01-23 00:01:25.971872: step: 1256/527, loss: 0.08869829773902893 2023-01-23 00:01:27.109180: step: 1260/527, loss: 6.988436698913574 2023-01-23 00:01:28.211186: step: 1264/527, loss: 0.06967001408338547 2023-01-23 00:01:29.336385: step: 1268/527, loss: 0.19424334168434143 2023-01-23 00:01:30.440351: step: 1272/527, loss: 0.15633173286914825 2023-01-23 00:01:31.523890: step: 1276/527, loss: 0.07303605228662491 2023-01-23 00:01:32.612281: step: 1280/527, loss: 0.17554417252540588 2023-01-23 00:01:33.735146: step: 1284/527, loss: 0.44688090682029724 2023-01-23 00:01:34.844319: step: 1288/527, loss: 0.09542637318372726 2023-01-23 00:01:35.960081: step: 1292/527, loss: 0.1234808936715126 2023-01-23 00:01:37.049691: step: 1296/527, loss: 0.20519113540649414 2023-01-23 00:01:38.178110: step: 1300/527, loss: 0.16131410002708435 2023-01-23 00:01:39.282862: step: 1304/527, loss: 0.16981634497642517 2023-01-23 00:01:40.440348: step: 1308/527, loss: 0.27565404772758484 2023-01-23 00:01:41.596328: step: 1312/527, loss: 0.15511161088943481 2023-01-23 00:01:42.704623: step: 1316/527, loss: 0.3203316926956177 2023-01-23 00:01:43.797523: step: 1320/527, loss: 0.3474600613117218 2023-01-23 00:01:44.888438: step: 1324/527, loss: 0.20824985206127167 2023-01-23 00:01:46.021236: step: 1328/527, loss: 0.021573161706328392 2023-01-23 00:01:47.104591: step: 1332/527, loss: 0.14113540947437286 2023-01-23 00:01:48.187192: step: 1336/527, loss: 0.15560971200466156 2023-01-23 00:01:49.304665: step: 1340/527, loss: 0.7649518251419067 2023-01-23 00:01:50.409300: step: 1344/527, loss: 0.05924525111913681 2023-01-23 00:01:51.538155: step: 1348/527, loss: 0.2367693930864334 2023-01-23 00:01:52.646267: step: 1352/527, loss: 0.16110849380493164 2023-01-23 00:01:53.749142: step: 1356/527, loss: 0.7183638215065002 2023-01-23 00:01:54.863006: step: 1360/527, loss: 1.31695556640625 2023-01-23 00:01:55.952465: step: 1364/527, loss: 0.3987087309360504 2023-01-23 00:01:57.061780: step: 1368/527, loss: 0.16208620369434357 2023-01-23 00:01:58.188655: step: 1372/527, loss: 0.8116208910942078 2023-01-23 00:01:59.334529: step: 1376/527, loss: 0.17888890206813812 2023-01-23 00:02:00.449834: step: 1380/527, loss: 0.3769391179084778 2023-01-23 00:02:01.554157: step: 1384/527, loss: 0.0617159865796566 2023-01-23 00:02:02.663503: step: 1388/527, loss: 0.7561659812927246 2023-01-23 00:02:03.779361: step: 1392/527, loss: 1.257905125617981 2023-01-23 00:02:04.903027: step: 1396/527, loss: 0.6649717092514038 2023-01-23 00:02:05.993955: step: 1400/527, loss: 0.09816465526819229 2023-01-23 00:02:07.154368: step: 1404/527, loss: 0.21050962805747986 2023-01-23 00:02:08.274830: step: 1408/527, loss: 0.22895203530788422 2023-01-23 00:02:09.395477: step: 1412/527, loss: 0.16873522102832794 2023-01-23 00:02:10.511418: step: 1416/527, loss: 0.1545039266347885 2023-01-23 00:02:11.658581: step: 1420/527, loss: 0.3784274458885193 2023-01-23 00:02:12.766100: step: 1424/527, loss: 0.1585555374622345 2023-01-23 00:02:13.869531: step: 1428/527, loss: 0.12912093102931976 2023-01-23 00:02:14.981851: step: 1432/527, loss: 0.1897331327199936 2023-01-23 00:02:16.126708: step: 1436/527, loss: 0.1998147964477539 2023-01-23 00:02:17.266338: step: 1440/527, loss: 1.7084550857543945 2023-01-23 00:02:18.374690: step: 1444/527, loss: 1.9001847505569458 2023-01-23 00:02:19.478292: step: 1448/527, loss: 0.5831559300422668 2023-01-23 00:02:20.582710: step: 1452/527, loss: 0.16930732131004333 2023-01-23 00:02:21.726989: step: 1456/527, loss: 0.11557817459106445 2023-01-23 00:02:22.855320: step: 1460/527, loss: 0.036350131034851074 2023-01-23 00:02:23.966797: step: 1464/527, loss: 0.06388416886329651 2023-01-23 00:02:25.071152: step: 1468/527, loss: 0.3397369086742401 2023-01-23 00:02:26.197663: step: 1472/527, loss: 1.7740471363067627 2023-01-23 00:02:27.307552: step: 1476/527, loss: 0.15655669569969177 2023-01-23 00:02:28.419826: step: 1480/527, loss: 0.4824514389038086 2023-01-23 00:02:29.591648: step: 1484/527, loss: 0.05881619453430176 2023-01-23 00:02:30.733716: step: 1488/527, loss: 0.22328053414821625 2023-01-23 00:02:31.819677: step: 1492/527, loss: 0.10837321728467941 2023-01-23 00:02:32.916492: step: 1496/527, loss: 0.13701844215393066 2023-01-23 00:02:33.986906: step: 1500/527, loss: 0.13396620750427246 2023-01-23 00:02:35.105620: step: 1504/527, loss: 0.4210335612297058 2023-01-23 00:02:36.242457: step: 1508/527, loss: 0.2519668638706207 2023-01-23 00:02:37.378614: step: 1512/527, loss: 0.37600135803222656 2023-01-23 00:02:38.493415: step: 1516/527, loss: 0.048534706234931946 2023-01-23 00:02:39.601180: step: 1520/527, loss: 0.0802985206246376 2023-01-23 00:02:40.687075: step: 1524/527, loss: 0.3425601124763489 2023-01-23 00:02:41.799775: step: 1528/527, loss: 0.2651395797729492 2023-01-23 00:02:42.904870: step: 1532/527, loss: 0.3466445505619049 2023-01-23 00:02:43.999881: step: 1536/527, loss: 0.7408485412597656 2023-01-23 00:02:45.125423: step: 1540/527, loss: 0.15022669732570648 2023-01-23 00:02:46.226523: step: 1544/527, loss: 0.20141810178756714 2023-01-23 00:02:47.331386: step: 1548/527, loss: 0.6393409967422485 2023-01-23 00:02:48.436142: step: 1552/527, loss: 0.1797538697719574 2023-01-23 00:02:49.581085: step: 1556/527, loss: 0.2448950856924057 2023-01-23 00:02:50.668561: step: 1560/527, loss: 0.6922838091850281 2023-01-23 00:02:51.780823: step: 1564/527, loss: 1.196817398071289 2023-01-23 00:02:52.880626: step: 1568/527, loss: 1.151154637336731 2023-01-23 00:02:53.993706: step: 1572/527, loss: 0.20969842374324799 2023-01-23 00:02:55.082564: step: 1576/527, loss: 0.13624997437000275 2023-01-23 00:02:56.205476: step: 1580/527, loss: 0.26747751235961914 2023-01-23 00:02:57.335295: step: 1584/527, loss: 0.4422232210636139 2023-01-23 00:02:58.418452: step: 1588/527, loss: 0.21091842651367188 2023-01-23 00:02:59.549468: step: 1592/527, loss: 0.11456441879272461 2023-01-23 00:03:00.660858: step: 1596/527, loss: 0.23135709762573242 2023-01-23 00:03:01.770896: step: 1600/527, loss: 0.07480859756469727 2023-01-23 00:03:02.891694: step: 1604/527, loss: 0.5939818024635315 2023-01-23 00:03:04.022481: step: 1608/527, loss: 0.08332765102386475 2023-01-23 00:03:05.122108: step: 1612/527, loss: 0.729077935218811 2023-01-23 00:03:06.234063: step: 1616/527, loss: 0.3042232394218445 2023-01-23 00:03:07.340417: step: 1620/527, loss: 0.17822542786598206 2023-01-23 00:03:08.433622: step: 1624/527, loss: 0.07442331314086914 2023-01-23 00:03:09.559540: step: 1628/527, loss: 0.2780519127845764 2023-01-23 00:03:10.673936: step: 1632/527, loss: 0.1747821867465973 2023-01-23 00:03:11.808828: step: 1636/527, loss: 0.08693154156208038 2023-01-23 00:03:12.957946: step: 1640/527, loss: 0.1609300673007965 2023-01-23 00:03:14.058880: step: 1644/527, loss: 0.15866021811962128 2023-01-23 00:03:15.183266: step: 1648/527, loss: 0.13303261995315552 2023-01-23 00:03:16.275992: step: 1652/527, loss: 0.1694045066833496 2023-01-23 00:03:17.415232: step: 1656/527, loss: 0.7706464529037476 2023-01-23 00:03:18.537493: step: 1660/527, loss: 0.7723090052604675 2023-01-23 00:03:19.649978: step: 1664/527, loss: 0.14559917151927948 2023-01-23 00:03:20.763538: step: 1668/527, loss: 1.1145273447036743 2023-01-23 00:03:21.901602: step: 1672/527, loss: 0.8525142669677734 2023-01-23 00:03:23.018321: step: 1676/527, loss: 0.15494604408740997 2023-01-23 00:03:24.135725: step: 1680/527, loss: 0.20186564326286316 2023-01-23 00:03:25.240005: step: 1684/527, loss: 0.034897495061159134 2023-01-23 00:03:26.356976: step: 1688/527, loss: 0.7521612644195557 2023-01-23 00:03:27.452829: step: 1692/527, loss: 1.5351735353469849 2023-01-23 00:03:28.551003: step: 1696/527, loss: 0.1274941861629486 2023-01-23 00:03:29.688337: step: 1700/527, loss: 0.3413775861263275 2023-01-23 00:03:30.790659: step: 1704/527, loss: 0.24382968246936798 2023-01-23 00:03:31.921913: step: 1708/527, loss: 0.7066400051116943 2023-01-23 00:03:33.066072: step: 1712/527, loss: 0.1125233918428421 2023-01-23 00:03:34.171072: step: 1716/527, loss: 0.6647725701332092 2023-01-23 00:03:35.294562: step: 1720/527, loss: 0.35052013397216797 2023-01-23 00:03:36.422570: step: 1724/527, loss: 0.9961753487586975 2023-01-23 00:03:37.520712: step: 1728/527, loss: 0.639833390712738 2023-01-23 00:03:38.626451: step: 1732/527, loss: 0.7160775661468506 2023-01-23 00:03:39.767734: step: 1736/527, loss: 0.4022991359233856 2023-01-23 00:03:40.880361: step: 1740/527, loss: 0.07399425655603409 2023-01-23 00:03:42.003495: step: 1744/527, loss: 0.15703420341014862 2023-01-23 00:03:43.122106: step: 1748/527, loss: 0.23415395617485046 2023-01-23 00:03:44.224152: step: 1752/527, loss: 0.7301901578903198 2023-01-23 00:03:45.355081: step: 1756/527, loss: 0.09709759056568146 2023-01-23 00:03:46.474939: step: 1760/527, loss: 1.3265905380249023 2023-01-23 00:03:47.592881: step: 1764/527, loss: 0.12964686751365662 2023-01-23 00:03:48.724550: step: 1768/527, loss: 0.056077100336551666 2023-01-23 00:03:49.825295: step: 1772/527, loss: 0.05269885063171387 2023-01-23 00:03:50.945597: step: 1776/527, loss: 0.05644979700446129 2023-01-23 00:03:52.058403: step: 1780/527, loss: 1.9284570217132568 2023-01-23 00:03:53.184590: step: 1784/527, loss: 0.45022791624069214 2023-01-23 00:03:54.280363: step: 1788/527, loss: 0.41081351041793823 2023-01-23 00:03:55.404867: step: 1792/527, loss: 0.6521373987197876 2023-01-23 00:03:56.536489: step: 1796/527, loss: 1.4138745069503784 2023-01-23 00:03:57.649879: step: 1800/527, loss: 0.2815832197666168 2023-01-23 00:03:58.797573: step: 1804/527, loss: 0.18337178230285645 2023-01-23 00:03:59.924182: step: 1808/527, loss: 0.19891025125980377 2023-01-23 00:04:01.051333: step: 1812/527, loss: 1.1391477584838867 2023-01-23 00:04:02.191722: step: 1816/527, loss: 0.0474453940987587 2023-01-23 00:04:03.300213: step: 1820/527, loss: 0.6373782753944397 2023-01-23 00:04:04.386352: step: 1824/527, loss: 0.7021394371986389 2023-01-23 00:04:05.470500: step: 1828/527, loss: 0.03942599147558212 2023-01-23 00:04:06.553158: step: 1832/527, loss: 0.9061006307601929 2023-01-23 00:04:07.668120: step: 1836/527, loss: 0.6624301075935364 2023-01-23 00:04:08.780700: step: 1840/527, loss: 0.22355647385120392 2023-01-23 00:04:09.896260: step: 1844/527, loss: 0.05921673774719238 2023-01-23 00:04:11.010746: step: 1848/527, loss: 0.8650643229484558 2023-01-23 00:04:12.124470: step: 1852/527, loss: 0.8127278089523315 2023-01-23 00:04:13.216977: step: 1856/527, loss: 0.3343259394168854 2023-01-23 00:04:14.351767: step: 1860/527, loss: 0.08522005379199982 2023-01-23 00:04:15.454527: step: 1864/527, loss: 0.15020093321800232 2023-01-23 00:04:16.557859: step: 1868/527, loss: 0.09190855175256729 2023-01-23 00:04:17.696376: step: 1872/527, loss: 0.3572346270084381 2023-01-23 00:04:18.832604: step: 1876/527, loss: 0.12306909263134003 2023-01-23 00:04:19.998717: step: 1880/527, loss: 0.7557978630065918 2023-01-23 00:04:21.128668: step: 1884/527, loss: 0.14303618669509888 2023-01-23 00:04:22.250227: step: 1888/527, loss: 0.21837492287158966 2023-01-23 00:04:23.380349: step: 1892/527, loss: 0.6044619083404541 2023-01-23 00:04:24.467570: step: 1896/527, loss: 0.0841360092163086 2023-01-23 00:04:25.591085: step: 1900/527, loss: 0.16436046361923218 2023-01-23 00:04:26.709943: step: 1904/527, loss: 0.08870497345924377 2023-01-23 00:04:27.835383: step: 1908/527, loss: 0.13764634728431702 2023-01-23 00:04:28.945740: step: 1912/527, loss: 0.09183311462402344 2023-01-23 00:04:30.065500: step: 1916/527, loss: 0.23198318481445312 2023-01-23 00:04:31.173666: step: 1920/527, loss: 0.06905551254749298 2023-01-23 00:04:32.293432: step: 1924/527, loss: 0.5190895199775696 2023-01-23 00:04:33.435425: step: 1928/527, loss: 0.14674490690231323 2023-01-23 00:04:34.548640: step: 1932/527, loss: 0.2626269459724426 2023-01-23 00:04:35.660724: step: 1936/527, loss: 0.23745432496070862 2023-01-23 00:04:36.779004: step: 1940/527, loss: 0.7011728882789612 2023-01-23 00:04:37.897603: step: 1944/527, loss: 0.2089247703552246 2023-01-23 00:04:39.012873: step: 1948/527, loss: 0.05228424072265625 2023-01-23 00:04:40.118286: step: 1952/527, loss: 0.22551411390304565 2023-01-23 00:04:41.230788: step: 1956/527, loss: 0.1240018904209137 2023-01-23 00:04:42.356241: step: 1960/527, loss: 0.08612652122974396 2023-01-23 00:04:43.475650: step: 1964/527, loss: 2.1947362422943115 2023-01-23 00:04:44.615804: step: 1968/527, loss: 0.4895205497741699 2023-01-23 00:04:45.748327: step: 1972/527, loss: 0.2622237205505371 2023-01-23 00:04:46.844193: step: 1976/527, loss: 0.14788514375686646 2023-01-23 00:04:47.941087: step: 1980/527, loss: 1.4838156700134277 2023-01-23 00:04:49.052223: step: 1984/527, loss: 1.0007070302963257 2023-01-23 00:04:50.176444: step: 1988/527, loss: 3.2435779571533203 2023-01-23 00:04:51.283183: step: 1992/527, loss: 0.8127609491348267 2023-01-23 00:04:52.433158: step: 1996/527, loss: 0.12645292282104492 2023-01-23 00:04:53.553100: step: 2000/527, loss: 0.821395218372345 2023-01-23 00:04:54.690246: step: 2004/527, loss: 0.23000317811965942 2023-01-23 00:04:55.831357: step: 2008/527, loss: 1.283876657485962 2023-01-23 00:04:56.934593: step: 2012/527, loss: 0.11036530137062073 2023-01-23 00:04:58.078558: step: 2016/527, loss: 0.24143162369728088 2023-01-23 00:04:59.173557: step: 2020/527, loss: 0.08629532158374786 2023-01-23 00:05:00.289334: step: 2024/527, loss: 0.2083907574415207 2023-01-23 00:05:01.409234: step: 2028/527, loss: 0.8790580034255981 2023-01-23 00:05:02.497404: step: 2032/527, loss: 0.06619081646203995 2023-01-23 00:05:03.577311: step: 2036/527, loss: 0.37637922167778015 2023-01-23 00:05:04.664695: step: 2040/527, loss: 0.2209973782300949 2023-01-23 00:05:05.774462: step: 2044/527, loss: 0.5480155944824219 2023-01-23 00:05:06.906101: step: 2048/527, loss: 0.1440381109714508 2023-01-23 00:05:07.996127: step: 2052/527, loss: 0.1772206425666809 2023-01-23 00:05:09.121231: step: 2056/527, loss: 0.1490059792995453 2023-01-23 00:05:10.241510: step: 2060/527, loss: 2.06956148147583 2023-01-23 00:05:11.373014: step: 2064/527, loss: 0.1219981238245964 2023-01-23 00:05:12.484531: step: 2068/527, loss: 0.5840121507644653 2023-01-23 00:05:13.599272: step: 2072/527, loss: 0.7571640014648438 2023-01-23 00:05:14.685141: step: 2076/527, loss: 0.6306487321853638 2023-01-23 00:05:15.779461: step: 2080/527, loss: 0.13957777619361877 2023-01-23 00:05:16.901244: step: 2084/527, loss: 0.08821021020412445 2023-01-23 00:05:18.017322: step: 2088/527, loss: 0.1441861242055893 2023-01-23 00:05:19.130224: step: 2092/527, loss: 0.18021021783351898 2023-01-23 00:05:20.250321: step: 2096/527, loss: 0.09722509235143661 2023-01-23 00:05:21.374248: step: 2100/527, loss: 0.07278265804052353 2023-01-23 00:05:22.485549: step: 2104/527, loss: 1.0719960927963257 2023-01-23 00:05:23.624912: step: 2108/527, loss: 0.19550323486328125 ================================================== Loss: 0.449 -------------------- Dev: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Test: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Chinese: {'event': {'p': 0.5897435897435898, 'r': 0.8518518518518519, 'f1': 0.6969696969696971}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Korean: {'event': {'p': 0.7297297297297297, 'r': 0.42857142857142855, 'f1': 0.54}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.5897435897435898, 'r': 0.8518518518518519, 'f1': 0.6969696969696971}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.623059866962306, 'r': 0.748335552596538, 'f1': 0.6799758015728978}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Eng Test for Korean: {'event': {'p': 0.6303294573643411, 'r': 0.7434285714285714, 'f1': 0.6822233875196644}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Korean: {'event': {'p': 0.725, 'r': 0.4603174603174603, 'f1': 0.5631067961165048}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:06:20.443548: step: 4/527, loss: 0.10690689086914062 2023-01-23 00:06:21.602038: step: 8/527, loss: 0.26229238510131836 2023-01-23 00:06:22.722288: step: 12/527, loss: 0.18392285704612732 2023-01-23 00:06:23.860462: step: 16/527, loss: 0.1851082742214203 2023-01-23 00:06:24.994947: step: 20/527, loss: 0.9686121940612793 2023-01-23 00:06:26.131320: step: 24/527, loss: 0.21167385578155518 2023-01-23 00:06:27.263548: step: 28/527, loss: 0.6079787611961365 2023-01-23 00:06:28.387273: step: 32/527, loss: 0.1102290153503418 2023-01-23 00:06:29.508142: step: 36/527, loss: 0.10351839661598206 2023-01-23 00:06:30.629330: step: 40/527, loss: 0.07550635933876038 2023-01-23 00:06:31.741919: step: 44/527, loss: 0.40787068009376526 2023-01-23 00:06:32.841008: step: 48/527, loss: 0.01969430409371853 2023-01-23 00:06:33.999317: step: 52/527, loss: 0.10179881751537323 2023-01-23 00:06:35.132487: step: 56/527, loss: 1.273101568222046 2023-01-23 00:06:36.243423: step: 60/527, loss: 0.14134541153907776 2023-01-23 00:06:37.352596: step: 64/527, loss: 0.33622679114341736 2023-01-23 00:06:38.496060: step: 68/527, loss: 0.48188328742980957 2023-01-23 00:06:39.642232: step: 72/527, loss: 1.5705112218856812 2023-01-23 00:06:40.732725: step: 76/527, loss: 0.11758680641651154 2023-01-23 00:06:41.861104: step: 80/527, loss: 0.09144806861877441 2023-01-23 00:06:42.971834: step: 84/527, loss: 0.4159158766269684 2023-01-23 00:06:44.068769: step: 88/527, loss: 0.1799517273902893 2023-01-23 00:06:45.179932: step: 92/527, loss: 0.30521997809410095 2023-01-23 00:06:46.293965: step: 96/527, loss: 0.0161711685359478 2023-01-23 00:06:47.420129: step: 100/527, loss: 0.21460795402526855 2023-01-23 00:06:48.521295: step: 104/527, loss: 0.6638267040252686 2023-01-23 00:06:49.624981: step: 108/527, loss: 0.612761378288269 2023-01-23 00:06:50.737149: step: 112/527, loss: 0.18181133270263672 2023-01-23 00:06:51.825663: step: 116/527, loss: 0.2613412141799927 2023-01-23 00:06:52.950679: step: 120/527, loss: 0.2105141133069992 2023-01-23 00:06:54.076675: step: 124/527, loss: 0.13384270668029785 2023-01-23 00:06:55.194849: step: 128/527, loss: 0.22524939477443695 2023-01-23 00:06:56.337688: step: 132/527, loss: 0.2948891818523407 2023-01-23 00:06:57.432547: step: 136/527, loss: 0.27858057618141174 2023-01-23 00:06:58.516280: step: 140/527, loss: 0.30813902616500854 2023-01-23 00:06:59.625235: step: 144/527, loss: 0.9296542406082153 2023-01-23 00:07:00.775232: step: 148/527, loss: 0.1598132699728012 2023-01-23 00:07:01.909378: step: 152/527, loss: 0.08925390243530273 2023-01-23 00:07:02.994231: step: 156/527, loss: 0.11261416226625443 2023-01-23 00:07:04.120604: step: 160/527, loss: 0.21356457471847534 2023-01-23 00:07:05.237038: step: 164/527, loss: 0.10367031395435333 2023-01-23 00:07:06.352398: step: 168/527, loss: 0.1113584041595459 2023-01-23 00:07:07.459294: step: 172/527, loss: 5.876473426818848 2023-01-23 00:07:08.566797: step: 176/527, loss: 0.14457111060619354 2023-01-23 00:07:09.683734: step: 180/527, loss: 0.2923074960708618 2023-01-23 00:07:10.849934: step: 184/527, loss: 0.12595386803150177 2023-01-23 00:07:11.964866: step: 188/527, loss: 0.432263046503067 2023-01-23 00:07:13.095665: step: 192/527, loss: 0.04789486154913902 2023-01-23 00:07:14.210078: step: 196/527, loss: 0.27775058150291443 2023-01-23 00:07:15.311900: step: 200/527, loss: 0.1511821299791336 2023-01-23 00:07:16.415163: step: 204/527, loss: 0.07255931198596954 2023-01-23 00:07:17.548816: step: 208/527, loss: 0.5041448473930359 2023-01-23 00:07:18.681600: step: 212/527, loss: 0.6822780966758728 2023-01-23 00:07:19.782360: step: 216/527, loss: 0.23523148894309998 2023-01-23 00:07:20.911912: step: 220/527, loss: 6.788954257965088 2023-01-23 00:07:22.048269: step: 224/527, loss: 0.18758010864257812 2023-01-23 00:07:23.208396: step: 228/527, loss: 0.21922169625759125 2023-01-23 00:07:24.337682: step: 232/527, loss: 0.15649166703224182 2023-01-23 00:07:25.441308: step: 236/527, loss: 0.09689555317163467 2023-01-23 00:07:26.569520: step: 240/527, loss: 0.12026543915271759 2023-01-23 00:07:27.678523: step: 244/527, loss: 0.12343807518482208 2023-01-23 00:07:28.783315: step: 248/527, loss: 0.41481637954711914 2023-01-23 00:07:29.917454: step: 252/527, loss: 0.22074469923973083 2023-01-23 00:07:31.090625: step: 256/527, loss: 0.3606138229370117 2023-01-23 00:07:32.248908: step: 260/527, loss: 0.3048554062843323 2023-01-23 00:07:33.339098: step: 264/527, loss: 0.3297635018825531 2023-01-23 00:07:34.765383: step: 268/527, loss: 0.07887134701013565 2023-01-23 00:07:35.898546: step: 272/527, loss: 0.1862729787826538 2023-01-23 00:07:37.008383: step: 276/527, loss: 0.07785310596227646 2023-01-23 00:07:38.112077: step: 280/527, loss: 0.16687121987342834 2023-01-23 00:07:39.245110: step: 284/527, loss: 0.7596178650856018 2023-01-23 00:07:40.369847: step: 288/527, loss: 0.12910166382789612 2023-01-23 00:07:41.496747: step: 292/527, loss: 0.21187114715576172 2023-01-23 00:07:42.638100: step: 296/527, loss: 0.5705634355545044 2023-01-23 00:07:43.754531: step: 300/527, loss: 0.7281942367553711 2023-01-23 00:07:44.866300: step: 304/527, loss: 0.20015564560890198 2023-01-23 00:07:45.990120: step: 308/527, loss: 6.149100303649902 2023-01-23 00:07:47.109446: step: 312/527, loss: 0.4192318022251129 2023-01-23 00:07:48.249514: step: 316/527, loss: 0.18486247956752777 2023-01-23 00:07:49.369953: step: 320/527, loss: 0.1175541952252388 2023-01-23 00:07:50.485494: step: 324/527, loss: 0.16281580924987793 2023-01-23 00:07:51.603062: step: 328/527, loss: 0.3077685832977295 2023-01-23 00:07:52.718518: step: 332/527, loss: 0.0775519385933876 2023-01-23 00:07:53.826569: step: 336/527, loss: 0.17245282232761383 2023-01-23 00:07:54.931182: step: 340/527, loss: 0.08445768058300018 2023-01-23 00:07:56.071451: step: 344/527, loss: 0.10412216186523438 2023-01-23 00:07:57.176056: step: 348/527, loss: 0.1898246705532074 2023-01-23 00:07:58.296953: step: 352/527, loss: 0.14269724488258362 2023-01-23 00:07:59.449878: step: 356/527, loss: 0.2525274455547333 2023-01-23 00:08:00.575041: step: 360/527, loss: 0.16955356299877167 2023-01-23 00:08:01.678080: step: 364/527, loss: 0.0355035774409771 2023-01-23 00:08:02.834152: step: 368/527, loss: 0.08516263961791992 2023-01-23 00:08:03.932548: step: 372/527, loss: 0.10598832368850708 2023-01-23 00:08:05.039063: step: 376/527, loss: 0.2322016805410385 2023-01-23 00:08:06.163813: step: 380/527, loss: 1.0321524143218994 2023-01-23 00:08:07.271528: step: 384/527, loss: 1.0343544483184814 2023-01-23 00:08:08.397882: step: 388/527, loss: 0.11659832298755646 2023-01-23 00:08:09.541576: step: 392/527, loss: 0.7954214811325073 2023-01-23 00:08:10.658433: step: 396/527, loss: 0.3000028133392334 2023-01-23 00:08:11.796240: step: 400/527, loss: 0.4770169258117676 2023-01-23 00:08:12.905288: step: 404/527, loss: 0.05647563934326172 2023-01-23 00:08:14.034463: step: 408/527, loss: 0.18130803108215332 2023-01-23 00:08:15.127379: step: 412/527, loss: 0.08880696445703506 2023-01-23 00:08:16.267422: step: 416/527, loss: 0.05320234224200249 2023-01-23 00:08:17.369860: step: 420/527, loss: 0.11156439781188965 2023-01-23 00:08:18.488903: step: 424/527, loss: 1.1430052518844604 2023-01-23 00:08:19.607270: step: 428/527, loss: 0.8291699290275574 2023-01-23 00:08:20.725913: step: 432/527, loss: 0.3138306140899658 2023-01-23 00:08:21.798875: step: 436/527, loss: 0.11644463241100311 2023-01-23 00:08:22.928326: step: 440/527, loss: 0.012163449078798294 2023-01-23 00:08:24.054896: step: 444/527, loss: 0.09956265240907669 2023-01-23 00:08:25.180277: step: 448/527, loss: 0.482940673828125 2023-01-23 00:08:26.290705: step: 452/527, loss: 0.020507145673036575 2023-01-23 00:08:27.614650: step: 456/527, loss: 0.23200541734695435 2023-01-23 00:08:29.056269: step: 460/527, loss: 0.6038606762886047 2023-01-23 00:08:30.165370: step: 464/527, loss: 0.06216559186577797 2023-01-23 00:08:31.311940: step: 468/527, loss: 0.2516061067581177 2023-01-23 00:08:32.407428: step: 472/527, loss: 0.7467668056488037 2023-01-23 00:08:33.516598: step: 476/527, loss: 0.04760603979229927 2023-01-23 00:08:34.627505: step: 480/527, loss: 0.2646981477737427 2023-01-23 00:08:35.794275: step: 484/527, loss: 0.1797521561384201 2023-01-23 00:08:36.934301: step: 488/527, loss: 0.04645397514104843 2023-01-23 00:08:38.023318: step: 492/527, loss: 0.060700275003910065 2023-01-23 00:08:39.137998: step: 496/527, loss: 0.22435550391674042 2023-01-23 00:08:40.247624: step: 500/527, loss: 0.06895676255226135 2023-01-23 00:08:41.377920: step: 504/527, loss: 0.2748515009880066 2023-01-23 00:08:42.469310: step: 508/527, loss: 0.21761950850486755 2023-01-23 00:08:43.577871: step: 512/527, loss: 0.2205769568681717 2023-01-23 00:08:44.677140: step: 516/527, loss: 0.14005526900291443 2023-01-23 00:08:45.754816: step: 520/527, loss: 0.2663953900337219 2023-01-23 00:08:46.893642: step: 524/527, loss: 0.6959317922592163 2023-01-23 00:08:48.006847: step: 528/527, loss: 0.07471341639757156 2023-01-23 00:08:49.140454: step: 532/527, loss: 0.4083843231201172 2023-01-23 00:08:50.249389: step: 536/527, loss: 0.9699186086654663 2023-01-23 00:08:51.361327: step: 540/527, loss: 0.2076442837715149 2023-01-23 00:08:52.478878: step: 544/527, loss: 0.7029988765716553 2023-01-23 00:08:53.584874: step: 548/527, loss: 0.08358597755432129 2023-01-23 00:08:54.718254: step: 552/527, loss: 0.42553555965423584 2023-01-23 00:08:55.829111: step: 556/527, loss: 0.22079545259475708 2023-01-23 00:08:56.940115: step: 560/527, loss: 0.06804303824901581 2023-01-23 00:08:58.092944: step: 564/527, loss: 0.24682527780532837 2023-01-23 00:08:59.230924: step: 568/527, loss: 0.16522203385829926 2023-01-23 00:09:00.362934: step: 572/527, loss: 0.35852736234664917 2023-01-23 00:09:01.526361: step: 576/527, loss: 1.2968554496765137 2023-01-23 00:09:02.640697: step: 580/527, loss: 0.7931557297706604 2023-01-23 00:09:03.725248: step: 584/527, loss: 0.16539031267166138 2023-01-23 00:09:04.883904: step: 588/527, loss: 0.4904360771179199 2023-01-23 00:09:06.024785: step: 592/527, loss: 0.03752746805548668 2023-01-23 00:09:07.160136: step: 596/527, loss: 0.6924483180046082 2023-01-23 00:09:08.294907: step: 600/527, loss: 0.6220345497131348 2023-01-23 00:09:09.397868: step: 604/527, loss: 0.8555251955986023 2023-01-23 00:09:10.517571: step: 608/527, loss: 0.4294985830783844 2023-01-23 00:09:11.624641: step: 612/527, loss: 0.016052579507231712 2023-01-23 00:09:12.731117: step: 616/527, loss: 0.1309412121772766 2023-01-23 00:09:13.858276: step: 620/527, loss: 0.045116521418094635 2023-01-23 00:09:14.966597: step: 624/527, loss: 0.2054397612810135 2023-01-23 00:09:16.093920: step: 628/527, loss: 0.15325704216957092 2023-01-23 00:09:17.228054: step: 632/527, loss: 0.20916244387626648 2023-01-23 00:09:18.359576: step: 636/527, loss: 0.023050928488373756 2023-01-23 00:09:19.487141: step: 640/527, loss: 0.5019675493240356 2023-01-23 00:09:20.603045: step: 644/527, loss: 0.07485733181238174 2023-01-23 00:09:21.696472: step: 648/527, loss: 0.2559559941291809 2023-01-23 00:09:22.796741: step: 652/527, loss: 0.6436958909034729 2023-01-23 00:09:23.909309: step: 656/527, loss: 0.6133899688720703 2023-01-23 00:09:24.994906: step: 660/527, loss: 0.903583824634552 2023-01-23 00:09:26.127800: step: 664/527, loss: 0.06538905948400497 2023-01-23 00:09:27.222767: step: 668/527, loss: 0.0531894713640213 2023-01-23 00:09:28.352556: step: 672/527, loss: 0.15315064787864685 2023-01-23 00:09:29.490711: step: 676/527, loss: 0.4527393579483032 2023-01-23 00:09:30.575400: step: 680/527, loss: 0.06289754062891006 2023-01-23 00:09:31.704858: step: 684/527, loss: 0.2575264573097229 2023-01-23 00:09:32.820644: step: 688/527, loss: 0.013828087598085403 2023-01-23 00:09:33.918983: step: 692/527, loss: 0.17731815576553345 2023-01-23 00:09:35.010053: step: 696/527, loss: 0.4480947256088257 2023-01-23 00:09:36.125126: step: 700/527, loss: 1.1558541059494019 2023-01-23 00:09:37.263232: step: 704/527, loss: 0.03804426267743111 2023-01-23 00:09:38.406162: step: 708/527, loss: 0.16213403642177582 2023-01-23 00:09:39.563196: step: 712/527, loss: 1.238487958908081 2023-01-23 00:09:40.720107: step: 716/527, loss: 0.18230657279491425 2023-01-23 00:09:41.863800: step: 720/527, loss: 0.5329722166061401 2023-01-23 00:09:42.934503: step: 724/527, loss: 0.14703664183616638 2023-01-23 00:09:44.041312: step: 728/527, loss: 0.09924321621656418 2023-01-23 00:09:45.147445: step: 732/527, loss: 0.20120000839233398 2023-01-23 00:09:46.229977: step: 736/527, loss: 0.3734327554702759 2023-01-23 00:09:47.348460: step: 740/527, loss: 0.21190989017486572 2023-01-23 00:09:48.431824: step: 744/527, loss: 0.5763669013977051 2023-01-23 00:09:49.557372: step: 748/527, loss: 0.1756018102169037 2023-01-23 00:09:50.692255: step: 752/527, loss: 0.8739427328109741 2023-01-23 00:09:51.812922: step: 756/527, loss: 0.6947381496429443 2023-01-23 00:09:52.906220: step: 760/527, loss: 0.13500681519508362 2023-01-23 00:09:54.018020: step: 764/527, loss: 0.03397350385785103 2023-01-23 00:09:55.107213: step: 768/527, loss: 0.050150156021118164 2023-01-23 00:09:56.240164: step: 772/527, loss: 0.07563390582799911 2023-01-23 00:09:57.341087: step: 776/527, loss: 0.243687242269516 2023-01-23 00:09:58.445440: step: 780/527, loss: 0.05087833106517792 2023-01-23 00:09:59.566867: step: 784/527, loss: 0.07746048271656036 2023-01-23 00:10:00.686696: step: 788/527, loss: 0.1341322511434555 2023-01-23 00:10:01.805341: step: 792/527, loss: 0.06158266216516495 2023-01-23 00:10:02.904004: step: 796/527, loss: 0.06087656319141388 2023-01-23 00:10:04.026725: step: 800/527, loss: 0.2464689314365387 2023-01-23 00:10:05.144542: step: 804/527, loss: 0.25684911012649536 2023-01-23 00:10:06.233003: step: 808/527, loss: 0.13031105697155 2023-01-23 00:10:07.349369: step: 812/527, loss: 0.17095699906349182 2023-01-23 00:10:08.456240: step: 816/527, loss: 0.12209143489599228 2023-01-23 00:10:09.568766: step: 820/527, loss: 0.2230725884437561 2023-01-23 00:10:10.732744: step: 824/527, loss: 0.5473877787590027 2023-01-23 00:10:11.853073: step: 828/527, loss: 0.11638985574245453 2023-01-23 00:10:12.970860: step: 832/527, loss: 0.6785234808921814 2023-01-23 00:10:14.073633: step: 836/527, loss: 0.2687937021255493 2023-01-23 00:10:15.189488: step: 840/527, loss: 0.29614678025245667 2023-01-23 00:10:16.287899: step: 844/527, loss: 0.2697829306125641 2023-01-23 00:10:17.410640: step: 848/527, loss: 0.03577737882733345 2023-01-23 00:10:18.530468: step: 852/527, loss: 0.6415659189224243 2023-01-23 00:10:19.618626: step: 856/527, loss: 0.6541442275047302 2023-01-23 00:10:20.730011: step: 860/527, loss: 0.19686970114707947 2023-01-23 00:10:21.808406: step: 864/527, loss: 0.6205440163612366 2023-01-23 00:10:22.924007: step: 868/527, loss: 0.724902868270874 2023-01-23 00:10:24.046711: step: 872/527, loss: 0.43067342042922974 2023-01-23 00:10:25.139401: step: 876/527, loss: 0.0367342010140419 2023-01-23 00:10:26.272676: step: 880/527, loss: 0.2426941990852356 2023-01-23 00:10:27.396886: step: 884/527, loss: 0.44762009382247925 2023-01-23 00:10:28.526347: step: 888/527, loss: 0.1117599755525589 2023-01-23 00:10:29.652616: step: 892/527, loss: 0.043029118329286575 2023-01-23 00:10:30.776041: step: 896/527, loss: 0.09100999683141708 2023-01-23 00:10:31.871741: step: 900/527, loss: 0.105923131108284 2023-01-23 00:10:32.976722: step: 904/527, loss: 0.12159473448991776 2023-01-23 00:10:34.077074: step: 908/527, loss: 0.06646624207496643 2023-01-23 00:10:35.162168: step: 912/527, loss: 0.15469437837600708 2023-01-23 00:10:36.297064: step: 916/527, loss: 0.3962244391441345 2023-01-23 00:10:37.451299: step: 920/527, loss: 0.15693531930446625 2023-01-23 00:10:38.543170: step: 924/527, loss: 0.3318410813808441 2023-01-23 00:10:39.669223: step: 928/527, loss: 0.1302165985107422 2023-01-23 00:10:40.765282: step: 932/527, loss: 0.09787073731422424 2023-01-23 00:10:41.880899: step: 936/527, loss: 5.116474628448486 2023-01-23 00:10:43.021348: step: 940/527, loss: 0.10111570358276367 2023-01-23 00:10:44.137684: step: 944/527, loss: 0.7220234870910645 2023-01-23 00:10:45.255751: step: 948/527, loss: 0.3562307357788086 2023-01-23 00:10:46.371627: step: 952/527, loss: 0.441702663898468 2023-01-23 00:10:47.471487: step: 956/527, loss: 0.09431767463684082 2023-01-23 00:10:48.589327: step: 960/527, loss: 0.01877765730023384 2023-01-23 00:10:49.709889: step: 964/527, loss: 0.24384915828704834 2023-01-23 00:10:50.834718: step: 968/527, loss: 0.11317439377307892 2023-01-23 00:10:51.972671: step: 972/527, loss: 0.1555279642343521 2023-01-23 00:10:53.076562: step: 976/527, loss: 0.1521296501159668 2023-01-23 00:10:54.208614: step: 980/527, loss: 0.26489123702049255 2023-01-23 00:10:55.315298: step: 984/527, loss: 0.02657175064086914 2023-01-23 00:10:56.441043: step: 988/527, loss: 0.9635842442512512 2023-01-23 00:10:57.515052: step: 992/527, loss: 0.05721640586853027 2023-01-23 00:10:58.623662: step: 996/527, loss: 0.5187878608703613 2023-01-23 00:10:59.719078: step: 1000/527, loss: 0.17131757736206055 2023-01-23 00:11:00.875373: step: 1004/527, loss: 1.59785795211792 2023-01-23 00:11:01.952491: step: 1008/527, loss: 0.030515337362885475 2023-01-23 00:11:03.110142: step: 1012/527, loss: 0.6989672183990479 2023-01-23 00:11:04.241276: step: 1016/527, loss: 0.09899745136499405 2023-01-23 00:11:05.330500: step: 1020/527, loss: 0.601532518863678 2023-01-23 00:11:06.475992: step: 1024/527, loss: 1.5280327796936035 2023-01-23 00:11:07.570783: step: 1028/527, loss: 0.25149011611938477 2023-01-23 00:11:08.675342: step: 1032/527, loss: 0.03057580068707466 2023-01-23 00:11:09.803556: step: 1036/527, loss: 0.18764066696166992 2023-01-23 00:11:10.933469: step: 1040/527, loss: 0.14220742881298065 2023-01-23 00:11:12.041260: step: 1044/527, loss: 0.4160740375518799 2023-01-23 00:11:13.166100: step: 1048/527, loss: 0.14959710836410522 2023-01-23 00:11:14.265704: step: 1052/527, loss: 0.08783617615699768 2023-01-23 00:11:15.385331: step: 1056/527, loss: 0.20530100166797638 2023-01-23 00:11:16.504614: step: 1060/527, loss: 1.0951528549194336 2023-01-23 00:11:17.608422: step: 1064/527, loss: 0.6230490803718567 2023-01-23 00:11:18.727805: step: 1068/527, loss: 0.047351837158203125 2023-01-23 00:11:19.839028: step: 1072/527, loss: 0.30140554904937744 2023-01-23 00:11:20.982689: step: 1076/527, loss: 0.17081165313720703 2023-01-23 00:11:22.097195: step: 1080/527, loss: 0.11976084858179092 2023-01-23 00:11:23.268423: step: 1084/527, loss: 0.015096187591552734 2023-01-23 00:11:24.387384: step: 1088/527, loss: 0.25514861941337585 2023-01-23 00:11:25.503403: step: 1092/527, loss: 0.08299532532691956 2023-01-23 00:11:26.607796: step: 1096/527, loss: 0.05821748077869415 2023-01-23 00:11:27.705540: step: 1100/527, loss: 0.07984709739685059 2023-01-23 00:11:28.836331: step: 1104/527, loss: 0.03862800449132919 2023-01-23 00:11:29.947647: step: 1108/527, loss: 0.17441534996032715 2023-01-23 00:11:31.025260: step: 1112/527, loss: 0.04745130613446236 2023-01-23 00:11:32.142943: step: 1116/527, loss: 0.5034268498420715 2023-01-23 00:11:33.260138: step: 1120/527, loss: 0.025683045387268066 2023-01-23 00:11:34.372999: step: 1124/527, loss: 0.6742849349975586 2023-01-23 00:11:35.490094: step: 1128/527, loss: 0.06588998436927795 2023-01-23 00:11:36.611653: step: 1132/527, loss: 0.1916070133447647 2023-01-23 00:11:37.720072: step: 1136/527, loss: 0.7642965316772461 2023-01-23 00:11:38.839257: step: 1140/527, loss: 0.12646256387233734 2023-01-23 00:11:39.978762: step: 1144/527, loss: 0.12535008788108826 2023-01-23 00:11:41.100304: step: 1148/527, loss: 0.6683266758918762 2023-01-23 00:11:42.188922: step: 1152/527, loss: 0.19703570008277893 2023-01-23 00:11:43.356454: step: 1156/527, loss: 0.20910978317260742 2023-01-23 00:11:44.457697: step: 1160/527, loss: 0.21245956420898438 2023-01-23 00:11:45.564314: step: 1164/527, loss: 0.06803891807794571 2023-01-23 00:11:46.681556: step: 1168/527, loss: 0.6156831979751587 2023-01-23 00:11:47.791531: step: 1172/527, loss: 0.02805934101343155 2023-01-23 00:11:48.903803: step: 1176/527, loss: 0.4477941393852234 2023-01-23 00:11:50.025295: step: 1180/527, loss: 0.20378351211547852 2023-01-23 00:11:51.154342: step: 1184/527, loss: 1.4286248683929443 2023-01-23 00:11:52.272017: step: 1188/527, loss: 0.1440032571554184 2023-01-23 00:11:53.392804: step: 1192/527, loss: 0.06988263130187988 2023-01-23 00:11:54.501251: step: 1196/527, loss: 0.10316705703735352 2023-01-23 00:11:55.631701: step: 1200/527, loss: 0.39985477924346924 2023-01-23 00:11:56.726739: step: 1204/527, loss: 0.19708210229873657 2023-01-23 00:11:57.864156: step: 1208/527, loss: 0.07931695878505707 2023-01-23 00:11:58.992344: step: 1212/527, loss: 0.19265621900558472 2023-01-23 00:12:00.112996: step: 1216/527, loss: 0.19477620720863342 2023-01-23 00:12:01.226168: step: 1220/527, loss: 0.14170321822166443 2023-01-23 00:12:02.367148: step: 1224/527, loss: 0.23198643326759338 2023-01-23 00:12:03.453924: step: 1228/527, loss: 0.08336324989795685 2023-01-23 00:12:04.565673: step: 1232/527, loss: 0.6265767216682434 2023-01-23 00:12:05.672154: step: 1236/527, loss: 0.06663751602172852 2023-01-23 00:12:06.817928: step: 1240/527, loss: 0.1593008041381836 2023-01-23 00:12:07.947145: step: 1244/527, loss: 0.5316864848136902 2023-01-23 00:12:09.133779: step: 1248/527, loss: 0.5278263092041016 2023-01-23 00:12:10.253124: step: 1252/527, loss: 0.04160184785723686 2023-01-23 00:12:11.381337: step: 1256/527, loss: 0.036322880536317825 2023-01-23 00:12:12.544932: step: 1260/527, loss: 0.6000651121139526 2023-01-23 00:12:13.652204: step: 1264/527, loss: 0.31250160932540894 2023-01-23 00:12:14.753977: step: 1268/527, loss: 0.17543601989746094 2023-01-23 00:12:15.855032: step: 1272/527, loss: 0.34024086594581604 2023-01-23 00:12:16.977696: step: 1276/527, loss: 0.2732410430908203 2023-01-23 00:12:18.120971: step: 1280/527, loss: 0.7535012364387512 2023-01-23 00:12:19.252567: step: 1284/527, loss: 0.13631367683410645 2023-01-23 00:12:20.354097: step: 1288/527, loss: 0.6097455620765686 2023-01-23 00:12:21.498768: step: 1292/527, loss: 0.6293331980705261 2023-01-23 00:12:22.620774: step: 1296/527, loss: 0.7128167152404785 2023-01-23 00:12:23.731615: step: 1300/527, loss: 0.13039374351501465 2023-01-23 00:12:24.858566: step: 1304/527, loss: 0.171274334192276 2023-01-23 00:12:26.000028: step: 1308/527, loss: 0.6098413467407227 2023-01-23 00:12:27.112596: step: 1312/527, loss: 0.6271671652793884 2023-01-23 00:12:28.233345: step: 1316/527, loss: 0.17098703980445862 2023-01-23 00:12:29.331496: step: 1320/527, loss: 0.04796471819281578 2023-01-23 00:12:30.442610: step: 1324/527, loss: 0.7401866912841797 2023-01-23 00:12:31.555023: step: 1328/527, loss: 0.3096674680709839 2023-01-23 00:12:32.650455: step: 1332/527, loss: 0.11526908725500107 2023-01-23 00:12:33.767784: step: 1336/527, loss: 5.885909080505371 2023-01-23 00:12:34.883822: step: 1340/527, loss: 0.10175705701112747 2023-01-23 00:12:35.988592: step: 1344/527, loss: 0.14329329133033752 2023-01-23 00:12:37.099489: step: 1348/527, loss: 0.12524500489234924 2023-01-23 00:12:38.202892: step: 1352/527, loss: 0.5256365537643433 2023-01-23 00:12:39.308038: step: 1356/527, loss: 0.14834603667259216 2023-01-23 00:12:40.440823: step: 1360/527, loss: 0.3145448565483093 2023-01-23 00:12:41.556764: step: 1364/527, loss: 0.3525451719760895 2023-01-23 00:12:42.657907: step: 1368/527, loss: 0.10944939404726028 2023-01-23 00:12:43.793938: step: 1372/527, loss: 0.019724082201719284 2023-01-23 00:12:44.932179: step: 1376/527, loss: 0.16537638008594513 2023-01-23 00:12:46.056918: step: 1380/527, loss: 0.5396057963371277 2023-01-23 00:12:47.138025: step: 1384/527, loss: 0.09598135948181152 2023-01-23 00:12:48.247413: step: 1388/527, loss: 0.9520591497421265 2023-01-23 00:12:49.337170: step: 1392/527, loss: 0.07424316555261612 2023-01-23 00:12:50.459677: step: 1396/527, loss: 0.07592492550611496 2023-01-23 00:12:51.553783: step: 1400/527, loss: 0.15157172083854675 2023-01-23 00:12:52.679852: step: 1404/527, loss: 0.09792271256446838 2023-01-23 00:12:53.798490: step: 1408/527, loss: 0.0961262658238411 2023-01-23 00:12:54.947896: step: 1412/527, loss: 1.803153395652771 2023-01-23 00:12:56.056733: step: 1416/527, loss: 0.1287805587053299 2023-01-23 00:12:57.188327: step: 1420/527, loss: 0.1581663191318512 2023-01-23 00:12:58.329165: step: 1424/527, loss: 0.21145859360694885 2023-01-23 00:12:59.431806: step: 1428/527, loss: 0.6952801942825317 2023-01-23 00:13:00.539600: step: 1432/527, loss: 0.08553238213062286 2023-01-23 00:13:01.683457: step: 1436/527, loss: 0.11029711365699768 2023-01-23 00:13:02.798778: step: 1440/527, loss: 0.14753007888793945 2023-01-23 00:13:03.894073: step: 1444/527, loss: 0.21220636367797852 2023-01-23 00:13:05.025955: step: 1448/527, loss: 0.14068298041820526 2023-01-23 00:13:06.151082: step: 1452/527, loss: 0.6115983128547668 2023-01-23 00:13:07.282211: step: 1456/527, loss: 0.4948551058769226 2023-01-23 00:13:08.408746: step: 1460/527, loss: 0.04553709179162979 2023-01-23 00:13:09.529430: step: 1464/527, loss: 0.04666939005255699 2023-01-23 00:13:10.648502: step: 1468/527, loss: 0.2741253972053528 2023-01-23 00:13:11.758901: step: 1472/527, loss: 0.09618870913982391 2023-01-23 00:13:12.868476: step: 1476/527, loss: 0.5667693018913269 2023-01-23 00:13:13.997637: step: 1480/527, loss: 0.38979265093803406 2023-01-23 00:13:15.111665: step: 1484/527, loss: 0.1593582183122635 2023-01-23 00:13:16.230722: step: 1488/527, loss: 0.11751909554004669 2023-01-23 00:13:17.358616: step: 1492/527, loss: 0.1928863227367401 2023-01-23 00:13:18.483951: step: 1496/527, loss: 0.8210095763206482 2023-01-23 00:13:19.588585: step: 1500/527, loss: 0.29117992520332336 2023-01-23 00:13:20.705581: step: 1504/527, loss: 0.08087539672851562 2023-01-23 00:13:21.797053: step: 1508/527, loss: 0.19064950942993164 2023-01-23 00:13:22.900207: step: 1512/527, loss: 0.15087157487869263 2023-01-23 00:13:24.033935: step: 1516/527, loss: 0.321188747882843 2023-01-23 00:13:25.148459: step: 1520/527, loss: 0.10306596755981445 2023-01-23 00:13:26.247455: step: 1524/527, loss: 0.20539608597755432 2023-01-23 00:13:27.354303: step: 1528/527, loss: 0.5874325633049011 2023-01-23 00:13:28.459225: step: 1532/527, loss: 0.08216361701488495 2023-01-23 00:13:29.568385: step: 1536/527, loss: 0.809807538986206 2023-01-23 00:13:30.697871: step: 1540/527, loss: 0.4458211064338684 2023-01-23 00:13:31.821126: step: 1544/527, loss: 0.17472851276397705 2023-01-23 00:13:32.926737: step: 1548/527, loss: 0.5652610659599304 2023-01-23 00:13:34.046465: step: 1552/527, loss: 0.027675582095980644 2023-01-23 00:13:35.201134: step: 1556/527, loss: 0.11164265125989914 2023-01-23 00:13:36.312512: step: 1560/527, loss: 0.3571251332759857 2023-01-23 00:13:37.445130: step: 1564/527, loss: 0.45429757237434387 2023-01-23 00:13:38.541689: step: 1568/527, loss: 1.0791453123092651 2023-01-23 00:13:39.662383: step: 1572/527, loss: 0.20986071228981018 2023-01-23 00:13:40.777437: step: 1576/527, loss: 0.33028706908226013 2023-01-23 00:13:41.883817: step: 1580/527, loss: 0.6444014310836792 2023-01-23 00:13:42.994488: step: 1584/527, loss: 0.05755796283483505 2023-01-23 00:13:44.131849: step: 1588/527, loss: 0.106346994638443 2023-01-23 00:13:45.239262: step: 1592/527, loss: 0.7262479662895203 2023-01-23 00:13:46.386982: step: 1596/527, loss: 0.5133785009384155 2023-01-23 00:13:47.499371: step: 1600/527, loss: 0.02716188319027424 2023-01-23 00:13:48.605800: step: 1604/527, loss: 1.0088305473327637 2023-01-23 00:13:49.728587: step: 1608/527, loss: 0.07211685180664062 2023-01-23 00:13:50.835449: step: 1612/527, loss: 0.689362645149231 2023-01-23 00:13:51.964821: step: 1616/527, loss: 0.7506176233291626 2023-01-23 00:13:53.100476: step: 1620/527, loss: 0.062085725367069244 2023-01-23 00:13:54.212019: step: 1624/527, loss: 0.4119747281074524 2023-01-23 00:13:55.330882: step: 1628/527, loss: 0.5336955189704895 2023-01-23 00:13:56.430996: step: 1632/527, loss: 0.06760063022375107 2023-01-23 00:13:57.529897: step: 1636/527, loss: 0.09099073708057404 2023-01-23 00:13:58.666065: step: 1640/527, loss: 0.5419539213180542 2023-01-23 00:13:59.787307: step: 1644/527, loss: 1.2363033294677734 2023-01-23 00:14:00.898718: step: 1648/527, loss: 0.13272853195667267 2023-01-23 00:14:02.018537: step: 1652/527, loss: 0.14073458313941956 2023-01-23 00:14:03.148640: step: 1656/527, loss: 0.1024385392665863 2023-01-23 00:14:04.293346: step: 1660/527, loss: 0.19618120789527893 2023-01-23 00:14:05.394479: step: 1664/527, loss: 0.3330685496330261 2023-01-23 00:14:06.513886: step: 1668/527, loss: 0.061608217656612396 2023-01-23 00:14:07.643549: step: 1672/527, loss: 0.5016913414001465 2023-01-23 00:14:08.759581: step: 1676/527, loss: 0.2912317216396332 2023-01-23 00:14:09.862727: step: 1680/527, loss: 0.054673001170158386 2023-01-23 00:14:10.964249: step: 1684/527, loss: 0.22520524263381958 2023-01-23 00:14:12.064536: step: 1688/527, loss: 0.24312329292297363 2023-01-23 00:14:13.180423: step: 1692/527, loss: 0.2241046130657196 2023-01-23 00:14:14.343614: step: 1696/527, loss: 0.2024269998073578 2023-01-23 00:14:15.478290: step: 1700/527, loss: 0.507845401763916 2023-01-23 00:14:16.579818: step: 1704/527, loss: 0.1972704976797104 2023-01-23 00:14:17.676505: step: 1708/527, loss: 0.9078596830368042 2023-01-23 00:14:18.790018: step: 1712/527, loss: 0.10781069099903107 2023-01-23 00:14:19.918872: step: 1716/527, loss: 0.3044893741607666 2023-01-23 00:14:21.035468: step: 1720/527, loss: 0.24299149215221405 2023-01-23 00:14:22.189785: step: 1724/527, loss: 1.3196039199829102 2023-01-23 00:14:23.304870: step: 1728/527, loss: 0.1933065503835678 2023-01-23 00:14:24.456393: step: 1732/527, loss: 0.2124234139919281 2023-01-23 00:14:25.553336: step: 1736/527, loss: 0.0949990302324295 2023-01-23 00:14:26.640362: step: 1740/527, loss: 1.154528260231018 2023-01-23 00:14:27.781860: step: 1744/527, loss: 3.1624863147735596 2023-01-23 00:14:28.941557: step: 1748/527, loss: 0.21432171761989594 2023-01-23 00:14:30.053072: step: 1752/527, loss: 1.3081138134002686 2023-01-23 00:14:31.183799: step: 1756/527, loss: 0.2940703332424164 2023-01-23 00:14:32.344525: step: 1760/527, loss: 0.513171374797821 2023-01-23 00:14:33.440961: step: 1764/527, loss: 0.029730796813964844 2023-01-23 00:14:34.563253: step: 1768/527, loss: 0.17151489853858948 2023-01-23 00:14:35.714578: step: 1772/527, loss: 0.1870718002319336 2023-01-23 00:14:36.817686: step: 1776/527, loss: 1.1802641153335571 2023-01-23 00:14:37.942919: step: 1780/527, loss: 0.036284636706113815 2023-01-23 00:14:39.063489: step: 1784/527, loss: 0.1877608299255371 2023-01-23 00:14:40.157658: step: 1788/527, loss: 0.16261368989944458 2023-01-23 00:14:41.244647: step: 1792/527, loss: 0.22908459603786469 2023-01-23 00:14:42.356252: step: 1796/527, loss: 0.23925809562206268 2023-01-23 00:14:43.482088: step: 1800/527, loss: 0.8778618574142456 2023-01-23 00:14:44.584930: step: 1804/527, loss: 0.5116405487060547 2023-01-23 00:14:45.720588: step: 1808/527, loss: 1.1821987628936768 2023-01-23 00:14:46.837830: step: 1812/527, loss: 0.6773964166641235 2023-01-23 00:14:47.933138: step: 1816/527, loss: 0.19796791672706604 2023-01-23 00:14:49.058569: step: 1820/527, loss: 0.5183804035186768 2023-01-23 00:14:50.169386: step: 1824/527, loss: 0.07474000006914139 2023-01-23 00:14:51.278227: step: 1828/527, loss: 0.06196761131286621 2023-01-23 00:14:52.383846: step: 1832/527, loss: 0.1263589859008789 2023-01-23 00:14:53.508311: step: 1836/527, loss: 0.1731864959001541 2023-01-23 00:14:54.593010: step: 1840/527, loss: 0.10313358157873154 2023-01-23 00:14:55.711252: step: 1844/527, loss: 0.6231164336204529 2023-01-23 00:14:56.810334: step: 1848/527, loss: 0.15423031151294708 2023-01-23 00:14:57.959695: step: 1852/527, loss: 0.07791309058666229 2023-01-23 00:14:59.072959: step: 1856/527, loss: 0.0723499283194542 2023-01-23 00:15:00.193410: step: 1860/527, loss: 0.010405349545180798 2023-01-23 00:15:01.312366: step: 1864/527, loss: 0.5426110029220581 2023-01-23 00:15:02.453740: step: 1868/527, loss: 2.9066708087921143 2023-01-23 00:15:03.603910: step: 1872/527, loss: 0.08476868271827698 2023-01-23 00:15:04.736498: step: 1876/527, loss: 0.16761358082294464 2023-01-23 00:15:05.825337: step: 1880/527, loss: 0.02445092238485813 2023-01-23 00:15:06.946648: step: 1884/527, loss: 0.07886552810668945 2023-01-23 00:15:08.060697: step: 1888/527, loss: 0.6830229759216309 2023-01-23 00:15:09.162826: step: 1892/527, loss: 0.21814775466918945 2023-01-23 00:15:10.266031: step: 1896/527, loss: 0.073696568608284 2023-01-23 00:15:11.387524: step: 1900/527, loss: 0.05319714546203613 2023-01-23 00:15:12.542114: step: 1904/527, loss: 0.04640341177582741 2023-01-23 00:15:13.650914: step: 1908/527, loss: 0.16652211546897888 2023-01-23 00:15:14.758458: step: 1912/527, loss: 0.1321442574262619 2023-01-23 00:15:15.847336: step: 1916/527, loss: 0.10971913486719131 2023-01-23 00:15:16.987680: step: 1920/527, loss: 0.14570856094360352 2023-01-23 00:15:18.096585: step: 1924/527, loss: 0.3235597610473633 2023-01-23 00:15:19.244221: step: 1928/527, loss: 0.17856568098068237 2023-01-23 00:15:20.371424: step: 1932/527, loss: 0.30888310074806213 2023-01-23 00:15:21.460752: step: 1936/527, loss: 0.7898846864700317 2023-01-23 00:15:22.575853: step: 1940/527, loss: 1.4510753154754639 2023-01-23 00:15:23.652210: step: 1944/527, loss: 0.09784574061632156 2023-01-23 00:15:24.771669: step: 1948/527, loss: 0.33908191323280334 2023-01-23 00:15:25.900548: step: 1952/527, loss: 0.14806756377220154 2023-01-23 00:15:27.035534: step: 1956/527, loss: 0.160518079996109 2023-01-23 00:15:28.154777: step: 1960/527, loss: 0.15122422575950623 2023-01-23 00:15:29.276166: step: 1964/527, loss: 0.25120019912719727 2023-01-23 00:15:30.416820: step: 1968/527, loss: 4.559635639190674 2023-01-23 00:15:31.530944: step: 1972/527, loss: 0.1597447395324707 2023-01-23 00:15:32.685067: step: 1976/527, loss: 0.5630172491073608 2023-01-23 00:15:33.813795: step: 1980/527, loss: 0.0700603500008583 2023-01-23 00:15:34.959803: step: 1984/527, loss: 0.23006907105445862 2023-01-23 00:15:36.058530: step: 1988/527, loss: 0.36249151825904846 2023-01-23 00:15:37.153699: step: 1992/527, loss: 0.05795612558722496 2023-01-23 00:15:38.268107: step: 1996/527, loss: 0.05022997781634331 2023-01-23 00:15:39.403996: step: 2000/527, loss: 0.1965283900499344 2023-01-23 00:15:40.512664: step: 2004/527, loss: 0.08582153916358948 2023-01-23 00:15:41.613579: step: 2008/527, loss: 0.08638457953929901 2023-01-23 00:15:42.723997: step: 2012/527, loss: 0.5372883677482605 2023-01-23 00:15:43.835840: step: 2016/527, loss: 0.07991671562194824 2023-01-23 00:15:44.931498: step: 2020/527, loss: 0.09569688141345978 2023-01-23 00:15:46.046805: step: 2024/527, loss: 0.3317447006702423 2023-01-23 00:15:47.143803: step: 2028/527, loss: 0.13375358283519745 2023-01-23 00:15:48.281053: step: 2032/527, loss: 0.3296220004558563 2023-01-23 00:15:49.377453: step: 2036/527, loss: 0.14134007692337036 2023-01-23 00:15:50.487429: step: 2040/527, loss: 0.8119940757751465 2023-01-23 00:15:51.627649: step: 2044/527, loss: 0.19644784927368164 2023-01-23 00:15:52.743541: step: 2048/527, loss: 0.09602288901805878 2023-01-23 00:15:53.843611: step: 2052/527, loss: 0.3183956742286682 2023-01-23 00:15:54.942203: step: 2056/527, loss: 0.202362060546875 2023-01-23 00:15:56.076723: step: 2060/527, loss: 0.24712371826171875 2023-01-23 00:15:57.182879: step: 2064/527, loss: 0.051843930035829544 2023-01-23 00:15:58.312052: step: 2068/527, loss: 0.106096550822258 2023-01-23 00:15:59.445628: step: 2072/527, loss: 0.4491764307022095 2023-01-23 00:16:00.553484: step: 2076/527, loss: 0.18740062415599823 2023-01-23 00:16:01.671350: step: 2080/527, loss: 0.5244362354278564 2023-01-23 00:16:02.789671: step: 2084/527, loss: 0.20706963539123535 2023-01-23 00:16:03.897656: step: 2088/527, loss: 0.09140148013830185 2023-01-23 00:16:05.002456: step: 2092/527, loss: 0.1178104430437088 2023-01-23 00:16:06.077606: step: 2096/527, loss: 0.13795657455921173 2023-01-23 00:16:07.194762: step: 2100/527, loss: 0.3109671473503113 2023-01-23 00:16:08.315265: step: 2104/527, loss: 0.10626889020204544 2023-01-23 00:16:09.427514: step: 2108/527, loss: 0.2091844081878662 ================================================== Loss: 0.377 -------------------- Dev: {'event': {'p': 0.5508637236084453, 'r': 0.7643142476697736, 'f1': 0.6402677077523703}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Test: {'event': {'p': 0.6072245084590764, 'r': 0.7588571428571429, 'f1': 0.6746253492506985}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Chinese: {'event': {'p': 0.5783132530120482, 'r': 0.8888888888888888, 'f1': 0.7007299270072992}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Korean: {'event': {'p': 0.6078431372549019, 'r': 0.49206349206349204, 'f1': 0.543859649122807}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Russian: {'event': {'p': 0.45, 'r': 0.5, 'f1': 0.4736842105263158}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Chinese: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Chinese: {'event': {'p': 0.5897435897435898, 'r': 0.8518518518518519, 'f1': 0.6969696969696971}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Eng Dev for Korean: {'event': {'p': 0.623059866962306, 'r': 0.748335552596538, 'f1': 0.6799758015728978}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Eng Test for Korean: {'event': {'p': 0.6303294573643411, 'r': 0.7434285714285714, 'f1': 0.6822233875196644}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Korean: {'event': {'p': 0.725, 'r': 0.4603174603174603, 'f1': 0.5631067961165048}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 5 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:16:51.312652: step: 4/527, loss: 0.022249173372983932 2023-01-23 00:16:52.451483: step: 8/527, loss: 0.052853774279356 2023-01-23 00:16:53.586941: step: 12/527, loss: 0.7081000804901123 2023-01-23 00:16:54.701474: step: 16/527, loss: 0.74346524477005 2023-01-23 00:16:55.812312: step: 20/527, loss: 0.14211177825927734 2023-01-23 00:16:56.922712: step: 24/527, loss: 0.2531282305717468 2023-01-23 00:16:58.057965: step: 28/527, loss: 0.30989572405815125 2023-01-23 00:16:59.156099: step: 32/527, loss: 0.5147432088851929 2023-01-23 00:17:00.262584: step: 36/527, loss: 0.3979882299900055 2023-01-23 00:17:01.386399: step: 40/527, loss: 0.4900001287460327 2023-01-23 00:17:02.531635: step: 44/527, loss: 0.3972986340522766 2023-01-23 00:17:03.665900: step: 48/527, loss: 0.18098661303520203 2023-01-23 00:17:04.772221: step: 52/527, loss: 0.2125433385372162 2023-01-23 00:17:05.881219: step: 56/527, loss: 0.1582840532064438 2023-01-23 00:17:07.044742: step: 60/527, loss: 0.08227815479040146 2023-01-23 00:17:08.165584: step: 64/527, loss: 0.2074366956949234 2023-01-23 00:17:09.260353: step: 68/527, loss: 0.8821884393692017 2023-01-23 00:17:10.407072: step: 72/527, loss: 0.4126943051815033 2023-01-23 00:17:11.527905: step: 76/527, loss: 0.1910543441772461 2023-01-23 00:17:12.667325: step: 80/527, loss: 0.07960166782140732 2023-01-23 00:17:13.810917: step: 84/527, loss: 1.8199048042297363 2023-01-23 00:17:14.929055: step: 88/527, loss: 0.19938337802886963 2023-01-23 00:17:16.063836: step: 92/527, loss: 0.27888283133506775 2023-01-23 00:17:17.185987: step: 96/527, loss: 0.024596024304628372 2023-01-23 00:17:18.310475: step: 100/527, loss: 0.6486958265304565 2023-01-23 00:17:19.455161: step: 104/527, loss: 0.5802132487297058 2023-01-23 00:17:20.567639: step: 108/527, loss: 0.06379161030054092 2023-01-23 00:17:21.665105: step: 112/527, loss: 0.12056417763233185 2023-01-23 00:17:22.799913: step: 116/527, loss: 0.03934536129236221 2023-01-23 00:17:23.894632: step: 120/527, loss: 0.12554463744163513 2023-01-23 00:17:25.015539: step: 124/527, loss: 0.36919790506362915 2023-01-23 00:17:26.130542: step: 128/527, loss: 0.2116708904504776 2023-01-23 00:17:27.247063: step: 132/527, loss: 0.07913322746753693 2023-01-23 00:17:28.348952: step: 136/527, loss: 0.03686084970831871 2023-01-23 00:17:29.480776: step: 140/527, loss: 0.06286859512329102 2023-01-23 00:17:30.640213: step: 144/527, loss: 0.16066904366016388 2023-01-23 00:17:31.733275: step: 148/527, loss: 0.013411570340394974 2023-01-23 00:17:32.849849: step: 152/527, loss: 0.05042882263660431 2023-01-23 00:17:33.988547: step: 156/527, loss: 0.12395496666431427 2023-01-23 00:17:35.097349: step: 160/527, loss: 0.04236145317554474 2023-01-23 00:17:36.226756: step: 164/527, loss: 0.2898007929325104 2023-01-23 00:17:37.380658: step: 168/527, loss: 0.12594422698020935 2023-01-23 00:17:38.475082: step: 172/527, loss: 0.06793558597564697 2023-01-23 00:17:39.582831: step: 176/527, loss: 0.15639400482177734 2023-01-23 00:17:40.681149: step: 180/527, loss: 0.0663963332772255 2023-01-23 00:17:41.800140: step: 184/527, loss: 0.13980217278003693 2023-01-23 00:17:42.928486: step: 188/527, loss: 0.07688732445240021 2023-01-23 00:17:44.043460: step: 192/527, loss: 0.33318862318992615 2023-01-23 00:17:45.180177: step: 196/527, loss: 0.5603718161582947 2023-01-23 00:17:46.299707: step: 200/527, loss: 0.11913757771253586 2023-01-23 00:17:47.409913: step: 204/527, loss: 0.22479209303855896 2023-01-23 00:17:48.528655: step: 208/527, loss: 0.04867896810173988 2023-01-23 00:17:49.654907: step: 212/527, loss: 0.2195838838815689 2023-01-23 00:17:50.781625: step: 216/527, loss: 0.29979240894317627 2023-01-23 00:17:51.900081: step: 220/527, loss: 0.07239756733179092 2023-01-23 00:17:53.027749: step: 224/527, loss: 0.13301487267017365 2023-01-23 00:17:54.165902: step: 228/527, loss: 0.05738077312707901 2023-01-23 00:17:55.281897: step: 232/527, loss: 0.17286130785942078 2023-01-23 00:17:56.418286: step: 236/527, loss: 0.0887751579284668 2023-01-23 00:17:57.527181: step: 240/527, loss: 0.15568368136882782 2023-01-23 00:17:58.686800: step: 244/527, loss: 0.13179701566696167 2023-01-23 00:17:59.791641: step: 248/527, loss: 0.253068208694458 2023-01-23 00:18:00.900899: step: 252/527, loss: 0.01938309706747532 2023-01-23 00:18:01.994674: step: 256/527, loss: 0.3003976047039032 2023-01-23 00:18:03.125379: step: 260/527, loss: 0.07840452343225479 2023-01-23 00:18:04.231724: step: 264/527, loss: 0.22693189978599548 2023-01-23 00:18:05.359989: step: 268/527, loss: 0.13255755603313446 2023-01-23 00:18:06.467964: step: 272/527, loss: 0.08591623604297638 2023-01-23 00:18:07.607837: step: 276/527, loss: 0.1224118247628212 2023-01-23 00:18:08.704756: step: 280/527, loss: 0.2676970362663269 2023-01-23 00:18:09.831603: step: 284/527, loss: 0.8498929142951965 2023-01-23 00:18:10.938332: step: 288/527, loss: 0.17844171822071075 2023-01-23 00:18:12.049346: step: 292/527, loss: 0.17654944956302643 2023-01-23 00:18:13.199572: step: 296/527, loss: 0.27493420243263245 2023-01-23 00:18:14.358337: step: 300/527, loss: 0.5626609921455383 2023-01-23 00:18:15.461206: step: 304/527, loss: 0.10025587677955627 2023-01-23 00:18:16.597515: step: 308/527, loss: 0.2725190222263336 2023-01-23 00:18:17.720360: step: 312/527, loss: 0.13540850579738617 2023-01-23 00:18:18.867204: step: 316/527, loss: 0.3991667926311493 2023-01-23 00:18:19.981960: step: 320/527, loss: 0.2279730886220932 2023-01-23 00:18:21.104099: step: 324/527, loss: 0.10485020279884338 2023-01-23 00:18:22.225774: step: 328/527, loss: 0.2993044853210449 2023-01-23 00:18:23.340799: step: 332/527, loss: 0.44559991359710693 2023-01-23 00:18:24.427734: step: 336/527, loss: 0.8305598497390747 2023-01-23 00:18:25.553541: step: 340/527, loss: 0.024629592895507812 2023-01-23 00:18:26.646378: step: 344/527, loss: 0.09807997196912766 2023-01-23 00:18:27.765898: step: 348/527, loss: 0.10104046016931534 2023-01-23 00:18:28.871465: step: 352/527, loss: 0.47202205657958984 2023-01-23 00:18:29.997958: step: 356/527, loss: 0.08598623424768448 2023-01-23 00:18:31.140250: step: 360/527, loss: 0.05252885818481445 2023-01-23 00:18:32.283109: step: 364/527, loss: 0.14439114928245544 2023-01-23 00:18:33.419800: step: 368/527, loss: 0.23421978950500488 2023-01-23 00:18:34.563030: step: 372/527, loss: 0.02374706231057644 2023-01-23 00:18:35.682093: step: 376/527, loss: 0.38174647092819214 2023-01-23 00:18:36.790878: step: 380/527, loss: 0.12252788245677948 2023-01-23 00:18:37.910334: step: 384/527, loss: 0.0832308754324913 2023-01-23 00:18:39.022461: step: 388/527, loss: 0.4216980040073395 2023-01-23 00:18:40.160509: step: 392/527, loss: 0.10022832453250885 2023-01-23 00:18:41.262438: step: 396/527, loss: 0.4777892231941223 2023-01-23 00:18:42.374453: step: 400/527, loss: 0.14322252571582794 2023-01-23 00:18:43.491792: step: 404/527, loss: 0.36082419753074646 2023-01-23 00:18:44.612184: step: 408/527, loss: 0.11528797447681427 2023-01-23 00:18:45.710212: step: 412/527, loss: 0.09323558956384659 2023-01-23 00:18:46.828824: step: 416/527, loss: 0.2644132673740387 2023-01-23 00:18:47.937906: step: 420/527, loss: 0.14598503708839417 2023-01-23 00:18:49.027613: step: 424/527, loss: 0.05398502200841904 2023-01-23 00:18:50.119509: step: 428/527, loss: 0.02928008884191513 2023-01-23 00:18:51.233956: step: 432/527, loss: 0.1828937530517578 2023-01-23 00:18:52.368942: step: 436/527, loss: 0.06944485008716583 2023-01-23 00:18:53.498768: step: 440/527, loss: 0.06689062714576721 2023-01-23 00:18:54.616698: step: 444/527, loss: 0.3215312957763672 2023-01-23 00:18:55.738905: step: 448/527, loss: 1.0646305084228516 2023-01-23 00:18:56.853701: step: 452/527, loss: 0.09793760627508163 2023-01-23 00:18:57.961171: step: 456/527, loss: 0.037320468574762344 2023-01-23 00:18:59.087336: step: 460/527, loss: 0.03612460941076279 2023-01-23 00:19:00.218268: step: 464/527, loss: 0.4685177803039551 2023-01-23 00:19:01.350152: step: 468/527, loss: 0.21176129579544067 2023-01-23 00:19:02.464422: step: 472/527, loss: 0.15110769867897034 2023-01-23 00:19:03.564977: step: 476/527, loss: 0.03734846040606499 2023-01-23 00:19:04.687700: step: 480/527, loss: 0.11501837521791458 2023-01-23 00:19:05.775358: step: 484/527, loss: 0.12085433304309845 2023-01-23 00:19:06.885553: step: 488/527, loss: 0.184031680226326 2023-01-23 00:19:07.983954: step: 492/527, loss: 0.22017021477222443 2023-01-23 00:19:09.102359: step: 496/527, loss: 0.0701756477355957 2023-01-23 00:19:10.215807: step: 500/527, loss: 0.34339240193367004 2023-01-23 00:19:11.338249: step: 504/527, loss: 0.12254400551319122 2023-01-23 00:19:12.468871: step: 508/527, loss: 0.1262405514717102 2023-01-23 00:19:13.588585: step: 512/527, loss: 0.2515473961830139 2023-01-23 00:19:14.717400: step: 516/527, loss: 0.15226033329963684 2023-01-23 00:19:15.841757: step: 520/527, loss: 0.29322385787963867 2023-01-23 00:19:16.947449: step: 524/527, loss: 0.04788990318775177 2023-01-23 00:19:18.084664: step: 528/527, loss: 0.11253318190574646 2023-01-23 00:19:19.201589: step: 532/527, loss: 0.3688744306564331 2023-01-23 00:19:20.332595: step: 536/527, loss: 0.007495689671486616 2023-01-23 00:19:21.439651: step: 540/527, loss: 1.9881023168563843 2023-01-23 00:19:22.591062: step: 544/527, loss: 0.25205785036087036 2023-01-23 00:19:23.708496: step: 548/527, loss: 0.5413957834243774 2023-01-23 00:19:24.812152: step: 552/527, loss: 0.03450331836938858 2023-01-23 00:19:25.943104: step: 556/527, loss: 0.05203437805175781 2023-01-23 00:19:27.082827: step: 560/527, loss: 0.15648341178894043 2023-01-23 00:19:28.184456: step: 564/527, loss: 0.2538606822490692 2023-01-23 00:19:29.285545: step: 568/527, loss: 0.5688015222549438 2023-01-23 00:19:30.398006: step: 572/527, loss: 0.05319194868206978 2023-01-23 00:19:31.522083: step: 576/527, loss: 0.08528414368629456 2023-01-23 00:19:32.643704: step: 580/527, loss: 0.018718242645263672 2023-01-23 00:19:33.755920: step: 584/527, loss: 0.18431530892848969 2023-01-23 00:19:34.855794: step: 588/527, loss: 0.23473891615867615 2023-01-23 00:19:35.965918: step: 592/527, loss: 0.31978341937065125 2023-01-23 00:19:37.082256: step: 596/527, loss: 0.23661017417907715 2023-01-23 00:19:38.219766: step: 600/527, loss: 0.06694173812866211 2023-01-23 00:19:39.360020: step: 604/527, loss: 0.09180283546447754 2023-01-23 00:19:40.465301: step: 608/527, loss: 0.27530938386917114 2023-01-23 00:19:41.575604: step: 612/527, loss: 0.084027960896492 2023-01-23 00:19:42.674725: step: 616/527, loss: 0.49331170320510864 2023-01-23 00:19:43.792640: step: 620/527, loss: 0.0631282776594162 2023-01-23 00:19:44.914844: step: 624/527, loss: 0.33109167218208313 2023-01-23 00:19:46.077859: step: 628/527, loss: 0.09209757298231125 2023-01-23 00:19:47.185642: step: 632/527, loss: 0.06318364292383194 2023-01-23 00:19:48.303005: step: 636/527, loss: 0.07507705688476562 2023-01-23 00:19:49.408470: step: 640/527, loss: 0.07749853283166885 2023-01-23 00:19:50.510460: step: 644/527, loss: 0.26230868697166443 2023-01-23 00:19:51.620674: step: 648/527, loss: 0.016105175018310547 2023-01-23 00:19:52.729346: step: 652/527, loss: 0.15562590956687927 2023-01-23 00:19:53.831051: step: 656/527, loss: 0.11723342537879944 2023-01-23 00:19:54.995731: step: 660/527, loss: 0.09571543335914612 2023-01-23 00:19:56.115274: step: 664/527, loss: 0.39289337396621704 2023-01-23 00:19:57.257122: step: 668/527, loss: 0.21759510040283203 2023-01-23 00:19:58.395365: step: 672/527, loss: 0.32942602038383484 2023-01-23 00:19:59.515118: step: 676/527, loss: 0.3686848282814026 2023-01-23 00:20:00.633100: step: 680/527, loss: 0.6266034245491028 2023-01-23 00:20:01.757058: step: 684/527, loss: 0.5773298144340515 2023-01-23 00:20:02.869938: step: 688/527, loss: 0.17440158128738403 2023-01-23 00:20:03.984742: step: 692/527, loss: 0.4018300771713257 2023-01-23 00:20:05.108885: step: 696/527, loss: 0.7968790531158447 2023-01-23 00:20:06.266782: step: 700/527, loss: 1.1410709619522095 2023-01-23 00:20:07.396450: step: 704/527, loss: 0.7502236366271973 2023-01-23 00:20:08.516322: step: 708/527, loss: 0.4125533699989319 2023-01-23 00:20:09.635914: step: 712/527, loss: 0.19168511033058167 2023-01-23 00:20:10.774927: step: 716/527, loss: 0.3345426619052887 2023-01-23 00:20:11.885765: step: 720/527, loss: 0.4023723006248474 2023-01-23 00:20:13.016923: step: 724/527, loss: 0.4025490880012512 2023-01-23 00:20:14.136016: step: 728/527, loss: 0.07412385940551758 2023-01-23 00:20:15.251723: step: 732/527, loss: 1.1686017513275146 2023-01-23 00:20:16.375864: step: 736/527, loss: 0.10230188816785812 2023-01-23 00:20:17.470942: step: 740/527, loss: 0.11895523220300674 2023-01-23 00:20:18.566458: step: 744/527, loss: 0.11851787567138672 2023-01-23 00:20:19.720956: step: 748/527, loss: 0.09862575680017471 2023-01-23 00:20:20.829335: step: 752/527, loss: 0.14316025376319885 2023-01-23 00:20:21.952400: step: 756/527, loss: 0.026618385687470436 2023-01-23 00:20:23.044269: step: 760/527, loss: 0.10191135108470917 2023-01-23 00:20:24.140329: step: 764/527, loss: 0.06429853290319443 2023-01-23 00:20:25.231570: step: 768/527, loss: 0.10434617847204208 2023-01-23 00:20:26.347225: step: 772/527, loss: 0.4852098524570465 2023-01-23 00:20:27.482932: step: 776/527, loss: 0.12042541801929474 2023-01-23 00:20:28.584249: step: 780/527, loss: 0.3627198338508606 2023-01-23 00:20:29.687666: step: 784/527, loss: 0.06677771359682083 2023-01-23 00:20:30.809389: step: 788/527, loss: 0.07106852531433105 2023-01-23 00:20:31.901523: step: 792/527, loss: 0.24752560257911682 2023-01-23 00:20:33.053980: step: 796/527, loss: 0.31318798661231995 2023-01-23 00:20:34.195644: step: 800/527, loss: 0.8733689785003662 2023-01-23 00:20:35.332836: step: 804/527, loss: 0.0827760249376297 2023-01-23 00:20:36.449439: step: 808/527, loss: 0.19814014434814453 2023-01-23 00:20:37.599355: step: 812/527, loss: 0.7050227522850037 2023-01-23 00:20:38.714402: step: 816/527, loss: 0.0867125540971756 2023-01-23 00:20:39.832682: step: 820/527, loss: 0.06167206913232803 2023-01-23 00:20:40.964622: step: 824/527, loss: 0.10035638511180878 2023-01-23 00:20:42.082893: step: 828/527, loss: 0.3124014139175415 2023-01-23 00:20:43.185857: step: 832/527, loss: 0.09035825729370117 2023-01-23 00:20:44.290623: step: 836/527, loss: 0.19395585358142853 2023-01-23 00:20:45.429234: step: 840/527, loss: 0.26939234137535095 2023-01-23 00:20:46.527308: step: 844/527, loss: 0.22080013155937195 2023-01-23 00:20:47.627602: step: 848/527, loss: 0.1174074113368988 2023-01-23 00:20:48.756875: step: 852/527, loss: 0.20611357688903809 2023-01-23 00:20:49.872405: step: 856/527, loss: 0.2556767463684082 2023-01-23 00:20:50.990466: step: 860/527, loss: 0.2072371393442154 2023-01-23 00:20:52.083620: step: 864/527, loss: 0.14181672036647797 2023-01-23 00:20:53.200690: step: 868/527, loss: 0.01876373216509819 2023-01-23 00:20:54.308304: step: 872/527, loss: 0.06636762619018555 2023-01-23 00:20:55.421501: step: 876/527, loss: 0.3427914083003998 2023-01-23 00:20:56.549163: step: 880/527, loss: 0.3641493618488312 2023-01-23 00:20:57.651578: step: 884/527, loss: 0.10763339698314667 2023-01-23 00:20:58.745527: step: 888/527, loss: 0.024660492315888405 2023-01-23 00:20:59.860805: step: 892/527, loss: 0.0815097838640213 2023-01-23 00:21:00.986820: step: 896/527, loss: 0.9242773056030273 2023-01-23 00:21:02.075686: step: 900/527, loss: 0.17459973692893982 2023-01-23 00:21:03.193480: step: 904/527, loss: 0.15603943169116974 2023-01-23 00:21:04.289527: step: 908/527, loss: 0.10006008297204971 2023-01-23 00:21:05.375996: step: 912/527, loss: 0.30490848422050476 2023-01-23 00:21:06.520396: step: 916/527, loss: 0.6718155145645142 2023-01-23 00:21:07.658688: step: 920/527, loss: 0.2529800832271576 2023-01-23 00:21:08.768324: step: 924/527, loss: 0.14264421164989471 2023-01-23 00:21:09.869617: step: 928/527, loss: 0.14165306091308594 2023-01-23 00:21:11.003068: step: 932/527, loss: 0.9555657505989075 2023-01-23 00:21:12.089511: step: 936/527, loss: 0.02001338079571724 2023-01-23 00:21:13.218069: step: 940/527, loss: 0.09285005927085876 2023-01-23 00:21:14.348107: step: 944/527, loss: 0.22490891814231873 2023-01-23 00:21:15.468554: step: 948/527, loss: 0.25561201572418213 2023-01-23 00:21:16.575474: step: 952/527, loss: 0.1803903579711914 2023-01-23 00:21:17.677985: step: 956/527, loss: 0.05386696010828018 2023-01-23 00:21:18.805835: step: 960/527, loss: 0.09731750190258026 2023-01-23 00:21:19.916836: step: 964/527, loss: 0.9887893199920654 2023-01-23 00:21:21.033257: step: 968/527, loss: 0.09517116844654083 2023-01-23 00:21:22.151549: step: 972/527, loss: 1.279137134552002 2023-01-23 00:21:23.284786: step: 976/527, loss: 0.05282296985387802 2023-01-23 00:21:24.374250: step: 980/527, loss: 0.09685230255126953 2023-01-23 00:21:25.473552: step: 984/527, loss: 0.5726767778396606 2023-01-23 00:21:26.623441: step: 988/527, loss: 0.052752211689949036 2023-01-23 00:21:27.737612: step: 992/527, loss: 0.21002750098705292 2023-01-23 00:21:28.865330: step: 996/527, loss: 0.18055109679698944 2023-01-23 00:21:30.015642: step: 1000/527, loss: 0.02756948582828045 2023-01-23 00:21:31.147552: step: 1004/527, loss: 0.11835814267396927 2023-01-23 00:21:32.277743: step: 1008/527, loss: 0.019714046269655228 2023-01-23 00:21:33.364242: step: 1012/527, loss: 0.17755194008350372 2023-01-23 00:21:34.473033: step: 1016/527, loss: 1.000170350074768 2023-01-23 00:21:35.621042: step: 1020/527, loss: 0.2504728436470032 2023-01-23 00:21:36.733793: step: 1024/527, loss: 1.7355129718780518 2023-01-23 00:21:37.842828: step: 1028/527, loss: 0.21115180850028992 2023-01-23 00:21:38.976424: step: 1032/527, loss: 0.10982055217027664 2023-01-23 00:21:40.167659: step: 1036/527, loss: 0.2593066990375519 2023-01-23 00:21:41.276758: step: 1040/527, loss: 0.08927793800830841 2023-01-23 00:21:42.394853: step: 1044/527, loss: 0.19734343886375427 2023-01-23 00:21:43.532515: step: 1048/527, loss: 0.04375801235437393 2023-01-23 00:21:44.676316: step: 1052/527, loss: 0.15594534575939178 2023-01-23 00:21:45.785274: step: 1056/527, loss: 0.2277686595916748 2023-01-23 00:21:46.865480: step: 1060/527, loss: 0.12292556464672089 2023-01-23 00:21:47.990950: step: 1064/527, loss: 0.08499407768249512 2023-01-23 00:21:49.082676: step: 1068/527, loss: 0.07709364593029022 2023-01-23 00:21:50.252901: step: 1072/527, loss: 1.4358896017074585 2023-01-23 00:21:51.348979: step: 1076/527, loss: 0.26507455110549927 2023-01-23 00:21:52.481510: step: 1080/527, loss: 0.15279458463191986 2023-01-23 00:21:53.608602: step: 1084/527, loss: 0.4682307243347168 2023-01-23 00:21:54.716272: step: 1088/527, loss: 0.09165802597999573 2023-01-23 00:21:55.814268: step: 1092/527, loss: 0.03277845308184624 2023-01-23 00:21:56.956949: step: 1096/527, loss: 0.04624834284186363 2023-01-23 00:21:58.058607: step: 1100/527, loss: 0.8685977458953857 2023-01-23 00:21:59.218273: step: 1104/527, loss: 0.22323516011238098 2023-01-23 00:22:00.331560: step: 1108/527, loss: 0.048952389508485794 2023-01-23 00:22:01.494225: step: 1112/527, loss: 0.26619797945022583 2023-01-23 00:22:02.629990: step: 1116/527, loss: 0.08635836094617844 2023-01-23 00:22:03.755070: step: 1120/527, loss: 0.07696514576673508 2023-01-23 00:22:04.872672: step: 1124/527, loss: 0.4653652012348175 2023-01-23 00:22:05.969331: step: 1128/527, loss: 0.4134241044521332 2023-01-23 00:22:07.063571: step: 1132/527, loss: 0.04846487194299698 2023-01-23 00:22:08.181275: step: 1136/527, loss: 0.09378290176391602 2023-01-23 00:22:09.298699: step: 1140/527, loss: 0.22416406869888306 2023-01-23 00:22:10.388594: step: 1144/527, loss: 0.0863683745265007 2023-01-23 00:22:11.502753: step: 1148/527, loss: 0.28976762294769287 2023-01-23 00:22:12.615688: step: 1152/527, loss: 0.10418453067541122 2023-01-23 00:22:13.758405: step: 1156/527, loss: 0.1785154789686203 2023-01-23 00:22:14.870930: step: 1160/527, loss: 0.5722765922546387 2023-01-23 00:22:15.967597: step: 1164/527, loss: 0.03684301674365997 2023-01-23 00:22:17.082856: step: 1168/527, loss: 0.039452649652957916 2023-01-23 00:22:18.175574: step: 1172/527, loss: 0.18124571442604065 2023-01-23 00:22:19.308915: step: 1176/527, loss: 0.07443609833717346 2023-01-23 00:22:20.430467: step: 1180/527, loss: 0.05713377147912979 2023-01-23 00:22:21.555893: step: 1184/527, loss: 0.1508459597826004 2023-01-23 00:22:22.690384: step: 1188/527, loss: 0.17405909299850464 2023-01-23 00:22:23.805933: step: 1192/527, loss: 0.9470130801200867 2023-01-23 00:22:24.920139: step: 1196/527, loss: 0.09310570359230042 2023-01-23 00:22:26.045083: step: 1200/527, loss: 0.1418277770280838 2023-01-23 00:22:27.138139: step: 1204/527, loss: 0.05359811708331108 2023-01-23 00:22:28.257200: step: 1208/527, loss: 0.14250335097312927 2023-01-23 00:22:29.361226: step: 1212/527, loss: 0.03633112832903862 2023-01-23 00:22:30.455180: step: 1216/527, loss: 0.24199900031089783 2023-01-23 00:22:31.564572: step: 1220/527, loss: 0.1411367505788803 2023-01-23 00:22:32.670343: step: 1224/527, loss: 0.21746310591697693 2023-01-23 00:22:33.780827: step: 1228/527, loss: 0.0654749870300293 2023-01-23 00:22:34.886912: step: 1232/527, loss: 0.1332019865512848 2023-01-23 00:22:36.000917: step: 1236/527, loss: 0.5273067355155945 2023-01-23 00:22:37.127749: step: 1240/527, loss: 0.09529409557580948 2023-01-23 00:22:38.250964: step: 1244/527, loss: 0.1311378926038742 2023-01-23 00:22:39.380215: step: 1248/527, loss: 0.9126175045967102 2023-01-23 00:22:40.496887: step: 1252/527, loss: 0.1864130049943924 2023-01-23 00:22:41.591324: step: 1256/527, loss: 0.5510692000389099 2023-01-23 00:22:42.720935: step: 1260/527, loss: 0.17051827907562256 2023-01-23 00:22:43.839010: step: 1264/527, loss: 0.5297226905822754 2023-01-23 00:22:44.941473: step: 1268/527, loss: 0.3993288278579712 2023-01-23 00:22:46.043512: step: 1272/527, loss: 0.20910167694091797 2023-01-23 00:22:47.163012: step: 1276/527, loss: 0.1272641271352768 2023-01-23 00:22:48.260817: step: 1280/527, loss: 0.1559308022260666 2023-01-23 00:22:49.397319: step: 1284/527, loss: 0.16905517876148224 2023-01-23 00:22:50.501609: step: 1288/527, loss: 0.3641207218170166 2023-01-23 00:22:51.657638: step: 1292/527, loss: 0.08693476021289825 2023-01-23 00:22:52.792361: step: 1296/527, loss: 0.3631511628627777 2023-01-23 00:22:53.916471: step: 1300/527, loss: 0.19771808385849 2023-01-23 00:22:55.022561: step: 1304/527, loss: 0.6395736932754517 2023-01-23 00:22:56.142761: step: 1308/527, loss: 0.16837921738624573 2023-01-23 00:22:57.279754: step: 1312/527, loss: 0.24905472993850708 2023-01-23 00:22:58.401732: step: 1316/527, loss: 0.2387489676475525 2023-01-23 00:22:59.537873: step: 1320/527, loss: 0.04274997487664223 2023-01-23 00:23:00.648955: step: 1324/527, loss: 0.09521861374378204 2023-01-23 00:23:01.756235: step: 1328/527, loss: 0.3333342671394348 2023-01-23 00:23:02.876903: step: 1332/527, loss: 0.1949302703142166 2023-01-23 00:23:04.017259: step: 1336/527, loss: 0.12058629840612411 2023-01-23 00:23:05.136046: step: 1340/527, loss: 0.0865846648812294 2023-01-23 00:23:06.262482: step: 1344/527, loss: 0.11176042258739471 2023-01-23 00:23:07.383011: step: 1348/527, loss: 0.3597325086593628 2023-01-23 00:23:08.492609: step: 1352/527, loss: 0.09371452778577805 2023-01-23 00:23:09.615840: step: 1356/527, loss: 0.14018574357032776 2023-01-23 00:23:10.725228: step: 1360/527, loss: 0.13045254349708557 2023-01-23 00:23:11.845000: step: 1364/527, loss: 0.22600993514060974 2023-01-23 00:23:12.969931: step: 1368/527, loss: 0.13538646697998047 2023-01-23 00:23:14.117194: step: 1372/527, loss: 0.13528437912464142 2023-01-23 00:23:15.231503: step: 1376/527, loss: 0.1861620396375656 2023-01-23 00:23:16.342234: step: 1380/527, loss: 0.24410080909729004 2023-01-23 00:23:17.434888: step: 1384/527, loss: 0.11294479668140411 2023-01-23 00:23:18.576988: step: 1388/527, loss: 0.3299015760421753 2023-01-23 00:23:19.720274: step: 1392/527, loss: 0.7020249962806702 2023-01-23 00:23:20.825160: step: 1396/527, loss: 0.1549573540687561 2023-01-23 00:23:21.923524: step: 1400/527, loss: 0.10581937432289124 2023-01-23 00:23:23.045693: step: 1404/527, loss: 0.18154793977737427 2023-01-23 00:23:24.180489: step: 1408/527, loss: 0.10994873195886612 2023-01-23 00:23:25.287630: step: 1412/527, loss: 0.10219564288854599 2023-01-23 00:23:26.401703: step: 1416/527, loss: 0.18357139825820923 2023-01-23 00:23:27.498794: step: 1420/527, loss: 0.1429995596408844 2023-01-23 00:23:28.629355: step: 1424/527, loss: 0.20690345764160156 2023-01-23 00:23:29.741929: step: 1428/527, loss: 0.03540918976068497 2023-01-23 00:23:30.869077: step: 1432/527, loss: 0.6291588544845581 2023-01-23 00:23:31.950825: step: 1436/527, loss: 0.023952938616275787 2023-01-23 00:23:33.080097: step: 1440/527, loss: 0.24245738983154297 2023-01-23 00:23:34.190731: step: 1444/527, loss: 0.3907793164253235 2023-01-23 00:23:35.296898: step: 1448/527, loss: 0.37806135416030884 2023-01-23 00:23:36.407112: step: 1452/527, loss: 0.08191061019897461 2023-01-23 00:23:37.488198: step: 1456/527, loss: 0.027959156781435013 2023-01-23 00:23:38.556941: step: 1460/527, loss: 1.3866853713989258 2023-01-23 00:23:39.665771: step: 1464/527, loss: 0.023647405207157135 2023-01-23 00:23:40.794087: step: 1468/527, loss: 0.4275180697441101 2023-01-23 00:23:41.924556: step: 1472/527, loss: 0.2591736614704132 2023-01-23 00:23:43.061715: step: 1476/527, loss: 0.20774689316749573 2023-01-23 00:23:44.175155: step: 1480/527, loss: 0.11025485396385193 2023-01-23 00:23:45.273828: step: 1484/527, loss: 0.14052283763885498 2023-01-23 00:23:46.385653: step: 1488/527, loss: 0.24376507103443146 2023-01-23 00:23:47.514686: step: 1492/527, loss: 0.15917068719863892 2023-01-23 00:23:48.605462: step: 1496/527, loss: 0.2980186343193054 2023-01-23 00:23:49.725712: step: 1500/527, loss: 0.05484504997730255 2023-01-23 00:23:50.864186: step: 1504/527, loss: 0.20234127342700958 2023-01-23 00:23:51.957697: step: 1508/527, loss: 0.1779327392578125 2023-01-23 00:23:53.094242: step: 1512/527, loss: 1.5668984651565552 2023-01-23 00:23:54.223898: step: 1516/527, loss: 0.20066681504249573 2023-01-23 00:23:55.372291: step: 1520/527, loss: 0.5626883506774902 2023-01-23 00:23:56.484514: step: 1524/527, loss: 0.10938291251659393 2023-01-23 00:23:57.580577: step: 1528/527, loss: 0.05707111209630966 2023-01-23 00:23:58.685392: step: 1532/527, loss: 0.0390351302921772 2023-01-23 00:23:59.805079: step: 1536/527, loss: 0.36508435010910034 2023-01-23 00:24:00.939461: step: 1540/527, loss: 2.013167142868042 2023-01-23 00:24:02.052949: step: 1544/527, loss: 0.5956695079803467 2023-01-23 00:24:03.132862: step: 1548/527, loss: 0.033182717859745026 2023-01-23 00:24:04.236295: step: 1552/527, loss: 0.24339599907398224 2023-01-23 00:24:05.373285: step: 1556/527, loss: 0.18161793053150177 2023-01-23 00:24:06.463346: step: 1560/527, loss: 0.0639905035495758 2023-01-23 00:24:07.564249: step: 1564/527, loss: 0.17906102538108826 2023-01-23 00:24:08.672746: step: 1568/527, loss: 0.1424793303012848 2023-01-23 00:24:09.766690: step: 1572/527, loss: 0.06548518687486649 2023-01-23 00:24:10.912344: step: 1576/527, loss: 0.47618475556373596 2023-01-23 00:24:12.031491: step: 1580/527, loss: 0.27528414130210876 2023-01-23 00:24:13.161468: step: 1584/527, loss: 0.10735063999891281 2023-01-23 00:24:14.268157: step: 1588/527, loss: 0.08886084705591202 2023-01-23 00:24:15.395827: step: 1592/527, loss: 0.19100405275821686 2023-01-23 00:24:16.511143: step: 1596/527, loss: 0.05273895710706711 2023-01-23 00:24:17.634352: step: 1600/527, loss: 0.10789547115564346 2023-01-23 00:24:18.755774: step: 1604/527, loss: 0.2897655963897705 2023-01-23 00:24:19.862530: step: 1608/527, loss: 0.12071742862462997 2023-01-23 00:24:20.986601: step: 1612/527, loss: 0.0781133696436882 2023-01-23 00:24:22.088658: step: 1616/527, loss: 0.29869768023490906 2023-01-23 00:24:23.215253: step: 1620/527, loss: 0.21792864799499512 2023-01-23 00:24:24.341448: step: 1624/527, loss: 0.1269351989030838 2023-01-23 00:24:25.455895: step: 1628/527, loss: 0.07096557319164276 2023-01-23 00:24:26.556849: step: 1632/527, loss: 0.017712164670228958 2023-01-23 00:24:27.670351: step: 1636/527, loss: 0.2003255933523178 2023-01-23 00:24:28.764855: step: 1640/527, loss: 0.5588818788528442 2023-01-23 00:24:29.891799: step: 1644/527, loss: 0.11722178757190704 2023-01-23 00:24:31.016322: step: 1648/527, loss: 0.1632780134677887 2023-01-23 00:24:32.181925: step: 1652/527, loss: 0.15093430876731873 2023-01-23 00:24:33.301226: step: 1656/527, loss: 1.1809275150299072 2023-01-23 00:24:34.432528: step: 1660/527, loss: 0.12432112544775009 2023-01-23 00:24:35.556348: step: 1664/527, loss: 0.02001657523214817 2023-01-23 00:24:36.704716: step: 1668/527, loss: 0.21676737070083618 2023-01-23 00:24:37.810355: step: 1672/527, loss: 0.1052466407418251 2023-01-23 00:24:38.913403: step: 1676/527, loss: 0.26912203431129456 2023-01-23 00:24:40.024342: step: 1680/527, loss: 0.042536139488220215 2023-01-23 00:24:41.137916: step: 1684/527, loss: 0.23775464296340942 2023-01-23 00:24:42.245349: step: 1688/527, loss: 0.07776384800672531 2023-01-23 00:24:43.457882: step: 1692/527, loss: 0.6749762296676636 2023-01-23 00:24:44.563666: step: 1696/527, loss: 0.03224661573767662 2023-01-23 00:24:45.673682: step: 1700/527, loss: 0.017928026616573334 2023-01-23 00:24:46.790843: step: 1704/527, loss: 0.06147823482751846 2023-01-23 00:24:47.899843: step: 1708/527, loss: 0.05404110252857208 2023-01-23 00:24:49.041378: step: 1712/527, loss: 0.17946463823318481 2023-01-23 00:24:50.172426: step: 1716/527, loss: 0.11328163743019104 2023-01-23 00:24:51.267685: step: 1720/527, loss: 0.11372726410627365 2023-01-23 00:24:52.426335: step: 1724/527, loss: 0.08207159489393234 2023-01-23 00:24:53.530671: step: 1728/527, loss: 0.13389606773853302 2023-01-23 00:24:54.659698: step: 1732/527, loss: 0.11606454849243164 2023-01-23 00:24:55.798732: step: 1736/527, loss: 0.07518234848976135 2023-01-23 00:24:56.900672: step: 1740/527, loss: 0.5552347898483276 2023-01-23 00:24:57.993565: step: 1744/527, loss: 0.07792038470506668 2023-01-23 00:24:59.114222: step: 1748/527, loss: 0.454611212015152 2023-01-23 00:25:00.218035: step: 1752/527, loss: 3.4578685760498047 2023-01-23 00:25:01.303205: step: 1756/527, loss: 0.10688943415880203 2023-01-23 00:25:02.417547: step: 1760/527, loss: 0.2683517336845398 2023-01-23 00:25:03.528904: step: 1764/527, loss: 0.11338291317224503 2023-01-23 00:25:04.655020: step: 1768/527, loss: 0.582638144493103 2023-01-23 00:25:05.759288: step: 1772/527, loss: 0.2119365781545639 2023-01-23 00:25:06.884621: step: 1776/527, loss: 0.013252854347229004 2023-01-23 00:25:07.972477: step: 1780/527, loss: 0.14856967329978943 2023-01-23 00:25:09.084320: step: 1784/527, loss: 0.06847252696752548 2023-01-23 00:25:10.195232: step: 1788/527, loss: 0.15705688297748566 2023-01-23 00:25:11.306406: step: 1792/527, loss: 0.5870369672775269 2023-01-23 00:25:12.437704: step: 1796/527, loss: 0.09677582234144211 2023-01-23 00:25:13.543655: step: 1800/527, loss: 0.03032945841550827 2023-01-23 00:25:14.697965: step: 1804/527, loss: 0.10984992980957031 2023-01-23 00:25:15.816525: step: 1808/527, loss: 0.09253750741481781 2023-01-23 00:25:16.912335: step: 1812/527, loss: 0.045008473098278046 2023-01-23 00:25:18.003658: step: 1816/527, loss: 0.07884550094604492 2023-01-23 00:25:19.123211: step: 1820/527, loss: 0.15091238915920258 2023-01-23 00:25:20.265196: step: 1824/527, loss: 0.9885495901107788 2023-01-23 00:25:21.364228: step: 1828/527, loss: 0.5015361905097961 2023-01-23 00:25:22.475550: step: 1832/527, loss: 0.027435969561338425 2023-01-23 00:25:23.591779: step: 1836/527, loss: 0.33904364705085754 2023-01-23 00:25:24.740127: step: 1840/527, loss: 0.17369243502616882 2023-01-23 00:25:25.871239: step: 1844/527, loss: 0.3427468240261078 2023-01-23 00:25:27.025387: step: 1848/527, loss: 0.076171875 2023-01-23 00:25:28.148394: step: 1852/527, loss: 1.4057501554489136 2023-01-23 00:25:29.265191: step: 1856/527, loss: 0.345808207988739 2023-01-23 00:25:30.408659: step: 1860/527, loss: 0.32316648960113525 2023-01-23 00:25:31.527303: step: 1864/527, loss: 0.15814723074436188 2023-01-23 00:25:32.658343: step: 1868/527, loss: 0.1599147915840149 2023-01-23 00:25:33.778569: step: 1872/527, loss: 0.1452324390411377 2023-01-23 00:25:34.905052: step: 1876/527, loss: 0.04798183590173721 2023-01-23 00:25:35.984069: step: 1880/527, loss: 0.7147918939590454 2023-01-23 00:25:37.100541: step: 1884/527, loss: 0.14694365859031677 2023-01-23 00:25:38.221393: step: 1888/527, loss: 0.11013995110988617 2023-01-23 00:25:39.338141: step: 1892/527, loss: 0.32118573784828186 2023-01-23 00:25:40.465520: step: 1896/527, loss: 0.14987125992774963 2023-01-23 00:25:41.589478: step: 1900/527, loss: 0.37460213899612427 2023-01-23 00:25:42.748841: step: 1904/527, loss: 0.47931337356567383 2023-01-23 00:25:43.894666: step: 1908/527, loss: 0.16490508615970612 2023-01-23 00:25:45.022797: step: 1912/527, loss: 0.3935590982437134 2023-01-23 00:25:46.163797: step: 1916/527, loss: 0.14565859735012054 2023-01-23 00:25:47.276474: step: 1920/527, loss: 0.02524557150900364 2023-01-23 00:25:48.374115: step: 1924/527, loss: 0.041150666773319244 2023-01-23 00:25:49.484648: step: 1928/527, loss: 0.08393120765686035 2023-01-23 00:25:50.608840: step: 1932/527, loss: 0.40841445326805115 2023-01-23 00:25:51.784919: step: 1936/527, loss: 0.04874372482299805 2023-01-23 00:25:52.901183: step: 1940/527, loss: 0.7660877108573914 2023-01-23 00:25:54.022976: step: 1944/527, loss: 0.04994244873523712 2023-01-23 00:25:55.087605: step: 1948/527, loss: 0.039541102945804596 2023-01-23 00:25:56.180894: step: 1952/527, loss: 0.8475232124328613 2023-01-23 00:25:57.330949: step: 1956/527, loss: 0.3495897352695465 2023-01-23 00:25:58.426985: step: 1960/527, loss: 0.23900824785232544 2023-01-23 00:25:59.521828: step: 1964/527, loss: 0.2032717615365982 2023-01-23 00:26:00.693878: step: 1968/527, loss: 0.13190627098083496 2023-01-23 00:26:01.826850: step: 1972/527, loss: 0.8238687515258789 2023-01-23 00:26:02.955851: step: 1976/527, loss: 0.27192696928977966 2023-01-23 00:26:04.084518: step: 1980/527, loss: 0.2585316300392151 2023-01-23 00:26:05.202172: step: 1984/527, loss: 0.14649590849876404 2023-01-23 00:26:06.322859: step: 1988/527, loss: 0.08172807097434998 2023-01-23 00:26:07.461167: step: 1992/527, loss: 5.662846565246582 2023-01-23 00:26:08.571576: step: 1996/527, loss: 0.1646881401538849 2023-01-23 00:26:09.685973: step: 2000/527, loss: 0.1943332850933075 2023-01-23 00:26:10.848522: step: 2004/527, loss: 0.08647593855857849 2023-01-23 00:26:12.006126: step: 2008/527, loss: 0.11360268294811249 2023-01-23 00:26:13.159272: step: 2012/527, loss: 0.6866371035575867 2023-01-23 00:26:14.278594: step: 2016/527, loss: 0.01580023765563965 2023-01-23 00:26:15.384541: step: 2020/527, loss: 0.3055339753627777 2023-01-23 00:26:16.472181: step: 2024/527, loss: 0.3335486352443695 2023-01-23 00:26:17.584029: step: 2028/527, loss: 0.14706259965896606 2023-01-23 00:26:18.716331: step: 2032/527, loss: 0.31563299894332886 2023-01-23 00:26:19.829748: step: 2036/527, loss: 0.07901842892169952 2023-01-23 00:26:20.938715: step: 2040/527, loss: 0.48686444759368896 2023-01-23 00:26:22.058525: step: 2044/527, loss: 0.06225104629993439 2023-01-23 00:26:23.167052: step: 2048/527, loss: 0.15736566483974457 2023-01-23 00:26:24.336991: step: 2052/527, loss: 0.1980557143688202 2023-01-23 00:26:25.437671: step: 2056/527, loss: 0.07029370963573456 2023-01-23 00:26:26.570952: step: 2060/527, loss: 0.19861502945423126 2023-01-23 00:26:27.696434: step: 2064/527, loss: 0.18253564834594727 2023-01-23 00:26:28.813634: step: 2068/527, loss: 0.919429361820221 2023-01-23 00:26:29.922041: step: 2072/527, loss: 0.1285136193037033 2023-01-23 00:26:31.041578: step: 2076/527, loss: 0.14362964034080505 2023-01-23 00:26:32.162033: step: 2080/527, loss: 0.045040179044008255 2023-01-23 00:26:33.284431: step: 2084/527, loss: 0.15095248818397522 2023-01-23 00:26:34.398968: step: 2088/527, loss: 0.034040164202451706 2023-01-23 00:26:35.497344: step: 2092/527, loss: 0.1280737817287445 2023-01-23 00:26:36.625773: step: 2096/527, loss: 0.0901232659816742 2023-01-23 00:26:37.764247: step: 2100/527, loss: 0.12633295357227325 2023-01-23 00:26:38.888548: step: 2104/527, loss: 0.040778160095214844 2023-01-23 00:26:40.002987: step: 2108/527, loss: 0.1386338174343109 ================================================== Loss: 0.265 -------------------- Dev: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Test: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Korean: {'event': {'p': 0.6444444444444445, 'r': 0.4603174603174603, 'f1': 0.537037037037037}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Russian: {'event': {'p': 0.5135135135135135, 'r': 0.5277777777777778, 'f1': 0.5205479452054794}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} New best chinese model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.623059866962306, 'r': 0.748335552596538, 'f1': 0.6799758015728978}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Eng Test for Korean: {'event': {'p': 0.6303294573643411, 'r': 0.7434285714285714, 'f1': 0.6822233875196644}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Korean: {'event': {'p': 0.725, 'r': 0.4603174603174603, 'f1': 0.5631067961165048}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 6 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:27:27.459032: step: 4/527, loss: 0.09809327125549316 2023-01-23 00:27:28.561562: step: 8/527, loss: 0.05085396766662598 2023-01-23 00:27:29.687487: step: 12/527, loss: 0.019361257553100586 2023-01-23 00:27:30.804883: step: 16/527, loss: 0.16312438249588013 2023-01-23 00:27:31.963032: step: 20/527, loss: 0.19972950220108032 2023-01-23 00:27:33.072748: step: 24/527, loss: 0.227146714925766 2023-01-23 00:27:34.202068: step: 28/527, loss: 0.13906002044677734 2023-01-23 00:27:35.297376: step: 32/527, loss: 0.00816967524588108 2023-01-23 00:27:36.415523: step: 36/527, loss: 0.08307237923145294 2023-01-23 00:27:37.573718: step: 40/527, loss: 0.2395917922258377 2023-01-23 00:27:38.710262: step: 44/527, loss: 0.34595829248428345 2023-01-23 00:27:39.813998: step: 48/527, loss: 0.09413719177246094 2023-01-23 00:27:40.937424: step: 52/527, loss: 0.13661280274391174 2023-01-23 00:27:42.068543: step: 56/527, loss: 0.07389058917760849 2023-01-23 00:27:43.224513: step: 60/527, loss: 0.318773478269577 2023-01-23 00:27:44.317754: step: 64/527, loss: 0.04285860061645508 2023-01-23 00:27:45.428525: step: 68/527, loss: 0.09297715127468109 2023-01-23 00:27:46.545417: step: 72/527, loss: 0.3081987202167511 2023-01-23 00:27:47.645125: step: 76/527, loss: 0.0583585761487484 2023-01-23 00:27:48.765368: step: 80/527, loss: 0.17066267132759094 2023-01-23 00:27:49.867209: step: 84/527, loss: 0.0739145278930664 2023-01-23 00:27:50.988733: step: 88/527, loss: 0.04386329650878906 2023-01-23 00:27:52.156944: step: 92/527, loss: 0.3526807725429535 2023-01-23 00:27:53.289935: step: 96/527, loss: 0.10786505043506622 2023-01-23 00:27:54.428941: step: 100/527, loss: 0.13941210508346558 2023-01-23 00:27:55.544654: step: 104/527, loss: 0.0392460823059082 2023-01-23 00:27:56.684928: step: 108/527, loss: 0.14055700600147247 2023-01-23 00:27:57.806086: step: 112/527, loss: 0.034890368580818176 2023-01-23 00:27:58.935975: step: 116/527, loss: 0.03204517439007759 2023-01-23 00:28:00.041433: step: 120/527, loss: 0.49269258975982666 2023-01-23 00:28:01.141378: step: 124/527, loss: 0.02144632488489151 2023-01-23 00:28:02.228644: step: 128/527, loss: 0.4735269546508789 2023-01-23 00:28:03.351630: step: 132/527, loss: 0.20490741729736328 2023-01-23 00:28:04.486957: step: 136/527, loss: 0.21948319673538208 2023-01-23 00:28:05.585285: step: 140/527, loss: 0.09926557540893555 2023-01-23 00:28:06.708398: step: 144/527, loss: 0.12586374580860138 2023-01-23 00:28:07.814480: step: 148/527, loss: 0.05742521584033966 2023-01-23 00:28:08.936799: step: 152/527, loss: 0.0542752742767334 2023-01-23 00:28:10.056014: step: 156/527, loss: 0.21279259026050568 2023-01-23 00:28:11.154529: step: 160/527, loss: 0.10694971680641174 2023-01-23 00:28:12.274111: step: 164/527, loss: 0.21865972876548767 2023-01-23 00:28:13.394126: step: 168/527, loss: 0.1016080379486084 2023-01-23 00:28:14.505024: step: 172/527, loss: 0.25411853194236755 2023-01-23 00:28:15.585466: step: 176/527, loss: 0.07370634377002716 2023-01-23 00:28:16.727100: step: 180/527, loss: 0.06605801731348038 2023-01-23 00:28:17.842235: step: 184/527, loss: 0.7231829166412354 2023-01-23 00:28:18.975002: step: 188/527, loss: 0.23981569707393646 2023-01-23 00:28:20.089600: step: 192/527, loss: 0.07329946011304855 2023-01-23 00:28:21.211131: step: 196/527, loss: 0.1070750504732132 2023-01-23 00:28:22.310406: step: 200/527, loss: 0.5285487174987793 2023-01-23 00:28:23.449933: step: 204/527, loss: 7.36193323135376 2023-01-23 00:28:24.562419: step: 208/527, loss: 1.5692079067230225 2023-01-23 00:28:25.690015: step: 212/527, loss: 0.2993311882019043 2023-01-23 00:28:26.789843: step: 216/527, loss: 0.24765712022781372 2023-01-23 00:28:27.893888: step: 220/527, loss: 0.06100483238697052 2023-01-23 00:28:28.986908: step: 224/527, loss: 0.28924334049224854 2023-01-23 00:28:30.112493: step: 228/527, loss: 0.13619975745677948 2023-01-23 00:28:31.220561: step: 232/527, loss: 0.023258520290255547 2023-01-23 00:28:32.355663: step: 236/527, loss: 0.18993701040744781 2023-01-23 00:28:33.452697: step: 240/527, loss: 0.04937797412276268 2023-01-23 00:28:34.568849: step: 244/527, loss: 0.38007059693336487 2023-01-23 00:28:35.688033: step: 248/527, loss: 0.07458534836769104 2023-01-23 00:28:36.794960: step: 252/527, loss: 0.09627914428710938 2023-01-23 00:28:37.911388: step: 256/527, loss: 0.1149645745754242 2023-01-23 00:28:39.031561: step: 260/527, loss: 0.22831077873706818 2023-01-23 00:28:40.146956: step: 264/527, loss: 0.16829939186573029 2023-01-23 00:28:41.261816: step: 268/527, loss: 0.026418304070830345 2023-01-23 00:28:42.389915: step: 272/527, loss: 0.08385644108057022 2023-01-23 00:28:43.496471: step: 276/527, loss: 0.19703388214111328 2023-01-23 00:28:44.619779: step: 280/527, loss: 0.5657750368118286 2023-01-23 00:28:45.753921: step: 284/527, loss: 0.443074107170105 2023-01-23 00:28:46.855237: step: 288/527, loss: 0.04611387103796005 2023-01-23 00:28:47.968672: step: 292/527, loss: 0.18945404887199402 2023-01-23 00:28:49.090929: step: 296/527, loss: 0.14467281103134155 2023-01-23 00:28:50.216617: step: 300/527, loss: 0.05260620266199112 2023-01-23 00:28:51.351246: step: 304/527, loss: 0.05623479187488556 2023-01-23 00:28:52.496758: step: 308/527, loss: 0.8390406370162964 2023-01-23 00:28:53.604470: step: 312/527, loss: 0.1849168837070465 2023-01-23 00:28:54.749357: step: 316/527, loss: 0.09425649791955948 2023-01-23 00:28:55.860504: step: 320/527, loss: 0.02252521552145481 2023-01-23 00:28:56.958192: step: 324/527, loss: 0.03591623529791832 2023-01-23 00:28:58.096850: step: 328/527, loss: 1.6285374164581299 2023-01-23 00:28:59.184722: step: 332/527, loss: 0.2274969518184662 2023-01-23 00:29:00.338639: step: 336/527, loss: 0.06355658173561096 2023-01-23 00:29:01.449603: step: 340/527, loss: 0.26239073276519775 2023-01-23 00:29:02.571682: step: 344/527, loss: 0.06323843449354172 2023-01-23 00:29:03.753455: step: 348/527, loss: 0.12584057450294495 2023-01-23 00:29:04.872735: step: 352/527, loss: 0.11452827602624893 2023-01-23 00:29:05.960912: step: 356/527, loss: 0.15861892700195312 2023-01-23 00:29:07.083399: step: 360/527, loss: 0.13074392080307007 2023-01-23 00:29:08.197636: step: 364/527, loss: 0.4625195562839508 2023-01-23 00:29:09.317550: step: 368/527, loss: 0.030454207211732864 2023-01-23 00:29:10.461685: step: 372/527, loss: 0.06957979500293732 2023-01-23 00:29:11.572683: step: 376/527, loss: 0.05140667036175728 2023-01-23 00:29:12.714357: step: 380/527, loss: 0.4253025949001312 2023-01-23 00:29:13.821653: step: 384/527, loss: 0.09943151473999023 2023-01-23 00:29:14.965266: step: 388/527, loss: 0.05313815921545029 2023-01-23 00:29:16.075912: step: 392/527, loss: 0.09376392513513565 2023-01-23 00:29:17.175810: step: 396/527, loss: 0.18427881598472595 2023-01-23 00:29:18.290392: step: 400/527, loss: 0.4059511423110962 2023-01-23 00:29:19.403642: step: 404/527, loss: 1.1235990524291992 2023-01-23 00:29:20.508560: step: 408/527, loss: 0.08466877788305283 2023-01-23 00:29:21.652777: step: 412/527, loss: 0.1253274828195572 2023-01-23 00:29:22.758522: step: 416/527, loss: 0.10244733095169067 2023-01-23 00:29:23.861397: step: 420/527, loss: 0.037759922444820404 2023-01-23 00:29:24.981637: step: 424/527, loss: 0.0929255485534668 2023-01-23 00:29:26.144952: step: 428/527, loss: 0.09901618957519531 2023-01-23 00:29:27.241961: step: 432/527, loss: 0.07042674720287323 2023-01-23 00:29:28.336897: step: 436/527, loss: 0.112521693110466 2023-01-23 00:29:29.504367: step: 440/527, loss: 0.06343488395214081 2023-01-23 00:29:30.612753: step: 444/527, loss: 0.2544664740562439 2023-01-23 00:29:31.725594: step: 448/527, loss: 0.10858345031738281 2023-01-23 00:29:32.863774: step: 452/527, loss: 0.1982189118862152 2023-01-23 00:29:33.958546: step: 456/527, loss: 0.462616503238678 2023-01-23 00:29:35.091418: step: 460/527, loss: 0.01927213743329048 2023-01-23 00:29:36.229614: step: 464/527, loss: 0.06015825644135475 2023-01-23 00:29:37.337388: step: 468/527, loss: 0.021100426092743874 2023-01-23 00:29:38.444778: step: 472/527, loss: 0.5226963758468628 2023-01-23 00:29:39.564077: step: 476/527, loss: 0.18448218703269958 2023-01-23 00:29:40.665723: step: 480/527, loss: 0.4267534017562866 2023-01-23 00:29:41.772188: step: 484/527, loss: 0.0962924063205719 2023-01-23 00:29:42.886789: step: 488/527, loss: 0.3233264088630676 2023-01-23 00:29:44.020110: step: 492/527, loss: 0.2946937382221222 2023-01-23 00:29:45.164171: step: 496/527, loss: 0.22921191155910492 2023-01-23 00:29:46.265438: step: 500/527, loss: 0.22058506309986115 2023-01-23 00:29:47.389831: step: 504/527, loss: 0.3670656085014343 2023-01-23 00:29:48.497200: step: 508/527, loss: 0.027770616114139557 2023-01-23 00:29:49.610570: step: 512/527, loss: 0.40341219305992126 2023-01-23 00:29:50.734283: step: 516/527, loss: 0.16455334424972534 2023-01-23 00:29:51.837856: step: 520/527, loss: 0.06385669857263565 2023-01-23 00:29:52.985056: step: 524/527, loss: 0.14708839356899261 2023-01-23 00:29:54.092957: step: 528/527, loss: 0.07627630233764648 2023-01-23 00:29:55.226599: step: 532/527, loss: 0.14719390869140625 2023-01-23 00:29:56.335379: step: 536/527, loss: 0.08033581078052521 2023-01-23 00:29:57.419412: step: 540/527, loss: 0.23325125873088837 2023-01-23 00:29:58.527605: step: 544/527, loss: 2.487367630004883 2023-01-23 00:29:59.628696: step: 548/527, loss: 0.06263256072998047 2023-01-23 00:30:00.756986: step: 552/527, loss: 0.06555996090173721 2023-01-23 00:30:01.857330: step: 556/527, loss: 0.6510850191116333 2023-01-23 00:30:03.010747: step: 560/527, loss: 0.051460932940244675 2023-01-23 00:30:04.168436: step: 564/527, loss: 0.2417098879814148 2023-01-23 00:30:05.265781: step: 568/527, loss: 0.15583720803260803 2023-01-23 00:30:06.352776: step: 572/527, loss: 0.09012976288795471 2023-01-23 00:30:07.492704: step: 576/527, loss: 0.09545796364545822 2023-01-23 00:30:08.604840: step: 580/527, loss: 0.1428963840007782 2023-01-23 00:30:09.700778: step: 584/527, loss: 0.02486112155020237 2023-01-23 00:30:10.825899: step: 588/527, loss: 0.6363394856452942 2023-01-23 00:30:11.925610: step: 592/527, loss: 0.15953369438648224 2023-01-23 00:30:13.010389: step: 596/527, loss: 0.09888716042041779 2023-01-23 00:30:14.140454: step: 600/527, loss: 0.17457886040210724 2023-01-23 00:30:15.280698: step: 604/527, loss: 0.12611256539821625 2023-01-23 00:30:16.385838: step: 608/527, loss: 0.1384473741054535 2023-01-23 00:30:17.506416: step: 612/527, loss: 0.5675891637802124 2023-01-23 00:30:18.607807: step: 616/527, loss: 0.04823513329029083 2023-01-23 00:30:19.747924: step: 620/527, loss: 0.1206444725394249 2023-01-23 00:30:20.852227: step: 624/527, loss: 0.13086660206317902 2023-01-23 00:30:21.955623: step: 628/527, loss: 0.03149242326617241 2023-01-23 00:30:23.067563: step: 632/527, loss: 1.693342924118042 2023-01-23 00:30:24.180631: step: 636/527, loss: 0.06635136902332306 2023-01-23 00:30:25.302264: step: 640/527, loss: 0.1403800994157791 2023-01-23 00:30:26.419563: step: 644/527, loss: 0.16130180656909943 2023-01-23 00:30:27.587497: step: 648/527, loss: 0.21009722352027893 2023-01-23 00:30:28.678222: step: 652/527, loss: 0.117999367415905 2023-01-23 00:30:29.761873: step: 656/527, loss: 0.06665630638599396 2023-01-23 00:30:30.861118: step: 660/527, loss: 0.09412749111652374 2023-01-23 00:30:31.977526: step: 664/527, loss: 0.0505555160343647 2023-01-23 00:30:33.080972: step: 668/527, loss: 0.10793104022741318 2023-01-23 00:30:34.197918: step: 672/527, loss: 0.09758541733026505 2023-01-23 00:30:35.297709: step: 676/527, loss: 0.028674745932221413 2023-01-23 00:30:36.407630: step: 680/527, loss: 0.17769642174243927 2023-01-23 00:30:37.527226: step: 684/527, loss: 0.05619053915143013 2023-01-23 00:30:38.655245: step: 688/527, loss: 0.1361272782087326 2023-01-23 00:30:39.801567: step: 692/527, loss: 0.047132205218076706 2023-01-23 00:30:40.902825: step: 696/527, loss: 0.02039952203631401 2023-01-23 00:30:42.014277: step: 700/527, loss: 0.2235385924577713 2023-01-23 00:30:43.128002: step: 704/527, loss: 0.5704684257507324 2023-01-23 00:30:44.245981: step: 708/527, loss: 0.06690311431884766 2023-01-23 00:30:45.340400: step: 712/527, loss: 0.04849109798669815 2023-01-23 00:30:46.445629: step: 716/527, loss: 0.04433765262365341 2023-01-23 00:30:47.575340: step: 720/527, loss: 0.1945153772830963 2023-01-23 00:30:48.716706: step: 724/527, loss: 1.1448918581008911 2023-01-23 00:30:49.853265: step: 728/527, loss: 0.12088766694068909 2023-01-23 00:30:50.945272: step: 732/527, loss: 0.15637846291065216 2023-01-23 00:30:52.063903: step: 736/527, loss: 0.16790294647216797 2023-01-23 00:30:53.194193: step: 740/527, loss: 0.22962361574172974 2023-01-23 00:30:54.321759: step: 744/527, loss: 0.2084241807460785 2023-01-23 00:30:55.426017: step: 748/527, loss: 0.1070745512843132 2023-01-23 00:30:56.544793: step: 752/527, loss: 0.044196177273988724 2023-01-23 00:30:57.650259: step: 756/527, loss: 0.08995571732521057 2023-01-23 00:30:58.772789: step: 760/527, loss: 0.13057290017604828 2023-01-23 00:30:59.891031: step: 764/527, loss: 0.042356349527835846 2023-01-23 00:31:00.984482: step: 768/527, loss: 0.06146450340747833 2023-01-23 00:31:02.087926: step: 772/527, loss: 0.8324622511863708 2023-01-23 00:31:03.219985: step: 776/527, loss: 0.10629577934741974 2023-01-23 00:31:04.340665: step: 780/527, loss: 0.1776294708251953 2023-01-23 00:31:05.467162: step: 784/527, loss: 0.09487247467041016 2023-01-23 00:31:06.608096: step: 788/527, loss: 0.17201289534568787 2023-01-23 00:31:07.712607: step: 792/527, loss: 0.09751377254724503 2023-01-23 00:31:08.802050: step: 796/527, loss: 0.06043205410242081 2023-01-23 00:31:09.907577: step: 800/527, loss: 0.6203104257583618 2023-01-23 00:31:11.036943: step: 804/527, loss: 0.1668541133403778 2023-01-23 00:31:12.145531: step: 808/527, loss: 0.019964074715971947 2023-01-23 00:31:13.283298: step: 812/527, loss: 0.10484543442726135 2023-01-23 00:31:14.380800: step: 816/527, loss: 0.06051645427942276 2023-01-23 00:31:15.482735: step: 820/527, loss: 0.12209854274988174 2023-01-23 00:31:16.605294: step: 824/527, loss: 0.04891569912433624 2023-01-23 00:31:17.701708: step: 828/527, loss: 0.15463551878929138 2023-01-23 00:31:18.830756: step: 832/527, loss: 0.04897718504071236 2023-01-23 00:31:20.033217: step: 836/527, loss: 0.07085323333740234 2023-01-23 00:31:21.193701: step: 840/527, loss: 0.22726650536060333 2023-01-23 00:31:22.320980: step: 844/527, loss: 0.1680097132921219 2023-01-23 00:31:23.461764: step: 848/527, loss: 0.05432019382715225 2023-01-23 00:31:24.549378: step: 852/527, loss: 0.20855771005153656 2023-01-23 00:31:25.659056: step: 856/527, loss: 0.09163542091846466 2023-01-23 00:31:26.773779: step: 860/527, loss: 0.6569075584411621 2023-01-23 00:31:27.910741: step: 864/527, loss: 0.20458078384399414 2023-01-23 00:31:29.031218: step: 868/527, loss: 0.03446998447179794 2023-01-23 00:31:30.139780: step: 872/527, loss: 0.09217791259288788 2023-01-23 00:31:31.283425: step: 876/527, loss: 0.12784966826438904 2023-01-23 00:31:32.422738: step: 880/527, loss: 0.3509131371974945 2023-01-23 00:31:33.556580: step: 884/527, loss: 0.09518971294164658 2023-01-23 00:31:34.690423: step: 888/527, loss: 0.12601538002490997 2023-01-23 00:31:35.798986: step: 892/527, loss: 0.37552404403686523 2023-01-23 00:31:36.900822: step: 896/527, loss: 0.027489997446537018 2023-01-23 00:31:37.996033: step: 900/527, loss: 0.5072728395462036 2023-01-23 00:31:39.111672: step: 904/527, loss: 0.15026246011257172 2023-01-23 00:31:40.205277: step: 908/527, loss: 0.03210487961769104 2023-01-23 00:31:41.300756: step: 912/527, loss: 0.33800965547561646 2023-01-23 00:31:42.414949: step: 916/527, loss: 0.1711035966873169 2023-01-23 00:31:43.533040: step: 920/527, loss: 0.27540695667266846 2023-01-23 00:31:44.664354: step: 924/527, loss: 0.14751377701759338 2023-01-23 00:31:45.799704: step: 928/527, loss: 0.1953502595424652 2023-01-23 00:31:46.952730: step: 932/527, loss: 0.11080723255872726 2023-01-23 00:31:48.081527: step: 936/527, loss: 0.27181875705718994 2023-01-23 00:31:49.241334: step: 940/527, loss: 0.2269311547279358 2023-01-23 00:31:50.347197: step: 944/527, loss: 0.1031261682510376 2023-01-23 00:31:51.483000: step: 948/527, loss: 0.22477851808071136 2023-01-23 00:31:52.608479: step: 952/527, loss: 0.2573592960834503 2023-01-23 00:31:53.736015: step: 956/527, loss: 0.01927013322710991 2023-01-23 00:31:54.834821: step: 960/527, loss: 0.10109458118677139 2023-01-23 00:31:55.939914: step: 964/527, loss: 0.26626792550086975 2023-01-23 00:31:57.040007: step: 968/527, loss: 0.06411395221948624 2023-01-23 00:31:58.135461: step: 972/527, loss: 0.07802343368530273 2023-01-23 00:31:59.248587: step: 976/527, loss: 0.05781755596399307 2023-01-23 00:32:00.352677: step: 980/527, loss: 0.19363024830818176 2023-01-23 00:32:01.447079: step: 984/527, loss: 0.04104762151837349 2023-01-23 00:32:02.604955: step: 988/527, loss: 0.10942211747169495 2023-01-23 00:32:03.695624: step: 992/527, loss: 0.24775008857250214 2023-01-23 00:32:04.794390: step: 996/527, loss: 0.059580616652965546 2023-01-23 00:32:05.910828: step: 1000/527, loss: 0.19865284860134125 2023-01-23 00:32:07.041975: step: 1004/527, loss: 0.07532624900341034 2023-01-23 00:32:08.181763: step: 1008/527, loss: 0.0459408275783062 2023-01-23 00:32:09.267921: step: 1012/527, loss: 0.19169330596923828 2023-01-23 00:32:10.386412: step: 1016/527, loss: 0.16757772862911224 2023-01-23 00:32:11.526298: step: 1020/527, loss: 0.18535690009593964 2023-01-23 00:32:12.637556: step: 1024/527, loss: 0.10012922435998917 2023-01-23 00:32:13.773621: step: 1028/527, loss: 0.33195796608924866 2023-01-23 00:32:14.878868: step: 1032/527, loss: 0.08743729442358017 2023-01-23 00:32:15.986822: step: 1036/527, loss: 0.11253470927476883 2023-01-23 00:32:17.110963: step: 1040/527, loss: 0.0761350616812706 2023-01-23 00:32:18.211370: step: 1044/527, loss: 0.4874473512172699 2023-01-23 00:32:19.362041: step: 1048/527, loss: 0.10132065415382385 2023-01-23 00:32:20.478437: step: 1052/527, loss: 0.06235170364379883 2023-01-23 00:32:21.573928: step: 1056/527, loss: 0.12807168066501617 2023-01-23 00:32:22.681638: step: 1060/527, loss: 0.07180386036634445 2023-01-23 00:32:23.784717: step: 1064/527, loss: 0.04184355586767197 2023-01-23 00:32:24.896583: step: 1068/527, loss: 0.17963910102844238 2023-01-23 00:32:26.005022: step: 1072/527, loss: 0.1311967819929123 2023-01-23 00:32:27.125519: step: 1076/527, loss: 0.15443992614746094 2023-01-23 00:32:28.256660: step: 1080/527, loss: 0.05564575642347336 2023-01-23 00:32:29.372289: step: 1084/527, loss: 0.10488510131835938 2023-01-23 00:32:30.507392: step: 1088/527, loss: 0.07519850879907608 2023-01-23 00:32:31.599314: step: 1092/527, loss: 0.03843050077557564 2023-01-23 00:32:32.694381: step: 1096/527, loss: 0.06683111190795898 2023-01-23 00:32:33.837389: step: 1100/527, loss: 0.11720244586467743 2023-01-23 00:32:34.937336: step: 1104/527, loss: 0.173865407705307 2023-01-23 00:32:36.037899: step: 1108/527, loss: 0.10403481125831604 2023-01-23 00:32:37.133961: step: 1112/527, loss: 0.14471574127674103 2023-01-23 00:32:38.258216: step: 1116/527, loss: 0.12222138047218323 2023-01-23 00:32:39.353257: step: 1120/527, loss: 0.4596376419067383 2023-01-23 00:32:40.463615: step: 1124/527, loss: 0.04904680326581001 2023-01-23 00:32:41.577935: step: 1128/527, loss: 0.05549602583050728 2023-01-23 00:32:42.661367: step: 1132/527, loss: 0.06192345544695854 2023-01-23 00:32:43.777196: step: 1136/527, loss: 0.21808868646621704 2023-01-23 00:32:44.874705: step: 1140/527, loss: 0.016543006524443626 2023-01-23 00:32:45.965852: step: 1144/527, loss: 0.03120427206158638 2023-01-23 00:32:47.079142: step: 1148/527, loss: 0.09503450989723206 2023-01-23 00:32:48.210065: step: 1152/527, loss: 0.13844004273414612 2023-01-23 00:32:49.313874: step: 1156/527, loss: 0.03258562088012695 2023-01-23 00:32:50.411618: step: 1160/527, loss: 0.16277696192264557 2023-01-23 00:32:51.541005: step: 1164/527, loss: 0.104385145008564 2023-01-23 00:32:52.636792: step: 1168/527, loss: 0.3542178273200989 2023-01-23 00:32:53.736768: step: 1172/527, loss: 0.03200025483965874 2023-01-23 00:32:54.863083: step: 1176/527, loss: 1.0159810781478882 2023-01-23 00:32:55.956479: step: 1180/527, loss: 0.02443542517721653 2023-01-23 00:32:57.064103: step: 1184/527, loss: 0.07146044075489044 2023-01-23 00:32:58.201081: step: 1188/527, loss: 0.07184791564941406 2023-01-23 00:32:59.314460: step: 1192/527, loss: 0.20940819382667542 2023-01-23 00:33:00.463741: step: 1196/527, loss: 0.04494314268231392 2023-01-23 00:33:01.588470: step: 1200/527, loss: 0.042475320398807526 2023-01-23 00:33:02.721638: step: 1204/527, loss: 0.20813989639282227 2023-01-23 00:33:03.812440: step: 1208/527, loss: 0.08634022623300552 2023-01-23 00:33:04.955815: step: 1212/527, loss: 0.10069496929645538 2023-01-23 00:33:06.078123: step: 1216/527, loss: 0.06731309741735458 2023-01-23 00:33:07.171940: step: 1220/527, loss: 0.042203620076179504 2023-01-23 00:33:08.290888: step: 1224/527, loss: 0.066911980509758 2023-01-23 00:33:09.434687: step: 1228/527, loss: 0.1049363911151886 2023-01-23 00:33:10.545009: step: 1232/527, loss: 0.13648033142089844 2023-01-23 00:33:11.634923: step: 1236/527, loss: 0.057218264788389206 2023-01-23 00:33:12.757280: step: 1240/527, loss: 0.09241737425327301 2023-01-23 00:33:13.881324: step: 1244/527, loss: 0.08478298783302307 2023-01-23 00:33:15.050087: step: 1248/527, loss: 0.04098280519247055 2023-01-23 00:33:16.173848: step: 1252/527, loss: 0.5447441935539246 2023-01-23 00:33:17.261878: step: 1256/527, loss: 0.10794176906347275 2023-01-23 00:33:18.382762: step: 1260/527, loss: 0.09547443687915802 2023-01-23 00:33:19.503337: step: 1264/527, loss: 0.6401312947273254 2023-01-23 00:33:20.633641: step: 1268/527, loss: 0.08853588253259659 2023-01-23 00:33:21.765368: step: 1272/527, loss: 0.06858411431312561 2023-01-23 00:33:22.859866: step: 1276/527, loss: 0.047343719750642776 2023-01-23 00:33:23.971488: step: 1280/527, loss: 0.07362031936645508 2023-01-23 00:33:25.099299: step: 1284/527, loss: 0.030834747478365898 2023-01-23 00:33:26.187293: step: 1288/527, loss: 0.09466381371021271 2023-01-23 00:33:27.287557: step: 1292/527, loss: 0.033593546599149704 2023-01-23 00:33:28.422218: step: 1296/527, loss: 0.7086694240570068 2023-01-23 00:33:29.526645: step: 1300/527, loss: 0.8558475375175476 2023-01-23 00:33:30.628060: step: 1304/527, loss: 0.2469218373298645 2023-01-23 00:33:31.722992: step: 1308/527, loss: 0.06691642105579376 2023-01-23 00:33:32.855531: step: 1312/527, loss: 0.2022683173418045 2023-01-23 00:33:33.979162: step: 1316/527, loss: 0.07356643676757812 2023-01-23 00:33:35.111129: step: 1320/527, loss: 0.3981837034225464 2023-01-23 00:33:36.231774: step: 1324/527, loss: 0.5606789588928223 2023-01-23 00:33:37.351920: step: 1328/527, loss: 0.04904594272375107 2023-01-23 00:33:38.473986: step: 1332/527, loss: 0.1244468241930008 2023-01-23 00:33:39.570638: step: 1336/527, loss: 0.07025432586669922 2023-01-23 00:33:40.681679: step: 1340/527, loss: 0.16344553232192993 2023-01-23 00:33:41.847894: step: 1344/527, loss: 0.32741180062294006 2023-01-23 00:33:42.992232: step: 1348/527, loss: 0.07296828925609589 2023-01-23 00:33:44.096000: step: 1352/527, loss: 0.27629411220550537 2023-01-23 00:33:45.199686: step: 1356/527, loss: 0.12563781440258026 2023-01-23 00:33:46.312949: step: 1360/527, loss: 0.027243472635746002 2023-01-23 00:33:47.433247: step: 1364/527, loss: 0.05697689205408096 2023-01-23 00:33:48.559119: step: 1368/527, loss: 0.1365257352590561 2023-01-23 00:33:49.656242: step: 1372/527, loss: 0.0675632506608963 2023-01-23 00:33:50.800980: step: 1376/527, loss: 0.104736328125 2023-01-23 00:33:51.929840: step: 1380/527, loss: 0.11787891387939453 2023-01-23 00:33:53.058023: step: 1384/527, loss: 0.08608703315258026 2023-01-23 00:33:54.208397: step: 1388/527, loss: 0.11276207119226456 2023-01-23 00:33:55.339286: step: 1392/527, loss: 0.05546441301703453 2023-01-23 00:33:56.463607: step: 1396/527, loss: 0.09754600375890732 2023-01-23 00:33:57.593334: step: 1400/527, loss: 0.11838255077600479 2023-01-23 00:33:58.741327: step: 1404/527, loss: 0.29293060302734375 2023-01-23 00:33:59.855500: step: 1408/527, loss: 0.2585332989692688 2023-01-23 00:34:01.036535: step: 1412/527, loss: 0.0649944320321083 2023-01-23 00:34:02.165999: step: 1416/527, loss: 0.4411774277687073 2023-01-23 00:34:03.310767: step: 1420/527, loss: 0.3748853802680969 2023-01-23 00:34:04.436867: step: 1424/527, loss: 0.14224694669246674 2023-01-23 00:34:05.533596: step: 1428/527, loss: 0.12376852333545685 2023-01-23 00:34:06.652181: step: 1432/527, loss: 0.0445614829659462 2023-01-23 00:34:07.766245: step: 1436/527, loss: 0.10244321823120117 2023-01-23 00:34:08.880068: step: 1440/527, loss: 0.049125123769044876 2023-01-23 00:34:09.991280: step: 1444/527, loss: 0.0648014098405838 2023-01-23 00:34:11.097210: step: 1448/527, loss: 0.043448470532894135 2023-01-23 00:34:12.203306: step: 1452/527, loss: 2.2965469360351562 2023-01-23 00:34:13.323820: step: 1456/527, loss: 0.08939695358276367 2023-01-23 00:34:14.418505: step: 1460/527, loss: 0.49545416235923767 2023-01-23 00:34:15.515116: step: 1464/527, loss: 0.2712860107421875 2023-01-23 00:34:16.679809: step: 1468/527, loss: 0.12156401574611664 2023-01-23 00:34:17.765209: step: 1472/527, loss: 0.06898212432861328 2023-01-23 00:34:18.880869: step: 1476/527, loss: 0.7155297994613647 2023-01-23 00:34:20.014442: step: 1480/527, loss: 0.5859881639480591 2023-01-23 00:34:21.140294: step: 1484/527, loss: 0.2621147036552429 2023-01-23 00:34:22.253816: step: 1488/527, loss: 0.2753903269767761 2023-01-23 00:34:23.378362: step: 1492/527, loss: 0.07720744609832764 2023-01-23 00:34:24.514565: step: 1496/527, loss: 0.3880332112312317 2023-01-23 00:34:25.626860: step: 1500/527, loss: 0.029722118750214577 2023-01-23 00:34:26.726377: step: 1504/527, loss: 0.12754212319850922 2023-01-23 00:34:27.883322: step: 1508/527, loss: 0.03767247498035431 2023-01-23 00:34:29.006877: step: 1512/527, loss: 0.1910894364118576 2023-01-23 00:34:30.101947: step: 1516/527, loss: 0.20364342629909515 2023-01-23 00:34:31.244802: step: 1520/527, loss: 0.10594988614320755 2023-01-23 00:34:32.351746: step: 1524/527, loss: 0.39296627044677734 2023-01-23 00:34:33.461691: step: 1528/527, loss: 0.20036441087722778 2023-01-23 00:34:34.562657: step: 1532/527, loss: 0.22469262778759003 2023-01-23 00:34:35.654389: step: 1536/527, loss: 0.7947580218315125 2023-01-23 00:34:36.744586: step: 1540/527, loss: 0.14896979928016663 2023-01-23 00:34:37.865095: step: 1544/527, loss: 0.07383136451244354 2023-01-23 00:34:38.969844: step: 1548/527, loss: 0.3677327036857605 2023-01-23 00:34:40.080184: step: 1552/527, loss: 0.2320484071969986 2023-01-23 00:34:41.157057: step: 1556/527, loss: 0.045197200030088425 2023-01-23 00:34:42.287056: step: 1560/527, loss: 0.04570980370044708 2023-01-23 00:34:43.389347: step: 1564/527, loss: 0.1397724151611328 2023-01-23 00:34:44.479845: step: 1568/527, loss: 0.050203919410705566 2023-01-23 00:34:45.592963: step: 1572/527, loss: 0.10314774513244629 2023-01-23 00:34:46.706356: step: 1576/527, loss: 0.14121532440185547 2023-01-23 00:34:47.818170: step: 1580/527, loss: 0.1390089988708496 2023-01-23 00:34:48.937790: step: 1584/527, loss: 0.16678285598754883 2023-01-23 00:34:50.081231: step: 1588/527, loss: 0.06695995479822159 2023-01-23 00:34:51.218262: step: 1592/527, loss: 0.1356671303510666 2023-01-23 00:34:52.361375: step: 1596/527, loss: 0.12512041628360748 2023-01-23 00:34:53.475299: step: 1600/527, loss: 0.15033353865146637 2023-01-23 00:34:54.577385: step: 1604/527, loss: 0.08384790271520615 2023-01-23 00:34:55.673762: step: 1608/527, loss: 0.12716065347194672 2023-01-23 00:34:56.785455: step: 1612/527, loss: 1.3559350967407227 2023-01-23 00:34:57.909067: step: 1616/527, loss: 0.2778850495815277 2023-01-23 00:34:59.004497: step: 1620/527, loss: 0.21592837572097778 2023-01-23 00:35:00.134921: step: 1624/527, loss: 1.6223134994506836 2023-01-23 00:35:01.255125: step: 1628/527, loss: 0.11719150841236115 2023-01-23 00:35:02.385647: step: 1632/527, loss: 0.02186565473675728 2023-01-23 00:35:03.515393: step: 1636/527, loss: 0.06387953460216522 2023-01-23 00:35:04.640609: step: 1640/527, loss: 0.28336697816848755 2023-01-23 00:35:05.739530: step: 1644/527, loss: 0.059557151049375534 2023-01-23 00:35:06.858839: step: 1648/527, loss: 0.10761547088623047 2023-01-23 00:35:07.962812: step: 1652/527, loss: 0.0354095958173275 2023-01-23 00:35:09.059739: step: 1656/527, loss: 0.023825978860259056 2023-01-23 00:35:10.211398: step: 1660/527, loss: 0.6483904123306274 2023-01-23 00:35:11.337279: step: 1664/527, loss: 0.13121700286865234 2023-01-23 00:35:12.480028: step: 1668/527, loss: 0.08512725681066513 2023-01-23 00:35:13.599839: step: 1672/527, loss: 0.24677523970603943 2023-01-23 00:35:14.761320: step: 1676/527, loss: 0.1847589612007141 2023-01-23 00:35:15.863746: step: 1680/527, loss: 0.05692744627594948 2023-01-23 00:35:16.965590: step: 1684/527, loss: 0.03351978957653046 2023-01-23 00:35:18.083869: step: 1688/527, loss: 0.20769162476062775 2023-01-23 00:35:19.197922: step: 1692/527, loss: 0.1787862777709961 2023-01-23 00:35:20.319077: step: 1696/527, loss: 0.11295967549085617 2023-01-23 00:35:21.467160: step: 1700/527, loss: 0.2951013445854187 2023-01-23 00:35:22.606087: step: 1704/527, loss: 0.7350033521652222 2023-01-23 00:35:23.710880: step: 1708/527, loss: 0.1782231330871582 2023-01-23 00:35:24.809112: step: 1712/527, loss: 0.06197957694530487 2023-01-23 00:35:25.956962: step: 1716/527, loss: 0.137616828083992 2023-01-23 00:35:27.081759: step: 1720/527, loss: 0.12330102920532227 2023-01-23 00:35:28.187071: step: 1724/527, loss: 0.07322268187999725 2023-01-23 00:35:29.319763: step: 1728/527, loss: 0.10931578278541565 2023-01-23 00:35:30.423050: step: 1732/527, loss: 0.21992158889770508 2023-01-23 00:35:31.536907: step: 1736/527, loss: 0.03928103670477867 2023-01-23 00:35:32.655346: step: 1740/527, loss: 0.1692342758178711 2023-01-23 00:35:33.784787: step: 1744/527, loss: 0.8762350678443909 2023-01-23 00:35:34.920713: step: 1748/527, loss: 0.08703046292066574 2023-01-23 00:35:36.030010: step: 1752/527, loss: 0.14437074959278107 2023-01-23 00:35:37.136178: step: 1756/527, loss: 0.6991404294967651 2023-01-23 00:35:38.269820: step: 1760/527, loss: 1.4307262897491455 2023-01-23 00:35:39.357639: step: 1764/527, loss: 0.11755216121673584 2023-01-23 00:35:40.474029: step: 1768/527, loss: 0.044787146151065826 2023-01-23 00:35:41.606961: step: 1772/527, loss: 0.3315788507461548 2023-01-23 00:35:42.741251: step: 1776/527, loss: 0.0509905107319355 2023-01-23 00:35:43.861656: step: 1780/527, loss: 0.10356564819812775 2023-01-23 00:35:44.990253: step: 1784/527, loss: 0.11475391685962677 2023-01-23 00:35:46.075629: step: 1788/527, loss: 0.07465533912181854 2023-01-23 00:35:47.168078: step: 1792/527, loss: 0.623273491859436 2023-01-23 00:35:48.266221: step: 1796/527, loss: 1.0531699657440186 2023-01-23 00:35:49.371784: step: 1800/527, loss: 0.16995516419410706 2023-01-23 00:35:50.488496: step: 1804/527, loss: 0.3234786093235016 2023-01-23 00:35:51.614274: step: 1808/527, loss: 0.07282867282629013 2023-01-23 00:35:52.724504: step: 1812/527, loss: 0.08937902003526688 2023-01-23 00:35:53.864790: step: 1816/527, loss: 0.08460541069507599 2023-01-23 00:35:54.967662: step: 1820/527, loss: 0.25552287697792053 2023-01-23 00:35:56.074689: step: 1824/527, loss: 0.511398434638977 2023-01-23 00:35:57.226395: step: 1828/527, loss: 0.2710249423980713 2023-01-23 00:35:58.325730: step: 1832/527, loss: 0.11441951245069504 2023-01-23 00:35:59.473550: step: 1836/527, loss: 0.08043336868286133 2023-01-23 00:36:00.592841: step: 1840/527, loss: 0.09224581718444824 2023-01-23 00:36:01.719240: step: 1844/527, loss: 0.05091162025928497 2023-01-23 00:36:02.831680: step: 1848/527, loss: 0.033593371510505676 2023-01-23 00:36:03.933245: step: 1852/527, loss: 0.10790939629077911 2023-01-23 00:36:05.021770: step: 1856/527, loss: 0.12572938203811646 2023-01-23 00:36:06.157373: step: 1860/527, loss: 0.26958292722702026 2023-01-23 00:36:07.273710: step: 1864/527, loss: 0.23192210495471954 2023-01-23 00:36:08.375549: step: 1868/527, loss: 0.25937768816947937 2023-01-23 00:36:09.511127: step: 1872/527, loss: 0.11124172806739807 2023-01-23 00:36:10.622645: step: 1876/527, loss: 0.14579525589942932 2023-01-23 00:36:11.744385: step: 1880/527, loss: 0.21267780661582947 2023-01-23 00:36:12.856940: step: 1884/527, loss: 0.13840064406394958 2023-01-23 00:36:13.977546: step: 1888/527, loss: 0.24192224442958832 2023-01-23 00:36:15.121915: step: 1892/527, loss: 0.21090145409107208 2023-01-23 00:36:16.271957: step: 1896/527, loss: 0.3962695002555847 2023-01-23 00:36:17.370333: step: 1900/527, loss: 0.45139873027801514 2023-01-23 00:36:18.463842: step: 1904/527, loss: 0.11555171757936478 2023-01-23 00:36:19.564035: step: 1908/527, loss: 0.07022295147180557 2023-01-23 00:36:20.676029: step: 1912/527, loss: 0.14107860624790192 2023-01-23 00:36:21.769491: step: 1916/527, loss: 0.06297574192285538 2023-01-23 00:36:22.868204: step: 1920/527, loss: 0.17644786834716797 2023-01-23 00:36:23.982546: step: 1924/527, loss: 0.8610503077507019 2023-01-23 00:36:25.097723: step: 1928/527, loss: 0.10697450488805771 2023-01-23 00:36:26.214673: step: 1932/527, loss: 0.16575948894023895 2023-01-23 00:36:27.330531: step: 1936/527, loss: 0.055447958409786224 2023-01-23 00:36:28.437539: step: 1940/527, loss: 0.04218924418091774 2023-01-23 00:36:29.526914: step: 1944/527, loss: 0.1842663288116455 2023-01-23 00:36:30.627263: step: 1948/527, loss: 0.03675966337323189 2023-01-23 00:36:31.747400: step: 1952/527, loss: 0.0435916893184185 2023-01-23 00:36:32.868735: step: 1956/527, loss: 0.283983439207077 2023-01-23 00:36:33.984575: step: 1960/527, loss: 0.1147836223244667 2023-01-23 00:36:35.100041: step: 1964/527, loss: 0.04272041469812393 2023-01-23 00:36:36.174823: step: 1968/527, loss: 0.04331010952591896 2023-01-23 00:36:37.297337: step: 1972/527, loss: 0.4950031340122223 2023-01-23 00:36:38.433196: step: 1976/527, loss: 0.0833769366145134 2023-01-23 00:36:39.525211: step: 1980/527, loss: 0.1930265873670578 2023-01-23 00:36:40.633918: step: 1984/527, loss: 0.3233078420162201 2023-01-23 00:36:41.786072: step: 1988/527, loss: 0.11017389595508575 2023-01-23 00:36:42.915751: step: 1992/527, loss: 0.2645139694213867 2023-01-23 00:36:44.014394: step: 1996/527, loss: 0.16826733946800232 2023-01-23 00:36:45.103538: step: 2000/527, loss: 0.21377840638160706 2023-01-23 00:36:46.210380: step: 2004/527, loss: 0.027663325890898705 2023-01-23 00:36:47.330800: step: 2008/527, loss: 1.346925139427185 2023-01-23 00:36:48.444883: step: 2012/527, loss: 0.12795507907867432 2023-01-23 00:36:49.569690: step: 2016/527, loss: 0.14580735564231873 2023-01-23 00:36:50.685987: step: 2020/527, loss: 0.06216411292552948 2023-01-23 00:36:51.796871: step: 2024/527, loss: 0.2667986750602722 2023-01-23 00:36:52.912562: step: 2028/527, loss: 0.2472350150346756 2023-01-23 00:36:54.023798: step: 2032/527, loss: 0.3867262005805969 2023-01-23 00:36:55.147453: step: 2036/527, loss: 0.1298755705356598 2023-01-23 00:36:56.251792: step: 2040/527, loss: 0.10681380331516266 2023-01-23 00:36:57.332885: step: 2044/527, loss: 0.07795391231775284 2023-01-23 00:36:58.469147: step: 2048/527, loss: 0.1490764617919922 2023-01-23 00:36:59.596997: step: 2052/527, loss: 0.045057106763124466 2023-01-23 00:37:00.700188: step: 2056/527, loss: 0.014367294497787952 2023-01-23 00:37:01.843895: step: 2060/527, loss: 0.11018409579992294 2023-01-23 00:37:02.944972: step: 2064/527, loss: 0.14450684189796448 2023-01-23 00:37:04.060991: step: 2068/527, loss: 0.20723925530910492 2023-01-23 00:37:05.193193: step: 2072/527, loss: 0.20739594101905823 2023-01-23 00:37:06.307957: step: 2076/527, loss: 0.28867560625076294 2023-01-23 00:37:07.457720: step: 2080/527, loss: 0.21481843292713165 2023-01-23 00:37:08.553356: step: 2084/527, loss: 0.019100641831755638 2023-01-23 00:37:09.665814: step: 2088/527, loss: 0.2526426315307617 2023-01-23 00:37:10.761524: step: 2092/527, loss: 0.04098861292004585 2023-01-23 00:37:11.886198: step: 2096/527, loss: 0.06792278587818146 2023-01-23 00:37:12.994430: step: 2100/527, loss: 0.4546794891357422 2023-01-23 00:37:14.097276: step: 2104/527, loss: 0.0580110177397728 2023-01-23 00:37:15.207319: step: 2108/527, loss: 0.07411313056945801 ================================================== Loss: 0.216 -------------------- Dev: {'event': {'p': 0.5728542914171657, 'r': 0.7643142476697736, 'f1': 0.6548773531089561}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Test: {'event': {'p': 0.6153136531365314, 'r': 0.7622857142857142, 'f1': 0.6809596733027055}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Chinese: {'event': {'p': 0.5764705882352941, 'r': 0.9074074074074074, 'f1': 0.7050359712230215}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Korean: {'event': {'p': 0.5833333333333334, 'r': 0.5555555555555556, 'f1': 0.5691056910569107}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Russian: {'event': {'p': 0.4222222222222222, 'r': 0.5277777777777778, 'f1': 0.46913580246913583}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.623059866962306, 'r': 0.748335552596538, 'f1': 0.6799758015728978}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Eng Test for Korean: {'event': {'p': 0.6303294573643411, 'r': 0.7434285714285714, 'f1': 0.6822233875196644}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Korean: {'event': {'p': 0.725, 'r': 0.4603174603174603, 'f1': 0.5631067961165048}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 7 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:37:56.949851: step: 4/527, loss: 0.4163857698440552 2023-01-23 00:37:58.069415: step: 8/527, loss: 0.1529022753238678 2023-01-23 00:37:59.176424: step: 12/527, loss: 0.05762157216668129 2023-01-23 00:38:00.269683: step: 16/527, loss: 0.08841285854578018 2023-01-23 00:38:01.376869: step: 20/527, loss: 0.0973958969116211 2023-01-23 00:38:02.502030: step: 24/527, loss: 0.08074665069580078 2023-01-23 00:38:03.643425: step: 28/527, loss: 0.041546061635017395 2023-01-23 00:38:04.766930: step: 32/527, loss: 0.08659076690673828 2023-01-23 00:38:05.889328: step: 36/527, loss: 0.03707285225391388 2023-01-23 00:38:07.011795: step: 40/527, loss: 0.034918975085020065 2023-01-23 00:38:08.135596: step: 44/527, loss: 1.883135437965393 2023-01-23 00:38:09.255756: step: 48/527, loss: 0.04940805584192276 2023-01-23 00:38:10.376209: step: 52/527, loss: 0.47926968336105347 2023-01-23 00:38:11.483158: step: 56/527, loss: 0.4219369888305664 2023-01-23 00:38:12.594419: step: 60/527, loss: 0.165618896484375 2023-01-23 00:38:13.697190: step: 64/527, loss: 0.09352541714906693 2023-01-23 00:38:14.807276: step: 68/527, loss: 0.10504341125488281 2023-01-23 00:38:15.939094: step: 72/527, loss: 0.06129646301269531 2023-01-23 00:38:17.043611: step: 76/527, loss: 0.09223870933055878 2023-01-23 00:38:18.166499: step: 80/527, loss: 0.041403770446777344 2023-01-23 00:38:19.304165: step: 84/527, loss: 0.18043842911720276 2023-01-23 00:38:20.417987: step: 88/527, loss: 0.05343092605471611 2023-01-23 00:38:21.554054: step: 92/527, loss: 0.1469569355249405 2023-01-23 00:38:22.686028: step: 96/527, loss: 0.049339961260557175 2023-01-23 00:38:23.809997: step: 100/527, loss: 0.3157622516155243 2023-01-23 00:38:24.934692: step: 104/527, loss: 0.060452986508607864 2023-01-23 00:38:26.026498: step: 108/527, loss: 0.08834151923656464 2023-01-23 00:38:27.137321: step: 112/527, loss: 0.0646904930472374 2023-01-23 00:38:28.311549: step: 116/527, loss: 0.18475113809108734 2023-01-23 00:38:29.417588: step: 120/527, loss: 0.06487848609685898 2023-01-23 00:38:30.567289: step: 124/527, loss: 0.018282031640410423 2023-01-23 00:38:31.691277: step: 128/527, loss: 0.72438645362854 2023-01-23 00:38:32.817887: step: 132/527, loss: 0.05598144605755806 2023-01-23 00:38:33.927547: step: 136/527, loss: 0.2264978438615799 2023-01-23 00:38:35.036104: step: 140/527, loss: 0.03982143476605415 2023-01-23 00:38:36.152774: step: 144/527, loss: 0.14689961075782776 2023-01-23 00:38:37.289385: step: 148/527, loss: 0.4916355311870575 2023-01-23 00:38:38.394684: step: 152/527, loss: 0.07981789112091064 2023-01-23 00:38:39.539995: step: 156/527, loss: 0.18129000067710876 2023-01-23 00:38:40.658974: step: 160/527, loss: 0.2122432291507721 2023-01-23 00:38:41.785598: step: 164/527, loss: 0.16031265258789062 2023-01-23 00:38:42.946298: step: 168/527, loss: 0.09366440773010254 2023-01-23 00:38:44.074869: step: 172/527, loss: 0.06383056938648224 2023-01-23 00:38:45.210147: step: 176/527, loss: 0.02323140949010849 2023-01-23 00:38:46.354683: step: 180/527, loss: 0.0427170991897583 2023-01-23 00:38:47.512932: step: 184/527, loss: 0.02111082151532173 2023-01-23 00:38:48.620296: step: 188/527, loss: 0.1825898289680481 2023-01-23 00:38:49.762770: step: 192/527, loss: 0.06691794097423553 2023-01-23 00:38:50.952805: step: 196/527, loss: 0.03627481311559677 2023-01-23 00:38:52.096738: step: 200/527, loss: 0.2426256686449051 2023-01-23 00:38:53.230322: step: 204/527, loss: 0.08774900436401367 2023-01-23 00:38:54.332043: step: 208/527, loss: 0.015432262793183327 2023-01-23 00:38:55.438386: step: 212/527, loss: 0.1281810998916626 2023-01-23 00:38:56.557917: step: 216/527, loss: 0.11239566653966904 2023-01-23 00:38:57.661481: step: 220/527, loss: 0.14200712740421295 2023-01-23 00:38:58.767185: step: 224/527, loss: 0.020856954157352448 2023-01-23 00:38:59.872269: step: 228/527, loss: 0.42918795347213745 2023-01-23 00:39:00.990363: step: 232/527, loss: 0.3628288507461548 2023-01-23 00:39:02.127910: step: 236/527, loss: 0.0947122573852539 2023-01-23 00:39:03.229720: step: 240/527, loss: 0.19834065437316895 2023-01-23 00:39:04.382222: step: 244/527, loss: 0.4911825954914093 2023-01-23 00:39:05.482796: step: 248/527, loss: 0.09635010361671448 2023-01-23 00:39:06.590741: step: 252/527, loss: 0.17898845672607422 2023-01-23 00:39:07.696528: step: 256/527, loss: 0.022247934713959694 2023-01-23 00:39:08.808160: step: 260/527, loss: 0.03129100799560547 2023-01-23 00:39:09.950670: step: 264/527, loss: 0.4095683991909027 2023-01-23 00:39:11.059341: step: 268/527, loss: 0.40701723098754883 2023-01-23 00:39:12.169782: step: 272/527, loss: 0.04488027095794678 2023-01-23 00:39:13.301936: step: 276/527, loss: 0.09322738647460938 2023-01-23 00:39:14.418168: step: 280/527, loss: 0.03998060151934624 2023-01-23 00:39:15.511142: step: 284/527, loss: 0.1009332686662674 2023-01-23 00:39:16.651407: step: 288/527, loss: 0.093017578125 2023-01-23 00:39:17.733659: step: 292/527, loss: 0.10761814564466476 2023-01-23 00:39:18.841487: step: 296/527, loss: 0.12481708824634552 2023-01-23 00:39:19.984271: step: 300/527, loss: 0.22298498451709747 2023-01-23 00:39:21.115292: step: 304/527, loss: 0.03874704986810684 2023-01-23 00:39:22.243974: step: 308/527, loss: 0.10267810523509979 2023-01-23 00:39:23.347754: step: 312/527, loss: 0.11853323131799698 2023-01-23 00:39:24.515026: step: 316/527, loss: 0.18502922356128693 2023-01-23 00:39:25.619298: step: 320/527, loss: 0.23269948363304138 2023-01-23 00:39:26.739217: step: 324/527, loss: 0.16119326651096344 2023-01-23 00:39:27.830567: step: 328/527, loss: 0.2053823471069336 2023-01-23 00:39:28.950991: step: 332/527, loss: 0.20369234681129456 2023-01-23 00:39:30.099569: step: 336/527, loss: 0.0752011314034462 2023-01-23 00:39:31.223494: step: 340/527, loss: 0.23168422281742096 2023-01-23 00:39:32.369375: step: 344/527, loss: 0.11187683045864105 2023-01-23 00:39:33.496745: step: 348/527, loss: 0.07700939476490021 2023-01-23 00:39:34.620838: step: 352/527, loss: 0.09568607807159424 2023-01-23 00:39:35.725206: step: 356/527, loss: 0.09235820919275284 2023-01-23 00:39:36.834753: step: 360/527, loss: 0.04071841016411781 2023-01-23 00:39:37.965279: step: 364/527, loss: 0.3631020486354828 2023-01-23 00:39:39.111288: step: 368/527, loss: 0.08378183841705322 2023-01-23 00:39:40.238391: step: 372/527, loss: 0.10895195603370667 2023-01-23 00:39:41.356760: step: 376/527, loss: 0.2277248501777649 2023-01-23 00:39:42.496066: step: 380/527, loss: 0.08755837380886078 2023-01-23 00:39:43.591026: step: 384/527, loss: 0.021001672372221947 2023-01-23 00:39:44.701586: step: 388/527, loss: 0.03189681097865105 2023-01-23 00:39:45.798593: step: 392/527, loss: 0.07882877439260483 2023-01-23 00:39:46.891081: step: 396/527, loss: 0.051482390612363815 2023-01-23 00:39:48.005517: step: 400/527, loss: 0.05202658474445343 2023-01-23 00:39:49.130719: step: 404/527, loss: 0.010090017691254616 2023-01-23 00:39:50.268127: step: 408/527, loss: 0.04227237403392792 2023-01-23 00:39:51.392407: step: 412/527, loss: 0.0582159049808979 2023-01-23 00:39:52.477316: step: 416/527, loss: 0.04844675213098526 2023-01-23 00:39:53.626449: step: 420/527, loss: 0.1275196075439453 2023-01-23 00:39:54.765646: step: 424/527, loss: 0.03432321548461914 2023-01-23 00:39:55.913654: step: 428/527, loss: 0.027138233184814453 2023-01-23 00:39:57.039625: step: 432/527, loss: 0.15999102592468262 2023-01-23 00:39:58.158890: step: 436/527, loss: 0.0599401481449604 2023-01-23 00:39:59.309698: step: 440/527, loss: 0.13749723136425018 2023-01-23 00:40:00.423669: step: 444/527, loss: 0.1396523416042328 2023-01-23 00:40:01.545071: step: 448/527, loss: 0.1558721512556076 2023-01-23 00:40:02.655252: step: 452/527, loss: 0.0588221549987793 2023-01-23 00:40:03.779211: step: 456/527, loss: 0.0807713121175766 2023-01-23 00:40:04.874329: step: 460/527, loss: 0.2337636947631836 2023-01-23 00:40:05.992552: step: 464/527, loss: 0.353346586227417 2023-01-23 00:40:07.087056: step: 468/527, loss: 0.3557613492012024 2023-01-23 00:40:08.192123: step: 472/527, loss: 0.0360901840031147 2023-01-23 00:40:09.307299: step: 476/527, loss: 0.057752180844545364 2023-01-23 00:40:10.411131: step: 480/527, loss: 0.05274629592895508 2023-01-23 00:40:11.550300: step: 484/527, loss: 0.21701183915138245 2023-01-23 00:40:12.674014: step: 488/527, loss: 0.11842170357704163 2023-01-23 00:40:13.811035: step: 492/527, loss: 0.07372350990772247 2023-01-23 00:40:14.932658: step: 496/527, loss: 0.1916522979736328 2023-01-23 00:40:16.072881: step: 500/527, loss: 0.026337623596191406 2023-01-23 00:40:17.209514: step: 504/527, loss: 0.1076316386461258 2023-01-23 00:40:18.317832: step: 508/527, loss: 0.0957251638174057 2023-01-23 00:40:19.471477: step: 512/527, loss: 0.1499813199043274 2023-01-23 00:40:20.570068: step: 516/527, loss: 0.08520378917455673 2023-01-23 00:40:21.684287: step: 520/527, loss: 0.016389036551117897 2023-01-23 00:40:22.789741: step: 524/527, loss: 0.18101529777050018 2023-01-23 00:40:23.913316: step: 528/527, loss: 0.867286205291748 2023-01-23 00:40:25.019440: step: 532/527, loss: 0.13916663825511932 2023-01-23 00:40:26.128816: step: 536/527, loss: 0.027466487139463425 2023-01-23 00:40:27.235442: step: 540/527, loss: 0.09202560782432556 2023-01-23 00:40:28.351725: step: 544/527, loss: 0.056922055780887604 2023-01-23 00:40:29.515002: step: 548/527, loss: 0.03030090220272541 2023-01-23 00:40:30.647808: step: 552/527, loss: 0.08011779934167862 2023-01-23 00:40:31.747848: step: 556/527, loss: 0.2224745750427246 2023-01-23 00:40:32.866046: step: 560/527, loss: 1.09000825881958 2023-01-23 00:40:33.983440: step: 564/527, loss: 0.11000881344079971 2023-01-23 00:40:35.113787: step: 568/527, loss: 0.10341700911521912 2023-01-23 00:40:36.203777: step: 572/527, loss: 0.09961043298244476 2023-01-23 00:40:37.315948: step: 576/527, loss: 0.08434166759252548 2023-01-23 00:40:38.409602: step: 580/527, loss: 1.3565940856933594 2023-01-23 00:40:39.531812: step: 584/527, loss: 0.061338040977716446 2023-01-23 00:40:40.661788: step: 588/527, loss: 0.09565553814172745 2023-01-23 00:40:41.762054: step: 592/527, loss: 0.058181095868349075 2023-01-23 00:40:42.881874: step: 596/527, loss: 0.028829671442508698 2023-01-23 00:40:43.997697: step: 600/527, loss: 0.624508798122406 2023-01-23 00:40:45.113461: step: 604/527, loss: 0.13610239326953888 2023-01-23 00:40:46.264671: step: 608/527, loss: 0.20758108794689178 2023-01-23 00:40:47.377921: step: 612/527, loss: 0.049506377428770065 2023-01-23 00:40:48.523312: step: 616/527, loss: 0.10307340323925018 2023-01-23 00:40:49.633158: step: 620/527, loss: 0.2556304931640625 2023-01-23 00:40:50.758691: step: 624/527, loss: 0.027541637420654297 2023-01-23 00:40:51.891264: step: 628/527, loss: 0.009839916601777077 2023-01-23 00:40:53.026543: step: 632/527, loss: 0.10135894268751144 2023-01-23 00:40:54.137029: step: 636/527, loss: 0.10231619328260422 2023-01-23 00:40:55.230429: step: 640/527, loss: 0.06040620803833008 2023-01-23 00:40:56.349057: step: 644/527, loss: 0.027965473011136055 2023-01-23 00:40:57.512082: step: 648/527, loss: 0.046386051923036575 2023-01-23 00:40:58.594902: step: 652/527, loss: 0.06775007396936417 2023-01-23 00:40:59.696628: step: 656/527, loss: 0.06058797985315323 2023-01-23 00:41:00.821903: step: 660/527, loss: 0.090027816593647 2023-01-23 00:41:01.926557: step: 664/527, loss: 0.07801564782857895 2023-01-23 00:41:03.060416: step: 668/527, loss: 0.08385339379310608 2023-01-23 00:41:04.148682: step: 672/527, loss: 0.08331829309463501 2023-01-23 00:41:05.246264: step: 676/527, loss: 0.6659325361251831 2023-01-23 00:41:06.359504: step: 680/527, loss: 0.7293831706047058 2023-01-23 00:41:07.542651: step: 684/527, loss: 0.05969591438770294 2023-01-23 00:41:08.641242: step: 688/527, loss: 0.6861038208007812 2023-01-23 00:41:09.734628: step: 692/527, loss: 0.04558753967285156 2023-01-23 00:41:10.865600: step: 696/527, loss: 0.01676201820373535 2023-01-23 00:41:11.947855: step: 700/527, loss: 0.09361982345581055 2023-01-23 00:41:13.052494: step: 704/527, loss: 0.09973125159740448 2023-01-23 00:41:14.174081: step: 708/527, loss: 0.06737489998340607 2023-01-23 00:41:15.306905: step: 712/527, loss: 0.19065189361572266 2023-01-23 00:41:16.420913: step: 716/527, loss: 0.030136965215206146 2023-01-23 00:41:17.531904: step: 720/527, loss: 0.19711259007453918 2023-01-23 00:41:18.629124: step: 724/527, loss: 0.10855312645435333 2023-01-23 00:41:19.799312: step: 728/527, loss: 0.19465523958206177 2023-01-23 00:41:20.905603: step: 732/527, loss: 0.26685845851898193 2023-01-23 00:41:22.028506: step: 736/527, loss: 0.22611409425735474 2023-01-23 00:41:23.142832: step: 740/527, loss: 0.1645619422197342 2023-01-23 00:41:24.250352: step: 744/527, loss: 0.20419436693191528 2023-01-23 00:41:25.388588: step: 748/527, loss: 0.3499457836151123 2023-01-23 00:41:26.506263: step: 752/527, loss: 0.06739039719104767 2023-01-23 00:41:27.629520: step: 756/527, loss: 0.06154441833496094 2023-01-23 00:41:28.750027: step: 760/527, loss: 0.12735189497470856 2023-01-23 00:41:29.867519: step: 764/527, loss: 0.15564127266407013 2023-01-23 00:41:30.978494: step: 768/527, loss: 0.08485320955514908 2023-01-23 00:41:32.113908: step: 772/527, loss: 0.11284656822681427 2023-01-23 00:41:33.261004: step: 776/527, loss: 0.06927724182605743 2023-01-23 00:41:34.349747: step: 780/527, loss: 0.32635122537612915 2023-01-23 00:41:35.474636: step: 784/527, loss: 0.10320340096950531 2023-01-23 00:41:36.577769: step: 788/527, loss: 0.10767364501953125 2023-01-23 00:41:37.679383: step: 792/527, loss: 0.09046483039855957 2023-01-23 00:41:38.807660: step: 796/527, loss: 0.11691588908433914 2023-01-23 00:41:39.913029: step: 800/527, loss: 0.3364820182323456 2023-01-23 00:41:41.062518: step: 804/527, loss: 0.0500788688659668 2023-01-23 00:41:42.171463: step: 808/527, loss: 0.12557896971702576 2023-01-23 00:41:43.253087: step: 812/527, loss: 0.054504796862602234 2023-01-23 00:41:44.394392: step: 816/527, loss: 0.1100677102804184 2023-01-23 00:41:45.515011: step: 820/527, loss: 0.3233882784843445 2023-01-23 00:41:46.631302: step: 824/527, loss: 0.09544192254543304 2023-01-23 00:41:47.765043: step: 828/527, loss: 0.7357593774795532 2023-01-23 00:41:48.859069: step: 832/527, loss: 0.2747243642807007 2023-01-23 00:41:50.012351: step: 836/527, loss: 0.08035686612129211 2023-01-23 00:41:51.146688: step: 840/527, loss: 0.08558931201696396 2023-01-23 00:41:52.289972: step: 844/527, loss: 0.13456383347511292 2023-01-23 00:41:53.408247: step: 848/527, loss: 0.21272344887256622 2023-01-23 00:41:54.533332: step: 852/527, loss: 0.17726688086986542 2023-01-23 00:41:55.660016: step: 856/527, loss: 0.12897071242332458 2023-01-23 00:41:56.785339: step: 860/527, loss: 0.8192094564437866 2023-01-23 00:41:57.919878: step: 864/527, loss: 0.16395854949951172 2023-01-23 00:41:59.039953: step: 868/527, loss: 0.12887009978294373 2023-01-23 00:42:00.182052: step: 872/527, loss: 0.13416939973831177 2023-01-23 00:42:01.280408: step: 876/527, loss: 0.055257562547922134 2023-01-23 00:42:02.397558: step: 880/527, loss: 0.24006567895412445 2023-01-23 00:42:03.483729: step: 884/527, loss: 2.345228910446167 2023-01-23 00:42:04.596622: step: 888/527, loss: 0.10308703780174255 2023-01-23 00:42:05.743336: step: 892/527, loss: 0.01845252513885498 2023-01-23 00:42:06.843230: step: 896/527, loss: 0.04147949442267418 2023-01-23 00:42:07.948382: step: 900/527, loss: 0.0654093325138092 2023-01-23 00:42:09.081589: step: 904/527, loss: 0.08820018917322159 2023-01-23 00:42:10.195887: step: 908/527, loss: 0.05852871015667915 2023-01-23 00:42:11.296865: step: 912/527, loss: 0.18398791551589966 2023-01-23 00:42:12.426379: step: 916/527, loss: 0.2626205384731293 2023-01-23 00:42:13.530043: step: 920/527, loss: 0.029584836214780807 2023-01-23 00:42:14.654112: step: 924/527, loss: 0.046279050409793854 2023-01-23 00:42:15.789320: step: 928/527, loss: 0.042564962059259415 2023-01-23 00:42:16.880876: step: 932/527, loss: 0.14204120635986328 2023-01-23 00:42:17.984133: step: 936/527, loss: 0.2715371251106262 2023-01-23 00:42:19.081994: step: 940/527, loss: 0.028046369552612305 2023-01-23 00:42:20.175094: step: 944/527, loss: 0.11839208751916885 2023-01-23 00:42:21.286695: step: 948/527, loss: 0.03891754150390625 2023-01-23 00:42:22.404422: step: 952/527, loss: 0.06709346920251846 2023-01-23 00:42:23.508641: step: 956/527, loss: 0.16514796018600464 2023-01-23 00:42:24.632106: step: 960/527, loss: 0.2718348503112793 2023-01-23 00:42:25.729825: step: 964/527, loss: 0.04165635257959366 2023-01-23 00:42:26.843063: step: 968/527, loss: 0.10216931998729706 2023-01-23 00:42:27.968293: step: 972/527, loss: 0.7923317551612854 2023-01-23 00:42:29.078146: step: 976/527, loss: 0.11979684978723526 2023-01-23 00:42:30.196850: step: 980/527, loss: 0.03786206617951393 2023-01-23 00:42:31.310228: step: 984/527, loss: 0.051749035716056824 2023-01-23 00:42:32.422177: step: 988/527, loss: 0.12070188671350479 2023-01-23 00:42:33.548696: step: 992/527, loss: 0.09267584979534149 2023-01-23 00:42:34.657059: step: 996/527, loss: 0.03320141136646271 2023-01-23 00:42:35.764989: step: 1000/527, loss: 0.06649112701416016 2023-01-23 00:42:36.881301: step: 1004/527, loss: 0.010967493057250977 2023-01-23 00:42:37.982466: step: 1008/527, loss: 0.057869818061590195 2023-01-23 00:42:39.086854: step: 1012/527, loss: 0.06803999841213226 2023-01-23 00:42:40.199517: step: 1016/527, loss: 0.08660201728343964 2023-01-23 00:42:41.297419: step: 1020/527, loss: 0.013811302371323109 2023-01-23 00:42:42.402049: step: 1024/527, loss: 0.1948748081922531 2023-01-23 00:42:43.522801: step: 1028/527, loss: 0.20716705918312073 2023-01-23 00:42:44.654974: step: 1032/527, loss: 0.08804524689912796 2023-01-23 00:42:45.788482: step: 1036/527, loss: 0.7179480791091919 2023-01-23 00:42:46.951910: step: 1040/527, loss: 0.055435944348573685 2023-01-23 00:42:48.070968: step: 1044/527, loss: 0.1197391003370285 2023-01-23 00:42:49.190663: step: 1048/527, loss: 0.03375749662518501 2023-01-23 00:42:50.312225: step: 1052/527, loss: 0.09264302253723145 2023-01-23 00:42:51.438006: step: 1056/527, loss: 0.06527318805456161 2023-01-23 00:42:52.571330: step: 1060/527, loss: 0.1340409219264984 2023-01-23 00:42:53.679610: step: 1064/527, loss: 0.0313778892159462 2023-01-23 00:42:54.802902: step: 1068/527, loss: 0.018294524401426315 2023-01-23 00:42:55.901813: step: 1072/527, loss: 0.25492602586746216 2023-01-23 00:42:56.990892: step: 1076/527, loss: 0.08791275322437286 2023-01-23 00:42:58.117504: step: 1080/527, loss: 0.061396028846502304 2023-01-23 00:42:59.225924: step: 1084/527, loss: 0.09266863018274307 2023-01-23 00:43:00.339610: step: 1088/527, loss: 0.6098822355270386 2023-01-23 00:43:01.459745: step: 1092/527, loss: 0.05562954023480415 2023-01-23 00:43:02.564073: step: 1096/527, loss: 0.08444322645664215 2023-01-23 00:43:03.690497: step: 1100/527, loss: 0.026590727269649506 2023-01-23 00:43:04.832246: step: 1104/527, loss: 0.2448311746120453 2023-01-23 00:43:05.947763: step: 1108/527, loss: 0.030560877174139023 2023-01-23 00:43:07.069157: step: 1112/527, loss: 0.12442392855882645 2023-01-23 00:43:08.206558: step: 1116/527, loss: 0.18386010825634003 2023-01-23 00:43:09.348061: step: 1120/527, loss: 0.05577525869011879 2023-01-23 00:43:10.485801: step: 1124/527, loss: 0.27393731474876404 2023-01-23 00:43:11.642777: step: 1128/527, loss: 0.12740007042884827 2023-01-23 00:43:12.780710: step: 1132/527, loss: 0.07875290513038635 2023-01-23 00:43:13.907289: step: 1136/527, loss: 0.08007045090198517 2023-01-23 00:43:15.001228: step: 1140/527, loss: 0.061043668538331985 2023-01-23 00:43:16.118011: step: 1144/527, loss: 0.5175440311431885 2023-01-23 00:43:17.219468: step: 1148/527, loss: 0.04177398979663849 2023-01-23 00:43:18.337957: step: 1152/527, loss: 0.11078719794750214 2023-01-23 00:43:19.491686: step: 1156/527, loss: 0.03102750889956951 2023-01-23 00:43:20.637660: step: 1160/527, loss: 1.415436029434204 2023-01-23 00:43:21.762728: step: 1164/527, loss: 0.1366496980190277 2023-01-23 00:43:22.870557: step: 1168/527, loss: 0.07057762145996094 2023-01-23 00:43:23.985948: step: 1172/527, loss: 0.08305162191390991 2023-01-23 00:43:25.110822: step: 1176/527, loss: 0.3974289000034332 2023-01-23 00:43:26.206387: step: 1180/527, loss: 0.12361840903759003 2023-01-23 00:43:27.317376: step: 1184/527, loss: 0.09007033705711365 2023-01-23 00:43:28.437135: step: 1188/527, loss: 0.0807943046092987 2023-01-23 00:43:29.545793: step: 1192/527, loss: 0.05922601372003555 2023-01-23 00:43:30.654778: step: 1196/527, loss: 0.12947070598602295 2023-01-23 00:43:31.757440: step: 1200/527, loss: 0.004709864035248756 2023-01-23 00:43:32.858607: step: 1204/527, loss: 0.1928752213716507 2023-01-23 00:43:33.980583: step: 1208/527, loss: 0.22018328309059143 2023-01-23 00:43:35.119633: step: 1212/527, loss: 0.11299343407154083 2023-01-23 00:43:36.238964: step: 1216/527, loss: 0.04332923889160156 2023-01-23 00:43:37.365510: step: 1220/527, loss: 0.30185890197753906 2023-01-23 00:43:38.508665: step: 1224/527, loss: 0.06659789383411407 2023-01-23 00:43:39.660741: step: 1228/527, loss: 0.13564452528953552 2023-01-23 00:43:40.781285: step: 1232/527, loss: 0.06867778301239014 2023-01-23 00:43:41.887334: step: 1236/527, loss: 0.11100301891565323 2023-01-23 00:43:43.003723: step: 1240/527, loss: 0.0544523224234581 2023-01-23 00:43:44.123906: step: 1244/527, loss: 0.0866757407784462 2023-01-23 00:43:45.242764: step: 1248/527, loss: 0.07921364158391953 2023-01-23 00:43:46.355228: step: 1252/527, loss: 0.05777778476476669 2023-01-23 00:43:47.471410: step: 1256/527, loss: 0.14223213493824005 2023-01-23 00:43:48.593946: step: 1260/527, loss: 0.030316973105072975 2023-01-23 00:43:49.744156: step: 1264/527, loss: 0.12432222068309784 2023-01-23 00:43:50.848558: step: 1268/527, loss: 0.0868983268737793 2023-01-23 00:43:51.976553: step: 1272/527, loss: 0.07804088294506073 2023-01-23 00:43:53.105394: step: 1276/527, loss: 0.04638509824872017 2023-01-23 00:43:54.209175: step: 1280/527, loss: 0.12446976453065872 2023-01-23 00:43:55.337864: step: 1284/527, loss: 0.09551630169153214 2023-01-23 00:43:56.440484: step: 1288/527, loss: 0.06230277940630913 2023-01-23 00:43:57.573276: step: 1292/527, loss: 0.09850476682186127 2023-01-23 00:43:58.692433: step: 1296/527, loss: 0.15821805596351624 2023-01-23 00:43:59.831721: step: 1300/527, loss: 0.2274303436279297 2023-01-23 00:44:00.937979: step: 1304/527, loss: 0.10555467754602432 2023-01-23 00:44:02.042877: step: 1308/527, loss: 0.14376716315746307 2023-01-23 00:44:03.132163: step: 1312/527, loss: 0.04370608553290367 2023-01-23 00:44:04.227001: step: 1316/527, loss: 0.16287164390087128 2023-01-23 00:44:05.312311: step: 1320/527, loss: 0.0780348852276802 2023-01-23 00:44:06.415330: step: 1324/527, loss: 0.30689454078674316 2023-01-23 00:44:07.536532: step: 1328/527, loss: 0.22151941061019897 2023-01-23 00:44:08.667917: step: 1332/527, loss: 0.11925353854894638 2023-01-23 00:44:09.781574: step: 1336/527, loss: 0.04480161517858505 2023-01-23 00:44:10.898599: step: 1340/527, loss: 0.3220299482345581 2023-01-23 00:44:12.047754: step: 1344/527, loss: 0.05163154751062393 2023-01-23 00:44:13.188421: step: 1348/527, loss: 0.5688139200210571 2023-01-23 00:44:14.286478: step: 1352/527, loss: 0.07556018978357315 2023-01-23 00:44:15.419517: step: 1356/527, loss: 0.18702572584152222 2023-01-23 00:44:16.571151: step: 1360/527, loss: 0.1352296769618988 2023-01-23 00:44:17.669910: step: 1364/527, loss: 0.06931314617395401 2023-01-23 00:44:18.752416: step: 1368/527, loss: 0.07552079856395721 2023-01-23 00:44:19.869773: step: 1372/527, loss: 0.19485560059547424 2023-01-23 00:44:21.015723: step: 1376/527, loss: 0.202061265707016 2023-01-23 00:44:22.099010: step: 1380/527, loss: 0.14096584916114807 2023-01-23 00:44:23.248201: step: 1384/527, loss: 0.06458797305822372 2023-01-23 00:44:24.343382: step: 1388/527, loss: 0.0766574889421463 2023-01-23 00:44:25.449796: step: 1392/527, loss: 0.09638500213623047 2023-01-23 00:44:26.547772: step: 1396/527, loss: 0.032813072204589844 2023-01-23 00:44:27.667190: step: 1400/527, loss: 1.4972236156463623 2023-01-23 00:44:28.782777: step: 1404/527, loss: 0.08110485225915909 2023-01-23 00:44:29.890122: step: 1408/527, loss: 0.03479523956775665 2023-01-23 00:44:31.021114: step: 1412/527, loss: 0.16209346055984497 2023-01-23 00:44:32.160052: step: 1416/527, loss: 0.128150075674057 2023-01-23 00:44:33.337272: step: 1420/527, loss: 0.5813602805137634 2023-01-23 00:44:34.464202: step: 1424/527, loss: 0.032004550099372864 2023-01-23 00:44:35.574635: step: 1428/527, loss: 0.0892861932516098 2023-01-23 00:44:36.715729: step: 1432/527, loss: 0.4798944890499115 2023-01-23 00:44:37.862883: step: 1436/527, loss: 0.15733404457569122 2023-01-23 00:44:38.978883: step: 1440/527, loss: 0.04112134128808975 2023-01-23 00:44:40.083142: step: 1444/527, loss: 0.13937664031982422 2023-01-23 00:44:41.186639: step: 1448/527, loss: 0.12938299775123596 2023-01-23 00:44:42.289214: step: 1452/527, loss: 0.17721039056777954 2023-01-23 00:44:43.364441: step: 1456/527, loss: 0.07398548722267151 2023-01-23 00:44:44.485098: step: 1460/527, loss: 0.07432279735803604 2023-01-23 00:44:45.635828: step: 1464/527, loss: 0.24189713597297668 2023-01-23 00:44:46.756709: step: 1468/527, loss: 0.20307216048240662 2023-01-23 00:44:47.837429: step: 1472/527, loss: 0.07236690819263458 2023-01-23 00:44:48.938835: step: 1476/527, loss: 0.09924187511205673 2023-01-23 00:44:50.069870: step: 1480/527, loss: 0.017452048137784004 2023-01-23 00:44:51.208271: step: 1484/527, loss: 0.21310366690158844 2023-01-23 00:44:52.306115: step: 1488/527, loss: 0.08805880695581436 2023-01-23 00:44:53.432327: step: 1492/527, loss: 0.060295961797237396 2023-01-23 00:44:54.526998: step: 1496/527, loss: 0.5504152774810791 2023-01-23 00:44:55.608609: step: 1500/527, loss: 0.2574518322944641 2023-01-23 00:44:56.710909: step: 1504/527, loss: 0.13807792961597443 2023-01-23 00:44:57.830422: step: 1508/527, loss: 0.1989975869655609 2023-01-23 00:44:58.925016: step: 1512/527, loss: 0.33166128396987915 2023-01-23 00:45:00.019956: step: 1516/527, loss: 0.7003905773162842 2023-01-23 00:45:01.164956: step: 1520/527, loss: 0.05732369422912598 2023-01-23 00:45:02.253108: step: 1524/527, loss: 0.03219633176922798 2023-01-23 00:45:03.368642: step: 1528/527, loss: 0.09347958117723465 2023-01-23 00:45:04.500029: step: 1532/527, loss: 0.6772009134292603 2023-01-23 00:45:05.604404: step: 1536/527, loss: 0.1210743859410286 2023-01-23 00:45:06.706764: step: 1540/527, loss: 0.09642343968153 2023-01-23 00:45:07.831030: step: 1544/527, loss: 0.09271785616874695 2023-01-23 00:45:08.930880: step: 1548/527, loss: 0.011908723041415215 2023-01-23 00:45:10.082298: step: 1552/527, loss: 0.25633230805397034 2023-01-23 00:45:11.221357: step: 1556/527, loss: 0.1170506477355957 2023-01-23 00:45:12.316514: step: 1560/527, loss: 0.14003857970237732 2023-01-23 00:45:13.427806: step: 1564/527, loss: 0.08557999134063721 2023-01-23 00:45:14.555493: step: 1568/527, loss: 0.048417091369628906 2023-01-23 00:45:15.682829: step: 1572/527, loss: 0.15803876519203186 2023-01-23 00:45:16.798277: step: 1576/527, loss: 0.029480792582035065 2023-01-23 00:45:17.911716: step: 1580/527, loss: 0.03046315908432007 2023-01-23 00:45:19.050978: step: 1584/527, loss: 0.1353861689567566 2023-01-23 00:45:20.152404: step: 1588/527, loss: 0.1786683052778244 2023-01-23 00:45:21.274760: step: 1592/527, loss: 0.523049533367157 2023-01-23 00:45:22.400604: step: 1596/527, loss: 0.27000704407691956 2023-01-23 00:45:23.507504: step: 1600/527, loss: 0.5134299993515015 2023-01-23 00:45:24.663286: step: 1604/527, loss: 0.064641572535038 2023-01-23 00:45:25.811382: step: 1608/527, loss: 0.2145313173532486 2023-01-23 00:45:26.933211: step: 1612/527, loss: 0.08705604076385498 2023-01-23 00:45:28.062866: step: 1616/527, loss: 0.02277245558798313 2023-01-23 00:45:29.146177: step: 1620/527, loss: 0.014551687985658646 2023-01-23 00:45:30.270182: step: 1624/527, loss: 0.04566545411944389 2023-01-23 00:45:31.387511: step: 1628/527, loss: 0.14497403800487518 2023-01-23 00:45:32.493939: step: 1632/527, loss: 0.11718883365392685 2023-01-23 00:45:33.616037: step: 1636/527, loss: 0.01563549041748047 2023-01-23 00:45:34.763490: step: 1640/527, loss: 0.03902807459235191 2023-01-23 00:45:35.879469: step: 1644/527, loss: 0.07829437404870987 2023-01-23 00:45:36.977651: step: 1648/527, loss: 0.08640975505113602 2023-01-23 00:45:38.117586: step: 1652/527, loss: 0.12538833916187286 2023-01-23 00:45:39.211684: step: 1656/527, loss: 0.08356447517871857 2023-01-23 00:45:40.304653: step: 1660/527, loss: 0.08952084183692932 2023-01-23 00:45:41.409108: step: 1664/527, loss: 0.09460973739624023 2023-01-23 00:45:42.497882: step: 1668/527, loss: 0.14672046899795532 2023-01-23 00:45:43.591080: step: 1672/527, loss: 0.03532552719116211 2023-01-23 00:45:44.695054: step: 1676/527, loss: 0.49581262469291687 2023-01-23 00:45:45.827944: step: 1680/527, loss: 0.07083387672901154 2023-01-23 00:45:46.952884: step: 1684/527, loss: 0.17162543535232544 2023-01-23 00:45:48.094051: step: 1688/527, loss: 0.22113552689552307 2023-01-23 00:45:49.255670: step: 1692/527, loss: 0.1550757884979248 2023-01-23 00:45:50.365930: step: 1696/527, loss: 0.05860729515552521 2023-01-23 00:45:51.458405: step: 1700/527, loss: 0.035207558423280716 2023-01-23 00:45:52.576803: step: 1704/527, loss: 0.3124457597732544 2023-01-23 00:45:53.667297: step: 1708/527, loss: 0.17580759525299072 2023-01-23 00:45:54.818589: step: 1712/527, loss: 0.0979655310511589 2023-01-23 00:45:55.931777: step: 1716/527, loss: 0.0509522445499897 2023-01-23 00:45:57.030364: step: 1720/527, loss: 0.6463108658790588 2023-01-23 00:45:58.140264: step: 1724/527, loss: 0.08071768283843994 2023-01-23 00:45:59.266774: step: 1728/527, loss: 0.15986299514770508 2023-01-23 00:46:00.402493: step: 1732/527, loss: 0.08089923858642578 2023-01-23 00:46:01.526876: step: 1736/527, loss: 0.025244617834687233 2023-01-23 00:46:02.654042: step: 1740/527, loss: 0.0182831771671772 2023-01-23 00:46:03.750444: step: 1744/527, loss: 0.3164002299308777 2023-01-23 00:46:04.861715: step: 1748/527, loss: 0.20178470015525818 2023-01-23 00:46:05.962448: step: 1752/527, loss: 0.09134826809167862 2023-01-23 00:46:07.061108: step: 1756/527, loss: 0.01455754041671753 2023-01-23 00:46:08.180657: step: 1760/527, loss: 0.08732642978429794 2023-01-23 00:46:09.278353: step: 1764/527, loss: 0.09809169918298721 2023-01-23 00:46:10.398722: step: 1768/527, loss: 0.4199890196323395 2023-01-23 00:46:11.520279: step: 1772/527, loss: 0.04358282312750816 2023-01-23 00:46:12.644261: step: 1776/527, loss: 0.1687115728855133 2023-01-23 00:46:13.798589: step: 1780/527, loss: 0.07507472485303879 2023-01-23 00:46:14.902684: step: 1784/527, loss: 0.2308574616909027 2023-01-23 00:46:16.020155: step: 1788/527, loss: 0.014399625360965729 2023-01-23 00:46:17.133941: step: 1792/527, loss: 0.11223955452442169 2023-01-23 00:46:18.251194: step: 1796/527, loss: 0.08046016842126846 2023-01-23 00:46:19.343714: step: 1800/527, loss: 0.04589500278234482 2023-01-23 00:46:20.466737: step: 1804/527, loss: 0.19525334239006042 2023-01-23 00:46:21.603784: step: 1808/527, loss: 0.0960565134882927 2023-01-23 00:46:22.744506: step: 1812/527, loss: 0.12751737236976624 2023-01-23 00:46:23.854240: step: 1816/527, loss: 0.7705336809158325 2023-01-23 00:46:24.959945: step: 1820/527, loss: 0.11391840130090714 2023-01-23 00:46:26.081033: step: 1824/527, loss: 0.06742487102746964 2023-01-23 00:46:27.221585: step: 1828/527, loss: 0.0063932896591722965 2023-01-23 00:46:28.320200: step: 1832/527, loss: 0.08857765793800354 2023-01-23 00:46:29.430392: step: 1836/527, loss: 0.29340478777885437 2023-01-23 00:46:30.556649: step: 1840/527, loss: 0.027070429176092148 2023-01-23 00:46:31.685148: step: 1844/527, loss: 0.12149587273597717 2023-01-23 00:46:32.788795: step: 1848/527, loss: 0.0981663316488266 2023-01-23 00:46:33.927081: step: 1852/527, loss: 0.016504479572176933 2023-01-23 00:46:35.050368: step: 1856/527, loss: 0.6940372586250305 2023-01-23 00:46:36.181923: step: 1860/527, loss: 0.058921121060848236 2023-01-23 00:46:37.329513: step: 1864/527, loss: 0.2871111035346985 2023-01-23 00:46:38.467403: step: 1868/527, loss: 0.07852087169885635 2023-01-23 00:46:39.553456: step: 1872/527, loss: 0.038393642753362656 2023-01-23 00:46:40.665610: step: 1876/527, loss: 0.02996664121747017 2023-01-23 00:46:41.789539: step: 1880/527, loss: 1.200020670890808 2023-01-23 00:46:42.884128: step: 1884/527, loss: 0.08861713856458664 2023-01-23 00:46:43.979424: step: 1888/527, loss: 0.036472223699092865 2023-01-23 00:46:45.082349: step: 1892/527, loss: 0.10223941504955292 2023-01-23 00:46:46.191697: step: 1896/527, loss: 0.155283123254776 2023-01-23 00:46:47.278970: step: 1900/527, loss: 0.00884790439158678 2023-01-23 00:46:48.425622: step: 1904/527, loss: 0.08166541904211044 2023-01-23 00:46:49.572132: step: 1908/527, loss: 0.0051506999880075455 2023-01-23 00:46:50.697503: step: 1912/527, loss: 0.08418627083301544 2023-01-23 00:46:51.816126: step: 1916/527, loss: 0.0670836865901947 2023-01-23 00:46:52.960745: step: 1920/527, loss: 0.24358788132667542 2023-01-23 00:46:54.064331: step: 1924/527, loss: 0.0745553970336914 2023-01-23 00:46:55.174639: step: 1928/527, loss: 0.14394140243530273 2023-01-23 00:46:56.283777: step: 1932/527, loss: 0.05049104988574982 2023-01-23 00:46:57.390530: step: 1936/527, loss: 0.5881040096282959 2023-01-23 00:46:58.486951: step: 1940/527, loss: 0.08709597587585449 2023-01-23 00:46:59.594284: step: 1944/527, loss: 0.13468265533447266 2023-01-23 00:47:00.749109: step: 1948/527, loss: 0.1144346296787262 2023-01-23 00:47:01.857951: step: 1952/527, loss: 0.1711897850036621 2023-01-23 00:47:02.967606: step: 1956/527, loss: 0.03764772415161133 2023-01-23 00:47:04.097135: step: 1960/527, loss: 0.03174591064453125 2023-01-23 00:47:05.222617: step: 1964/527, loss: 0.03121805191040039 2023-01-23 00:47:06.353126: step: 1968/527, loss: 0.11928224563598633 2023-01-23 00:47:07.472364: step: 1972/527, loss: 0.14947627484798431 2023-01-23 00:47:08.574235: step: 1976/527, loss: 0.1037091314792633 2023-01-23 00:47:09.699314: step: 1980/527, loss: 0.05550746992230415 2023-01-23 00:47:10.801184: step: 1984/527, loss: 0.026887917891144753 2023-01-23 00:47:11.936302: step: 1988/527, loss: 0.11578197777271271 2023-01-23 00:47:13.029926: step: 1992/527, loss: 0.15976767241954803 2023-01-23 00:47:14.157200: step: 1996/527, loss: 0.11969108134508133 2023-01-23 00:47:15.249511: step: 2000/527, loss: 0.055633485317230225 2023-01-23 00:47:16.342354: step: 2004/527, loss: 0.019266510382294655 2023-01-23 00:47:17.466420: step: 2008/527, loss: 0.0732610747218132 2023-01-23 00:47:18.541107: step: 2012/527, loss: 0.07769189029932022 2023-01-23 00:47:19.660640: step: 2016/527, loss: 0.06608028709888458 2023-01-23 00:47:20.772374: step: 2020/527, loss: 0.06438541412353516 2023-01-23 00:47:21.854324: step: 2024/527, loss: 0.02088642120361328 2023-01-23 00:47:22.972559: step: 2028/527, loss: 0.014204931445419788 2023-01-23 00:47:24.085336: step: 2032/527, loss: 0.062448449432849884 2023-01-23 00:47:25.207538: step: 2036/527, loss: 0.2478632926940918 2023-01-23 00:47:26.322033: step: 2040/527, loss: 0.13856564462184906 2023-01-23 00:47:27.442738: step: 2044/527, loss: 0.025881171226501465 2023-01-23 00:47:28.544540: step: 2048/527, loss: 0.06553387641906738 2023-01-23 00:47:29.674670: step: 2052/527, loss: 0.09359939396381378 2023-01-23 00:47:30.785191: step: 2056/527, loss: 0.09992627799510956 2023-01-23 00:47:31.907280: step: 2060/527, loss: 0.044696759432554245 2023-01-23 00:47:33.002500: step: 2064/527, loss: 0.0404149554669857 2023-01-23 00:47:34.093150: step: 2068/527, loss: 0.08958516269922256 2023-01-23 00:47:35.228425: step: 2072/527, loss: 0.03951120376586914 2023-01-23 00:47:36.355604: step: 2076/527, loss: 0.11337490379810333 2023-01-23 00:47:37.469386: step: 2080/527, loss: 0.20729884505271912 2023-01-23 00:47:38.609100: step: 2084/527, loss: 0.09457206726074219 2023-01-23 00:47:39.695610: step: 2088/527, loss: 0.10443481802940369 2023-01-23 00:47:40.799058: step: 2092/527, loss: 0.16657915711402893 2023-01-23 00:47:41.934738: step: 2096/527, loss: 0.15816065669059753 2023-01-23 00:47:43.069180: step: 2100/527, loss: 0.9830401539802551 2023-01-23 00:47:44.200666: step: 2104/527, loss: 0.03774967044591904 2023-01-23 00:47:45.289571: step: 2108/527, loss: 0.08231468498706818 ================================================== Loss: 0.160 -------------------- Dev: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Test: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Chinese: {'event': {'p': 0.5512820512820513, 'r': 0.7962962962962963, 'f1': 0.6515151515151515}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Russian: {'event': {'p': 0.5555555555555556, 'r': 0.5555555555555556, 'f1': 0.5555555555555556}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} New best korean model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Eng Test for Korean: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 8 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:48:31.579582: step: 4/527, loss: 0.0662505179643631 2023-01-23 00:48:32.710111: step: 8/527, loss: 0.02305145189166069 2023-01-23 00:48:33.860590: step: 12/527, loss: 0.04371337592601776 2023-01-23 00:48:34.958349: step: 16/527, loss: 0.10356616973876953 2023-01-23 00:48:36.055217: step: 20/527, loss: 0.10588302463293076 2023-01-23 00:48:37.167388: step: 24/527, loss: 0.11706677079200745 2023-01-23 00:48:38.254484: step: 28/527, loss: 0.009508872404694557 2023-01-23 00:48:39.370312: step: 32/527, loss: 0.08286657929420471 2023-01-23 00:48:40.483493: step: 36/527, loss: 0.0660085678100586 2023-01-23 00:48:41.594619: step: 40/527, loss: 0.06506600230932236 2023-01-23 00:48:42.714005: step: 44/527, loss: 0.04293499141931534 2023-01-23 00:48:43.812853: step: 48/527, loss: 0.005032634828239679 2023-01-23 00:48:44.932852: step: 52/527, loss: 0.24146828055381775 2023-01-23 00:48:46.046789: step: 56/527, loss: 0.024000121280550957 2023-01-23 00:48:47.164681: step: 60/527, loss: 0.021087074652314186 2023-01-23 00:48:48.239761: step: 64/527, loss: 0.12115335464477539 2023-01-23 00:48:49.354633: step: 68/527, loss: 0.2353973388671875 2023-01-23 00:48:50.482174: step: 72/527, loss: 0.03964404761791229 2023-01-23 00:48:51.603678: step: 76/527, loss: 0.11088452488183975 2023-01-23 00:48:52.702189: step: 80/527, loss: 0.04457240179181099 2023-01-23 00:48:53.823244: step: 84/527, loss: 0.39419201016426086 2023-01-23 00:48:54.963905: step: 88/527, loss: 0.1277870237827301 2023-01-23 00:48:56.132054: step: 92/527, loss: 0.22232437133789062 2023-01-23 00:48:57.270961: step: 96/527, loss: 0.06264851242303848 2023-01-23 00:48:58.385966: step: 100/527, loss: 0.09544029831886292 2023-01-23 00:48:59.516829: step: 104/527, loss: 0.05071617290377617 2023-01-23 00:49:00.624825: step: 108/527, loss: 0.08496695011854172 2023-01-23 00:49:01.744002: step: 112/527, loss: 0.1718399077653885 2023-01-23 00:49:02.871717: step: 116/527, loss: 0.5687503814697266 2023-01-23 00:49:03.975424: step: 120/527, loss: 0.0852271094918251 2023-01-23 00:49:05.090381: step: 124/527, loss: 0.0876384750008583 2023-01-23 00:49:06.182104: step: 128/527, loss: 0.03890504688024521 2023-01-23 00:49:07.284516: step: 132/527, loss: 0.011740398593246937 2023-01-23 00:49:08.436607: step: 136/527, loss: 0.04613227769732475 2023-01-23 00:49:09.539155: step: 140/527, loss: 0.02544255182147026 2023-01-23 00:49:10.676448: step: 144/527, loss: 0.07478688657283783 2023-01-23 00:49:11.765076: step: 148/527, loss: 0.02361168898642063 2023-01-23 00:49:12.891432: step: 152/527, loss: 0.10690093040466309 2023-01-23 00:49:14.002065: step: 156/527, loss: 0.05181805044412613 2023-01-23 00:49:15.124714: step: 160/527, loss: 0.005225944332778454 2023-01-23 00:49:16.215845: step: 164/527, loss: 0.033143140375614166 2023-01-23 00:49:17.330560: step: 168/527, loss: 0.13845062255859375 2023-01-23 00:49:18.453218: step: 172/527, loss: 0.007614517118781805 2023-01-23 00:49:19.589286: step: 176/527, loss: 0.03459987789392471 2023-01-23 00:49:20.705381: step: 180/527, loss: 0.051847122609615326 2023-01-23 00:49:21.802147: step: 184/527, loss: 0.09796829521656036 2023-01-23 00:49:22.927371: step: 188/527, loss: 2.120713710784912 2023-01-23 00:49:24.037042: step: 192/527, loss: 0.07133102416992188 2023-01-23 00:49:25.169059: step: 196/527, loss: 0.027388669550418854 2023-01-23 00:49:26.283003: step: 200/527, loss: 0.09015445411205292 2023-01-23 00:49:27.405210: step: 204/527, loss: 0.14747075736522675 2023-01-23 00:49:28.541534: step: 208/527, loss: 0.06787262111902237 2023-01-23 00:49:29.701690: step: 212/527, loss: 0.037381600588560104 2023-01-23 00:49:30.840195: step: 216/527, loss: 0.08827958256006241 2023-01-23 00:49:31.948173: step: 220/527, loss: 0.06868445873260498 2023-01-23 00:49:33.081462: step: 224/527, loss: 2.1922600269317627 2023-01-23 00:49:34.217417: step: 228/527, loss: 0.9606868624687195 2023-01-23 00:49:35.335041: step: 232/527, loss: 0.1942111998796463 2023-01-23 00:49:36.446224: step: 236/527, loss: 0.014286041259765625 2023-01-23 00:49:37.569577: step: 240/527, loss: 0.03378438949584961 2023-01-23 00:49:38.681787: step: 244/527, loss: 0.21601086854934692 2023-01-23 00:49:39.779267: step: 248/527, loss: 0.020242786034941673 2023-01-23 00:49:40.854551: step: 252/527, loss: 0.028825949877500534 2023-01-23 00:49:41.949163: step: 256/527, loss: 0.1523379385471344 2023-01-23 00:49:43.060194: step: 260/527, loss: 0.11253499984741211 2023-01-23 00:49:44.169527: step: 264/527, loss: 0.12275314331054688 2023-01-23 00:49:45.315811: step: 268/527, loss: 0.060677219182252884 2023-01-23 00:49:46.440086: step: 272/527, loss: 0.2576586604118347 2023-01-23 00:49:47.560168: step: 276/527, loss: 0.0753047913312912 2023-01-23 00:49:48.679693: step: 280/527, loss: 0.054239656776189804 2023-01-23 00:49:49.794182: step: 284/527, loss: 0.027918243780732155 2023-01-23 00:49:50.899600: step: 288/527, loss: 0.09006933867931366 2023-01-23 00:49:52.011425: step: 292/527, loss: 0.13811007142066956 2023-01-23 00:49:53.169108: step: 296/527, loss: 0.08516483008861542 2023-01-23 00:49:54.265425: step: 300/527, loss: 0.04768932983279228 2023-01-23 00:49:55.386784: step: 304/527, loss: 0.09100484848022461 2023-01-23 00:49:56.491209: step: 308/527, loss: 0.06292619556188583 2023-01-23 00:49:57.619254: step: 312/527, loss: 0.09011035412549973 2023-01-23 00:49:58.754281: step: 316/527, loss: 0.10717058181762695 2023-01-23 00:49:59.842849: step: 320/527, loss: 0.02911229245364666 2023-01-23 00:50:00.953456: step: 324/527, loss: 0.006568622775375843 2023-01-23 00:50:02.084812: step: 328/527, loss: 0.09261999279260635 2023-01-23 00:50:03.195989: step: 332/527, loss: 0.24732133746147156 2023-01-23 00:50:04.337029: step: 336/527, loss: 0.24520960450172424 2023-01-23 00:50:05.425064: step: 340/527, loss: 0.05025935173034668 2023-01-23 00:50:06.521944: step: 344/527, loss: 0.8846839070320129 2023-01-23 00:50:07.619392: step: 348/527, loss: 0.11841163784265518 2023-01-23 00:50:08.776666: step: 352/527, loss: 0.2878393828868866 2023-01-23 00:50:09.887575: step: 356/527, loss: 0.048734474927186966 2023-01-23 00:50:11.032944: step: 360/527, loss: 0.08063755184412003 2023-01-23 00:50:12.142878: step: 364/527, loss: 0.1054808646440506 2023-01-23 00:50:13.262900: step: 368/527, loss: 0.2780340611934662 2023-01-23 00:50:14.343924: step: 372/527, loss: 0.02704916149377823 2023-01-23 00:50:15.443181: step: 376/527, loss: 0.023357870057225227 2023-01-23 00:50:16.572851: step: 380/527, loss: 0.008502197451889515 2023-01-23 00:50:17.696474: step: 384/527, loss: 0.01613173447549343 2023-01-23 00:50:18.803859: step: 388/527, loss: 0.06635679304599762 2023-01-23 00:50:19.935660: step: 392/527, loss: 0.3218851089477539 2023-01-23 00:50:21.057801: step: 396/527, loss: 0.017478466033935547 2023-01-23 00:50:22.179357: step: 400/527, loss: 0.07034631073474884 2023-01-23 00:50:23.327240: step: 404/527, loss: 0.013752173632383347 2023-01-23 00:50:24.434617: step: 408/527, loss: 0.03642425686120987 2023-01-23 00:50:25.531983: step: 412/527, loss: 0.015242003835737705 2023-01-23 00:50:26.648916: step: 416/527, loss: 0.2196635752916336 2023-01-23 00:50:27.727583: step: 420/527, loss: 0.07315365970134735 2023-01-23 00:50:28.842198: step: 424/527, loss: 0.07526598125696182 2023-01-23 00:50:29.953123: step: 428/527, loss: 0.21751748025417328 2023-01-23 00:50:31.075679: step: 432/527, loss: 0.09427070617675781 2023-01-23 00:50:32.164667: step: 436/527, loss: 0.05080757290124893 2023-01-23 00:50:33.307687: step: 440/527, loss: 0.0573611743748188 2023-01-23 00:50:34.417549: step: 444/527, loss: 0.023981476202607155 2023-01-23 00:50:35.558230: step: 448/527, loss: 0.2168409377336502 2023-01-23 00:50:36.683763: step: 452/527, loss: 0.6849062442779541 2023-01-23 00:50:37.809746: step: 456/527, loss: 0.13787278532981873 2023-01-23 00:50:38.913332: step: 460/527, loss: 0.04414663091301918 2023-01-23 00:50:40.030400: step: 464/527, loss: 0.13336792588233948 2023-01-23 00:50:41.128897: step: 468/527, loss: 0.0023910521995276213 2023-01-23 00:50:42.238947: step: 472/527, loss: 0.03397655487060547 2023-01-23 00:50:43.354980: step: 476/527, loss: 0.061437513679265976 2023-01-23 00:50:44.504374: step: 480/527, loss: 0.032884836196899414 2023-01-23 00:50:45.623150: step: 484/527, loss: 0.020434189587831497 2023-01-23 00:50:46.744383: step: 488/527, loss: 0.11455907672643661 2023-01-23 00:50:47.841523: step: 492/527, loss: 0.04375505447387695 2023-01-23 00:50:48.949484: step: 496/527, loss: 0.015088570304214954 2023-01-23 00:50:50.072239: step: 500/527, loss: 0.24854516983032227 2023-01-23 00:50:51.181001: step: 504/527, loss: 0.11807260662317276 2023-01-23 00:50:52.276258: step: 508/527, loss: 0.02047448232769966 2023-01-23 00:50:53.399320: step: 512/527, loss: 0.04716825485229492 2023-01-23 00:50:54.499480: step: 516/527, loss: 0.017246628180146217 2023-01-23 00:50:55.604232: step: 520/527, loss: 0.009725379757583141 2023-01-23 00:50:56.732975: step: 524/527, loss: 0.08599434047937393 2023-01-23 00:50:57.870636: step: 528/527, loss: 0.01509561575949192 2023-01-23 00:50:59.024764: step: 532/527, loss: 0.051949284970760345 2023-01-23 00:51:00.133206: step: 536/527, loss: 0.06595317274332047 2023-01-23 00:51:01.274116: step: 540/527, loss: 0.006334829144179821 2023-01-23 00:51:02.397935: step: 544/527, loss: 0.13365554809570312 2023-01-23 00:51:03.494479: step: 548/527, loss: 0.078265480697155 2023-01-23 00:51:04.569232: step: 552/527, loss: 0.3172203004360199 2023-01-23 00:51:05.671058: step: 556/527, loss: 0.07690558582544327 2023-01-23 00:51:06.764124: step: 560/527, loss: 0.04610862582921982 2023-01-23 00:51:07.910726: step: 564/527, loss: 0.10414648056030273 2023-01-23 00:51:09.016980: step: 568/527, loss: 1.6091641187667847 2023-01-23 00:51:10.144595: step: 572/527, loss: 0.0679062157869339 2023-01-23 00:51:11.284227: step: 576/527, loss: 0.1298515349626541 2023-01-23 00:51:12.429433: step: 580/527, loss: 0.09296919405460358 2023-01-23 00:51:13.571858: step: 584/527, loss: 0.06620721518993378 2023-01-23 00:51:14.699628: step: 588/527, loss: 0.17447753250598907 2023-01-23 00:51:15.829755: step: 592/527, loss: 0.11061620712280273 2023-01-23 00:51:16.980689: step: 596/527, loss: 0.022596264258027077 2023-01-23 00:51:18.057981: step: 600/527, loss: 0.009488009847700596 2023-01-23 00:51:19.154903: step: 604/527, loss: 0.08111634850502014 2023-01-23 00:51:20.273696: step: 608/527, loss: 0.2065192312002182 2023-01-23 00:51:21.375632: step: 612/527, loss: 0.15944424271583557 2023-01-23 00:51:22.454655: step: 616/527, loss: 0.0411435142159462 2023-01-23 00:51:23.584945: step: 620/527, loss: 0.4751095771789551 2023-01-23 00:51:24.784546: step: 624/527, loss: 0.017486954107880592 2023-01-23 00:51:25.914484: step: 628/527, loss: 0.10820036381483078 2023-01-23 00:51:27.026948: step: 632/527, loss: 0.07209649682044983 2023-01-23 00:51:28.137164: step: 636/527, loss: 0.3613487482070923 2023-01-23 00:51:29.250745: step: 640/527, loss: 0.052332207560539246 2023-01-23 00:51:30.375009: step: 644/527, loss: 0.042186737060546875 2023-01-23 00:51:31.503777: step: 648/527, loss: 0.09314222633838654 2023-01-23 00:51:32.609825: step: 652/527, loss: 0.05167350918054581 2023-01-23 00:51:33.724417: step: 656/527, loss: 0.18223591148853302 2023-01-23 00:51:34.848319: step: 660/527, loss: 0.049448300153017044 2023-01-23 00:51:36.054736: step: 664/527, loss: 0.25445327162742615 2023-01-23 00:51:37.153838: step: 668/527, loss: 0.050452426075935364 2023-01-23 00:51:38.291642: step: 672/527, loss: 0.048032473772764206 2023-01-23 00:51:39.397789: step: 676/527, loss: 0.005077171139419079 2023-01-23 00:51:40.489601: step: 680/527, loss: 0.07325573265552521 2023-01-23 00:51:41.591993: step: 684/527, loss: 0.02732701227068901 2023-01-23 00:51:42.711153: step: 688/527, loss: 0.04781174659729004 2023-01-23 00:51:43.838690: step: 692/527, loss: 0.012380600906908512 2023-01-23 00:51:44.985209: step: 696/527, loss: 0.45908185839653015 2023-01-23 00:51:46.109245: step: 700/527, loss: 0.06139354780316353 2023-01-23 00:51:47.234568: step: 704/527, loss: 0.030178356915712357 2023-01-23 00:51:48.383249: step: 708/527, loss: 0.04643435403704643 2023-01-23 00:51:49.474148: step: 712/527, loss: 0.052681878209114075 2023-01-23 00:51:50.599364: step: 716/527, loss: 0.27830156683921814 2023-01-23 00:51:51.722574: step: 720/527, loss: 0.3921051025390625 2023-01-23 00:51:52.867305: step: 724/527, loss: 0.0639440268278122 2023-01-23 00:51:53.996438: step: 728/527, loss: 0.10487000644207001 2023-01-23 00:51:55.057388: step: 732/527, loss: 0.007948875427246094 2023-01-23 00:51:56.154324: step: 736/527, loss: 0.05195184051990509 2023-01-23 00:51:57.263173: step: 740/527, loss: 0.09556989371776581 2023-01-23 00:51:58.400313: step: 744/527, loss: 0.0831850990653038 2023-01-23 00:51:59.500527: step: 748/527, loss: 0.9573855400085449 2023-01-23 00:52:00.662839: step: 752/527, loss: 0.03609342873096466 2023-01-23 00:52:01.750605: step: 756/527, loss: 0.16214951872825623 2023-01-23 00:52:02.869844: step: 760/527, loss: 0.07001495361328125 2023-01-23 00:52:04.003179: step: 764/527, loss: 0.035993482917547226 2023-01-23 00:52:05.105515: step: 768/527, loss: 0.08164720982313156 2023-01-23 00:52:06.207739: step: 772/527, loss: 0.08554460853338242 2023-01-23 00:52:07.306778: step: 776/527, loss: 0.034188173711299896 2023-01-23 00:52:08.419451: step: 780/527, loss: 0.17033347487449646 2023-01-23 00:52:09.526274: step: 784/527, loss: 0.6016769409179688 2023-01-23 00:52:10.633078: step: 788/527, loss: 0.059973862022161484 2023-01-23 00:52:11.753939: step: 792/527, loss: 0.038151081651449203 2023-01-23 00:52:12.864087: step: 796/527, loss: 0.03735866770148277 2023-01-23 00:52:13.972998: step: 800/527, loss: 0.022502470761537552 2023-01-23 00:52:15.095537: step: 804/527, loss: 0.06727603077888489 2023-01-23 00:52:16.258866: step: 808/527, loss: 0.12131595611572266 2023-01-23 00:52:17.369710: step: 812/527, loss: 0.028738070279359818 2023-01-23 00:52:18.527981: step: 816/527, loss: 0.13612107932567596 2023-01-23 00:52:19.631495: step: 820/527, loss: 0.06285762786865234 2023-01-23 00:52:20.754963: step: 824/527, loss: 0.015120506286621094 2023-01-23 00:52:21.882896: step: 828/527, loss: 0.01286010816693306 2023-01-23 00:52:22.957781: step: 832/527, loss: 0.03013935126364231 2023-01-23 00:52:24.104900: step: 836/527, loss: 0.045346833765506744 2023-01-23 00:52:25.238795: step: 840/527, loss: 0.03209085762500763 2023-01-23 00:52:26.337100: step: 844/527, loss: 0.07587175071239471 2023-01-23 00:52:27.433678: step: 848/527, loss: 0.060262441635131836 2023-01-23 00:52:28.547699: step: 852/527, loss: 0.03813629224896431 2023-01-23 00:52:29.649100: step: 856/527, loss: 0.054195404052734375 2023-01-23 00:52:30.752994: step: 860/527, loss: 0.06379751861095428 2023-01-23 00:52:31.877973: step: 864/527, loss: 0.17835655808448792 2023-01-23 00:52:33.011535: step: 868/527, loss: 0.13484200835227966 2023-01-23 00:52:34.142806: step: 872/527, loss: 0.09741239994764328 2023-01-23 00:52:35.242303: step: 876/527, loss: 0.20531876385211945 2023-01-23 00:52:36.355261: step: 880/527, loss: 0.04844360426068306 2023-01-23 00:52:37.467234: step: 884/527, loss: 0.13682469725608826 2023-01-23 00:52:38.611267: step: 888/527, loss: 0.021317481994628906 2023-01-23 00:52:39.748220: step: 892/527, loss: 0.08924126625061035 2023-01-23 00:52:40.856552: step: 896/527, loss: 0.08994856476783752 2023-01-23 00:52:41.978503: step: 900/527, loss: 0.09318752586841583 2023-01-23 00:52:43.110688: step: 904/527, loss: 0.09816179424524307 2023-01-23 00:52:44.225823: step: 908/527, loss: 0.06105213239789009 2023-01-23 00:52:45.301460: step: 912/527, loss: 0.12363509833812714 2023-01-23 00:52:46.401029: step: 916/527, loss: 0.3785005807876587 2023-01-23 00:52:47.488713: step: 920/527, loss: 0.029609298333525658 2023-01-23 00:52:48.616840: step: 924/527, loss: 0.038342855870723724 2023-01-23 00:52:49.741364: step: 928/527, loss: 0.09694366157054901 2023-01-23 00:52:50.856043: step: 932/527, loss: 0.021542739123106003 2023-01-23 00:52:52.017733: step: 936/527, loss: 0.08838529884815216 2023-01-23 00:52:53.119543: step: 940/527, loss: 0.15060663223266602 2023-01-23 00:52:54.227516: step: 944/527, loss: 0.0634339302778244 2023-01-23 00:52:55.360143: step: 948/527, loss: 0.01123666763305664 2023-01-23 00:52:56.482034: step: 952/527, loss: 0.0901309996843338 2023-01-23 00:52:57.571684: step: 956/527, loss: 0.2298179566860199 2023-01-23 00:52:58.698516: step: 960/527, loss: 0.025294208899140358 2023-01-23 00:52:59.802855: step: 964/527, loss: 0.08347129821777344 2023-01-23 00:53:00.901317: step: 968/527, loss: 0.2022933065891266 2023-01-23 00:53:02.039346: step: 972/527, loss: 0.07445468753576279 2023-01-23 00:53:03.126535: step: 976/527, loss: 0.19276714324951172 2023-01-23 00:53:04.242251: step: 980/527, loss: 0.6211450695991516 2023-01-23 00:53:05.338533: step: 984/527, loss: 0.10394144058227539 2023-01-23 00:53:06.471795: step: 988/527, loss: 0.08870887756347656 2023-01-23 00:53:07.578314: step: 992/527, loss: 0.05664863809943199 2023-01-23 00:53:08.675867: step: 996/527, loss: 0.03477358818054199 2023-01-23 00:53:09.789087: step: 1000/527, loss: 0.062087155878543854 2023-01-23 00:53:10.911879: step: 1004/527, loss: 0.20270757377147675 2023-01-23 00:53:12.005371: step: 1008/527, loss: 0.04581394046545029 2023-01-23 00:53:13.125232: step: 1012/527, loss: 0.24383649230003357 2023-01-23 00:53:14.218588: step: 1016/527, loss: 0.07283799350261688 2023-01-23 00:53:15.349157: step: 1020/527, loss: 0.05839822441339493 2023-01-23 00:53:16.457293: step: 1024/527, loss: 0.028377056121826172 2023-01-23 00:53:17.615839: step: 1028/527, loss: 0.02766857109963894 2023-01-23 00:53:18.722447: step: 1032/527, loss: 0.4884529411792755 2023-01-23 00:53:19.823465: step: 1036/527, loss: 0.12447874248027802 2023-01-23 00:53:20.919051: step: 1040/527, loss: 0.07007427513599396 2023-01-23 00:53:22.039675: step: 1044/527, loss: 0.06323814392089844 2023-01-23 00:53:23.141712: step: 1048/527, loss: 0.1427680104970932 2023-01-23 00:53:24.272476: step: 1052/527, loss: 0.04865298420190811 2023-01-23 00:53:25.406137: step: 1056/527, loss: 0.14291763305664062 2023-01-23 00:53:26.546134: step: 1060/527, loss: 6.43531608581543 2023-01-23 00:53:27.669621: step: 1064/527, loss: 0.5440343618392944 2023-01-23 00:53:28.813287: step: 1068/527, loss: 0.11009788513183594 2023-01-23 00:53:29.922520: step: 1072/527, loss: 0.07011604309082031 2023-01-23 00:53:31.015308: step: 1076/527, loss: 0.03946361318230629 2023-01-23 00:53:32.100237: step: 1080/527, loss: 0.8340146541595459 2023-01-23 00:53:33.220341: step: 1084/527, loss: 0.4368932843208313 2023-01-23 00:53:34.361702: step: 1088/527, loss: 0.11110344529151917 2023-01-23 00:53:35.452307: step: 1092/527, loss: 0.05119132995605469 2023-01-23 00:53:36.583663: step: 1096/527, loss: 0.2475181519985199 2023-01-23 00:53:37.704338: step: 1100/527, loss: 0.019387532025575638 2023-01-23 00:53:38.808532: step: 1104/527, loss: 0.05043792724609375 2023-01-23 00:53:39.944014: step: 1108/527, loss: 0.1348879039287567 2023-01-23 00:53:41.103005: step: 1112/527, loss: 0.0670267641544342 2023-01-23 00:53:42.222892: step: 1116/527, loss: 0.37143874168395996 2023-01-23 00:53:43.320137: step: 1120/527, loss: 1.0088536739349365 2023-01-23 00:53:44.470318: step: 1124/527, loss: 0.13461600244045258 2023-01-23 00:53:45.587598: step: 1128/527, loss: 0.08546924591064453 2023-01-23 00:53:46.684062: step: 1132/527, loss: 0.05917949602007866 2023-01-23 00:53:47.819340: step: 1136/527, loss: 0.1073429137468338 2023-01-23 00:53:48.946787: step: 1140/527, loss: 0.057706646621227264 2023-01-23 00:53:50.043207: step: 1144/527, loss: 0.07149829715490341 2023-01-23 00:53:51.178952: step: 1148/527, loss: 0.41345348954200745 2023-01-23 00:53:52.339340: step: 1152/527, loss: 0.04643116146326065 2023-01-23 00:53:53.477964: step: 1156/527, loss: 0.0728122740983963 2023-01-23 00:53:54.588470: step: 1160/527, loss: 0.098331980407238 2023-01-23 00:53:55.686637: step: 1164/527, loss: 0.13591843843460083 2023-01-23 00:53:56.800133: step: 1168/527, loss: 0.07631073147058487 2023-01-23 00:53:57.900773: step: 1172/527, loss: 0.015086389146745205 2023-01-23 00:53:59.031669: step: 1176/527, loss: 0.04788418114185333 2023-01-23 00:54:00.131978: step: 1180/527, loss: 0.017382431775331497 2023-01-23 00:54:01.255178: step: 1184/527, loss: 0.08957886695861816 2023-01-23 00:54:02.381010: step: 1188/527, loss: 0.011996269226074219 2023-01-23 00:54:03.513954: step: 1192/527, loss: 0.0798896849155426 2023-01-23 00:54:04.636323: step: 1196/527, loss: 0.03217463567852974 2023-01-23 00:54:05.742606: step: 1200/527, loss: 0.17385807633399963 2023-01-23 00:54:06.858896: step: 1204/527, loss: 0.07956424355506897 2023-01-23 00:54:08.000201: step: 1208/527, loss: 0.12261418998241425 2023-01-23 00:54:09.125009: step: 1212/527, loss: 0.16994795203208923 2023-01-23 00:54:10.246937: step: 1216/527, loss: 0.08262792229652405 2023-01-23 00:54:11.379511: step: 1220/527, loss: 0.1975012719631195 2023-01-23 00:54:12.508261: step: 1224/527, loss: 0.3106329143047333 2023-01-23 00:54:13.620866: step: 1228/527, loss: 0.1893273890018463 2023-01-23 00:54:14.727214: step: 1232/527, loss: 0.045536234974861145 2023-01-23 00:54:15.850795: step: 1236/527, loss: 0.14887762069702148 2023-01-23 00:54:16.922646: step: 1240/527, loss: 0.012140464968979359 2023-01-23 00:54:18.024611: step: 1244/527, loss: 0.08412811160087585 2023-01-23 00:54:19.168853: step: 1248/527, loss: 0.19463062286376953 2023-01-23 00:54:20.282501: step: 1252/527, loss: 0.16118593513965607 2023-01-23 00:54:21.397292: step: 1256/527, loss: 0.0313691608607769 2023-01-23 00:54:22.499124: step: 1260/527, loss: 0.026839066296815872 2023-01-23 00:54:23.612742: step: 1264/527, loss: 0.25529128313064575 2023-01-23 00:54:24.731560: step: 1268/527, loss: 0.007850361056625843 2023-01-23 00:54:25.843198: step: 1272/527, loss: 0.15709514915943146 2023-01-23 00:54:26.989556: step: 1276/527, loss: 0.040537551045417786 2023-01-23 00:54:28.104127: step: 1280/527, loss: 3.074796676635742 2023-01-23 00:54:29.218819: step: 1284/527, loss: 0.008968162350356579 2023-01-23 00:54:30.324475: step: 1288/527, loss: 0.09053463488817215 2023-01-23 00:54:31.461847: step: 1292/527, loss: 0.1288224160671234 2023-01-23 00:54:32.614268: step: 1296/527, loss: 0.07565007358789444 2023-01-23 00:54:33.755160: step: 1300/527, loss: 0.04186534881591797 2023-01-23 00:54:34.874503: step: 1304/527, loss: 0.08322028815746307 2023-01-23 00:54:35.987561: step: 1308/527, loss: 0.01934673823416233 2023-01-23 00:54:37.093160: step: 1312/527, loss: 0.01737365871667862 2023-01-23 00:54:38.202925: step: 1316/527, loss: 0.9277999997138977 2023-01-23 00:54:39.307509: step: 1320/527, loss: 0.2980154752731323 2023-01-23 00:54:40.465293: step: 1324/527, loss: 0.13807877898216248 2023-01-23 00:54:41.576146: step: 1328/527, loss: 1.058421015739441 2023-01-23 00:54:42.673058: step: 1332/527, loss: 0.10170484334230423 2023-01-23 00:54:43.805690: step: 1336/527, loss: 0.04638261720538139 2023-01-23 00:54:44.943261: step: 1340/527, loss: 0.21932242810726166 2023-01-23 00:54:46.067693: step: 1344/527, loss: 0.06194324418902397 2023-01-23 00:54:47.197752: step: 1348/527, loss: 0.23304852843284607 2023-01-23 00:54:48.256392: step: 1352/527, loss: 0.09255866706371307 2023-01-23 00:54:49.391175: step: 1356/527, loss: 0.09989690780639648 2023-01-23 00:54:50.504908: step: 1360/527, loss: 0.2880953848361969 2023-01-23 00:54:51.590220: step: 1364/527, loss: 0.254019558429718 2023-01-23 00:54:52.706498: step: 1368/527, loss: 0.1618337631225586 2023-01-23 00:54:53.823740: step: 1372/527, loss: 0.07139015197753906 2023-01-23 00:54:54.947836: step: 1376/527, loss: 0.08707895874977112 2023-01-23 00:54:56.067314: step: 1380/527, loss: 0.03160696104168892 2023-01-23 00:54:57.172654: step: 1384/527, loss: 0.07711543887853622 2023-01-23 00:54:58.316368: step: 1388/527, loss: 0.06767632067203522 2023-01-23 00:54:59.418897: step: 1392/527, loss: 0.02076435089111328 2023-01-23 00:55:00.535243: step: 1396/527, loss: 0.1154114231467247 2023-01-23 00:55:01.636846: step: 1400/527, loss: 1.3009544610977173 2023-01-23 00:55:02.760747: step: 1404/527, loss: 0.3502483367919922 2023-01-23 00:55:03.868234: step: 1408/527, loss: 0.012606620788574219 2023-01-23 00:55:04.970539: step: 1412/527, loss: 0.15717962384223938 2023-01-23 00:55:06.068380: step: 1416/527, loss: 0.23339462280273438 2023-01-23 00:55:07.167103: step: 1420/527, loss: 0.07521486282348633 2023-01-23 00:55:08.296809: step: 1424/527, loss: 0.115093894302845 2023-01-23 00:55:09.408759: step: 1428/527, loss: 0.008766556158661842 2023-01-23 00:55:10.519409: step: 1432/527, loss: 0.7830890417098999 2023-01-23 00:55:11.650763: step: 1436/527, loss: 0.011635685339570045 2023-01-23 00:55:12.758249: step: 1440/527, loss: 0.03912472724914551 2023-01-23 00:55:13.854154: step: 1444/527, loss: 0.07074785232543945 2023-01-23 00:55:14.949922: step: 1448/527, loss: 0.08525390923023224 2023-01-23 00:55:16.068684: step: 1452/527, loss: 0.005463886074721813 2023-01-23 00:55:17.206615: step: 1456/527, loss: 0.08118104934692383 2023-01-23 00:55:18.346175: step: 1460/527, loss: 0.07672515511512756 2023-01-23 00:55:19.490996: step: 1464/527, loss: 0.015753936022520065 2023-01-23 00:55:20.607475: step: 1468/527, loss: 0.06355476379394531 2023-01-23 00:55:21.750769: step: 1472/527, loss: 0.04105234146118164 2023-01-23 00:55:22.877399: step: 1476/527, loss: 0.052450992166996 2023-01-23 00:55:23.982481: step: 1480/527, loss: 0.11526508629322052 2023-01-23 00:55:25.106352: step: 1484/527, loss: 0.26573240756988525 2023-01-23 00:55:26.249792: step: 1488/527, loss: 0.02306346967816353 2023-01-23 00:55:27.373591: step: 1492/527, loss: 0.04622216522693634 2023-01-23 00:55:28.495017: step: 1496/527, loss: 0.07649651169776917 2023-01-23 00:55:29.614991: step: 1500/527, loss: 0.0812816172838211 2023-01-23 00:55:30.720233: step: 1504/527, loss: 0.06730327755212784 2023-01-23 00:55:31.830062: step: 1508/527, loss: 0.17887631058692932 2023-01-23 00:55:32.941735: step: 1512/527, loss: 0.08821187168359756 2023-01-23 00:55:34.072391: step: 1516/527, loss: 0.1024264395236969 2023-01-23 00:55:35.171652: step: 1520/527, loss: 0.18608751893043518 2023-01-23 00:55:36.268158: step: 1524/527, loss: 0.10426501929759979 2023-01-23 00:55:37.388007: step: 1528/527, loss: 0.08230405300855637 2023-01-23 00:55:38.525618: step: 1532/527, loss: 0.1259574443101883 2023-01-23 00:55:39.641909: step: 1536/527, loss: 0.06192807853221893 2023-01-23 00:55:40.746271: step: 1540/527, loss: 0.3448143005371094 2023-01-23 00:55:41.847402: step: 1544/527, loss: 0.05571703985333443 2023-01-23 00:55:42.961945: step: 1548/527, loss: 0.12170401215553284 2023-01-23 00:55:44.095064: step: 1552/527, loss: 0.25000256299972534 2023-01-23 00:55:45.199460: step: 1556/527, loss: 0.03165683522820473 2023-01-23 00:55:46.307328: step: 1560/527, loss: 0.42499417066574097 2023-01-23 00:55:47.438999: step: 1564/527, loss: 0.0724034309387207 2023-01-23 00:55:48.551815: step: 1568/527, loss: 0.23406529426574707 2023-01-23 00:55:49.665797: step: 1572/527, loss: 0.1629871428012848 2023-01-23 00:55:50.826755: step: 1576/527, loss: 0.39764395356178284 2023-01-23 00:55:51.919019: step: 1580/527, loss: 0.06674136966466904 2023-01-23 00:55:53.013784: step: 1584/527, loss: 0.23009684681892395 2023-01-23 00:55:54.132978: step: 1588/527, loss: 0.31569528579711914 2023-01-23 00:55:55.245725: step: 1592/527, loss: 0.10268383473157883 2023-01-23 00:55:56.363462: step: 1596/527, loss: 0.23501887917518616 2023-01-23 00:55:57.475120: step: 1600/527, loss: 0.0585942268371582 2023-01-23 00:55:58.587067: step: 1604/527, loss: 0.17699089646339417 2023-01-23 00:55:59.684764: step: 1608/527, loss: 0.07348638027906418 2023-01-23 00:56:00.791344: step: 1612/527, loss: 0.8916938900947571 2023-01-23 00:56:01.895643: step: 1616/527, loss: 0.16198918223381042 2023-01-23 00:56:02.998718: step: 1620/527, loss: 0.11379070580005646 2023-01-23 00:56:04.124243: step: 1624/527, loss: 0.0705508291721344 2023-01-23 00:56:05.234250: step: 1628/527, loss: 0.052710454910993576 2023-01-23 00:56:06.392532: step: 1632/527, loss: 0.04200391843914986 2023-01-23 00:56:07.554897: step: 1636/527, loss: 0.1498866081237793 2023-01-23 00:56:08.693889: step: 1640/527, loss: 0.05908498913049698 2023-01-23 00:56:09.826115: step: 1644/527, loss: 0.06964989006519318 2023-01-23 00:56:10.915329: step: 1648/527, loss: 0.025357915088534355 2023-01-23 00:56:12.032810: step: 1652/527, loss: 0.1391225904226303 2023-01-23 00:56:13.133787: step: 1656/527, loss: 0.0820837989449501 2023-01-23 00:56:14.261660: step: 1660/527, loss: 0.23472963273525238 2023-01-23 00:56:15.368979: step: 1664/527, loss: 0.028299950063228607 2023-01-23 00:56:16.481363: step: 1668/527, loss: 0.05475912243127823 2023-01-23 00:56:17.624251: step: 1672/527, loss: 0.18757067620754242 2023-01-23 00:56:18.707268: step: 1676/527, loss: 0.05311885103583336 2023-01-23 00:56:19.826061: step: 1680/527, loss: 0.045778512954711914 2023-01-23 00:56:20.912293: step: 1684/527, loss: 0.06054845079779625 2023-01-23 00:56:22.027292: step: 1688/527, loss: 0.49073266983032227 2023-01-23 00:56:23.159564: step: 1692/527, loss: 0.016210127621889114 2023-01-23 00:56:24.285843: step: 1696/527, loss: 0.1042594462633133 2023-01-23 00:56:25.425823: step: 1700/527, loss: 0.08621196448802948 2023-01-23 00:56:26.541039: step: 1704/527, loss: 0.025224018841981888 2023-01-23 00:56:27.647225: step: 1708/527, loss: 0.06634263694286346 2023-01-23 00:56:28.748172: step: 1712/527, loss: 0.026540659368038177 2023-01-23 00:56:29.836984: step: 1716/527, loss: 0.010688591748476028 2023-01-23 00:56:30.920679: step: 1720/527, loss: 0.09580745548009872 2023-01-23 00:56:32.073265: step: 1724/527, loss: 0.03691267967224121 2023-01-23 00:56:33.205476: step: 1728/527, loss: 0.03740682452917099 2023-01-23 00:56:34.318074: step: 1732/527, loss: 0.011236190795898438 2023-01-23 00:56:35.416942: step: 1736/527, loss: 0.2811966836452484 2023-01-23 00:56:36.519744: step: 1740/527, loss: 0.05300874635577202 2023-01-23 00:56:37.620576: step: 1744/527, loss: 0.09903955459594727 2023-01-23 00:56:38.738537: step: 1748/527, loss: 0.21401633322238922 2023-01-23 00:56:39.875208: step: 1752/527, loss: 0.09192991256713867 2023-01-23 00:56:40.992909: step: 1756/527, loss: 0.2722126245498657 2023-01-23 00:56:42.113391: step: 1760/527, loss: 0.1748226284980774 2023-01-23 00:56:43.235055: step: 1764/527, loss: 0.01491079293191433 2023-01-23 00:56:44.356175: step: 1768/527, loss: 0.2708101272583008 2023-01-23 00:56:45.510398: step: 1772/527, loss: 0.06429481506347656 2023-01-23 00:56:46.600534: step: 1776/527, loss: 0.0377623587846756 2023-01-23 00:56:47.747543: step: 1780/527, loss: 0.6399953961372375 2023-01-23 00:56:48.864905: step: 1784/527, loss: 0.1107591912150383 2023-01-23 00:56:49.988863: step: 1788/527, loss: 0.07851023972034454 2023-01-23 00:56:51.122507: step: 1792/527, loss: 0.04165377840399742 2023-01-23 00:56:52.204004: step: 1796/527, loss: 0.09852257370948792 2023-01-23 00:56:53.337257: step: 1800/527, loss: 0.07205762714147568 2023-01-23 00:56:54.462939: step: 1804/527, loss: 0.08528709411621094 2023-01-23 00:56:55.589698: step: 1808/527, loss: 0.12615692615509033 2023-01-23 00:56:56.717416: step: 1812/527, loss: 0.06534786522388458 2023-01-23 00:56:57.847606: step: 1816/527, loss: 0.03077840805053711 2023-01-23 00:56:58.956549: step: 1820/527, loss: 0.024987507611513138 2023-01-23 00:57:00.088856: step: 1824/527, loss: 0.0980696976184845 2023-01-23 00:57:01.213047: step: 1828/527, loss: 0.05786591023206711 2023-01-23 00:57:02.367220: step: 1832/527, loss: 0.1295120120048523 2023-01-23 00:57:03.466333: step: 1836/527, loss: 0.027859877794981003 2023-01-23 00:57:04.547095: step: 1840/527, loss: 0.17273612320423126 2023-01-23 00:57:05.678915: step: 1844/527, loss: 0.12468204647302628 2023-01-23 00:57:06.801671: step: 1848/527, loss: 0.4620359539985657 2023-01-23 00:57:07.903555: step: 1852/527, loss: 0.03260040283203125 2023-01-23 00:57:09.027276: step: 1856/527, loss: 0.23308992385864258 2023-01-23 00:57:10.178078: step: 1860/527, loss: 0.1158839762210846 2023-01-23 00:57:11.300080: step: 1864/527, loss: 0.7057000398635864 2023-01-23 00:57:12.432289: step: 1868/527, loss: 0.12176495045423508 2023-01-23 00:57:13.529363: step: 1872/527, loss: 0.25947055220603943 2023-01-23 00:57:14.623550: step: 1876/527, loss: 0.012261772528290749 2023-01-23 00:57:15.758867: step: 1880/527, loss: 0.09982604533433914 2023-01-23 00:57:16.859944: step: 1884/527, loss: 0.34849119186401367 2023-01-23 00:57:17.975920: step: 1888/527, loss: 0.11010236293077469 2023-01-23 00:57:19.087385: step: 1892/527, loss: 0.043065883219242096 2023-01-23 00:57:20.177201: step: 1896/527, loss: 0.00511436490342021 2023-01-23 00:57:21.302524: step: 1900/527, loss: 0.2329554557800293 2023-01-23 00:57:22.436992: step: 1904/527, loss: 0.024020005017518997 2023-01-23 00:57:23.579576: step: 1908/527, loss: 0.03548374027013779 2023-01-23 00:57:24.670086: step: 1912/527, loss: 0.1007990837097168 2023-01-23 00:57:25.762311: step: 1916/527, loss: 0.1330731064081192 2023-01-23 00:57:26.873851: step: 1920/527, loss: 0.05790015682578087 2023-01-23 00:57:28.052766: step: 1924/527, loss: 0.031973741948604584 2023-01-23 00:57:29.145800: step: 1928/527, loss: 0.13544607162475586 2023-01-23 00:57:30.234248: step: 1932/527, loss: 0.10375213623046875 2023-01-23 00:57:31.340479: step: 1936/527, loss: 0.00409278878942132 2023-01-23 00:57:32.449876: step: 1940/527, loss: 0.22216644883155823 2023-01-23 00:57:33.561960: step: 1944/527, loss: 0.08315802365541458 2023-01-23 00:57:34.728982: step: 1948/527, loss: 0.16365459561347961 2023-01-23 00:57:35.836163: step: 1952/527, loss: 0.10047474503517151 2023-01-23 00:57:36.948100: step: 1956/527, loss: 0.023608017712831497 2023-01-23 00:57:38.077228: step: 1960/527, loss: 0.14685048162937164 2023-01-23 00:57:39.181776: step: 1964/527, loss: 0.4481789469718933 2023-01-23 00:57:40.283096: step: 1968/527, loss: 0.01799626462161541 2023-01-23 00:57:41.396646: step: 1972/527, loss: 0.13384322822093964 2023-01-23 00:57:42.542214: step: 1976/527, loss: 0.05929103121161461 2023-01-23 00:57:43.654397: step: 1980/527, loss: 0.04733085632324219 2023-01-23 00:57:44.795295: step: 1984/527, loss: 0.1961507797241211 2023-01-23 00:57:45.927162: step: 1988/527, loss: 0.0054912567138671875 2023-01-23 00:57:47.017977: step: 1992/527, loss: 0.010316896252334118 2023-01-23 00:57:48.119096: step: 1996/527, loss: 0.04214811325073242 2023-01-23 00:57:49.252377: step: 2000/527, loss: 0.02843298949301243 2023-01-23 00:57:50.363418: step: 2004/527, loss: 0.1185968816280365 2023-01-23 00:57:51.473652: step: 2008/527, loss: 0.042241476476192474 2023-01-23 00:57:52.572300: step: 2012/527, loss: 0.035033226013183594 2023-01-23 00:57:53.670588: step: 2016/527, loss: 0.10732424259185791 2023-01-23 00:57:54.794492: step: 2020/527, loss: 0.2094259262084961 2023-01-23 00:57:55.910126: step: 2024/527, loss: 0.029109908267855644 2023-01-23 00:57:57.040037: step: 2028/527, loss: 0.11315536499023438 2023-01-23 00:57:58.146849: step: 2032/527, loss: 0.31974098086357117 2023-01-23 00:57:59.268922: step: 2036/527, loss: 0.17729873955249786 2023-01-23 00:58:00.388957: step: 2040/527, loss: 0.1334826797246933 2023-01-23 00:58:01.505414: step: 2044/527, loss: 0.08637714385986328 2023-01-23 00:58:02.602451: step: 2048/527, loss: 0.7767112255096436 2023-01-23 00:58:03.691925: step: 2052/527, loss: 0.07197284698486328 2023-01-23 00:58:04.810917: step: 2056/527, loss: 0.06646418571472168 2023-01-23 00:58:05.913996: step: 2060/527, loss: 0.09405043721199036 2023-01-23 00:58:07.009010: step: 2064/527, loss: 0.10352182388305664 2023-01-23 00:58:08.112523: step: 2068/527, loss: 0.014022446237504482 2023-01-23 00:58:09.252682: step: 2072/527, loss: 0.18049511313438416 2023-01-23 00:58:10.385155: step: 2076/527, loss: 0.1650785505771637 2023-01-23 00:58:11.526048: step: 2080/527, loss: 0.0005835056654177606 2023-01-23 00:58:12.639431: step: 2084/527, loss: 0.1145477294921875 2023-01-23 00:58:13.739629: step: 2088/527, loss: 0.05340452119708061 2023-01-23 00:58:14.815844: step: 2092/527, loss: 0.04760236665606499 2023-01-23 00:58:15.919620: step: 2096/527, loss: 0.016291523352265358 2023-01-23 00:58:17.053442: step: 2100/527, loss: 0.43703246116638184 2023-01-23 00:58:18.198058: step: 2104/527, loss: 0.033960867673158646 2023-01-23 00:58:19.304078: step: 2108/527, loss: 0.09452714771032333 ================================================== Loss: 0.155 -------------------- Dev: {'event': {'p': 0.6291759465478842, 'r': 0.7523302263648469, 'f1': 0.6852637962401456}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Test: {'event': {'p': 0.6327412385981757, 'r': 0.7531428571428571, 'f1': 0.6877119749543438}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Chinese: {'event': {'p': 0.5443037974683544, 'r': 0.7962962962962963, 'f1': 0.6466165413533835}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Korean: {'event': {'p': 0.5681818181818182, 'r': 0.3968253968253968, 'f1': 0.4672897196261682}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Russian: {'event': {'p': 0.5277777777777778, 'r': 0.5277777777777778, 'f1': 0.5277777777777778}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Eng Test for Korean: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 9 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 00:59:00.582199: step: 4/527, loss: 0.051595307886600494 2023-01-23 00:59:01.714804: step: 8/527, loss: 0.21795311570167542 2023-01-23 00:59:02.809375: step: 12/527, loss: 0.014342594891786575 2023-01-23 00:59:03.935670: step: 16/527, loss: 0.007032871246337891 2023-01-23 00:59:05.096400: step: 20/527, loss: 0.056672193109989166 2023-01-23 00:59:06.227779: step: 24/527, loss: 0.09952010959386826 2023-01-23 00:59:07.364641: step: 28/527, loss: 0.0350005142390728 2023-01-23 00:59:08.473011: step: 32/527, loss: 0.04804582521319389 2023-01-23 00:59:09.576454: step: 36/527, loss: 0.0372166633605957 2023-01-23 00:59:10.673127: step: 40/527, loss: 0.14153394103050232 2023-01-23 00:59:11.779483: step: 44/527, loss: 0.0940588042140007 2023-01-23 00:59:12.879938: step: 48/527, loss: 0.07738695293664932 2023-01-23 00:59:14.002787: step: 52/527, loss: 0.019930411130189896 2023-01-23 00:59:15.112365: step: 56/527, loss: 0.07731838524341583 2023-01-23 00:59:16.240854: step: 60/527, loss: 0.026688480749726295 2023-01-23 00:59:17.345592: step: 64/527, loss: 0.2156534194946289 2023-01-23 00:59:18.461236: step: 68/527, loss: 0.07055337727069855 2023-01-23 00:59:19.565086: step: 72/527, loss: 0.03282523527741432 2023-01-23 00:59:20.704736: step: 76/527, loss: 0.07980633527040482 2023-01-23 00:59:21.823671: step: 80/527, loss: 0.05249176174402237 2023-01-23 00:59:22.964093: step: 84/527, loss: 0.08545837551355362 2023-01-23 00:59:24.084515: step: 88/527, loss: 0.06726416945457458 2023-01-23 00:59:25.188838: step: 92/527, loss: 0.02964954450726509 2023-01-23 00:59:26.301509: step: 96/527, loss: 0.586465060710907 2023-01-23 00:59:27.403843: step: 100/527, loss: 0.06918954849243164 2023-01-23 00:59:28.524467: step: 104/527, loss: 0.03303222730755806 2023-01-23 00:59:29.630675: step: 108/527, loss: 0.11050129681825638 2023-01-23 00:59:30.752917: step: 112/527, loss: 0.13752880692481995 2023-01-23 00:59:31.885818: step: 116/527, loss: 0.013901901431381702 2023-01-23 00:59:32.981729: step: 120/527, loss: 0.020580053329467773 2023-01-23 00:59:34.098536: step: 124/527, loss: 0.0653480589389801 2023-01-23 00:59:35.202627: step: 128/527, loss: 0.02235393412411213 2023-01-23 00:59:36.316551: step: 132/527, loss: 0.0476410910487175 2023-01-23 00:59:37.432306: step: 136/527, loss: 0.1430215835571289 2023-01-23 00:59:38.576438: step: 140/527, loss: 0.6804027557373047 2023-01-23 00:59:39.698050: step: 144/527, loss: 0.052741244435310364 2023-01-23 00:59:40.803286: step: 148/527, loss: 0.11618385463953018 2023-01-23 00:59:41.921916: step: 152/527, loss: 0.028086375445127487 2023-01-23 00:59:43.003426: step: 156/527, loss: 0.01134040392935276 2023-01-23 00:59:44.127838: step: 160/527, loss: 0.10427971184253693 2023-01-23 00:59:45.215896: step: 164/527, loss: 0.037943027913570404 2023-01-23 00:59:46.342507: step: 168/527, loss: 0.024988938122987747 2023-01-23 00:59:47.468890: step: 172/527, loss: 0.01216268539428711 2023-01-23 00:59:48.562650: step: 176/527, loss: 0.2157118022441864 2023-01-23 00:59:49.687670: step: 180/527, loss: 0.007859420962631702 2023-01-23 00:59:50.784067: step: 184/527, loss: 0.04901857674121857 2023-01-23 00:59:51.916288: step: 188/527, loss: 0.05857124179601669 2023-01-23 00:59:53.043428: step: 192/527, loss: 0.0880930945277214 2023-01-23 00:59:54.140644: step: 196/527, loss: 0.08765526115894318 2023-01-23 00:59:55.244631: step: 200/527, loss: 0.012512397952377796 2023-01-23 00:59:56.358061: step: 204/527, loss: 0.08281493186950684 2023-01-23 00:59:57.495576: step: 208/527, loss: 0.2987131178379059 2023-01-23 00:59:58.592907: step: 212/527, loss: 0.03581047058105469 2023-01-23 00:59:59.704836: step: 216/527, loss: 0.04344940185546875 2023-01-23 01:00:00.826478: step: 220/527, loss: 0.038758471608161926 2023-01-23 01:00:01.942034: step: 224/527, loss: 0.016405964270234108 2023-01-23 01:00:03.101575: step: 228/527, loss: 0.009473991580307484 2023-01-23 01:00:04.255435: step: 232/527, loss: 0.01953125 2023-01-23 01:00:05.365606: step: 236/527, loss: 0.5504188537597656 2023-01-23 01:00:06.470482: step: 240/527, loss: 0.13292884826660156 2023-01-23 01:00:07.601627: step: 244/527, loss: 0.0989723950624466 2023-01-23 01:00:08.694048: step: 248/527, loss: 0.024614572525024414 2023-01-23 01:00:09.827101: step: 252/527, loss: 0.05763568729162216 2023-01-23 01:00:10.917396: step: 256/527, loss: 0.031978532671928406 2023-01-23 01:00:12.038906: step: 260/527, loss: 0.059507131576538086 2023-01-23 01:00:13.151889: step: 264/527, loss: 0.11678314208984375 2023-01-23 01:00:14.310296: step: 268/527, loss: 0.05213165283203125 2023-01-23 01:00:15.419433: step: 272/527, loss: 0.3329939842224121 2023-01-23 01:00:16.569763: step: 276/527, loss: 0.0064781904220581055 2023-01-23 01:00:17.698409: step: 280/527, loss: 0.1207832396030426 2023-01-23 01:00:18.827092: step: 284/527, loss: 0.15894566476345062 2023-01-23 01:00:19.943241: step: 288/527, loss: 0.018662452697753906 2023-01-23 01:00:21.097310: step: 292/527, loss: 0.1356135457754135 2023-01-23 01:00:22.219841: step: 296/527, loss: 0.021311473101377487 2023-01-23 01:00:23.315745: step: 300/527, loss: 0.039720773696899414 2023-01-23 01:00:24.427199: step: 304/527, loss: 0.03100752830505371 2023-01-23 01:00:25.541100: step: 308/527, loss: 0.36052972078323364 2023-01-23 01:00:26.661013: step: 312/527, loss: 0.14400386810302734 2023-01-23 01:00:27.767931: step: 316/527, loss: 0.18612852692604065 2023-01-23 01:00:28.863631: step: 320/527, loss: 0.006817149929702282 2023-01-23 01:00:29.982262: step: 324/527, loss: 0.03348485380411148 2023-01-23 01:00:31.086844: step: 328/527, loss: 0.03515777736902237 2023-01-23 01:00:32.206016: step: 332/527, loss: 0.1866414099931717 2023-01-23 01:00:33.354890: step: 336/527, loss: 0.1255602389574051 2023-01-23 01:00:34.485832: step: 340/527, loss: 0.06024637073278427 2023-01-23 01:00:35.583422: step: 344/527, loss: 0.11093979328870773 2023-01-23 01:00:36.703912: step: 348/527, loss: 0.07445193827152252 2023-01-23 01:00:37.806782: step: 352/527, loss: 0.12680628895759583 2023-01-23 01:00:38.930504: step: 356/527, loss: 0.012469196692109108 2023-01-23 01:00:40.039023: step: 360/527, loss: 0.04769272729754448 2023-01-23 01:00:41.158716: step: 364/527, loss: 0.028695013374090195 2023-01-23 01:00:42.287151: step: 368/527, loss: 0.02932453155517578 2023-01-23 01:00:43.397072: step: 372/527, loss: 0.08361206203699112 2023-01-23 01:00:44.516994: step: 376/527, loss: 0.015822220593690872 2023-01-23 01:00:45.671426: step: 380/527, loss: 0.9458737373352051 2023-01-23 01:00:46.808755: step: 384/527, loss: 0.07193779945373535 2023-01-23 01:00:47.917204: step: 388/527, loss: 0.041545186191797256 2023-01-23 01:00:49.038591: step: 392/527, loss: 0.07402548938989639 2023-01-23 01:00:50.182415: step: 396/527, loss: 0.2643594741821289 2023-01-23 01:00:51.285432: step: 400/527, loss: 0.05531501770019531 2023-01-23 01:00:52.390987: step: 404/527, loss: 0.140653595328331 2023-01-23 01:00:53.499078: step: 408/527, loss: 0.19587451219558716 2023-01-23 01:00:54.596634: step: 412/527, loss: 0.10895223915576935 2023-01-23 01:00:55.687709: step: 416/527, loss: 0.03143281862139702 2023-01-23 01:00:56.818050: step: 420/527, loss: 0.1968051940202713 2023-01-23 01:00:57.944193: step: 424/527, loss: 0.13352827727794647 2023-01-23 01:00:59.067497: step: 428/527, loss: 0.06161308288574219 2023-01-23 01:01:00.189631: step: 432/527, loss: 0.05079159513115883 2023-01-23 01:01:01.309837: step: 436/527, loss: 0.027446651831269264 2023-01-23 01:01:02.459565: step: 440/527, loss: 0.893958568572998 2023-01-23 01:01:03.577632: step: 444/527, loss: 0.0050172386690974236 2023-01-23 01:01:04.683300: step: 448/527, loss: 0.17577290534973145 2023-01-23 01:01:05.789215: step: 452/527, loss: 0.07095552980899811 2023-01-23 01:01:06.882813: step: 456/527, loss: 0.046774961054325104 2023-01-23 01:01:07.992453: step: 460/527, loss: 0.03888244554400444 2023-01-23 01:01:09.114671: step: 464/527, loss: 0.07091875374317169 2023-01-23 01:01:10.259008: step: 468/527, loss: 0.05677299574017525 2023-01-23 01:01:11.389294: step: 472/527, loss: 0.028757859021425247 2023-01-23 01:01:12.518232: step: 476/527, loss: 0.0855398178100586 2023-01-23 01:01:13.615278: step: 480/527, loss: 0.018016815185546875 2023-01-23 01:01:14.719375: step: 484/527, loss: 0.723493218421936 2023-01-23 01:01:15.849511: step: 488/527, loss: 0.023238373920321465 2023-01-23 01:01:16.983449: step: 492/527, loss: 0.0139433853328228 2023-01-23 01:01:18.107112: step: 496/527, loss: 0.03217935562133789 2023-01-23 01:01:19.245643: step: 500/527, loss: 0.08146496117115021 2023-01-23 01:01:20.338830: step: 504/527, loss: 0.16402435302734375 2023-01-23 01:01:21.446173: step: 508/527, loss: 0.09143076092004776 2023-01-23 01:01:22.544757: step: 512/527, loss: 0.18203610181808472 2023-01-23 01:01:23.661922: step: 516/527, loss: 0.05439407750964165 2023-01-23 01:01:24.796737: step: 520/527, loss: 0.040465641766786575 2023-01-23 01:01:25.930630: step: 524/527, loss: 0.15628820657730103 2023-01-23 01:01:27.035907: step: 528/527, loss: 0.024280261248350143 2023-01-23 01:01:28.168271: step: 532/527, loss: 0.9812090396881104 2023-01-23 01:01:29.266832: step: 536/527, loss: 0.041773274540901184 2023-01-23 01:01:30.404391: step: 540/527, loss: 0.014004421420395374 2023-01-23 01:01:31.479950: step: 544/527, loss: 0.28797128796577454 2023-01-23 01:01:32.592316: step: 548/527, loss: 0.15383762121200562 2023-01-23 01:01:33.740595: step: 552/527, loss: 0.03226127475500107 2023-01-23 01:01:34.861761: step: 556/527, loss: 0.07663097232580185 2023-01-23 01:01:35.979103: step: 560/527, loss: 0.157141774892807 2023-01-23 01:01:37.098447: step: 564/527, loss: 0.030769873410463333 2023-01-23 01:01:38.246484: step: 568/527, loss: 0.021959971636533737 2023-01-23 01:01:39.368522: step: 572/527, loss: 0.3176236152648926 2023-01-23 01:01:40.474870: step: 576/527, loss: 0.09017715603113174 2023-01-23 01:01:41.602084: step: 580/527, loss: 0.11581927537918091 2023-01-23 01:01:42.704977: step: 584/527, loss: 0.025803565979003906 2023-01-23 01:01:43.796632: step: 588/527, loss: 0.11176472157239914 2023-01-23 01:01:44.950258: step: 592/527, loss: 0.17112714052200317 2023-01-23 01:01:46.081790: step: 596/527, loss: 0.04026642069220543 2023-01-23 01:01:47.207706: step: 600/527, loss: 0.04269104078412056 2023-01-23 01:01:48.328004: step: 604/527, loss: 0.001627254532650113 2023-01-23 01:01:49.479017: step: 608/527, loss: 0.06278057396411896 2023-01-23 01:01:50.589853: step: 612/527, loss: 0.12767010927200317 2023-01-23 01:01:51.716498: step: 616/527, loss: 0.16504952311515808 2023-01-23 01:01:52.826278: step: 620/527, loss: 0.8820794224739075 2023-01-23 01:01:53.975442: step: 624/527, loss: 0.06617297977209091 2023-01-23 01:01:55.088001: step: 628/527, loss: 0.10420079529285431 2023-01-23 01:01:56.181151: step: 632/527, loss: 0.1489923745393753 2023-01-23 01:01:57.281512: step: 636/527, loss: 0.035975027829408646 2023-01-23 01:01:58.386753: step: 640/527, loss: 0.006674480624496937 2023-01-23 01:01:59.496301: step: 644/527, loss: 0.05110277980566025 2023-01-23 01:02:00.624956: step: 648/527, loss: 0.10432196408510208 2023-01-23 01:02:01.730826: step: 652/527, loss: 0.12818804383277893 2023-01-23 01:02:02.875774: step: 656/527, loss: 0.08545484393835068 2023-01-23 01:02:04.005175: step: 660/527, loss: 0.1794532835483551 2023-01-23 01:02:05.101782: step: 664/527, loss: 0.02562990039587021 2023-01-23 01:02:06.215442: step: 668/527, loss: 0.11701121181249619 2023-01-23 01:02:07.329619: step: 672/527, loss: 0.0627840980887413 2023-01-23 01:02:08.442617: step: 676/527, loss: 0.028455354273319244 2023-01-23 01:02:09.623052: step: 680/527, loss: 0.08318942785263062 2023-01-23 01:02:10.719124: step: 684/527, loss: 0.058298543095588684 2023-01-23 01:02:11.816749: step: 688/527, loss: 0.11548461765050888 2023-01-23 01:02:12.912142: step: 692/527, loss: 0.05990896373987198 2023-01-23 01:02:14.041462: step: 696/527, loss: 0.04069337993860245 2023-01-23 01:02:15.158401: step: 700/527, loss: 0.0668603926897049 2023-01-23 01:02:16.241702: step: 704/527, loss: 0.0790155902504921 2023-01-23 01:02:17.378375: step: 708/527, loss: 0.0729002058506012 2023-01-23 01:02:18.482397: step: 712/527, loss: 0.04533720016479492 2023-01-23 01:02:19.593527: step: 716/527, loss: 0.00871977861970663 2023-01-23 01:02:20.701776: step: 720/527, loss: 0.24235667288303375 2023-01-23 01:02:21.823060: step: 724/527, loss: 0.16085177659988403 2023-01-23 01:02:22.922525: step: 728/527, loss: 0.06069374084472656 2023-01-23 01:02:24.027638: step: 732/527, loss: 0.8698010444641113 2023-01-23 01:02:25.137401: step: 736/527, loss: 0.08304710686206818 2023-01-23 01:02:26.272753: step: 740/527, loss: 0.04545240476727486 2023-01-23 01:02:27.409196: step: 744/527, loss: 0.17627717554569244 2023-01-23 01:02:28.556000: step: 748/527, loss: 0.10222113132476807 2023-01-23 01:02:29.683560: step: 752/527, loss: 0.04582156985998154 2023-01-23 01:02:30.809330: step: 756/527, loss: 0.12691383063793182 2023-01-23 01:02:31.926177: step: 760/527, loss: 0.015666961669921875 2023-01-23 01:02:33.066296: step: 764/527, loss: 0.023395538330078125 2023-01-23 01:02:34.167608: step: 768/527, loss: 0.12584558129310608 2023-01-23 01:02:35.272130: step: 772/527, loss: 0.2768898010253906 2023-01-23 01:02:36.373543: step: 776/527, loss: 0.061646413058042526 2023-01-23 01:02:37.468440: step: 780/527, loss: 0.14021873474121094 2023-01-23 01:02:38.588822: step: 784/527, loss: 0.022058581933379173 2023-01-23 01:02:39.742725: step: 788/527, loss: 0.09420184791088104 2023-01-23 01:02:40.898118: step: 792/527, loss: 0.006947755813598633 2023-01-23 01:02:42.024735: step: 796/527, loss: 0.035112954676151276 2023-01-23 01:02:43.132604: step: 800/527, loss: 0.05607881397008896 2023-01-23 01:02:44.278326: step: 804/527, loss: 0.13283759355545044 2023-01-23 01:02:45.392564: step: 808/527, loss: 0.2162027209997177 2023-01-23 01:02:46.476808: step: 812/527, loss: 0.010124064050614834 2023-01-23 01:02:47.560566: step: 816/527, loss: 0.04513978958129883 2023-01-23 01:02:48.677969: step: 820/527, loss: 0.08695545047521591 2023-01-23 01:02:49.811132: step: 824/527, loss: 0.028638172894716263 2023-01-23 01:02:50.909724: step: 828/527, loss: 0.07570729404687881 2023-01-23 01:02:52.033555: step: 832/527, loss: 0.031127024441957474 2023-01-23 01:02:53.180549: step: 836/527, loss: 0.147850900888443 2023-01-23 01:02:54.340307: step: 840/527, loss: 0.13352108001708984 2023-01-23 01:02:55.457427: step: 844/527, loss: 0.008698338642716408 2023-01-23 01:02:56.578498: step: 848/527, loss: 0.06953773647546768 2023-01-23 01:02:57.694861: step: 852/527, loss: 0.19496451318264008 2023-01-23 01:02:58.803681: step: 856/527, loss: 0.012446021661162376 2023-01-23 01:02:59.926348: step: 860/527, loss: 0.016244173049926758 2023-01-23 01:03:01.015242: step: 864/527, loss: 0.02343278005719185 2023-01-23 01:03:02.107815: step: 868/527, loss: 0.06877613067626953 2023-01-23 01:03:03.220316: step: 872/527, loss: 0.08043460547924042 2023-01-23 01:03:04.340217: step: 876/527, loss: 0.10054989159107208 2023-01-23 01:03:05.471390: step: 880/527, loss: 0.022923266515135765 2023-01-23 01:03:06.600820: step: 884/527, loss: 0.008685588836669922 2023-01-23 01:03:07.732889: step: 888/527, loss: 0.04530506208539009 2023-01-23 01:03:08.858983: step: 892/527, loss: 0.05135536193847656 2023-01-23 01:03:09.999487: step: 896/527, loss: 0.12792836129665375 2023-01-23 01:03:11.073302: step: 900/527, loss: 0.1459997296333313 2023-01-23 01:03:12.191976: step: 904/527, loss: 0.012559604831039906 2023-01-23 01:03:13.314632: step: 908/527, loss: 0.049593355506658554 2023-01-23 01:03:14.410247: step: 912/527, loss: 0.06437063217163086 2023-01-23 01:03:15.507447: step: 916/527, loss: 0.31694433093070984 2023-01-23 01:03:16.636216: step: 920/527, loss: 0.06975831836462021 2023-01-23 01:03:17.768977: step: 924/527, loss: 0.0027571157552301884 2023-01-23 01:03:18.886120: step: 928/527, loss: 0.02089233323931694 2023-01-23 01:03:19.990285: step: 932/527, loss: 0.12079944461584091 2023-01-23 01:03:21.087009: step: 936/527, loss: 0.025944948196411133 2023-01-23 01:03:22.202858: step: 940/527, loss: 0.060515787452459335 2023-01-23 01:03:23.301804: step: 944/527, loss: 0.06139860302209854 2023-01-23 01:03:24.390841: step: 948/527, loss: 0.0793663039803505 2023-01-23 01:03:25.491297: step: 952/527, loss: 0.03343363106250763 2023-01-23 01:03:26.621704: step: 956/527, loss: 0.058388423174619675 2023-01-23 01:03:27.729395: step: 960/527, loss: 0.003471088595688343 2023-01-23 01:03:28.801987: step: 964/527, loss: 0.0008508682367391884 2023-01-23 01:03:29.924362: step: 968/527, loss: 0.21581801772117615 2023-01-23 01:03:31.042123: step: 972/527, loss: 0.1659446656703949 2023-01-23 01:03:32.147941: step: 976/527, loss: 0.02426748350262642 2023-01-23 01:03:33.245686: step: 980/527, loss: 0.07811923325061798 2023-01-23 01:03:34.350804: step: 984/527, loss: 0.03855371102690697 2023-01-23 01:03:35.452343: step: 988/527, loss: 0.044640637934207916 2023-01-23 01:03:36.572588: step: 992/527, loss: 0.09066858887672424 2023-01-23 01:03:37.684485: step: 996/527, loss: 0.25727522373199463 2023-01-23 01:03:38.819990: step: 1000/527, loss: 0.030248023569583893 2023-01-23 01:03:39.926110: step: 1004/527, loss: 0.1124546080827713 2023-01-23 01:03:41.041145: step: 1008/527, loss: 0.050176240503787994 2023-01-23 01:03:42.165708: step: 1012/527, loss: 0.023157360032200813 2023-01-23 01:03:43.283944: step: 1016/527, loss: 0.051595114171504974 2023-01-23 01:03:44.435539: step: 1020/527, loss: 0.08982644230127335 2023-01-23 01:03:45.585106: step: 1024/527, loss: 0.056859780102968216 2023-01-23 01:03:46.678043: step: 1028/527, loss: 0.0330626480281353 2023-01-23 01:03:47.782865: step: 1032/527, loss: 0.15032851696014404 2023-01-23 01:03:48.919950: step: 1036/527, loss: 0.013005781918764114 2023-01-23 01:03:50.010420: step: 1040/527, loss: 0.010991288349032402 2023-01-23 01:03:51.129086: step: 1044/527, loss: 0.03911247104406357 2023-01-23 01:03:52.257592: step: 1048/527, loss: 0.21045923233032227 2023-01-23 01:03:53.382362: step: 1052/527, loss: 0.06488379091024399 2023-01-23 01:03:54.494440: step: 1056/527, loss: 0.09807959198951721 2023-01-23 01:03:55.620625: step: 1060/527, loss: 0.10139675438404083 2023-01-23 01:03:56.724082: step: 1064/527, loss: 0.05873613432049751 2023-01-23 01:03:57.870442: step: 1068/527, loss: 0.04523877799510956 2023-01-23 01:03:58.965476: step: 1072/527, loss: 0.03674621507525444 2023-01-23 01:04:00.065236: step: 1076/527, loss: 0.08303405344486237 2023-01-23 01:04:01.195177: step: 1080/527, loss: 0.054634857922792435 2023-01-23 01:04:02.312024: step: 1084/527, loss: 0.01169128529727459 2023-01-23 01:04:03.417378: step: 1088/527, loss: 0.0008047103765420616 2023-01-23 01:04:04.501315: step: 1092/527, loss: 0.034918248653411865 2023-01-23 01:04:05.615676: step: 1096/527, loss: 0.016103506088256836 2023-01-23 01:04:06.736726: step: 1100/527, loss: 0.014906119555234909 2023-01-23 01:04:07.874241: step: 1104/527, loss: 0.4689117670059204 2023-01-23 01:04:08.988436: step: 1108/527, loss: 0.06313753128051758 2023-01-23 01:04:10.084968: step: 1112/527, loss: 0.04947681352496147 2023-01-23 01:04:11.213548: step: 1116/527, loss: 0.1910746544599533 2023-01-23 01:04:12.336139: step: 1120/527, loss: 0.05156998708844185 2023-01-23 01:04:13.455593: step: 1124/527, loss: 0.10549669712781906 2023-01-23 01:04:14.576537: step: 1128/527, loss: 0.08306846767663956 2023-01-23 01:04:15.699155: step: 1132/527, loss: 0.2798956036567688 2023-01-23 01:04:16.837201: step: 1136/527, loss: 0.020055418834090233 2023-01-23 01:04:17.949198: step: 1140/527, loss: 0.12009506672620773 2023-01-23 01:04:19.089248: step: 1144/527, loss: 0.07453088462352753 2023-01-23 01:04:20.229660: step: 1148/527, loss: 0.05657706409692764 2023-01-23 01:04:21.337260: step: 1152/527, loss: 0.009659100323915482 2023-01-23 01:04:22.437429: step: 1156/527, loss: 0.07631902396678925 2023-01-23 01:04:23.539380: step: 1160/527, loss: 0.013288307003676891 2023-01-23 01:04:24.640449: step: 1164/527, loss: 0.03264322504401207 2023-01-23 01:04:25.782062: step: 1168/527, loss: 0.10932694375514984 2023-01-23 01:04:26.893529: step: 1172/527, loss: 0.12802904844284058 2023-01-23 01:04:28.049597: step: 1176/527, loss: 0.03888988494873047 2023-01-23 01:04:29.118400: step: 1180/527, loss: 0.09084005653858185 2023-01-23 01:04:30.219264: step: 1184/527, loss: 0.04941602051258087 2023-01-23 01:04:31.335630: step: 1188/527, loss: 0.204171285033226 2023-01-23 01:04:32.463290: step: 1192/527, loss: 0.10325238853693008 2023-01-23 01:04:33.597085: step: 1196/527, loss: 0.13256892561912537 2023-01-23 01:04:34.703484: step: 1200/527, loss: 0.3588072955608368 2023-01-23 01:04:35.790327: step: 1204/527, loss: 0.0320286750793457 2023-01-23 01:04:36.937126: step: 1208/527, loss: 0.07419176399707794 2023-01-23 01:04:38.076604: step: 1212/527, loss: 0.2340150773525238 2023-01-23 01:04:39.179899: step: 1216/527, loss: 0.0997081771492958 2023-01-23 01:04:40.311662: step: 1220/527, loss: 0.03382454067468643 2023-01-23 01:04:41.433990: step: 1224/527, loss: 0.3190089166164398 2023-01-23 01:04:42.536334: step: 1228/527, loss: 0.06604671478271484 2023-01-23 01:04:43.670261: step: 1232/527, loss: 0.021700192242860794 2023-01-23 01:04:44.806692: step: 1236/527, loss: 0.08422355353832245 2023-01-23 01:04:45.920235: step: 1240/527, loss: 0.04974422603845596 2023-01-23 01:04:47.063371: step: 1244/527, loss: 0.050561144948005676 2023-01-23 01:04:48.145750: step: 1248/527, loss: 0.09664221107959747 2023-01-23 01:04:49.298490: step: 1252/527, loss: 0.07937774807214737 2023-01-23 01:04:50.456838: step: 1256/527, loss: 0.06734037399291992 2023-01-23 01:04:51.558510: step: 1260/527, loss: 0.12246689945459366 2023-01-23 01:04:52.757510: step: 1264/527, loss: 0.16401682794094086 2023-01-23 01:04:53.904194: step: 1268/527, loss: 0.04839286953210831 2023-01-23 01:04:55.023311: step: 1272/527, loss: 0.03941002115607262 2023-01-23 01:04:56.135433: step: 1276/527, loss: 0.406686395406723 2023-01-23 01:04:57.244505: step: 1280/527, loss: 0.10335026681423187 2023-01-23 01:04:58.341757: step: 1284/527, loss: 0.027607250958681107 2023-01-23 01:04:59.421100: step: 1288/527, loss: 0.0010218620300292969 2023-01-23 01:05:00.551177: step: 1292/527, loss: 0.18122625350952148 2023-01-23 01:05:01.708804: step: 1296/527, loss: 0.1968582272529602 2023-01-23 01:05:02.849314: step: 1300/527, loss: 0.3424030840396881 2023-01-23 01:05:03.977275: step: 1304/527, loss: 0.07962293922901154 2023-01-23 01:05:05.091795: step: 1308/527, loss: 0.02208681032061577 2023-01-23 01:05:06.214419: step: 1312/527, loss: 0.06310930848121643 2023-01-23 01:05:07.315682: step: 1316/527, loss: 0.011055802926421165 2023-01-23 01:05:08.424479: step: 1320/527, loss: 0.14942240715026855 2023-01-23 01:05:09.509407: step: 1324/527, loss: 0.02859310992062092 2023-01-23 01:05:10.651398: step: 1328/527, loss: 0.20906075835227966 2023-01-23 01:05:11.768724: step: 1332/527, loss: 0.21689942479133606 2023-01-23 01:05:12.840794: step: 1336/527, loss: 0.0136909494176507 2023-01-23 01:05:13.963489: step: 1340/527, loss: 0.03338046371936798 2023-01-23 01:05:15.092083: step: 1344/527, loss: 0.051303669810295105 2023-01-23 01:05:16.241802: step: 1348/527, loss: 0.32002657651901245 2023-01-23 01:05:17.403068: step: 1352/527, loss: 0.22659428417682648 2023-01-23 01:05:18.527075: step: 1356/527, loss: 0.06208410859107971 2023-01-23 01:05:19.663542: step: 1360/527, loss: 0.13741245865821838 2023-01-23 01:05:20.811215: step: 1364/527, loss: 0.2033875584602356 2023-01-23 01:05:21.950868: step: 1368/527, loss: 0.043901439756155014 2023-01-23 01:05:23.072639: step: 1372/527, loss: 0.0405488982796669 2023-01-23 01:05:24.183177: step: 1376/527, loss: 0.027832698076963425 2023-01-23 01:05:25.288108: step: 1380/527, loss: 0.07191906124353409 2023-01-23 01:05:26.377417: step: 1384/527, loss: 0.03992345556616783 2023-01-23 01:05:27.493061: step: 1388/527, loss: 0.046364497393369675 2023-01-23 01:05:28.630588: step: 1392/527, loss: 0.044442370533943176 2023-01-23 01:05:29.753275: step: 1396/527, loss: 0.16097697615623474 2023-01-23 01:05:30.872668: step: 1400/527, loss: 0.053278401494026184 2023-01-23 01:05:31.970658: step: 1404/527, loss: 0.12819309532642365 2023-01-23 01:05:33.103639: step: 1408/527, loss: 0.008398199453949928 2023-01-23 01:05:34.219028: step: 1412/527, loss: 0.05243821069598198 2023-01-23 01:05:35.364291: step: 1416/527, loss: 0.11450280994176865 2023-01-23 01:05:36.445893: step: 1420/527, loss: 0.11117667704820633 2023-01-23 01:05:37.567198: step: 1424/527, loss: 0.022314930334687233 2023-01-23 01:05:38.679198: step: 1428/527, loss: 0.08874158561229706 2023-01-23 01:05:39.794173: step: 1432/527, loss: 0.08282079547643661 2023-01-23 01:05:40.941070: step: 1436/527, loss: 0.030485058203339577 2023-01-23 01:05:42.085305: step: 1440/527, loss: 0.0038703917525708675 2023-01-23 01:05:43.198479: step: 1444/527, loss: 0.012000465765595436 2023-01-23 01:05:44.299209: step: 1448/527, loss: 0.23242883384227753 2023-01-23 01:05:45.421900: step: 1452/527, loss: 0.1509992629289627 2023-01-23 01:05:46.527195: step: 1456/527, loss: 0.6384310722351074 2023-01-23 01:05:47.661333: step: 1460/527, loss: 0.21907207369804382 2023-01-23 01:05:48.783920: step: 1464/527, loss: 0.021789217367768288 2023-01-23 01:05:49.934972: step: 1468/527, loss: 0.03293295204639435 2023-01-23 01:05:51.071637: step: 1472/527, loss: 0.034748006612062454 2023-01-23 01:05:52.237066: step: 1476/527, loss: 0.3407320976257324 2023-01-23 01:05:53.347675: step: 1480/527, loss: 0.00953826867043972 2023-01-23 01:05:54.474190: step: 1484/527, loss: 0.0968172550201416 2023-01-23 01:05:55.616586: step: 1488/527, loss: 0.20362672209739685 2023-01-23 01:05:56.717218: step: 1492/527, loss: 0.0702076181769371 2023-01-23 01:05:57.816332: step: 1496/527, loss: 0.042742349207401276 2023-01-23 01:05:58.925556: step: 1500/527, loss: 0.07776002585887909 2023-01-23 01:06:00.067670: step: 1504/527, loss: 0.006988907232880592 2023-01-23 01:06:01.176756: step: 1508/527, loss: 0.13713455200195312 2023-01-23 01:06:02.315950: step: 1512/527, loss: 0.11749449372291565 2023-01-23 01:06:03.434443: step: 1516/527, loss: 0.04220447689294815 2023-01-23 01:06:04.576307: step: 1520/527, loss: 0.0352315679192543 2023-01-23 01:06:05.693513: step: 1524/527, loss: 0.21461772918701172 2023-01-23 01:06:06.815480: step: 1528/527, loss: 0.3125115633010864 2023-01-23 01:06:07.939231: step: 1532/527, loss: 0.03313593938946724 2023-01-23 01:06:09.048418: step: 1536/527, loss: 0.16639859974384308 2023-01-23 01:06:10.153994: step: 1540/527, loss: 0.0716957077383995 2023-01-23 01:06:11.256768: step: 1544/527, loss: 0.036715127527713776 2023-01-23 01:06:12.391869: step: 1548/527, loss: 0.09695421904325485 2023-01-23 01:06:13.495843: step: 1552/527, loss: 1.7821764945983887 2023-01-23 01:06:14.633291: step: 1556/527, loss: 0.007926560007035732 2023-01-23 01:06:15.802982: step: 1560/527, loss: 0.2573583126068115 2023-01-23 01:06:16.915288: step: 1564/527, loss: 0.09893312305212021 2023-01-23 01:06:18.012103: step: 1568/527, loss: 0.0410795658826828 2023-01-23 01:06:19.138428: step: 1572/527, loss: 0.01193923968821764 2023-01-23 01:06:20.251164: step: 1576/527, loss: 0.04817276448011398 2023-01-23 01:06:21.374105: step: 1580/527, loss: 0.19141235947608948 2023-01-23 01:06:22.493341: step: 1584/527, loss: 0.09637489914894104 2023-01-23 01:06:23.621836: step: 1588/527, loss: 0.08508166670799255 2023-01-23 01:06:24.769352: step: 1592/527, loss: 0.04195718839764595 2023-01-23 01:06:25.887474: step: 1596/527, loss: 0.01997981034219265 2023-01-23 01:06:26.999592: step: 1600/527, loss: 0.6615036725997925 2023-01-23 01:06:28.115514: step: 1604/527, loss: 0.22085018455982208 2023-01-23 01:06:29.249390: step: 1608/527, loss: 0.11679039150476456 2023-01-23 01:06:30.381643: step: 1612/527, loss: 0.32708579301834106 2023-01-23 01:06:31.524793: step: 1616/527, loss: 0.04775838926434517 2023-01-23 01:06:32.639900: step: 1620/527, loss: 0.11859016120433807 2023-01-23 01:06:33.742165: step: 1624/527, loss: 0.053553201258182526 2023-01-23 01:06:34.864665: step: 1628/527, loss: 0.085336834192276 2023-01-23 01:06:35.988332: step: 1632/527, loss: 0.06629399955272675 2023-01-23 01:06:37.121918: step: 1636/527, loss: 0.0947992354631424 2023-01-23 01:06:38.246104: step: 1640/527, loss: 0.0417722724378109 2023-01-23 01:06:39.362884: step: 1644/527, loss: 0.15947164595127106 2023-01-23 01:06:40.482629: step: 1648/527, loss: 0.12869195640087128 2023-01-23 01:06:41.587690: step: 1652/527, loss: 0.10346432030200958 2023-01-23 01:06:42.709735: step: 1656/527, loss: 0.05017418786883354 2023-01-23 01:06:43.823646: step: 1660/527, loss: 0.030830956995487213 2023-01-23 01:06:44.955149: step: 1664/527, loss: 0.025107383728027344 2023-01-23 01:06:46.071802: step: 1668/527, loss: 0.05479329824447632 2023-01-23 01:06:47.180460: step: 1672/527, loss: 0.025297069922089577 2023-01-23 01:06:48.276978: step: 1676/527, loss: 0.02036609686911106 2023-01-23 01:06:49.407885: step: 1680/527, loss: 0.1163950115442276 2023-01-23 01:06:50.496434: step: 1684/527, loss: 0.10086068511009216 2023-01-23 01:06:51.587592: step: 1688/527, loss: 0.01263041514903307 2023-01-23 01:06:52.683860: step: 1692/527, loss: 0.07123565673828125 2023-01-23 01:06:53.775065: step: 1696/527, loss: 0.24792207777500153 2023-01-23 01:06:54.894963: step: 1700/527, loss: 0.018088627606630325 2023-01-23 01:06:56.000643: step: 1704/527, loss: 0.0056847576051950455 2023-01-23 01:06:57.108137: step: 1708/527, loss: 0.2932374179363251 2023-01-23 01:06:58.241912: step: 1712/527, loss: 0.02307887189090252 2023-01-23 01:06:59.355750: step: 1716/527, loss: 0.022414302453398705 2023-01-23 01:07:00.497583: step: 1720/527, loss: 0.11815033853054047 2023-01-23 01:07:01.616314: step: 1724/527, loss: 0.2428234964609146 2023-01-23 01:07:02.721523: step: 1728/527, loss: 0.016019631177186966 2023-01-23 01:07:03.861082: step: 1732/527, loss: 0.04156522825360298 2023-01-23 01:07:04.994156: step: 1736/527, loss: 0.13247385621070862 2023-01-23 01:07:06.098271: step: 1740/527, loss: 0.03684301674365997 2023-01-23 01:07:07.191916: step: 1744/527, loss: 0.06472799181938171 2023-01-23 01:07:08.296701: step: 1748/527, loss: 0.02149028703570366 2023-01-23 01:07:09.431016: step: 1752/527, loss: 0.18686771392822266 2023-01-23 01:07:10.534914: step: 1756/527, loss: 0.04778309166431427 2023-01-23 01:07:11.651543: step: 1760/527, loss: 0.12145087867975235 2023-01-23 01:07:12.774212: step: 1764/527, loss: 0.1457628309726715 2023-01-23 01:07:13.895761: step: 1768/527, loss: 0.12799444794654846 2023-01-23 01:07:15.057451: step: 1772/527, loss: 0.0712505429983139 2023-01-23 01:07:16.173686: step: 1776/527, loss: 0.04652068391442299 2023-01-23 01:07:17.287551: step: 1780/527, loss: 0.05784625932574272 2023-01-23 01:07:18.369389: step: 1784/527, loss: 0.07610654830932617 2023-01-23 01:07:19.501718: step: 1788/527, loss: 0.0842500701546669 2023-01-23 01:07:20.608173: step: 1792/527, loss: 0.14114198088645935 2023-01-23 01:07:21.704389: step: 1796/527, loss: 0.012464619241654873 2023-01-23 01:07:22.830079: step: 1800/527, loss: 0.060916490852832794 2023-01-23 01:07:23.952087: step: 1804/527, loss: 0.04095430299639702 2023-01-23 01:07:25.054734: step: 1808/527, loss: 0.020702935755252838 2023-01-23 01:07:26.170934: step: 1812/527, loss: 0.07763157039880753 2023-01-23 01:07:27.291625: step: 1816/527, loss: 0.053613949567079544 2023-01-23 01:07:28.411768: step: 1820/527, loss: 0.14593087136745453 2023-01-23 01:07:29.548825: step: 1824/527, loss: 0.1952143758535385 2023-01-23 01:07:30.681253: step: 1828/527, loss: 0.024495698511600494 2023-01-23 01:07:31.781855: step: 1832/527, loss: 0.44712674617767334 2023-01-23 01:07:32.902036: step: 1836/527, loss: 0.20110397040843964 2023-01-23 01:07:34.052356: step: 1840/527, loss: 0.6827542185783386 2023-01-23 01:07:35.205733: step: 1844/527, loss: 0.06904802471399307 2023-01-23 01:07:36.328821: step: 1848/527, loss: 0.1400633305311203 2023-01-23 01:07:37.446122: step: 1852/527, loss: 0.031157206743955612 2023-01-23 01:07:38.563635: step: 1856/527, loss: 0.07655920833349228 2023-01-23 01:07:39.681513: step: 1860/527, loss: 0.17182445526123047 2023-01-23 01:07:40.783578: step: 1864/527, loss: 0.12290334701538086 2023-01-23 01:07:41.912161: step: 1868/527, loss: 0.02534008026123047 2023-01-23 01:07:43.041511: step: 1872/527, loss: 0.1496855765581131 2023-01-23 01:07:44.152602: step: 1876/527, loss: 0.07068276405334473 2023-01-23 01:07:45.286820: step: 1880/527, loss: 0.10964031517505646 2023-01-23 01:07:46.426018: step: 1884/527, loss: 0.07338638603687286 2023-01-23 01:07:47.539578: step: 1888/527, loss: 0.051754143089056015 2023-01-23 01:07:48.686947: step: 1892/527, loss: 0.029028750956058502 2023-01-23 01:07:49.808574: step: 1896/527, loss: 0.03004312515258789 2023-01-23 01:07:50.925626: step: 1900/527, loss: 0.9311063885688782 2023-01-23 01:07:52.045079: step: 1904/527, loss: 0.034515380859375 2023-01-23 01:07:53.171814: step: 1908/527, loss: 0.06688328832387924 2023-01-23 01:07:54.280150: step: 1912/527, loss: 0.08460120856761932 2023-01-23 01:07:55.389804: step: 1916/527, loss: 0.21299296617507935 2023-01-23 01:07:56.481242: step: 1920/527, loss: 3.4084885120391846 2023-01-23 01:07:57.593973: step: 1924/527, loss: 0.06421475857496262 2023-01-23 01:07:58.728845: step: 1928/527, loss: 0.06366176903247833 2023-01-23 01:07:59.817175: step: 1932/527, loss: 0.10714731365442276 2023-01-23 01:08:00.956630: step: 1936/527, loss: 0.0551820769906044 2023-01-23 01:08:02.058627: step: 1940/527, loss: 0.001966929528862238 2023-01-23 01:08:03.151101: step: 1944/527, loss: 0.02919469028711319 2023-01-23 01:08:04.283403: step: 1948/527, loss: 0.04343147575855255 2023-01-23 01:08:05.407744: step: 1952/527, loss: 0.00323486328125 2023-01-23 01:08:06.512319: step: 1956/527, loss: 0.0796491950750351 2023-01-23 01:08:07.625053: step: 1960/527, loss: 0.24456629157066345 2023-01-23 01:08:08.734682: step: 1964/527, loss: 0.06715717166662216 2023-01-23 01:08:09.840204: step: 1968/527, loss: 0.2161039412021637 2023-01-23 01:08:10.950740: step: 1972/527, loss: 0.058657363057136536 2023-01-23 01:08:12.073218: step: 1976/527, loss: 0.04664459079504013 2023-01-23 01:08:13.179049: step: 1980/527, loss: 0.15254350006580353 2023-01-23 01:08:14.311596: step: 1984/527, loss: 0.08862748742103577 2023-01-23 01:08:15.419854: step: 1988/527, loss: 0.12920647859573364 2023-01-23 01:08:16.511164: step: 1992/527, loss: 0.06137437745928764 2023-01-23 01:08:17.619205: step: 1996/527, loss: 0.02515232004225254 2023-01-23 01:08:18.757256: step: 2000/527, loss: 0.03706379234790802 2023-01-23 01:08:19.862451: step: 2004/527, loss: 0.1036413162946701 2023-01-23 01:08:20.987106: step: 2008/527, loss: 0.027640534564852715 2023-01-23 01:08:22.096900: step: 2012/527, loss: 0.05735301971435547 2023-01-23 01:08:23.194109: step: 2016/527, loss: 0.24364295601844788 2023-01-23 01:08:24.305038: step: 2020/527, loss: 0.011335229501128197 2023-01-23 01:08:25.392037: step: 2024/527, loss: 0.010156822390854359 2023-01-23 01:08:26.509506: step: 2028/527, loss: 0.1353379189968109 2023-01-23 01:08:27.634418: step: 2032/527, loss: 0.8149141073226929 2023-01-23 01:08:28.784592: step: 2036/527, loss: 0.05819511413574219 2023-01-23 01:08:29.922139: step: 2040/527, loss: 0.0748991072177887 2023-01-23 01:08:31.035659: step: 2044/527, loss: 0.030807018280029297 2023-01-23 01:08:32.152721: step: 2048/527, loss: 0.1352270096540451 2023-01-23 01:08:33.294657: step: 2052/527, loss: 0.06663999706506729 2023-01-23 01:08:34.414083: step: 2056/527, loss: 0.08670587837696075 2023-01-23 01:08:35.550058: step: 2060/527, loss: 0.48546478152275085 2023-01-23 01:08:36.650489: step: 2064/527, loss: 0.007636452093720436 2023-01-23 01:08:37.789418: step: 2068/527, loss: 0.08466167747974396 2023-01-23 01:08:38.895468: step: 2072/527, loss: 0.06291475147008896 2023-01-23 01:08:40.033804: step: 2076/527, loss: 0.23888997733592987 2023-01-23 01:08:41.149125: step: 2080/527, loss: 0.303070068359375 2023-01-23 01:08:42.257761: step: 2084/527, loss: 0.038504458963871 2023-01-23 01:08:43.386976: step: 2088/527, loss: 0.15526457130908966 2023-01-23 01:08:44.500363: step: 2092/527, loss: 0.03289389610290527 2023-01-23 01:08:45.612069: step: 2096/527, loss: 0.0832575187087059 2023-01-23 01:08:46.702955: step: 2100/527, loss: 0.05071668699383736 2023-01-23 01:08:47.800907: step: 2104/527, loss: 0.07595052570104599 2023-01-23 01:08:48.919328: step: 2108/527, loss: 0.08094806969165802 ================================================== Loss: 0.116 -------------------- Dev: {'event': {'p': 0.590818363273453, 'r': 0.7882822902796272, 'f1': 0.6754135767256132}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Test: {'event': {'p': 0.6228622862286228, 'r': 0.7908571428571428, 'f1': 0.6968781470292045}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Chinese: {'event': {'p': 0.5609756097560976, 'r': 0.8518518518518519, 'f1': 0.6764705882352942}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Korean: {'event': {'p': 0.5423728813559322, 'r': 0.5079365079365079, 'f1': 0.5245901639344263}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Russian: {'event': {'p': 0.40476190476190477, 'r': 0.4722222222222222, 'f1': 0.4358974358974359}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Eng Test for Korean: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 10 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:09:30.716390: step: 4/527, loss: 0.003954839892685413 2023-01-23 01:09:31.830553: step: 8/527, loss: 0.046971939504146576 2023-01-23 01:09:32.959607: step: 12/527, loss: 0.04838142544031143 2023-01-23 01:09:34.092996: step: 16/527, loss: 0.07612638175487518 2023-01-23 01:09:35.210759: step: 20/527, loss: 0.0632898360490799 2023-01-23 01:09:36.314180: step: 24/527, loss: 0.03009018860757351 2023-01-23 01:09:37.433571: step: 28/527, loss: 0.011864805594086647 2023-01-23 01:09:38.596717: step: 32/527, loss: 0.026328755542635918 2023-01-23 01:09:39.677831: step: 36/527, loss: 0.017726421356201172 2023-01-23 01:09:40.796275: step: 40/527, loss: 0.017998887225985527 2023-01-23 01:09:41.909962: step: 44/527, loss: 0.024239160120487213 2023-01-23 01:09:43.075753: step: 48/527, loss: 0.2858274281024933 2023-01-23 01:09:44.214633: step: 52/527, loss: 0.13892985880374908 2023-01-23 01:09:45.323306: step: 56/527, loss: 0.03566465526819229 2023-01-23 01:09:46.443585: step: 60/527, loss: 0.04515719413757324 2023-01-23 01:09:47.545857: step: 64/527, loss: 0.09078221768140793 2023-01-23 01:09:48.656027: step: 68/527, loss: 0.008511638268828392 2023-01-23 01:09:49.766809: step: 72/527, loss: 0.04063911736011505 2023-01-23 01:09:50.898236: step: 76/527, loss: 0.27636852860450745 2023-01-23 01:09:52.020027: step: 80/527, loss: 0.013401126489043236 2023-01-23 01:09:53.124606: step: 84/527, loss: 0.03900165483355522 2023-01-23 01:09:54.215967: step: 88/527, loss: 0.059256602078676224 2023-01-23 01:09:55.327698: step: 92/527, loss: 0.003378374967724085 2023-01-23 01:09:56.428014: step: 96/527, loss: 0.0051441197283566 2023-01-23 01:09:57.530617: step: 100/527, loss: 0.03346522897481918 2023-01-23 01:09:58.625263: step: 104/527, loss: 0.0983375608921051 2023-01-23 01:09:59.726829: step: 108/527, loss: 0.010539722628891468 2023-01-23 01:10:00.839047: step: 112/527, loss: 0.008619308471679688 2023-01-23 01:10:01.960325: step: 116/527, loss: 0.03800782933831215 2023-01-23 01:10:03.094263: step: 120/527, loss: 0.01792139932513237 2023-01-23 01:10:04.210288: step: 124/527, loss: 0.34092769026756287 2023-01-23 01:10:05.337572: step: 128/527, loss: 0.04823474958539009 2023-01-23 01:10:06.433564: step: 132/527, loss: 0.035067055374383926 2023-01-23 01:10:07.545256: step: 136/527, loss: 0.07441334426403046 2023-01-23 01:10:08.675357: step: 140/527, loss: 0.45805856585502625 2023-01-23 01:10:09.768909: step: 144/527, loss: 0.14782772958278656 2023-01-23 01:10:10.867757: step: 148/527, loss: 0.003230953123420477 2023-01-23 01:10:11.982732: step: 152/527, loss: 0.1826217621564865 2023-01-23 01:10:13.077054: step: 156/527, loss: 0.027864886447787285 2023-01-23 01:10:14.180217: step: 160/527, loss: 0.04507408291101456 2023-01-23 01:10:15.308848: step: 164/527, loss: 0.08217363059520721 2023-01-23 01:10:16.430213: step: 168/527, loss: 0.05337662994861603 2023-01-23 01:10:17.545778: step: 172/527, loss: 0.01218261756002903 2023-01-23 01:10:18.665548: step: 176/527, loss: 0.02252178266644478 2023-01-23 01:10:19.762280: step: 180/527, loss: 0.031838610768318176 2023-01-23 01:10:20.908315: step: 184/527, loss: 0.04049744829535484 2023-01-23 01:10:22.028984: step: 188/527, loss: 0.04686784744262695 2023-01-23 01:10:23.141330: step: 192/527, loss: 0.03474388271570206 2023-01-23 01:10:24.242353: step: 196/527, loss: 0.07600340247154236 2023-01-23 01:10:25.363043: step: 200/527, loss: 0.03720531612634659 2023-01-23 01:10:26.471244: step: 204/527, loss: 0.02830514870584011 2023-01-23 01:10:27.572256: step: 208/527, loss: 0.06913509964942932 2023-01-23 01:10:28.664360: step: 212/527, loss: 0.04269905388355255 2023-01-23 01:10:29.783955: step: 216/527, loss: 0.0749143585562706 2023-01-23 01:10:30.912133: step: 220/527, loss: 0.024440767243504524 2023-01-23 01:10:32.044752: step: 224/527, loss: 0.10924110561609268 2023-01-23 01:10:33.124401: step: 228/527, loss: 0.0377313606441021 2023-01-23 01:10:34.244969: step: 232/527, loss: 0.05104675143957138 2023-01-23 01:10:35.348409: step: 236/527, loss: 0.04650368541479111 2023-01-23 01:10:36.468170: step: 240/527, loss: 0.03903999179601669 2023-01-23 01:10:37.604659: step: 244/527, loss: 0.1586379110813141 2023-01-23 01:10:38.702470: step: 248/527, loss: 0.024387264624238014 2023-01-23 01:10:39.872634: step: 252/527, loss: 0.034582994878292084 2023-01-23 01:10:41.012624: step: 256/527, loss: 0.11938118934631348 2023-01-23 01:10:42.133231: step: 260/527, loss: 0.05850648880004883 2023-01-23 01:10:43.246994: step: 264/527, loss: 0.10497408360242844 2023-01-23 01:10:44.350443: step: 268/527, loss: 0.08671379089355469 2023-01-23 01:10:45.471895: step: 272/527, loss: 0.14994125068187714 2023-01-23 01:10:46.566412: step: 276/527, loss: 0.04751171916723251 2023-01-23 01:10:47.698860: step: 280/527, loss: 0.004595470614731312 2023-01-23 01:10:48.803034: step: 284/527, loss: 0.03405923768877983 2023-01-23 01:10:49.930875: step: 288/527, loss: 0.03524913638830185 2023-01-23 01:10:51.048351: step: 292/527, loss: 0.040092941373586655 2023-01-23 01:10:52.207954: step: 296/527, loss: 0.16224071383476257 2023-01-23 01:10:53.321962: step: 300/527, loss: 0.05617475509643555 2023-01-23 01:10:54.435475: step: 304/527, loss: 0.02535400539636612 2023-01-23 01:10:55.550934: step: 308/527, loss: 0.12030944228172302 2023-01-23 01:10:56.670957: step: 312/527, loss: 0.09914655238389969 2023-01-23 01:10:57.781709: step: 316/527, loss: 0.03183479607105255 2023-01-23 01:10:58.903266: step: 320/527, loss: 0.3562818467617035 2023-01-23 01:10:59.992185: step: 324/527, loss: 0.012703514657914639 2023-01-23 01:11:01.120131: step: 328/527, loss: 0.18833065032958984 2023-01-23 01:11:02.248732: step: 332/527, loss: 0.08275914192199707 2023-01-23 01:11:03.365455: step: 336/527, loss: 0.0039010047912597656 2023-01-23 01:11:04.480788: step: 340/527, loss: 0.00875091552734375 2023-01-23 01:11:05.583962: step: 344/527, loss: 0.36065831780433655 2023-01-23 01:11:06.710952: step: 348/527, loss: 0.044191304594278336 2023-01-23 01:11:07.810377: step: 352/527, loss: 0.016362953931093216 2023-01-23 01:11:08.953007: step: 356/527, loss: 0.13473300635814667 2023-01-23 01:11:10.064705: step: 360/527, loss: 0.21554070711135864 2023-01-23 01:11:11.160064: step: 364/527, loss: 0.013247680850327015 2023-01-23 01:11:12.264170: step: 368/527, loss: 0.04358730465173721 2023-01-23 01:11:13.376544: step: 372/527, loss: 0.11040067672729492 2023-01-23 01:11:14.507985: step: 376/527, loss: 0.28858065605163574 2023-01-23 01:11:15.620396: step: 380/527, loss: 0.03753490373492241 2023-01-23 01:11:16.727534: step: 384/527, loss: 0.045523930341005325 2023-01-23 01:11:17.847251: step: 388/527, loss: 0.06086587905883789 2023-01-23 01:11:18.955174: step: 392/527, loss: 0.01715068705379963 2023-01-23 01:11:20.086907: step: 396/527, loss: 0.090460404753685 2023-01-23 01:11:21.174275: step: 400/527, loss: 0.03120708465576172 2023-01-23 01:11:22.287668: step: 404/527, loss: 0.06381330639123917 2023-01-23 01:11:23.399902: step: 408/527, loss: 0.08668927848339081 2023-01-23 01:11:24.488809: step: 412/527, loss: 0.0744566023349762 2023-01-23 01:11:25.611580: step: 416/527, loss: 0.10327544808387756 2023-01-23 01:11:26.715812: step: 420/527, loss: 0.012917709536850452 2023-01-23 01:11:27.826737: step: 424/527, loss: 0.1444985270500183 2023-01-23 01:11:28.960415: step: 428/527, loss: 0.009100962430238724 2023-01-23 01:11:30.067743: step: 432/527, loss: 0.023868083953857422 2023-01-23 01:11:31.159779: step: 436/527, loss: 0.004426383879035711 2023-01-23 01:11:32.243947: step: 440/527, loss: 0.05390510708093643 2023-01-23 01:11:33.380224: step: 444/527, loss: 0.06055927276611328 2023-01-23 01:11:34.482556: step: 448/527, loss: 0.039765167981386185 2023-01-23 01:11:35.629731: step: 452/527, loss: 0.05110931769013405 2023-01-23 01:11:36.755506: step: 456/527, loss: 0.029856158420443535 2023-01-23 01:11:37.886945: step: 460/527, loss: 0.059945348650217056 2023-01-23 01:11:39.001828: step: 464/527, loss: 0.10481533408164978 2023-01-23 01:11:40.128156: step: 468/527, loss: 0.003098726272583008 2023-01-23 01:11:41.244865: step: 472/527, loss: 0.21219158172607422 2023-01-23 01:11:42.381570: step: 476/527, loss: 0.027169417589902878 2023-01-23 01:11:43.480242: step: 480/527, loss: 0.12153196334838867 2023-01-23 01:11:44.586518: step: 484/527, loss: 0.0643930435180664 2023-01-23 01:11:45.707766: step: 488/527, loss: 0.018788719549775124 2023-01-23 01:11:46.840887: step: 492/527, loss: 0.12497644126415253 2023-01-23 01:11:47.939480: step: 496/527, loss: 0.05850248411297798 2023-01-23 01:11:49.052416: step: 500/527, loss: 0.142948716878891 2023-01-23 01:11:50.164296: step: 504/527, loss: 0.06825704872608185 2023-01-23 01:11:51.274933: step: 508/527, loss: 0.05962572246789932 2023-01-23 01:11:52.422515: step: 512/527, loss: 0.17915363609790802 2023-01-23 01:11:53.515717: step: 516/527, loss: 0.0045237066224217415 2023-01-23 01:11:54.639838: step: 520/527, loss: 0.043069079518318176 2023-01-23 01:11:55.761400: step: 524/527, loss: 0.07603655010461807 2023-01-23 01:11:56.890891: step: 528/527, loss: 0.06310272216796875 2023-01-23 01:11:58.008512: step: 532/527, loss: 0.028141213580965996 2023-01-23 01:11:59.134899: step: 536/527, loss: 0.04646243900060654 2023-01-23 01:12:00.277643: step: 540/527, loss: 0.03706321865320206 2023-01-23 01:12:01.387310: step: 544/527, loss: 0.000804901123046875 2023-01-23 01:12:02.513744: step: 548/527, loss: 0.05564718320965767 2023-01-23 01:12:03.622776: step: 552/527, loss: 0.013531303033232689 2023-01-23 01:12:04.747272: step: 556/527, loss: 0.022708892822265625 2023-01-23 01:12:05.845439: step: 560/527, loss: 0.012738799676299095 2023-01-23 01:12:06.965014: step: 564/527, loss: 0.12732820212841034 2023-01-23 01:12:08.106578: step: 568/527, loss: 0.03637869656085968 2023-01-23 01:12:09.235741: step: 572/527, loss: 0.010107708163559437 2023-01-23 01:12:10.355047: step: 576/527, loss: 0.11684437096118927 2023-01-23 01:12:11.486621: step: 580/527, loss: 0.09836645424365997 2023-01-23 01:12:12.603385: step: 584/527, loss: 0.6022077798843384 2023-01-23 01:12:13.735113: step: 588/527, loss: 0.013932609930634499 2023-01-23 01:12:14.837771: step: 592/527, loss: 0.04637398570775986 2023-01-23 01:12:15.956013: step: 596/527, loss: 0.05373978987336159 2023-01-23 01:12:17.054675: step: 600/527, loss: 0.07080250233411789 2023-01-23 01:12:18.163412: step: 604/527, loss: 0.0334104523062706 2023-01-23 01:12:19.304311: step: 608/527, loss: 0.029811764135956764 2023-01-23 01:12:20.459662: step: 612/527, loss: 0.03693685680627823 2023-01-23 01:12:21.551245: step: 616/527, loss: 0.01927213743329048 2023-01-23 01:12:22.708722: step: 620/527, loss: 0.040345385670661926 2023-01-23 01:12:23.872111: step: 624/527, loss: 0.026490308344364166 2023-01-23 01:12:24.994535: step: 628/527, loss: 0.04458937793970108 2023-01-23 01:12:26.082851: step: 632/527, loss: 0.08664774894714355 2023-01-23 01:12:27.176534: step: 636/527, loss: 0.05152931436896324 2023-01-23 01:12:28.305529: step: 640/527, loss: 0.06606302410364151 2023-01-23 01:12:29.415052: step: 644/527, loss: 0.08554963767528534 2023-01-23 01:12:30.525028: step: 648/527, loss: 0.06100320816040039 2023-01-23 01:12:31.634402: step: 652/527, loss: 0.08864001929759979 2023-01-23 01:12:32.757894: step: 656/527, loss: 0.00786581076681614 2023-01-23 01:12:33.865807: step: 660/527, loss: 0.026302147656679153 2023-01-23 01:12:34.954654: step: 664/527, loss: 0.028510475531220436 2023-01-23 01:12:36.063973: step: 668/527, loss: 0.020524216815829277 2023-01-23 01:12:37.213012: step: 672/527, loss: 0.004625129513442516 2023-01-23 01:12:38.324473: step: 676/527, loss: 0.06390111893415451 2023-01-23 01:12:39.459736: step: 680/527, loss: 0.02110319212079048 2023-01-23 01:12:40.587433: step: 684/527, loss: 0.12578515708446503 2023-01-23 01:12:41.707148: step: 688/527, loss: 0.07842636108398438 2023-01-23 01:12:42.863825: step: 692/527, loss: 0.056389905512332916 2023-01-23 01:12:43.972359: step: 696/527, loss: 0.09628009796142578 2023-01-23 01:12:45.059506: step: 700/527, loss: 0.03165140002965927 2023-01-23 01:12:46.177732: step: 704/527, loss: 0.013393402099609375 2023-01-23 01:12:47.300053: step: 708/527, loss: 0.08585198223590851 2023-01-23 01:12:48.411143: step: 712/527, loss: 0.02230224572122097 2023-01-23 01:12:49.557574: step: 716/527, loss: 0.025845587253570557 2023-01-23 01:12:50.652855: step: 720/527, loss: 0.048845671117305756 2023-01-23 01:12:51.787241: step: 724/527, loss: 0.1077117919921875 2023-01-23 01:12:52.929632: step: 728/527, loss: 0.032929327338933945 2023-01-23 01:12:54.068474: step: 732/527, loss: 0.009232711978256702 2023-01-23 01:12:55.184232: step: 736/527, loss: 0.056148529052734375 2023-01-23 01:12:56.306837: step: 740/527, loss: 0.07523298263549805 2023-01-23 01:12:57.412585: step: 744/527, loss: 0.025061702355742455 2023-01-23 01:12:58.532726: step: 748/527, loss: 0.0389246940612793 2023-01-23 01:12:59.628079: step: 752/527, loss: 0.046741485595703125 2023-01-23 01:13:00.736919: step: 756/527, loss: 0.10398922115564346 2023-01-23 01:13:01.904819: step: 760/527, loss: 0.09913244843482971 2023-01-23 01:13:03.014447: step: 764/527, loss: 0.11089535802602768 2023-01-23 01:13:04.140422: step: 768/527, loss: 0.015854155644774437 2023-01-23 01:13:05.262526: step: 772/527, loss: 0.034119509160518646 2023-01-23 01:13:06.393645: step: 776/527, loss: 0.005016374867409468 2023-01-23 01:13:07.507452: step: 780/527, loss: 0.09810104966163635 2023-01-23 01:13:08.603907: step: 784/527, loss: 0.050821878015995026 2023-01-23 01:13:09.701614: step: 788/527, loss: 0.04132270812988281 2023-01-23 01:13:10.817511: step: 792/527, loss: 0.00894010066986084 2023-01-23 01:13:11.920438: step: 796/527, loss: 0.026571275666356087 2023-01-23 01:13:13.043039: step: 800/527, loss: 0.03814687952399254 2023-01-23 01:13:14.142793: step: 804/527, loss: 0.0411250963807106 2023-01-23 01:13:15.266002: step: 808/527, loss: 0.12153854966163635 2023-01-23 01:13:16.414901: step: 812/527, loss: 0.1451946347951889 2023-01-23 01:13:17.532360: step: 816/527, loss: 0.05243368446826935 2023-01-23 01:13:18.646425: step: 820/527, loss: 0.008421516045928001 2023-01-23 01:13:19.784010: step: 824/527, loss: 0.0033766748383641243 2023-01-23 01:13:20.914480: step: 828/527, loss: 0.02733621746301651 2023-01-23 01:13:22.023624: step: 832/527, loss: 0.0715036392211914 2023-01-23 01:13:23.154310: step: 836/527, loss: 0.06504850834608078 2023-01-23 01:13:24.273276: step: 840/527, loss: 0.020229816436767578 2023-01-23 01:13:25.402496: step: 844/527, loss: 0.005469238851219416 2023-01-23 01:13:26.531023: step: 848/527, loss: 0.15612564980983734 2023-01-23 01:13:27.682228: step: 852/527, loss: 0.12590274214744568 2023-01-23 01:13:28.811082: step: 856/527, loss: 0.01723346672952175 2023-01-23 01:13:29.926104: step: 860/527, loss: 0.21716849505901337 2023-01-23 01:13:31.093260: step: 864/527, loss: 0.01270294189453125 2023-01-23 01:13:32.215999: step: 868/527, loss: 0.06347303092479706 2023-01-23 01:13:33.331163: step: 872/527, loss: 0.050580885261297226 2023-01-23 01:13:34.443010: step: 876/527, loss: 0.033860303461551666 2023-01-23 01:13:35.587440: step: 880/527, loss: 0.04086499288678169 2023-01-23 01:13:36.697983: step: 884/527, loss: 0.13762474060058594 2023-01-23 01:13:37.788806: step: 888/527, loss: 0.03726176917552948 2023-01-23 01:13:38.916238: step: 892/527, loss: 0.017440414056181908 2023-01-23 01:13:40.054869: step: 896/527, loss: 0.004000520799309015 2023-01-23 01:13:41.197960: step: 900/527, loss: 0.19654139876365662 2023-01-23 01:13:42.311561: step: 904/527, loss: 0.038222648203372955 2023-01-23 01:13:43.455236: step: 908/527, loss: 0.012129020877182484 2023-01-23 01:13:44.588855: step: 912/527, loss: 0.28940287232398987 2023-01-23 01:13:45.687318: step: 916/527, loss: 0.035015106201171875 2023-01-23 01:13:46.778978: step: 920/527, loss: 0.040676262229681015 2023-01-23 01:13:47.916240: step: 924/527, loss: 0.10807066410779953 2023-01-23 01:13:49.061617: step: 928/527, loss: 0.19691458344459534 2023-01-23 01:13:50.226490: step: 932/527, loss: 0.03291444852948189 2023-01-23 01:13:51.350833: step: 936/527, loss: 0.3481800854206085 2023-01-23 01:13:52.480687: step: 940/527, loss: 0.026858510449528694 2023-01-23 01:13:53.625197: step: 944/527, loss: 0.02380075491964817 2023-01-23 01:13:54.745204: step: 948/527, loss: 0.025948908179998398 2023-01-23 01:13:55.851427: step: 952/527, loss: 0.021019816398620605 2023-01-23 01:13:56.966518: step: 956/527, loss: 0.11807326972484589 2023-01-23 01:13:58.073557: step: 960/527, loss: 0.047972775995731354 2023-01-23 01:13:59.187773: step: 964/527, loss: 0.0017597198020666838 2023-01-23 01:14:00.286642: step: 968/527, loss: 0.05020351707935333 2023-01-23 01:14:01.393191: step: 972/527, loss: 0.46636271476745605 2023-01-23 01:14:02.547629: step: 976/527, loss: 0.017462920397520065 2023-01-23 01:14:03.671044: step: 980/527, loss: 0.06998234242200851 2023-01-23 01:14:04.789128: step: 984/527, loss: 0.0309771541506052 2023-01-23 01:14:05.935727: step: 988/527, loss: 0.04553508758544922 2023-01-23 01:14:07.052186: step: 992/527, loss: 0.08498553931713104 2023-01-23 01:14:08.156915: step: 996/527, loss: 0.0004536628839559853 2023-01-23 01:14:09.249159: step: 1000/527, loss: 0.025867082178592682 2023-01-23 01:14:10.373867: step: 1004/527, loss: 0.04738101735711098 2023-01-23 01:14:11.475124: step: 1008/527, loss: 0.04738654941320419 2023-01-23 01:14:12.583623: step: 1012/527, loss: 0.08932209014892578 2023-01-23 01:14:13.704641: step: 1016/527, loss: 0.21987590193748474 2023-01-23 01:14:14.840337: step: 1020/527, loss: 0.05395636707544327 2023-01-23 01:14:16.011273: step: 1024/527, loss: 0.05484752729535103 2023-01-23 01:14:17.142423: step: 1028/527, loss: 0.02601451985538006 2023-01-23 01:14:18.255929: step: 1032/527, loss: 0.04179678112268448 2023-01-23 01:14:19.357945: step: 1036/527, loss: 0.08130798488855362 2023-01-23 01:14:20.494144: step: 1040/527, loss: 0.09006405621767044 2023-01-23 01:14:21.595923: step: 1044/527, loss: 0.07062435150146484 2023-01-23 01:14:22.713471: step: 1048/527, loss: 0.0372396856546402 2023-01-23 01:14:23.808023: step: 1052/527, loss: 0.06186370551586151 2023-01-23 01:14:24.922595: step: 1056/527, loss: 0.0354766845703125 2023-01-23 01:14:26.034095: step: 1060/527, loss: 0.7298870086669922 2023-01-23 01:14:27.142241: step: 1064/527, loss: 0.08513374626636505 2023-01-23 01:14:28.233399: step: 1068/527, loss: 0.14668826758861542 2023-01-23 01:14:29.374779: step: 1072/527, loss: 0.16648262739181519 2023-01-23 01:14:30.494895: step: 1076/527, loss: 0.06459064781665802 2023-01-23 01:14:31.579249: step: 1080/527, loss: 0.020952749997377396 2023-01-23 01:14:32.679833: step: 1084/527, loss: 0.06892142444849014 2023-01-23 01:14:33.816114: step: 1088/527, loss: 0.01784381829202175 2023-01-23 01:14:34.948332: step: 1092/527, loss: 0.030136872082948685 2023-01-23 01:14:36.056000: step: 1096/527, loss: 0.0029964924324303865 2023-01-23 01:14:37.168259: step: 1100/527, loss: 0.052674200385808945 2023-01-23 01:14:38.269594: step: 1104/527, loss: 0.14742393791675568 2023-01-23 01:14:39.398430: step: 1108/527, loss: 0.043134596198797226 2023-01-23 01:14:40.549402: step: 1112/527, loss: 0.010724497027695179 2023-01-23 01:14:41.657054: step: 1116/527, loss: 0.09624017775058746 2023-01-23 01:14:42.787689: step: 1120/527, loss: 0.02912454679608345 2023-01-23 01:14:43.911529: step: 1124/527, loss: 0.07969585061073303 2023-01-23 01:14:45.023843: step: 1128/527, loss: 0.08891606330871582 2023-01-23 01:14:46.136278: step: 1132/527, loss: 0.01089935377240181 2023-01-23 01:14:47.253318: step: 1136/527, loss: 0.049041748046875 2023-01-23 01:14:48.364082: step: 1140/527, loss: 0.019298363476991653 2023-01-23 01:14:49.489188: step: 1144/527, loss: 0.09605112671852112 2023-01-23 01:14:50.611484: step: 1148/527, loss: 0.06948347389698029 2023-01-23 01:14:51.726813: step: 1152/527, loss: 0.024088477715849876 2023-01-23 01:14:52.840097: step: 1156/527, loss: 0.0025262834969908 2023-01-23 01:14:53.965322: step: 1160/527, loss: 0.01917247660458088 2023-01-23 01:14:55.122630: step: 1164/527, loss: 0.12587738037109375 2023-01-23 01:14:56.242196: step: 1168/527, loss: 0.10613860934972763 2023-01-23 01:14:57.375832: step: 1172/527, loss: 0.025966167449951172 2023-01-23 01:14:58.517524: step: 1176/527, loss: 0.044365692883729935 2023-01-23 01:14:59.614636: step: 1180/527, loss: 0.012591457925736904 2023-01-23 01:15:00.703665: step: 1184/527, loss: 0.419198602437973 2023-01-23 01:15:01.820576: step: 1188/527, loss: 0.032729052007198334 2023-01-23 01:15:02.943215: step: 1192/527, loss: 0.06766434013843536 2023-01-23 01:15:04.075125: step: 1196/527, loss: 0.03297629579901695 2023-01-23 01:15:05.202407: step: 1200/527, loss: 0.03873659670352936 2023-01-23 01:15:06.308777: step: 1204/527, loss: 0.07039690017700195 2023-01-23 01:15:07.433534: step: 1208/527, loss: 0.08883514255285263 2023-01-23 01:15:08.538232: step: 1212/527, loss: 0.09126751869916916 2023-01-23 01:15:09.654210: step: 1216/527, loss: 0.29734936356544495 2023-01-23 01:15:10.776214: step: 1220/527, loss: 0.0026366233360022306 2023-01-23 01:15:11.903377: step: 1224/527, loss: 0.036997318267822266 2023-01-23 01:15:13.021639: step: 1228/527, loss: 0.15535296499729156 2023-01-23 01:15:14.127730: step: 1232/527, loss: 0.21307361125946045 2023-01-23 01:15:15.255707: step: 1236/527, loss: 0.04601650685071945 2023-01-23 01:15:16.372973: step: 1240/527, loss: 0.04376354068517685 2023-01-23 01:15:17.523200: step: 1244/527, loss: 0.01740141026675701 2023-01-23 01:15:18.618769: step: 1248/527, loss: 0.08658389747142792 2023-01-23 01:15:19.754592: step: 1252/527, loss: 0.2184923142194748 2023-01-23 01:15:20.859517: step: 1256/527, loss: 0.021442987024784088 2023-01-23 01:15:22.027847: step: 1260/527, loss: 0.03345203399658203 2023-01-23 01:15:23.133346: step: 1264/527, loss: 0.03281059116125107 2023-01-23 01:15:24.255895: step: 1268/527, loss: 0.03055848926305771 2023-01-23 01:15:25.382941: step: 1272/527, loss: 0.16485300660133362 2023-01-23 01:15:26.480632: step: 1276/527, loss: 0.0439058318734169 2023-01-23 01:15:27.608720: step: 1280/527, loss: 0.006540775299072266 2023-01-23 01:15:28.744711: step: 1284/527, loss: 0.007890892215073109 2023-01-23 01:15:29.858665: step: 1288/527, loss: 0.006539535708725452 2023-01-23 01:15:30.977310: step: 1292/527, loss: 0.04639873653650284 2023-01-23 01:15:32.077655: step: 1296/527, loss: 0.30003756284713745 2023-01-23 01:15:33.182326: step: 1300/527, loss: 0.26014575362205505 2023-01-23 01:15:34.308174: step: 1304/527, loss: 0.04884805530309677 2023-01-23 01:15:35.415627: step: 1308/527, loss: 0.0315403938293457 2023-01-23 01:15:36.520572: step: 1312/527, loss: 0.03044353611767292 2023-01-23 01:15:37.628670: step: 1316/527, loss: 0.05885648727416992 2023-01-23 01:15:38.751043: step: 1320/527, loss: 0.04441490024328232 2023-01-23 01:15:39.888533: step: 1324/527, loss: 0.09061069786548615 2023-01-23 01:15:40.995054: step: 1328/527, loss: 0.016812991350889206 2023-01-23 01:15:42.110321: step: 1332/527, loss: 0.037131693214178085 2023-01-23 01:15:43.220325: step: 1336/527, loss: 0.010467815212905407 2023-01-23 01:15:44.361634: step: 1340/527, loss: 0.08517646789550781 2023-01-23 01:15:45.462043: step: 1344/527, loss: 0.09999943524599075 2023-01-23 01:15:46.585952: step: 1348/527, loss: 0.0068035125732421875 2023-01-23 01:15:47.696411: step: 1352/527, loss: 0.03772592544555664 2023-01-23 01:15:48.814320: step: 1356/527, loss: 0.022836869582533836 2023-01-23 01:15:49.909795: step: 1360/527, loss: 0.03131914138793945 2023-01-23 01:15:51.034361: step: 1364/527, loss: 0.05937071144580841 2023-01-23 01:15:52.166046: step: 1368/527, loss: 0.02063455618917942 2023-01-23 01:15:53.264293: step: 1372/527, loss: 0.0737181305885315 2023-01-23 01:15:54.383537: step: 1376/527, loss: 0.04599037021398544 2023-01-23 01:15:55.495331: step: 1380/527, loss: 0.07567653805017471 2023-01-23 01:15:56.664946: step: 1384/527, loss: 0.06350994110107422 2023-01-23 01:15:57.784684: step: 1388/527, loss: 0.049146365374326706 2023-01-23 01:15:58.898992: step: 1392/527, loss: 0.02857999876141548 2023-01-23 01:16:00.012342: step: 1396/527, loss: 0.08483944088220596 2023-01-23 01:16:01.148205: step: 1400/527, loss: 0.0027028084732592106 2023-01-23 01:16:02.252417: step: 1404/527, loss: 0.030332984402775764 2023-01-23 01:16:03.399350: step: 1408/527, loss: 0.005330848973244429 2023-01-23 01:16:04.520906: step: 1412/527, loss: 0.0036644935607910156 2023-01-23 01:16:05.629201: step: 1416/527, loss: 0.03710651397705078 2023-01-23 01:16:06.734443: step: 1420/527, loss: 0.0002440452662995085 2023-01-23 01:16:07.872916: step: 1424/527, loss: 0.06374912708997726 2023-01-23 01:16:08.991453: step: 1428/527, loss: 0.07393179088830948 2023-01-23 01:16:10.080268: step: 1432/527, loss: 0.021627523005008698 2023-01-23 01:16:11.194173: step: 1436/527, loss: 0.05135336145758629 2023-01-23 01:16:12.299342: step: 1440/527, loss: 0.05861806496977806 2023-01-23 01:16:13.391654: step: 1444/527, loss: 0.011415171436965466 2023-01-23 01:16:14.492069: step: 1448/527, loss: 0.07422810047864914 2023-01-23 01:16:15.624812: step: 1452/527, loss: 0.04888172075152397 2023-01-23 01:16:16.742769: step: 1456/527, loss: 0.033731650561094284 2023-01-23 01:16:17.877550: step: 1460/527, loss: 0.033193159848451614 2023-01-23 01:16:19.017295: step: 1464/527, loss: 0.27129611372947693 2023-01-23 01:16:20.122428: step: 1468/527, loss: 0.13051776587963104 2023-01-23 01:16:21.249621: step: 1472/527, loss: 0.21012958884239197 2023-01-23 01:16:22.354934: step: 1476/527, loss: 0.009187507443130016 2023-01-23 01:16:23.451751: step: 1480/527, loss: 0.05515923723578453 2023-01-23 01:16:24.577763: step: 1484/527, loss: 0.06073570251464844 2023-01-23 01:16:25.698427: step: 1488/527, loss: 0.1984787881374359 2023-01-23 01:16:26.837061: step: 1492/527, loss: 0.19858984649181366 2023-01-23 01:16:27.943449: step: 1496/527, loss: 0.023435020819306374 2023-01-23 01:16:29.063924: step: 1500/527, loss: 0.10996846854686737 2023-01-23 01:16:30.208716: step: 1504/527, loss: 0.061175063252449036 2023-01-23 01:16:31.293704: step: 1508/527, loss: 0.29333925247192383 2023-01-23 01:16:32.399964: step: 1512/527, loss: 0.04473914951086044 2023-01-23 01:16:33.503797: step: 1516/527, loss: 0.054048679769039154 2023-01-23 01:16:34.614236: step: 1520/527, loss: 0.03265733644366264 2023-01-23 01:16:35.717065: step: 1524/527, loss: 0.02030506171286106 2023-01-23 01:16:36.806255: step: 1528/527, loss: 0.0635310709476471 2023-01-23 01:16:37.942167: step: 1532/527, loss: 0.0662098377943039 2023-01-23 01:16:39.035957: step: 1536/527, loss: 0.33434516191482544 2023-01-23 01:16:40.160964: step: 1540/527, loss: 0.14279527962207794 2023-01-23 01:16:41.293235: step: 1544/527, loss: 0.1918402761220932 2023-01-23 01:16:42.399224: step: 1548/527, loss: 0.05609443411231041 2023-01-23 01:16:43.492064: step: 1552/527, loss: 0.035256434231996536 2023-01-23 01:16:44.604348: step: 1556/527, loss: 0.1993013471364975 2023-01-23 01:16:45.721418: step: 1560/527, loss: 0.19217254221439362 2023-01-23 01:16:46.833693: step: 1564/527, loss: 0.049072038382291794 2023-01-23 01:16:47.938565: step: 1568/527, loss: 0.03501834720373154 2023-01-23 01:16:49.049999: step: 1572/527, loss: 0.01261987630277872 2023-01-23 01:16:50.171597: step: 1576/527, loss: 0.12150402367115021 2023-01-23 01:16:51.301633: step: 1580/527, loss: 0.10841389000415802 2023-01-23 01:16:52.459677: step: 1584/527, loss: 0.5932765007019043 2023-01-23 01:16:53.579977: step: 1588/527, loss: 0.08304176479578018 2023-01-23 01:16:54.695979: step: 1592/527, loss: 0.3327920138835907 2023-01-23 01:16:55.825365: step: 1596/527, loss: 0.030359935015439987 2023-01-23 01:16:56.934243: step: 1600/527, loss: 0.24892206490039825 2023-01-23 01:16:58.051330: step: 1604/527, loss: 0.1113288402557373 2023-01-23 01:16:59.187620: step: 1608/527, loss: 0.08322592079639435 2023-01-23 01:17:00.314610: step: 1612/527, loss: 0.06382560729980469 2023-01-23 01:17:01.451277: step: 1616/527, loss: 0.09297456592321396 2023-01-23 01:17:02.583938: step: 1620/527, loss: 0.029099320992827415 2023-01-23 01:17:03.696556: step: 1624/527, loss: 0.071587473154068 2023-01-23 01:17:04.843717: step: 1628/527, loss: 0.0069536687806248665 2023-01-23 01:17:06.006394: step: 1632/527, loss: 0.04498634487390518 2023-01-23 01:17:07.130440: step: 1636/527, loss: 0.007207393646240234 2023-01-23 01:17:08.259075: step: 1640/527, loss: 0.00955953635275364 2023-01-23 01:17:09.356067: step: 1644/527, loss: 0.09483642876148224 2023-01-23 01:17:10.461252: step: 1648/527, loss: 0.08596763759851456 2023-01-23 01:17:11.554095: step: 1652/527, loss: 0.050000667572021484 2023-01-23 01:17:12.666413: step: 1656/527, loss: 0.6716679334640503 2023-01-23 01:17:13.748728: step: 1660/527, loss: 0.03650989755988121 2023-01-23 01:17:14.860340: step: 1664/527, loss: 0.005126953125 2023-01-23 01:17:16.010104: step: 1668/527, loss: 0.08572454750537872 2023-01-23 01:17:17.140120: step: 1672/527, loss: 0.058576539158821106 2023-01-23 01:17:18.261116: step: 1676/527, loss: 0.018717478960752487 2023-01-23 01:17:19.366024: step: 1680/527, loss: 0.05393677204847336 2023-01-23 01:17:20.445592: step: 1684/527, loss: 0.04691710323095322 2023-01-23 01:17:21.550433: step: 1688/527, loss: 0.0034494400024414062 2023-01-23 01:17:22.676040: step: 1692/527, loss: 0.06828327476978302 2023-01-23 01:17:23.774359: step: 1696/527, loss: 0.052118588238954544 2023-01-23 01:17:24.894918: step: 1700/527, loss: 0.14473260939121246 2023-01-23 01:17:25.974601: step: 1704/527, loss: 0.18068094551563263 2023-01-23 01:17:27.076319: step: 1708/527, loss: 0.044092558324337006 2023-01-23 01:17:28.200857: step: 1712/527, loss: 0.13796940445899963 2023-01-23 01:17:29.319501: step: 1716/527, loss: 0.0381770133972168 2023-01-23 01:17:30.457637: step: 1720/527, loss: 0.07415933907032013 2023-01-23 01:17:31.587833: step: 1724/527, loss: 0.06726837158203125 2023-01-23 01:17:32.707198: step: 1728/527, loss: 0.00650444021448493 2023-01-23 01:17:33.808444: step: 1732/527, loss: 0.08492393046617508 2023-01-23 01:17:34.954390: step: 1736/527, loss: 0.01097936648875475 2023-01-23 01:17:36.052904: step: 1740/527, loss: 0.042238976806402206 2023-01-23 01:17:37.143201: step: 1744/527, loss: 0.1559789776802063 2023-01-23 01:17:38.271377: step: 1748/527, loss: 0.12835398316383362 2023-01-23 01:17:39.391905: step: 1752/527, loss: 1.0934780836105347 2023-01-23 01:17:40.514636: step: 1756/527, loss: 0.2530114948749542 2023-01-23 01:17:41.630240: step: 1760/527, loss: 0.020436763763427734 2023-01-23 01:17:42.757551: step: 1764/527, loss: 0.024929428473114967 2023-01-23 01:17:43.912597: step: 1768/527, loss: 0.1303289532661438 2023-01-23 01:17:45.057002: step: 1772/527, loss: 0.07172908633947372 2023-01-23 01:17:46.173780: step: 1776/527, loss: 0.06466171145439148 2023-01-23 01:17:47.295747: step: 1780/527, loss: 0.02956414222717285 2023-01-23 01:17:48.408549: step: 1784/527, loss: 0.09388022869825363 2023-01-23 01:17:49.525505: step: 1788/527, loss: 0.025692511349916458 2023-01-23 01:17:50.628268: step: 1792/527, loss: 0.03701906278729439 2023-01-23 01:17:51.761117: step: 1796/527, loss: 0.025358105078339577 2023-01-23 01:17:52.873034: step: 1800/527, loss: 0.08769816905260086 2023-01-23 01:17:54.015633: step: 1804/527, loss: 0.05394699424505234 2023-01-23 01:17:55.160804: step: 1808/527, loss: 0.08894510567188263 2023-01-23 01:17:56.246196: step: 1812/527, loss: 0.03907956928014755 2023-01-23 01:17:57.336806: step: 1816/527, loss: 0.04012441635131836 2023-01-23 01:17:58.422529: step: 1820/527, loss: 0.029288865625858307 2023-01-23 01:17:59.523966: step: 1824/527, loss: 0.00815882720053196 2023-01-23 01:18:00.637715: step: 1828/527, loss: 0.04130678251385689 2023-01-23 01:18:01.764695: step: 1832/527, loss: 0.0510869026184082 2023-01-23 01:18:02.878049: step: 1836/527, loss: 0.050705622881650925 2023-01-23 01:18:03.996585: step: 1840/527, loss: 0.1016816571354866 2023-01-23 01:18:05.101322: step: 1844/527, loss: 0.5428416132926941 2023-01-23 01:18:06.224283: step: 1848/527, loss: 0.14480562508106232 2023-01-23 01:18:07.317912: step: 1852/527, loss: 0.03458847850561142 2023-01-23 01:18:08.464612: step: 1856/527, loss: 0.0006271362071856856 2023-01-23 01:18:09.571701: step: 1860/527, loss: 0.07497739791870117 2023-01-23 01:18:10.723805: step: 1864/527, loss: 0.7174075245857239 2023-01-23 01:18:11.836028: step: 1868/527, loss: 0.035933688282966614 2023-01-23 01:18:12.956049: step: 1872/527, loss: 0.13449449837207794 2023-01-23 01:18:14.099314: step: 1876/527, loss: 0.023523710668087006 2023-01-23 01:18:15.217445: step: 1880/527, loss: 0.07532148063182831 2023-01-23 01:18:16.314931: step: 1884/527, loss: 0.1139548271894455 2023-01-23 01:18:17.446914: step: 1888/527, loss: 0.05602336302399635 2023-01-23 01:18:18.597672: step: 1892/527, loss: 0.10650572925806046 2023-01-23 01:18:19.738487: step: 1896/527, loss: 0.025278665125370026 2023-01-23 01:18:20.835151: step: 1900/527, loss: 0.16824665665626526 2023-01-23 01:18:21.972121: step: 1904/527, loss: 0.05643191188573837 2023-01-23 01:18:23.112085: step: 1908/527, loss: 0.14879217743873596 2023-01-23 01:18:24.248635: step: 1912/527, loss: 0.20584160089492798 2023-01-23 01:18:25.388242: step: 1916/527, loss: 0.003959703724831343 2023-01-23 01:18:26.542854: step: 1920/527, loss: 0.07251858711242676 2023-01-23 01:18:27.665636: step: 1924/527, loss: 0.2500073313713074 2023-01-23 01:18:28.771141: step: 1928/527, loss: 0.0089385025203228 2023-01-23 01:18:29.911837: step: 1932/527, loss: 0.024903345853090286 2023-01-23 01:18:31.017992: step: 1936/527, loss: 0.033914946019649506 2023-01-23 01:18:32.126949: step: 1940/527, loss: 0.06158857420086861 2023-01-23 01:18:33.246719: step: 1944/527, loss: 0.11178407818078995 2023-01-23 01:18:34.353850: step: 1948/527, loss: 0.003274250077083707 2023-01-23 01:18:35.466369: step: 1952/527, loss: 0.02889099158346653 2023-01-23 01:18:36.585044: step: 1956/527, loss: 0.10508528351783752 2023-01-23 01:18:37.700417: step: 1960/527, loss: 0.05529346689581871 2023-01-23 01:18:38.803814: step: 1964/527, loss: 0.0734872817993164 2023-01-23 01:18:39.929645: step: 1968/527, loss: 0.06625165790319443 2023-01-23 01:18:41.069314: step: 1972/527, loss: 0.006357860751450062 2023-01-23 01:18:42.259360: step: 1976/527, loss: 0.06232185661792755 2023-01-23 01:18:43.383270: step: 1980/527, loss: 0.05676531791687012 2023-01-23 01:18:44.490932: step: 1984/527, loss: 0.21562395989894867 2023-01-23 01:18:45.616804: step: 1988/527, loss: 0.10727329552173615 2023-01-23 01:18:46.721728: step: 1992/527, loss: 0.045992087572813034 2023-01-23 01:18:47.834751: step: 1996/527, loss: 0.013383293524384499 2023-01-23 01:18:48.919423: step: 2000/527, loss: 0.008558845147490501 2023-01-23 01:18:50.016419: step: 2004/527, loss: 0.31395137310028076 2023-01-23 01:18:51.128687: step: 2008/527, loss: 0.006506443023681641 2023-01-23 01:18:52.217953: step: 2012/527, loss: 0.03435507044196129 2023-01-23 01:18:53.319641: step: 2016/527, loss: 0.4994511306285858 2023-01-23 01:18:54.423015: step: 2020/527, loss: 0.0842091292142868 2023-01-23 01:18:55.557037: step: 2024/527, loss: 0.13641434907913208 2023-01-23 01:18:56.678239: step: 2028/527, loss: 0.06823810935020447 2023-01-23 01:18:57.785954: step: 2032/527, loss: 0.2879948616027832 2023-01-23 01:18:58.906084: step: 2036/527, loss: 0.1547890305519104 2023-01-23 01:19:00.037608: step: 2040/527, loss: 0.024220729246735573 2023-01-23 01:19:01.161963: step: 2044/527, loss: 0.10612693428993225 2023-01-23 01:19:02.281073: step: 2048/527, loss: 0.07173619419336319 2023-01-23 01:19:03.369775: step: 2052/527, loss: 0.04888134077191353 2023-01-23 01:19:04.480848: step: 2056/527, loss: 0.04329204559326172 2023-01-23 01:19:05.586726: step: 2060/527, loss: 0.10113182663917542 2023-01-23 01:19:06.688810: step: 2064/527, loss: 0.11236695945262909 2023-01-23 01:19:07.795959: step: 2068/527, loss: 0.01601400412619114 2023-01-23 01:19:08.912493: step: 2072/527, loss: 0.0417846217751503 2023-01-23 01:19:09.997893: step: 2076/527, loss: 0.011335277929902077 2023-01-23 01:19:11.131344: step: 2080/527, loss: 0.6258201003074646 2023-01-23 01:19:12.279219: step: 2084/527, loss: 0.09557032585144043 2023-01-23 01:19:13.395138: step: 2088/527, loss: 0.3243850767612457 2023-01-23 01:19:14.566126: step: 2092/527, loss: 0.06134028360247612 2023-01-23 01:19:15.668939: step: 2096/527, loss: 0.04106311872601509 2023-01-23 01:19:16.774671: step: 2100/527, loss: 0.5914804339408875 2023-01-23 01:19:17.897156: step: 2104/527, loss: 0.01369571778923273 2023-01-23 01:19:18.983902: step: 2108/527, loss: 0.026768207550048828 ================================================== Loss: 0.083 -------------------- Dev: {'event': {'p': 0.6084275436793423, 'r': 0.7882822902796272, 'f1': 0.6867749419953597}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Test: {'event': {'p': 0.6218149307107733, 'r': 0.7948571428571428, 'f1': 0.6977677451718083}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Chinese: {'event': {'p': 0.5595238095238095, 'r': 0.8703703703703703, 'f1': 0.6811594202898551}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Korean: {'event': {'p': 0.6382978723404256, 'r': 0.47619047619047616, 'f1': 0.5454545454545455}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Russian: {'event': {'p': 0.475, 'r': 0.5277777777777778, 'f1': 0.5}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Eng Test for Korean: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 11 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:20:00.540112: step: 4/527, loss: 0.08067798614501953 2023-01-23 01:20:01.642521: step: 8/527, loss: 0.04230527952313423 2023-01-23 01:20:02.778100: step: 12/527, loss: 0.04908175766468048 2023-01-23 01:20:03.890010: step: 16/527, loss: 0.03007793426513672 2023-01-23 01:20:05.025707: step: 20/527, loss: 0.01850900612771511 2023-01-23 01:20:06.146268: step: 24/527, loss: 0.020067691802978516 2023-01-23 01:20:07.257759: step: 28/527, loss: 0.025117874145507812 2023-01-23 01:20:08.360622: step: 32/527, loss: 0.038509272038936615 2023-01-23 01:20:09.460450: step: 36/527, loss: 0.000927352870348841 2023-01-23 01:20:10.575296: step: 40/527, loss: 0.046708106994628906 2023-01-23 01:20:11.681177: step: 44/527, loss: 0.01110992394387722 2023-01-23 01:20:12.806657: step: 48/527, loss: 0.04392719268798828 2023-01-23 01:20:13.895685: step: 52/527, loss: 0.052697114646434784 2023-01-23 01:20:15.005468: step: 56/527, loss: 0.015467071905732155 2023-01-23 01:20:16.135947: step: 60/527, loss: 0.02496185339987278 2023-01-23 01:20:17.245511: step: 64/527, loss: 0.001474666642025113 2023-01-23 01:20:18.382492: step: 68/527, loss: 0.04135601222515106 2023-01-23 01:20:19.553653: step: 72/527, loss: 0.12225857377052307 2023-01-23 01:20:20.682185: step: 76/527, loss: 0.12115774303674698 2023-01-23 01:20:21.784272: step: 80/527, loss: 0.0560920275747776 2023-01-23 01:20:22.907174: step: 84/527, loss: 0.022900868207216263 2023-01-23 01:20:24.052451: step: 88/527, loss: 0.0537288673222065 2023-01-23 01:20:25.179959: step: 92/527, loss: 0.0813688263297081 2023-01-23 01:20:26.294416: step: 96/527, loss: 0.1555161476135254 2023-01-23 01:20:27.416144: step: 100/527, loss: 0.1209169402718544 2023-01-23 01:20:28.510475: step: 104/527, loss: 0.04698324203491211 2023-01-23 01:20:29.638807: step: 108/527, loss: 0.06603773683309555 2023-01-23 01:20:30.753928: step: 112/527, loss: 0.18267536163330078 2023-01-23 01:20:31.878267: step: 116/527, loss: 0.0006688118446618319 2023-01-23 01:20:33.002329: step: 120/527, loss: 0.022388862445950508 2023-01-23 01:20:34.098688: step: 124/527, loss: 0.021190166473388672 2023-01-23 01:20:35.229073: step: 128/527, loss: 0.06566648185253143 2023-01-23 01:20:36.331562: step: 132/527, loss: 0.03349723666906357 2023-01-23 01:20:37.428716: step: 136/527, loss: 0.07213124632835388 2023-01-23 01:20:38.547181: step: 140/527, loss: 0.13203755021095276 2023-01-23 01:20:39.668369: step: 144/527, loss: 0.09245087206363678 2023-01-23 01:20:40.776058: step: 148/527, loss: 0.030596591532230377 2023-01-23 01:20:41.877218: step: 152/527, loss: 0.006259727291762829 2023-01-23 01:20:42.964166: step: 156/527, loss: 0.03528337553143501 2023-01-23 01:20:44.069768: step: 160/527, loss: 0.04161083698272705 2023-01-23 01:20:45.243359: step: 164/527, loss: 0.057959459722042084 2023-01-23 01:20:46.383313: step: 168/527, loss: 0.007411670871078968 2023-01-23 01:20:47.501852: step: 172/527, loss: 0.028269432485103607 2023-01-23 01:20:48.613443: step: 176/527, loss: 0.03533497080206871 2023-01-23 01:20:49.752227: step: 180/527, loss: 0.05302934721112251 2023-01-23 01:20:50.863838: step: 184/527, loss: 0.03431425243616104 2023-01-23 01:20:52.024740: step: 188/527, loss: 0.013964367099106312 2023-01-23 01:20:53.154792: step: 192/527, loss: 0.0025426866486668587 2023-01-23 01:20:54.268458: step: 196/527, loss: 0.014343834482133389 2023-01-23 01:20:55.393115: step: 200/527, loss: 0.04323830455541611 2023-01-23 01:20:56.510510: step: 204/527, loss: 0.022257614880800247 2023-01-23 01:20:57.643593: step: 208/527, loss: 0.05538616329431534 2023-01-23 01:20:58.774934: step: 212/527, loss: 0.015301132574677467 2023-01-23 01:20:59.867955: step: 216/527, loss: 0.0043236734345555305 2023-01-23 01:21:00.984293: step: 220/527, loss: 0.17806226015090942 2023-01-23 01:21:02.119657: step: 224/527, loss: 0.059856511652469635 2023-01-23 01:21:03.249915: step: 228/527, loss: 0.002015733625739813 2023-01-23 01:21:04.349833: step: 232/527, loss: 0.07700204849243164 2023-01-23 01:21:05.466678: step: 236/527, loss: 0.007751799188554287 2023-01-23 01:21:06.614124: step: 240/527, loss: 0.05842466652393341 2023-01-23 01:21:07.731868: step: 244/527, loss: 0.03313078731298447 2023-01-23 01:21:08.837621: step: 248/527, loss: 0.0014502524863928556 2023-01-23 01:21:09.946594: step: 252/527, loss: 0.05644569545984268 2023-01-23 01:21:11.084349: step: 256/527, loss: 0.028081322088837624 2023-01-23 01:21:12.213597: step: 260/527, loss: 0.21366047859191895 2023-01-23 01:21:13.312649: step: 264/527, loss: 0.007308769039809704 2023-01-23 01:21:14.402146: step: 268/527, loss: 0.003892803331837058 2023-01-23 01:21:15.553810: step: 272/527, loss: 0.007554054260253906 2023-01-23 01:21:16.672347: step: 276/527, loss: 0.08761701732873917 2023-01-23 01:21:17.791102: step: 280/527, loss: 0.025003815069794655 2023-01-23 01:21:18.909256: step: 284/527, loss: 0.20140905678272247 2023-01-23 01:21:19.993908: step: 288/527, loss: 0.11489498615264893 2023-01-23 01:21:21.122778: step: 292/527, loss: 0.04347868263721466 2023-01-23 01:21:22.224383: step: 296/527, loss: 0.5309319496154785 2023-01-23 01:21:23.349705: step: 300/527, loss: 0.033547595143318176 2023-01-23 01:21:24.493151: step: 304/527, loss: 0.0021164892241358757 2023-01-23 01:21:25.604423: step: 308/527, loss: 0.02490692213177681 2023-01-23 01:21:26.718633: step: 312/527, loss: 0.030603598803281784 2023-01-23 01:21:27.822944: step: 316/527, loss: 0.16288775205612183 2023-01-23 01:21:28.950370: step: 320/527, loss: 0.13951349258422852 2023-01-23 01:21:30.076596: step: 324/527, loss: 0.026932813227176666 2023-01-23 01:21:31.238590: step: 328/527, loss: 0.07625216990709305 2023-01-23 01:21:32.360214: step: 332/527, loss: 0.046442318707704544 2023-01-23 01:21:33.509517: step: 336/527, loss: 0.08221641182899475 2023-01-23 01:21:34.624718: step: 340/527, loss: 0.036043357104063034 2023-01-23 01:21:35.740697: step: 344/527, loss: 0.5584023594856262 2023-01-23 01:21:36.850351: step: 348/527, loss: 0.036026570945978165 2023-01-23 01:21:37.979192: step: 352/527, loss: 0.04068164899945259 2023-01-23 01:21:39.125875: step: 356/527, loss: 0.05809221416711807 2023-01-23 01:21:40.255094: step: 360/527, loss: 0.06390233337879181 2023-01-23 01:21:41.368484: step: 364/527, loss: 0.017533399164676666 2023-01-23 01:21:42.507294: step: 368/527, loss: 0.017421532422304153 2023-01-23 01:21:43.598809: step: 372/527, loss: 0.048799898475408554 2023-01-23 01:21:44.693607: step: 376/527, loss: 0.04963326454162598 2023-01-23 01:21:45.815568: step: 380/527, loss: 0.9355354309082031 2023-01-23 01:21:46.932708: step: 384/527, loss: 0.05899477005004883 2023-01-23 01:21:48.048334: step: 388/527, loss: 0.051241204142570496 2023-01-23 01:21:49.173576: step: 392/527, loss: 0.08990363776683807 2023-01-23 01:21:50.304851: step: 396/527, loss: 0.033190157264471054 2023-01-23 01:21:51.425765: step: 400/527, loss: 0.06903620064258575 2023-01-23 01:21:52.519368: step: 404/527, loss: 0.016281701624393463 2023-01-23 01:21:53.642425: step: 408/527, loss: 0.04275636747479439 2023-01-23 01:21:54.758229: step: 412/527, loss: 0.09220924228429794 2023-01-23 01:21:55.903563: step: 416/527, loss: 0.7665678262710571 2023-01-23 01:21:56.999909: step: 420/527, loss: 0.0925624817609787 2023-01-23 01:21:58.109656: step: 424/527, loss: 0.04685230180621147 2023-01-23 01:21:59.210198: step: 428/527, loss: 0.07393841445446014 2023-01-23 01:22:00.298654: step: 432/527, loss: 0.019419383257627487 2023-01-23 01:22:01.402478: step: 436/527, loss: 0.06814035773277283 2023-01-23 01:22:02.484548: step: 440/527, loss: 0.00699958810582757 2023-01-23 01:22:03.602262: step: 444/527, loss: 0.03958435356616974 2023-01-23 01:22:04.724812: step: 448/527, loss: 0.0026604654267430305 2023-01-23 01:22:05.819906: step: 452/527, loss: 0.0662652999162674 2023-01-23 01:22:06.931810: step: 456/527, loss: 0.004737568087875843 2023-01-23 01:22:08.048406: step: 460/527, loss: 0.050614356994628906 2023-01-23 01:22:09.138900: step: 464/527, loss: 0.0025129318237304688 2023-01-23 01:22:10.268530: step: 468/527, loss: 0.013509177602827549 2023-01-23 01:22:11.411942: step: 472/527, loss: 0.08616548031568527 2023-01-23 01:22:12.551044: step: 476/527, loss: 0.050011731684207916 2023-01-23 01:22:13.695053: step: 480/527, loss: 0.06006794050335884 2023-01-23 01:22:14.805980: step: 484/527, loss: 0.025157546624541283 2023-01-23 01:22:15.909562: step: 488/527, loss: 0.0459684394299984 2023-01-23 01:22:17.023320: step: 492/527, loss: 0.003686761949211359 2023-01-23 01:22:18.130717: step: 496/527, loss: 0.09003839641809464 2023-01-23 01:22:19.286848: step: 500/527, loss: 0.332754522562027 2023-01-23 01:22:20.390006: step: 504/527, loss: 0.004305028822273016 2023-01-23 01:22:21.504669: step: 508/527, loss: 0.01154098566621542 2023-01-23 01:22:22.621801: step: 512/527, loss: 0.008574867621064186 2023-01-23 01:22:23.713892: step: 516/527, loss: 0.029845476150512695 2023-01-23 01:22:24.840339: step: 520/527, loss: 0.030349839478731155 2023-01-23 01:22:25.952385: step: 524/527, loss: 0.13709792494773865 2023-01-23 01:22:27.063354: step: 528/527, loss: 0.008143424987792969 2023-01-23 01:22:28.207337: step: 532/527, loss: 0.006602668669074774 2023-01-23 01:22:29.345522: step: 536/527, loss: 0.009474802762269974 2023-01-23 01:22:30.441506: step: 540/527, loss: 0.1374446004629135 2023-01-23 01:22:31.583343: step: 544/527, loss: 0.020861387252807617 2023-01-23 01:22:32.669747: step: 548/527, loss: 0.06290445476770401 2023-01-23 01:22:33.758178: step: 552/527, loss: 0.08680963516235352 2023-01-23 01:22:34.842024: step: 556/527, loss: 0.020316505804657936 2023-01-23 01:22:35.970771: step: 560/527, loss: 0.22358588874340057 2023-01-23 01:22:37.092442: step: 564/527, loss: 0.029083536937832832 2023-01-23 01:22:38.202249: step: 568/527, loss: 0.012924958020448685 2023-01-23 01:22:39.326688: step: 572/527, loss: 0.1281568557024002 2023-01-23 01:22:40.411163: step: 576/527, loss: 0.07554297149181366 2023-01-23 01:22:41.549358: step: 580/527, loss: 0.11508617550134659 2023-01-23 01:22:42.707041: step: 584/527, loss: 0.022138023748993874 2023-01-23 01:22:43.805800: step: 588/527, loss: 0.008836365304887295 2023-01-23 01:22:44.934070: step: 592/527, loss: 0.06644859910011292 2023-01-23 01:22:46.109253: step: 596/527, loss: 0.08165421336889267 2023-01-23 01:22:47.224853: step: 600/527, loss: 0.12328466773033142 2023-01-23 01:22:48.366967: step: 604/527, loss: 0.023034285753965378 2023-01-23 01:22:49.488360: step: 608/527, loss: 0.08964891731739044 2023-01-23 01:22:50.575386: step: 612/527, loss: 0.008318711072206497 2023-01-23 01:22:51.700654: step: 616/527, loss: 0.06161961704492569 2023-01-23 01:22:52.785375: step: 620/527, loss: 0.022675370797514915 2023-01-23 01:22:53.882734: step: 624/527, loss: 0.3809185028076172 2023-01-23 01:22:54.983688: step: 628/527, loss: 0.00697560328990221 2023-01-23 01:22:56.112814: step: 632/527, loss: 0.015552139841020107 2023-01-23 01:22:57.224307: step: 636/527, loss: 0.10790614783763885 2023-01-23 01:22:58.357795: step: 640/527, loss: 0.124384306371212 2023-01-23 01:22:59.481410: step: 644/527, loss: 0.02337665669620037 2023-01-23 01:23:00.601214: step: 648/527, loss: 0.05483550950884819 2023-01-23 01:23:01.694112: step: 652/527, loss: 0.0023277283180505037 2023-01-23 01:23:02.843395: step: 656/527, loss: 0.03931853920221329 2023-01-23 01:23:03.963262: step: 660/527, loss: 0.053301528096199036 2023-01-23 01:23:05.103550: step: 664/527, loss: 0.028059815987944603 2023-01-23 01:23:06.251048: step: 668/527, loss: 0.2204572856426239 2023-01-23 01:23:07.406604: step: 672/527, loss: 0.10398168861865997 2023-01-23 01:23:08.525975: step: 676/527, loss: 0.08302703499794006 2023-01-23 01:23:09.646010: step: 680/527, loss: 0.007313197944313288 2023-01-23 01:23:10.755282: step: 684/527, loss: 0.03017606772482395 2023-01-23 01:23:11.859717: step: 688/527, loss: 0.03649766743183136 2023-01-23 01:23:12.983570: step: 692/527, loss: 0.02656860277056694 2023-01-23 01:23:14.109310: step: 696/527, loss: 0.03309822082519531 2023-01-23 01:23:15.234137: step: 700/527, loss: 0.03545217588543892 2023-01-23 01:23:16.355617: step: 704/527, loss: 0.004703187849372625 2023-01-23 01:23:17.486354: step: 708/527, loss: 0.02754044532775879 2023-01-23 01:23:18.591994: step: 712/527, loss: 0.015688514336943626 2023-01-23 01:23:19.735774: step: 716/527, loss: 0.006097698118537664 2023-01-23 01:23:20.834858: step: 720/527, loss: 0.005704117007553577 2023-01-23 01:23:21.946652: step: 724/527, loss: 0.02433185465633869 2023-01-23 01:23:23.038723: step: 728/527, loss: 0.05707826837897301 2023-01-23 01:23:24.153056: step: 732/527, loss: 0.17664051055908203 2023-01-23 01:23:25.271309: step: 736/527, loss: 0.489203542470932 2023-01-23 01:23:26.365658: step: 740/527, loss: 0.03538022190332413 2023-01-23 01:23:27.473968: step: 744/527, loss: 0.30911150574684143 2023-01-23 01:23:28.560416: step: 748/527, loss: 0.12843838334083557 2023-01-23 01:23:29.700819: step: 752/527, loss: 0.17990216612815857 2023-01-23 01:23:30.787946: step: 756/527, loss: 0.07838056236505508 2023-01-23 01:23:31.892908: step: 760/527, loss: 0.3793204426765442 2023-01-23 01:23:33.023834: step: 764/527, loss: 0.012790108099579811 2023-01-23 01:23:34.154743: step: 768/527, loss: 0.04151914268732071 2023-01-23 01:23:35.286657: step: 772/527, loss: 0.025007151067256927 2023-01-23 01:23:36.405541: step: 776/527, loss: 0.06588192284107208 2023-01-23 01:23:37.550781: step: 780/527, loss: 0.006550026126205921 2023-01-23 01:23:38.695535: step: 784/527, loss: 0.11618877202272415 2023-01-23 01:23:39.788744: step: 788/527, loss: 0.0011536121601238847 2023-01-23 01:23:40.935622: step: 792/527, loss: 0.009385443292558193 2023-01-23 01:23:42.044245: step: 796/527, loss: 0.09038610756397247 2023-01-23 01:23:43.203459: step: 800/527, loss: 0.48065185546875 2023-01-23 01:23:44.355778: step: 804/527, loss: 0.03906438499689102 2023-01-23 01:23:45.456279: step: 808/527, loss: 0.043680667877197266 2023-01-23 01:23:46.572512: step: 812/527, loss: 0.010559607297182083 2023-01-23 01:23:47.681886: step: 816/527, loss: 0.04532623291015625 2023-01-23 01:23:48.790743: step: 820/527, loss: 0.06716423481702805 2023-01-23 01:23:49.895490: step: 824/527, loss: 0.03185443952679634 2023-01-23 01:23:51.004787: step: 828/527, loss: 0.050204064697027206 2023-01-23 01:23:52.127018: step: 832/527, loss: 0.013174057006835938 2023-01-23 01:23:53.252183: step: 836/527, loss: 0.029108811169862747 2023-01-23 01:23:54.323964: step: 840/527, loss: 0.00281867990270257 2023-01-23 01:23:55.499804: step: 844/527, loss: 0.09427967667579651 2023-01-23 01:23:56.631452: step: 848/527, loss: 0.014589118771255016 2023-01-23 01:23:57.747241: step: 852/527, loss: 0.12413787841796875 2023-01-23 01:23:58.861785: step: 856/527, loss: 0.0020724297501146793 2023-01-23 01:23:59.970309: step: 860/527, loss: 0.13680876791477203 2023-01-23 01:24:01.075909: step: 864/527, loss: 0.0005034446949139237 2023-01-23 01:24:02.211218: step: 868/527, loss: 0.005815601442009211 2023-01-23 01:24:03.309611: step: 872/527, loss: 0.08673496544361115 2023-01-23 01:24:04.441138: step: 876/527, loss: 0.05387873575091362 2023-01-23 01:24:05.555737: step: 880/527, loss: 0.0957273468375206 2023-01-23 01:24:06.655171: step: 884/527, loss: 0.12776851654052734 2023-01-23 01:24:07.799750: step: 888/527, loss: 0.034682273864746094 2023-01-23 01:24:08.920075: step: 892/527, loss: 0.013392925262451172 2023-01-23 01:24:10.067130: step: 896/527, loss: 0.1638377159833908 2023-01-23 01:24:11.162356: step: 900/527, loss: 0.03973083943128586 2023-01-23 01:24:12.270718: step: 904/527, loss: 0.01538238488137722 2023-01-23 01:24:13.368397: step: 908/527, loss: 0.021627331152558327 2023-01-23 01:24:14.481028: step: 912/527, loss: 0.005932795815169811 2023-01-23 01:24:15.597712: step: 916/527, loss: 0.01120824832469225 2023-01-23 01:24:16.720373: step: 920/527, loss: 0.28511470556259155 2023-01-23 01:24:17.870145: step: 924/527, loss: 0.007882118225097656 2023-01-23 01:24:18.977680: step: 928/527, loss: 0.033103276044130325 2023-01-23 01:24:20.130602: step: 932/527, loss: 0.019672680646181107 2023-01-23 01:24:21.274992: step: 936/527, loss: 0.005472755525261164 2023-01-23 01:24:22.384557: step: 940/527, loss: 0.08514904975891113 2023-01-23 01:24:23.492936: step: 944/527, loss: 0.711452305316925 2023-01-23 01:24:24.605285: step: 948/527, loss: 0.04166736826300621 2023-01-23 01:24:25.776512: step: 952/527, loss: 0.013927007094025612 2023-01-23 01:24:26.888627: step: 956/527, loss: 0.28360825777053833 2023-01-23 01:24:27.983439: step: 960/527, loss: 0.03162822872400284 2023-01-23 01:24:29.143396: step: 964/527, loss: 0.005666160956025124 2023-01-23 01:24:30.298858: step: 968/527, loss: 0.08374099433422089 2023-01-23 01:24:31.418676: step: 972/527, loss: 0.005027103237807751 2023-01-23 01:24:32.505125: step: 976/527, loss: 0.043906304985284805 2023-01-23 01:24:33.596855: step: 980/527, loss: 0.01409902609884739 2023-01-23 01:24:34.692046: step: 984/527, loss: 0.08769810199737549 2023-01-23 01:24:35.826059: step: 988/527, loss: 0.35960617661476135 2023-01-23 01:24:36.942537: step: 992/527, loss: 0.001975440885871649 2023-01-23 01:24:38.044988: step: 996/527, loss: 0.0035919665824621916 2023-01-23 01:24:39.168902: step: 1000/527, loss: 0.04291825369000435 2023-01-23 01:24:40.288963: step: 1004/527, loss: 0.019007397815585136 2023-01-23 01:24:41.408264: step: 1008/527, loss: 0.030747700482606888 2023-01-23 01:24:42.540933: step: 1012/527, loss: 0.05249347910284996 2023-01-23 01:24:43.651053: step: 1016/527, loss: 0.00330104841850698 2023-01-23 01:24:44.822363: step: 1020/527, loss: 0.05303707346320152 2023-01-23 01:24:45.930980: step: 1024/527, loss: 0.002799081616103649 2023-01-23 01:24:47.075466: step: 1028/527, loss: 0.13503780961036682 2023-01-23 01:24:48.209505: step: 1032/527, loss: 0.04094085842370987 2023-01-23 01:24:49.337602: step: 1036/527, loss: 0.06127309799194336 2023-01-23 01:24:50.495700: step: 1040/527, loss: 0.10088405758142471 2023-01-23 01:24:51.630083: step: 1044/527, loss: 0.038503360003232956 2023-01-23 01:24:52.775212: step: 1048/527, loss: 0.31427013874053955 2023-01-23 01:24:53.908419: step: 1052/527, loss: 0.022220849990844727 2023-01-23 01:24:55.016243: step: 1056/527, loss: 0.06624408066272736 2023-01-23 01:24:56.132994: step: 1060/527, loss: 0.03269600868225098 2023-01-23 01:24:57.263940: step: 1064/527, loss: 0.015769578516483307 2023-01-23 01:24:58.360993: step: 1068/527, loss: 0.12198818475008011 2023-01-23 01:24:59.449047: step: 1072/527, loss: 0.00968790054321289 2023-01-23 01:25:00.550614: step: 1076/527, loss: 0.007360649295151234 2023-01-23 01:25:01.661639: step: 1080/527, loss: 0.09726858139038086 2023-01-23 01:25:02.781421: step: 1084/527, loss: 0.02572495862841606 2023-01-23 01:25:03.912206: step: 1088/527, loss: 0.06369371712207794 2023-01-23 01:25:05.048677: step: 1092/527, loss: 0.14304561913013458 2023-01-23 01:25:06.166839: step: 1096/527, loss: 0.07329888641834259 2023-01-23 01:25:07.249059: step: 1100/527, loss: 0.009450912475585938 2023-01-23 01:25:08.365425: step: 1104/527, loss: 0.17298002541065216 2023-01-23 01:25:09.477030: step: 1108/527, loss: 0.0022864341735839844 2023-01-23 01:25:10.613831: step: 1112/527, loss: 0.006158983800560236 2023-01-23 01:25:11.735319: step: 1116/527, loss: 0.014201736077666283 2023-01-23 01:25:12.869543: step: 1120/527, loss: 0.013581514358520508 2023-01-23 01:25:13.977097: step: 1124/527, loss: 0.07646389305591583 2023-01-23 01:25:15.078427: step: 1128/527, loss: 0.6779316067695618 2023-01-23 01:25:16.198722: step: 1132/527, loss: 0.14257989823818207 2023-01-23 01:25:17.308412: step: 1136/527, loss: 0.11525392532348633 2023-01-23 01:25:18.443519: step: 1140/527, loss: 0.07272128760814667 2023-01-23 01:25:19.550095: step: 1144/527, loss: 0.030729390680789948 2023-01-23 01:25:20.669794: step: 1148/527, loss: 0.01055831927806139 2023-01-23 01:25:21.796642: step: 1152/527, loss: 0.015939807519316673 2023-01-23 01:25:22.918450: step: 1156/527, loss: 0.03135376051068306 2023-01-23 01:25:24.046168: step: 1160/527, loss: 0.0008707046508789062 2023-01-23 01:25:25.164140: step: 1164/527, loss: 0.016486549749970436 2023-01-23 01:25:26.282277: step: 1168/527, loss: 0.2746197283267975 2023-01-23 01:25:27.428018: step: 1172/527, loss: 0.21655531227588654 2023-01-23 01:25:28.542568: step: 1176/527, loss: 0.3059563636779785 2023-01-23 01:25:29.648803: step: 1180/527, loss: 0.03283210098743439 2023-01-23 01:25:30.788880: step: 1184/527, loss: 0.033354759216308594 2023-01-23 01:25:31.895877: step: 1188/527, loss: 0.05663085728883743 2023-01-23 01:25:33.019675: step: 1192/527, loss: 0.024155616760253906 2023-01-23 01:25:34.142309: step: 1196/527, loss: 0.042307090014219284 2023-01-23 01:25:35.242339: step: 1200/527, loss: 0.04537970945239067 2023-01-23 01:25:36.381577: step: 1204/527, loss: 0.014900971204042435 2023-01-23 01:25:37.500508: step: 1208/527, loss: 0.016940975561738014 2023-01-23 01:25:38.627794: step: 1212/527, loss: 0.019524481147527695 2023-01-23 01:25:39.737460: step: 1216/527, loss: 0.0033661366906017065 2023-01-23 01:25:40.870193: step: 1220/527, loss: 0.024695778265595436 2023-01-23 01:25:41.998635: step: 1224/527, loss: 0.011055564507842064 2023-01-23 01:25:43.114960: step: 1228/527, loss: 0.09283050894737244 2023-01-23 01:25:44.207788: step: 1232/527, loss: 0.19912710785865784 2023-01-23 01:25:45.338561: step: 1236/527, loss: 0.04730741307139397 2023-01-23 01:25:46.452656: step: 1240/527, loss: 0.09515562653541565 2023-01-23 01:25:47.545149: step: 1244/527, loss: 0.09216327965259552 2023-01-23 01:25:48.654790: step: 1248/527, loss: 0.009247112087905407 2023-01-23 01:25:49.760839: step: 1252/527, loss: 0.056338027119636536 2023-01-23 01:25:50.914402: step: 1256/527, loss: 0.05807933956384659 2023-01-23 01:25:52.043775: step: 1260/527, loss: 0.012791823595762253 2023-01-23 01:25:53.145448: step: 1264/527, loss: 0.018391896039247513 2023-01-23 01:25:54.282879: step: 1268/527, loss: 0.03221721574664116 2023-01-23 01:25:55.420735: step: 1272/527, loss: 0.052113912999629974 2023-01-23 01:25:56.539432: step: 1276/527, loss: 0.055801428854465485 2023-01-23 01:25:57.638794: step: 1280/527, loss: 0.007173895835876465 2023-01-23 01:25:58.751042: step: 1284/527, loss: 0.04002103954553604 2023-01-23 01:25:59.827306: step: 1288/527, loss: 0.012015294283628464 2023-01-23 01:26:00.946660: step: 1292/527, loss: 0.032076645642519 2023-01-23 01:26:02.067659: step: 1296/527, loss: 0.22438450157642365 2023-01-23 01:26:03.211954: step: 1300/527, loss: 0.16272085905075073 2023-01-23 01:26:04.317502: step: 1304/527, loss: 0.02991161309182644 2023-01-23 01:26:05.432451: step: 1308/527, loss: 0.06720085442066193 2023-01-23 01:26:06.546319: step: 1312/527, loss: 0.623330295085907 2023-01-23 01:26:07.643456: step: 1316/527, loss: 0.0005444050184451044 2023-01-23 01:26:08.737537: step: 1320/527, loss: 0.020223617553710938 2023-01-23 01:26:09.878836: step: 1324/527, loss: 0.031185531988739967 2023-01-23 01:26:11.006380: step: 1328/527, loss: 0.022219086065888405 2023-01-23 01:26:12.137302: step: 1332/527, loss: 0.031058311462402344 2023-01-23 01:26:13.283198: step: 1336/527, loss: 0.4829002320766449 2023-01-23 01:26:14.408139: step: 1340/527, loss: 0.0422300361096859 2023-01-23 01:26:15.525003: step: 1344/527, loss: 0.01845226250588894 2023-01-23 01:26:16.635191: step: 1348/527, loss: 0.01965484581887722 2023-01-23 01:26:17.740444: step: 1352/527, loss: 0.013964558020234108 2023-01-23 01:26:18.843592: step: 1356/527, loss: 0.018819045275449753 2023-01-23 01:26:19.996645: step: 1360/527, loss: 0.07471399754285812 2023-01-23 01:26:21.119343: step: 1364/527, loss: 0.06723332405090332 2023-01-23 01:26:22.252534: step: 1368/527, loss: 0.12190437316894531 2023-01-23 01:26:23.338187: step: 1372/527, loss: 0.07430973649024963 2023-01-23 01:26:24.482742: step: 1376/527, loss: 0.10568561404943466 2023-01-23 01:26:25.576859: step: 1380/527, loss: 0.05476503074169159 2023-01-23 01:26:26.682567: step: 1384/527, loss: 0.04120798036456108 2023-01-23 01:26:27.802099: step: 1388/527, loss: 0.17752857506275177 2023-01-23 01:26:28.918850: step: 1392/527, loss: 0.09932174533605576 2023-01-23 01:26:30.026426: step: 1396/527, loss: 0.0008258819580078125 2023-01-23 01:26:31.154038: step: 1400/527, loss: 0.025541117414832115 2023-01-23 01:26:32.305296: step: 1404/527, loss: 0.011778187938034534 2023-01-23 01:26:33.413650: step: 1408/527, loss: 0.23266057670116425 2023-01-23 01:26:34.501645: step: 1412/527, loss: 0.02049694024026394 2023-01-23 01:26:35.596693: step: 1416/527, loss: 0.03534431755542755 2023-01-23 01:26:36.707486: step: 1420/527, loss: 0.0729069784283638 2023-01-23 01:26:37.811773: step: 1424/527, loss: 0.09496541321277618 2023-01-23 01:26:38.908557: step: 1428/527, loss: 0.017676448449492455 2023-01-23 01:26:40.026995: step: 1432/527, loss: 0.011694718152284622 2023-01-23 01:26:41.108940: step: 1436/527, loss: 0.02203083038330078 2023-01-23 01:26:42.247075: step: 1440/527, loss: 0.08038368076086044 2023-01-23 01:26:43.355589: step: 1444/527, loss: 0.07521028816699982 2023-01-23 01:26:44.473055: step: 1448/527, loss: 0.11142826080322266 2023-01-23 01:26:45.579204: step: 1452/527, loss: 0.016503525897860527 2023-01-23 01:26:46.759647: step: 1456/527, loss: 0.07093420624732971 2023-01-23 01:26:47.902574: step: 1460/527, loss: 0.009356880560517311 2023-01-23 01:26:49.025712: step: 1464/527, loss: 0.04651136323809624 2023-01-23 01:26:50.145321: step: 1468/527, loss: 0.046209149062633514 2023-01-23 01:26:51.240931: step: 1472/527, loss: 0.07755289226770401 2023-01-23 01:26:52.374639: step: 1476/527, loss: 0.0940558910369873 2023-01-23 01:26:53.522214: step: 1480/527, loss: 0.058814577758312225 2023-01-23 01:26:54.608092: step: 1484/527, loss: 0.03854780271649361 2023-01-23 01:26:55.717625: step: 1488/527, loss: 0.04230472818017006 2023-01-23 01:26:56.844645: step: 1492/527, loss: 0.026504946872591972 2023-01-23 01:26:57.955237: step: 1496/527, loss: 0.015051460824906826 2023-01-23 01:26:59.086808: step: 1500/527, loss: 0.08310193568468094 2023-01-23 01:27:00.226572: step: 1504/527, loss: 0.10364484786987305 2023-01-23 01:27:01.326027: step: 1508/527, loss: 0.0023200989235192537 2023-01-23 01:27:02.419547: step: 1512/527, loss: 0.030957406386733055 2023-01-23 01:27:03.542047: step: 1516/527, loss: 0.034165095537900925 2023-01-23 01:27:04.665657: step: 1520/527, loss: 0.571515679359436 2023-01-23 01:27:05.791275: step: 1524/527, loss: 0.08922509849071503 2023-01-23 01:27:06.909008: step: 1528/527, loss: 0.0038133144844323397 2023-01-23 01:27:08.032029: step: 1532/527, loss: 0.03225760906934738 2023-01-23 01:27:09.124616: step: 1536/527, loss: 0.014249228872358799 2023-01-23 01:27:10.223689: step: 1540/527, loss: 0.04622488096356392 2023-01-23 01:27:11.337143: step: 1544/527, loss: 0.12354183197021484 2023-01-23 01:27:12.439021: step: 1548/527, loss: 0.019068825989961624 2023-01-23 01:27:13.541087: step: 1552/527, loss: 0.09823817759752274 2023-01-23 01:27:14.645587: step: 1556/527, loss: 0.03968248516321182 2023-01-23 01:27:15.779879: step: 1560/527, loss: 0.16510942578315735 2023-01-23 01:27:16.886980: step: 1564/527, loss: 0.087456613779068 2023-01-23 01:27:17.992806: step: 1568/527, loss: 0.015306759625673294 2023-01-23 01:27:19.079709: step: 1572/527, loss: 0.04706630855798721 2023-01-23 01:27:20.210719: step: 1576/527, loss: 0.019093656912446022 2023-01-23 01:27:21.320233: step: 1580/527, loss: 0.11068210750818253 2023-01-23 01:27:22.415456: step: 1584/527, loss: 0.04693346470594406 2023-01-23 01:27:23.528869: step: 1588/527, loss: 0.03644957393407822 2023-01-23 01:27:24.645446: step: 1592/527, loss: 0.009153318591415882 2023-01-23 01:27:25.757996: step: 1596/527, loss: 0.015159226022660732 2023-01-23 01:27:26.890679: step: 1600/527, loss: 0.22866153717041016 2023-01-23 01:27:27.995985: step: 1604/527, loss: 0.01996641233563423 2023-01-23 01:27:29.111600: step: 1608/527, loss: 0.014081764966249466 2023-01-23 01:27:30.230046: step: 1612/527, loss: 0.02429504320025444 2023-01-23 01:27:31.357663: step: 1616/527, loss: 0.0254758819937706 2023-01-23 01:27:32.467556: step: 1620/527, loss: 0.00548896798864007 2023-01-23 01:27:33.560796: step: 1624/527, loss: 0.01192316971719265 2023-01-23 01:27:34.691859: step: 1628/527, loss: 0.029481984674930573 2023-01-23 01:27:35.815033: step: 1632/527, loss: 0.10827651619911194 2023-01-23 01:27:36.915765: step: 1636/527, loss: 0.03793678060173988 2023-01-23 01:27:38.033228: step: 1640/527, loss: 0.017445897683501244 2023-01-23 01:27:39.148288: step: 1644/527, loss: 0.13484449684619904 2023-01-23 01:27:40.282001: step: 1648/527, loss: 0.030888747423887253 2023-01-23 01:27:41.391594: step: 1652/527, loss: 0.014526368118822575 2023-01-23 01:27:42.508025: step: 1656/527, loss: 0.008530902676284313 2023-01-23 01:27:43.626223: step: 1660/527, loss: 0.05581941455602646 2023-01-23 01:27:44.748086: step: 1664/527, loss: 0.0075164795853197575 2023-01-23 01:27:45.921516: step: 1668/527, loss: 0.016440771520137787 2023-01-23 01:27:47.064096: step: 1672/527, loss: 0.06240687519311905 2023-01-23 01:27:48.179481: step: 1676/527, loss: 0.022716714069247246 2023-01-23 01:27:49.304421: step: 1680/527, loss: 0.0809703841805458 2023-01-23 01:27:50.428911: step: 1684/527, loss: 0.113915354013443 2023-01-23 01:27:51.538628: step: 1688/527, loss: 0.025805557146668434 2023-01-23 01:27:52.643181: step: 1692/527, loss: 0.014315415173768997 2023-01-23 01:27:53.760571: step: 1696/527, loss: 0.013329410925507545 2023-01-23 01:27:54.898957: step: 1700/527, loss: 0.0910153016448021 2023-01-23 01:27:56.016767: step: 1704/527, loss: 0.11901970207691193 2023-01-23 01:27:57.103467: step: 1708/527, loss: 0.013755107298493385 2023-01-23 01:27:58.216717: step: 1712/527, loss: 0.04601598158478737 2023-01-23 01:27:59.349395: step: 1716/527, loss: 0.07870922237634659 2023-01-23 01:28:00.435223: step: 1720/527, loss: 0.013075113296508789 2023-01-23 01:28:01.565858: step: 1724/527, loss: 0.035787202417850494 2023-01-23 01:28:02.672274: step: 1728/527, loss: 0.04321441799402237 2023-01-23 01:28:03.827063: step: 1732/527, loss: 0.08494377136230469 2023-01-23 01:28:04.930243: step: 1736/527, loss: 0.04179525375366211 2023-01-23 01:28:06.039855: step: 1740/527, loss: 0.05915093421936035 2023-01-23 01:28:07.127610: step: 1744/527, loss: 0.023685265332460403 2023-01-23 01:28:08.283984: step: 1748/527, loss: 0.031377315521240234 2023-01-23 01:28:09.380610: step: 1752/527, loss: 0.005942630581557751 2023-01-23 01:28:10.489804: step: 1756/527, loss: 0.04503745958209038 2023-01-23 01:28:11.604031: step: 1760/527, loss: 0.04873838648200035 2023-01-23 01:28:12.711789: step: 1764/527, loss: 0.007004499435424805 2023-01-23 01:28:13.811997: step: 1768/527, loss: 0.04896259307861328 2023-01-23 01:28:14.916808: step: 1772/527, loss: 0.2488558292388916 2023-01-23 01:28:16.008417: step: 1776/527, loss: 0.12036170810461044 2023-01-23 01:28:17.125451: step: 1780/527, loss: 0.06050090864300728 2023-01-23 01:28:18.223474: step: 1784/527, loss: 0.07888466864824295 2023-01-23 01:28:19.332908: step: 1788/527, loss: 0.050199463963508606 2023-01-23 01:28:20.441543: step: 1792/527, loss: 0.011244607158005238 2023-01-23 01:28:21.561576: step: 1796/527, loss: 0.049538515508174896 2023-01-23 01:28:22.641268: step: 1800/527, loss: 0.09805078059434891 2023-01-23 01:28:23.784371: step: 1804/527, loss: 0.04809122160077095 2023-01-23 01:28:24.895222: step: 1808/527, loss: 0.035685352981090546 2023-01-23 01:28:26.034024: step: 1812/527, loss: 0.05141716077923775 2023-01-23 01:28:27.165277: step: 1816/527, loss: 0.21552257239818573 2023-01-23 01:28:28.285685: step: 1820/527, loss: 0.06490226089954376 2023-01-23 01:28:29.407909: step: 1824/527, loss: 0.06039810553193092 2023-01-23 01:28:30.527747: step: 1828/527, loss: 0.021173859015107155 2023-01-23 01:28:31.626638: step: 1832/527, loss: 0.06303635239601135 2023-01-23 01:28:32.730622: step: 1836/527, loss: 0.010195828042924404 2023-01-23 01:28:33.857028: step: 1840/527, loss: 0.00405507069081068 2023-01-23 01:28:35.013892: step: 1844/527, loss: 0.005920981988310814 2023-01-23 01:28:36.153696: step: 1848/527, loss: 0.36158448457717896 2023-01-23 01:28:37.284498: step: 1852/527, loss: 0.0351228229701519 2023-01-23 01:28:38.401154: step: 1856/527, loss: 0.09659843146800995 2023-01-23 01:28:39.514852: step: 1860/527, loss: 0.027343153953552246 2023-01-23 01:28:40.624694: step: 1864/527, loss: 0.04684047773480415 2023-01-23 01:28:41.741793: step: 1868/527, loss: 0.10664357990026474 2023-01-23 01:28:42.818153: step: 1872/527, loss: 4.849433753406629e-05 2023-01-23 01:28:43.932147: step: 1876/527, loss: 0.08119240403175354 2023-01-23 01:28:45.062670: step: 1880/527, loss: 0.11827459931373596 2023-01-23 01:28:46.180035: step: 1884/527, loss: 0.06516905128955841 2023-01-23 01:28:47.298384: step: 1888/527, loss: 0.2907160818576813 2023-01-23 01:28:48.393825: step: 1892/527, loss: 0.013770675286650658 2023-01-23 01:28:49.523213: step: 1896/527, loss: 0.4469691216945648 2023-01-23 01:28:50.609506: step: 1900/527, loss: 0.00012354851060081273 2023-01-23 01:28:51.678372: step: 1904/527, loss: 0.00045781134394928813 2023-01-23 01:28:52.781694: step: 1908/527, loss: 0.09767188876867294 2023-01-23 01:28:53.908711: step: 1912/527, loss: 0.10007724910974503 2023-01-23 01:28:55.066020: step: 1916/527, loss: 0.2843441963195801 2023-01-23 01:28:56.199194: step: 1920/527, loss: 0.007694816682487726 2023-01-23 01:28:57.305517: step: 1924/527, loss: 0.08538040518760681 2023-01-23 01:28:58.455969: step: 1928/527, loss: 0.05739450454711914 2023-01-23 01:28:59.549417: step: 1932/527, loss: 0.05072937160730362 2023-01-23 01:29:00.693218: step: 1936/527, loss: 0.009163284674286842 2023-01-23 01:29:01.830545: step: 1940/527, loss: 0.10809822380542755 2023-01-23 01:29:02.965960: step: 1944/527, loss: 0.042459968477487564 2023-01-23 01:29:04.081267: step: 1948/527, loss: 0.01316680945456028 2023-01-23 01:29:05.182225: step: 1952/527, loss: 0.004237270448356867 2023-01-23 01:29:06.279544: step: 1956/527, loss: 0.046647265553474426 2023-01-23 01:29:07.426597: step: 1960/527, loss: 0.10077590495347977 2023-01-23 01:29:08.536451: step: 1964/527, loss: 0.10086269676685333 2023-01-23 01:29:09.659054: step: 1968/527, loss: 0.004143047612160444 2023-01-23 01:29:10.763225: step: 1972/527, loss: 0.14455413818359375 2023-01-23 01:29:11.892067: step: 1976/527, loss: 0.10469808429479599 2023-01-23 01:29:13.030394: step: 1980/527, loss: 0.1698492020368576 2023-01-23 01:29:14.152452: step: 1984/527, loss: 0.020453739911317825 2023-01-23 01:29:15.309047: step: 1988/527, loss: 0.0641474723815918 2023-01-23 01:29:16.407176: step: 1992/527, loss: 0.03961925581097603 2023-01-23 01:29:17.503848: step: 1996/527, loss: 0.04423222318291664 2023-01-23 01:29:18.623995: step: 2000/527, loss: 0.15131813287734985 2023-01-23 01:29:19.732539: step: 2004/527, loss: 0.004822921939194202 2023-01-23 01:29:20.828352: step: 2008/527, loss: 0.03845958784222603 2023-01-23 01:29:21.952377: step: 2012/527, loss: 0.09592628479003906 2023-01-23 01:29:23.068943: step: 2016/527, loss: 0.083295539021492 2023-01-23 01:29:24.174906: step: 2020/527, loss: 0.03820948675274849 2023-01-23 01:29:25.319444: step: 2024/527, loss: 0.08798827975988388 2023-01-23 01:29:26.440190: step: 2028/527, loss: 0.13839396834373474 2023-01-23 01:29:27.562426: step: 2032/527, loss: 0.0405183807015419 2023-01-23 01:29:28.668393: step: 2036/527, loss: 0.046586133539676666 2023-01-23 01:29:29.772806: step: 2040/527, loss: 0.01942768134176731 2023-01-23 01:29:30.902095: step: 2044/527, loss: 0.015798378735780716 2023-01-23 01:29:32.037739: step: 2048/527, loss: 0.009473991580307484 2023-01-23 01:29:33.145118: step: 2052/527, loss: 0.0479372963309288 2023-01-23 01:29:34.263463: step: 2056/527, loss: 0.13619175553321838 2023-01-23 01:29:35.360441: step: 2060/527, loss: 0.3218957781791687 2023-01-23 01:29:36.491770: step: 2064/527, loss: 0.08143134415149689 2023-01-23 01:29:37.590437: step: 2068/527, loss: 0.037137579172849655 2023-01-23 01:29:38.748038: step: 2072/527, loss: 0.04535358399152756 2023-01-23 01:29:39.856508: step: 2076/527, loss: 0.08916588127613068 2023-01-23 01:29:40.989162: step: 2080/527, loss: 0.09756284207105637 2023-01-23 01:29:42.105337: step: 2084/527, loss: 0.11427002400159836 2023-01-23 01:29:43.211308: step: 2088/527, loss: 0.08184423297643661 2023-01-23 01:29:44.320605: step: 2092/527, loss: 0.009721899405121803 2023-01-23 01:29:45.436599: step: 2096/527, loss: 0.06943559646606445 2023-01-23 01:29:46.533309: step: 2100/527, loss: 0.06615275889635086 2023-01-23 01:29:47.663519: step: 2104/527, loss: 0.05557527393102646 2023-01-23 01:29:48.768302: step: 2108/527, loss: 0.10674263536930084 ================================================== Loss: 0.072 -------------------- Dev: {'event': {'p': 0.6122004357298475, 'r': 0.748335552596538, 'f1': 0.6734571599760335}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Test: {'event': {'p': 0.652636671504596, 'r': 0.7708571428571429, 'f1': 0.706837830757139}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Chinese: {'event': {'p': 0.5769230769230769, 'r': 0.8333333333333334, 'f1': 0.6818181818181818}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Korean: {'event': {'p': 0.603448275862069, 'r': 0.5555555555555556, 'f1': 0.5785123966942148}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Russian: {'event': {'p': 0.5833333333333334, 'r': 0.5833333333333334, 'f1': 0.5833333333333334}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Eng Test for Korean: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 12 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:30:30.025574: step: 4/527, loss: 0.0090796472504735 2023-01-23 01:30:31.135985: step: 8/527, loss: 1.1015418767929077 2023-01-23 01:30:32.249230: step: 12/527, loss: 0.038718223571777344 2023-01-23 01:30:33.360081: step: 16/527, loss: 0.014827347360551357 2023-01-23 01:30:34.457373: step: 20/527, loss: 0.04376039654016495 2023-01-23 01:30:35.924997: step: 24/527, loss: 0.22693099081516266 2023-01-23 01:30:37.030946: step: 28/527, loss: 0.08030052483081818 2023-01-23 01:30:38.150963: step: 32/527, loss: 0.08119454979896545 2023-01-23 01:30:39.258611: step: 36/527, loss: 0.007630682084709406 2023-01-23 01:30:40.357303: step: 40/527, loss: 0.024507999420166016 2023-01-23 01:30:41.487945: step: 44/527, loss: 0.04251156002283096 2023-01-23 01:30:42.593004: step: 48/527, loss: 0.0012950897216796875 2023-01-23 01:30:43.725441: step: 52/527, loss: 0.009382450953125954 2023-01-23 01:30:44.860397: step: 56/527, loss: 0.07246322929859161 2023-01-23 01:30:45.952222: step: 60/527, loss: 0.1203889399766922 2023-01-23 01:30:47.057264: step: 64/527, loss: 0.050376035273075104 2023-01-23 01:30:48.174965: step: 68/527, loss: 0.07015237957239151 2023-01-23 01:30:49.309701: step: 72/527, loss: 0.029198456555604935 2023-01-23 01:30:50.436328: step: 76/527, loss: 0.03815422207117081 2023-01-23 01:30:51.514883: step: 80/527, loss: 0.06336124241352081 2023-01-23 01:30:52.618796: step: 84/527, loss: 0.06620607525110245 2023-01-23 01:30:53.739699: step: 88/527, loss: 0.023171139881014824 2023-01-23 01:30:54.863658: step: 92/527, loss: 0.1522786170244217 2023-01-23 01:30:56.027227: step: 96/527, loss: 0.025907421484589577 2023-01-23 01:30:57.166742: step: 100/527, loss: 0.020841384306550026 2023-01-23 01:30:58.266385: step: 104/527, loss: 0.019943714141845703 2023-01-23 01:30:59.371053: step: 108/527, loss: 0.05508747324347496 2023-01-23 01:31:00.479754: step: 112/527, loss: 0.01639232598245144 2023-01-23 01:31:01.596311: step: 116/527, loss: 0.006540966220200062 2023-01-23 01:31:02.700888: step: 120/527, loss: 0.006244754884392023 2023-01-23 01:31:03.825614: step: 124/527, loss: 0.3628656268119812 2023-01-23 01:31:04.939630: step: 128/527, loss: 0.007829857058823109 2023-01-23 01:31:06.134007: step: 132/527, loss: 0.015318060293793678 2023-01-23 01:31:07.247650: step: 136/527, loss: 0.0073646544478833675 2023-01-23 01:31:08.377596: step: 140/527, loss: 0.04042835161089897 2023-01-23 01:31:09.494648: step: 144/527, loss: 0.04568615183234215 2023-01-23 01:31:10.574107: step: 148/527, loss: 0.004329228773713112 2023-01-23 01:31:11.690530: step: 152/527, loss: 0.03024768829345703 2023-01-23 01:31:12.830007: step: 156/527, loss: 0.07341212779283524 2023-01-23 01:31:13.942253: step: 160/527, loss: 0.034290507435798645 2023-01-23 01:31:15.048936: step: 164/527, loss: 0.0008838654030114412 2023-01-23 01:31:16.150044: step: 168/527, loss: 0.024438858032226562 2023-01-23 01:31:17.257251: step: 172/527, loss: 0.017620373517274857 2023-01-23 01:31:18.361865: step: 176/527, loss: 0.0392850898206234 2023-01-23 01:31:19.489621: step: 180/527, loss: 0.0028553963638842106 2023-01-23 01:31:20.611107: step: 184/527, loss: 0.0015016555553302169 2023-01-23 01:31:21.746140: step: 188/527, loss: 0.016286849975585938 2023-01-23 01:31:22.847119: step: 192/527, loss: 0.16449928283691406 2023-01-23 01:31:23.971378: step: 196/527, loss: 0.023264314979314804 2023-01-23 01:31:25.074645: step: 200/527, loss: 0.0547269843518734 2023-01-23 01:31:26.223859: step: 204/527, loss: 0.014321994967758656 2023-01-23 01:31:27.350680: step: 208/527, loss: 0.04382476955652237 2023-01-23 01:31:28.478196: step: 212/527, loss: 0.045938681811094284 2023-01-23 01:31:29.625939: step: 216/527, loss: 0.019191646948456764 2023-01-23 01:31:30.766475: step: 220/527, loss: 0.045766159892082214 2023-01-23 01:31:31.872925: step: 224/527, loss: 0.20786495506763458 2023-01-23 01:31:32.986895: step: 228/527, loss: 0.326227605342865 2023-01-23 01:31:34.092939: step: 232/527, loss: 0.026268385350704193 2023-01-23 01:31:35.197906: step: 236/527, loss: 0.015949726104736328 2023-01-23 01:31:36.315431: step: 240/527, loss: 0.03268437460064888 2023-01-23 01:31:37.432129: step: 244/527, loss: 0.01395649928599596 2023-01-23 01:31:38.528699: step: 248/527, loss: 0.04610037803649902 2023-01-23 01:31:39.669802: step: 252/527, loss: 0.047167327255010605 2023-01-23 01:31:40.794241: step: 256/527, loss: 0.34132567048072815 2023-01-23 01:31:41.932054: step: 260/527, loss: 0.06189899146556854 2023-01-23 01:31:43.046340: step: 264/527, loss: 0.0043406011536717415 2023-01-23 01:31:44.141720: step: 268/527, loss: 0.007656860630959272 2023-01-23 01:31:45.268875: step: 272/527, loss: 0.07890200614929199 2023-01-23 01:31:46.406069: step: 276/527, loss: 0.015041542239487171 2023-01-23 01:31:47.491742: step: 280/527, loss: 0.018963146954774857 2023-01-23 01:31:48.645969: step: 284/527, loss: 0.06734047830104828 2023-01-23 01:31:49.759488: step: 288/527, loss: 0.0008373260498046875 2023-01-23 01:31:50.861704: step: 292/527, loss: 0.03211965784430504 2023-01-23 01:31:51.971842: step: 296/527, loss: 0.04786090925335884 2023-01-23 01:31:53.108615: step: 300/527, loss: 0.06224679946899414 2023-01-23 01:31:54.238447: step: 304/527, loss: 0.0392850898206234 2023-01-23 01:31:55.350834: step: 308/527, loss: 0.012391472235321999 2023-01-23 01:31:56.462019: step: 312/527, loss: 0.015016973949968815 2023-01-23 01:31:57.581350: step: 316/527, loss: 0.04218854755163193 2023-01-23 01:31:58.679446: step: 320/527, loss: 0.06763195991516113 2023-01-23 01:31:59.790897: step: 324/527, loss: 0.04276075214147568 2023-01-23 01:32:00.931555: step: 328/527, loss: 0.06996660679578781 2023-01-23 01:32:02.056397: step: 332/527, loss: 0.004106283187866211 2023-01-23 01:32:03.195002: step: 336/527, loss: 0.004105043597519398 2023-01-23 01:32:04.311959: step: 340/527, loss: 0.10584679245948792 2023-01-23 01:32:05.428599: step: 344/527, loss: 0.6630697250366211 2023-01-23 01:32:06.562262: step: 348/527, loss: 0.06550198048353195 2023-01-23 01:32:07.694898: step: 352/527, loss: 0.013426780700683594 2023-01-23 01:32:08.787683: step: 356/527, loss: 0.047193121165037155 2023-01-23 01:32:09.904350: step: 360/527, loss: 0.015097999945282936 2023-01-23 01:32:11.019316: step: 364/527, loss: 0.010131550021469593 2023-01-23 01:32:12.118426: step: 368/527, loss: 0.059766773134469986 2023-01-23 01:32:13.226886: step: 372/527, loss: 0.04051628336310387 2023-01-23 01:32:14.345616: step: 376/527, loss: 0.11368946731090546 2023-01-23 01:32:15.495093: step: 380/527, loss: 0.010639620013535023 2023-01-23 01:32:16.628316: step: 384/527, loss: 0.05690011754631996 2023-01-23 01:32:17.748042: step: 388/527, loss: 0.02503385581076145 2023-01-23 01:32:18.861116: step: 392/527, loss: 0.030107783153653145 2023-01-23 01:32:20.014545: step: 396/527, loss: 0.0432429313659668 2023-01-23 01:32:21.155196: step: 400/527, loss: 0.062355659902095795 2023-01-23 01:32:22.265648: step: 404/527, loss: 0.004106044769287109 2023-01-23 01:32:23.407410: step: 408/527, loss: 0.0394282341003418 2023-01-23 01:32:24.492330: step: 412/527, loss: 0.016356086358428 2023-01-23 01:32:25.600258: step: 416/527, loss: 0.02437877655029297 2023-01-23 01:32:26.720691: step: 420/527, loss: 0.06344123184680939 2023-01-23 01:32:27.838944: step: 424/527, loss: 0.008895492181181908 2023-01-23 01:32:28.929419: step: 428/527, loss: 0.018272925168275833 2023-01-23 01:32:30.025079: step: 432/527, loss: 0.010533380322158337 2023-01-23 01:32:31.115567: step: 436/527, loss: 0.006784630008041859 2023-01-23 01:32:32.226969: step: 440/527, loss: 0.06552910804748535 2023-01-23 01:32:33.347425: step: 444/527, loss: 0.030863476917147636 2023-01-23 01:32:34.471202: step: 448/527, loss: 0.0025686263106763363 2023-01-23 01:32:35.603183: step: 452/527, loss: 0.025992775335907936 2023-01-23 01:32:36.733655: step: 456/527, loss: 0.06317071616649628 2023-01-23 01:32:37.856339: step: 460/527, loss: 0.03235797956585884 2023-01-23 01:32:38.991878: step: 464/527, loss: 0.004155921749770641 2023-01-23 01:32:40.115081: step: 468/527, loss: 0.007954597473144531 2023-01-23 01:32:41.226076: step: 472/527, loss: 0.005334401037544012 2023-01-23 01:32:42.344158: step: 476/527, loss: 0.015418529510498047 2023-01-23 01:32:43.504118: step: 480/527, loss: 0.05910225212574005 2023-01-23 01:32:44.618587: step: 484/527, loss: 0.03519077226519585 2023-01-23 01:32:45.696007: step: 488/527, loss: 0.009540462866425514 2023-01-23 01:32:46.854452: step: 492/527, loss: 0.03551778569817543 2023-01-23 01:32:47.962745: step: 496/527, loss: 0.010079383850097656 2023-01-23 01:32:49.081112: step: 500/527, loss: 0.03679781034588814 2023-01-23 01:32:50.207903: step: 504/527, loss: 0.10027079284191132 2023-01-23 01:32:51.332560: step: 508/527, loss: 0.05423159524798393 2023-01-23 01:32:52.447317: step: 512/527, loss: 0.11605750024318695 2023-01-23 01:32:53.550430: step: 516/527, loss: 0.03766822814941406 2023-01-23 01:32:54.653681: step: 520/527, loss: 0.01925363577902317 2023-01-23 01:32:55.780293: step: 524/527, loss: 0.037119291722774506 2023-01-23 01:32:56.892528: step: 528/527, loss: 0.0038480341900140047 2023-01-23 01:32:57.999557: step: 532/527, loss: 0.11628389358520508 2023-01-23 01:32:59.139264: step: 536/527, loss: 0.01611914671957493 2023-01-23 01:33:00.264397: step: 540/527, loss: 0.012683678418397903 2023-01-23 01:33:01.361616: step: 544/527, loss: 0.09335164725780487 2023-01-23 01:33:02.464907: step: 548/527, loss: 0.001932764076627791 2023-01-23 01:33:03.565688: step: 552/527, loss: 0.007652664091438055 2023-01-23 01:33:04.686186: step: 556/527, loss: 0.009312629699707031 2023-01-23 01:33:05.796259: step: 560/527, loss: 0.033277321606874466 2023-01-23 01:33:06.889388: step: 564/527, loss: 0.03681612014770508 2023-01-23 01:33:08.036257: step: 568/527, loss: 0.14951543509960175 2023-01-23 01:33:09.145620: step: 572/527, loss: 0.03504066541790962 2023-01-23 01:33:10.241694: step: 576/527, loss: 0.032768476754426956 2023-01-23 01:33:11.344374: step: 580/527, loss: 0.02105121500790119 2023-01-23 01:33:12.461816: step: 584/527, loss: 0.01645069196820259 2023-01-23 01:33:13.598758: step: 588/527, loss: 0.01897439919412136 2023-01-23 01:33:14.705300: step: 592/527, loss: 0.07848014682531357 2023-01-23 01:33:15.812950: step: 596/527, loss: 0.03618254512548447 2023-01-23 01:33:16.920898: step: 600/527, loss: 0.004900741390883923 2023-01-23 01:33:18.064463: step: 604/527, loss: 0.015542412176728249 2023-01-23 01:33:19.198345: step: 608/527, loss: 0.028797341510653496 2023-01-23 01:33:20.306204: step: 612/527, loss: 0.1684170812368393 2023-01-23 01:33:21.415520: step: 616/527, loss: 0.13710841536521912 2023-01-23 01:33:22.539885: step: 620/527, loss: 0.08882565796375275 2023-01-23 01:33:23.673849: step: 624/527, loss: 0.03710470348596573 2023-01-23 01:33:24.804635: step: 628/527, loss: 0.039847325533628464 2023-01-23 01:33:25.938911: step: 632/527, loss: 0.06346473842859268 2023-01-23 01:33:27.065203: step: 636/527, loss: 0.1964806616306305 2023-01-23 01:33:28.144991: step: 640/527, loss: 0.025510216131806374 2023-01-23 01:33:29.299589: step: 644/527, loss: 0.2508466839790344 2023-01-23 01:33:30.393540: step: 648/527, loss: 0.019208097830414772 2023-01-23 01:33:31.542750: step: 652/527, loss: 0.02165374904870987 2023-01-23 01:33:32.676041: step: 656/527, loss: 0.024995042011141777 2023-01-23 01:33:33.829572: step: 660/527, loss: 0.0688634142279625 2023-01-23 01:33:34.941113: step: 664/527, loss: 0.06251154094934464 2023-01-23 01:33:36.051245: step: 668/527, loss: 0.002140820026397705 2023-01-23 01:33:37.178828: step: 672/527, loss: 0.568588376045227 2023-01-23 01:33:38.308367: step: 676/527, loss: 0.012835502624511719 2023-01-23 01:33:39.427850: step: 680/527, loss: 0.01817493513226509 2023-01-23 01:33:40.588682: step: 684/527, loss: 0.009250259958207607 2023-01-23 01:33:41.691168: step: 688/527, loss: 0.00530743645504117 2023-01-23 01:33:42.827025: step: 692/527, loss: 0.01853466033935547 2023-01-23 01:33:43.925298: step: 696/527, loss: 0.020940016955137253 2023-01-23 01:33:45.052305: step: 700/527, loss: 0.033652354031801224 2023-01-23 01:33:46.183188: step: 704/527, loss: 0.014206647872924805 2023-01-23 01:33:47.303973: step: 708/527, loss: 0.01654529571533203 2023-01-23 01:33:48.424122: step: 712/527, loss: 0.06393428146839142 2023-01-23 01:33:49.564267: step: 716/527, loss: 0.04209652170538902 2023-01-23 01:33:50.701781: step: 720/527, loss: 0.008309269323945045 2023-01-23 01:33:51.826453: step: 724/527, loss: 0.1493232101202011 2023-01-23 01:33:52.929180: step: 728/527, loss: 0.08231983333826065 2023-01-23 01:33:54.046142: step: 732/527, loss: 0.006633186247199774 2023-01-23 01:33:55.187088: step: 736/527, loss: 0.04080267250537872 2023-01-23 01:33:56.285455: step: 740/527, loss: 0.0752757117152214 2023-01-23 01:33:57.407680: step: 744/527, loss: 0.05169324949383736 2023-01-23 01:33:58.521883: step: 748/527, loss: 0.19992037117481232 2023-01-23 01:33:59.643631: step: 752/527, loss: 0.00885772705078125 2023-01-23 01:34:00.737038: step: 756/527, loss: 0.04186449199914932 2023-01-23 01:34:01.833861: step: 760/527, loss: 0.0403934046626091 2023-01-23 01:34:02.944333: step: 764/527, loss: 0.07358036190271378 2023-01-23 01:34:04.022854: step: 768/527, loss: 0.060792163014411926 2023-01-23 01:34:05.146914: step: 772/527, loss: 0.1698535829782486 2023-01-23 01:34:06.273632: step: 776/527, loss: 0.06591920554637909 2023-01-23 01:34:07.371683: step: 780/527, loss: 0.03862934187054634 2023-01-23 01:34:08.460890: step: 784/527, loss: 0.03184995800256729 2023-01-23 01:34:09.596635: step: 788/527, loss: 0.17610104382038116 2023-01-23 01:34:10.704307: step: 792/527, loss: 0.0256805419921875 2023-01-23 01:34:11.851532: step: 796/527, loss: 0.05252866819500923 2023-01-23 01:34:12.993766: step: 800/527, loss: 0.06755819916725159 2023-01-23 01:34:14.095980: step: 804/527, loss: 0.028908347710967064 2023-01-23 01:34:15.246119: step: 808/527, loss: 0.05783233791589737 2023-01-23 01:34:16.381674: step: 812/527, loss: 0.005163860507309437 2023-01-23 01:34:17.491971: step: 816/527, loss: 0.047847796231508255 2023-01-23 01:34:18.579269: step: 820/527, loss: 0.3479452133178711 2023-01-23 01:34:19.737990: step: 824/527, loss: 0.03642444685101509 2023-01-23 01:34:20.853938: step: 828/527, loss: 0.03324108198285103 2023-01-23 01:34:21.958430: step: 832/527, loss: 0.06421055644750595 2023-01-23 01:34:23.063684: step: 836/527, loss: 0.01081104390323162 2023-01-23 01:34:24.191096: step: 840/527, loss: 0.012990022078156471 2023-01-23 01:34:25.297730: step: 844/527, loss: 0.02893071249127388 2023-01-23 01:34:26.445763: step: 848/527, loss: 0.2745036482810974 2023-01-23 01:34:27.569934: step: 852/527, loss: 0.06306762993335724 2023-01-23 01:34:28.668964: step: 856/527, loss: 0.05304155498743057 2023-01-23 01:34:29.790693: step: 860/527, loss: 0.009493160992860794 2023-01-23 01:34:30.922310: step: 864/527, loss: 0.05348839983344078 2023-01-23 01:34:32.024734: step: 868/527, loss: 0.015718460083007812 2023-01-23 01:34:33.134848: step: 872/527, loss: 0.04415760189294815 2023-01-23 01:34:34.257477: step: 876/527, loss: 0.030023908242583275 2023-01-23 01:34:35.380817: step: 880/527, loss: 0.01096811331808567 2023-01-23 01:34:36.483860: step: 884/527, loss: 0.0032626152969896793 2023-01-23 01:34:37.606074: step: 888/527, loss: 0.06071672588586807 2023-01-23 01:34:38.724358: step: 892/527, loss: 0.1628277748823166 2023-01-23 01:34:39.812105: step: 896/527, loss: 0.0060142045840620995 2023-01-23 01:34:40.949985: step: 900/527, loss: 0.04828405752778053 2023-01-23 01:34:42.083166: step: 904/527, loss: 0.030677415430545807 2023-01-23 01:34:43.205859: step: 908/527, loss: 0.05100831016898155 2023-01-23 01:34:44.325676: step: 912/527, loss: 0.007142952177673578 2023-01-23 01:34:45.438446: step: 916/527, loss: 0.015167188830673695 2023-01-23 01:34:46.568556: step: 920/527, loss: 0.028451919555664062 2023-01-23 01:34:47.703560: step: 924/527, loss: 0.01021499652415514 2023-01-23 01:34:48.796000: step: 928/527, loss: 0.06605949252843857 2023-01-23 01:34:49.903976: step: 932/527, loss: 0.0361669547855854 2023-01-23 01:34:51.007265: step: 936/527, loss: 0.0746254026889801 2023-01-23 01:34:52.143978: step: 940/527, loss: 0.08664393424987793 2023-01-23 01:34:53.234725: step: 944/527, loss: 0.04396582022309303 2023-01-23 01:34:54.345149: step: 948/527, loss: 0.09045977890491486 2023-01-23 01:34:55.450080: step: 952/527, loss: 0.024817848578095436 2023-01-23 01:34:56.543732: step: 956/527, loss: 0.39009198546409607 2023-01-23 01:34:57.618440: step: 960/527, loss: 0.046346187591552734 2023-01-23 01:34:58.734752: step: 964/527, loss: 0.05146045982837677 2023-01-23 01:34:59.842605: step: 968/527, loss: 0.040400173515081406 2023-01-23 01:35:00.963753: step: 972/527, loss: 0.020724773406982422 2023-01-23 01:35:02.083912: step: 976/527, loss: 0.014262771233916283 2023-01-23 01:35:03.192969: step: 980/527, loss: 0.00963220652192831 2023-01-23 01:35:04.303967: step: 984/527, loss: 0.0022412778344005346 2023-01-23 01:35:05.436096: step: 988/527, loss: 0.030391409993171692 2023-01-23 01:35:06.571941: step: 992/527, loss: 0.07612724602222443 2023-01-23 01:35:07.698046: step: 996/527, loss: 0.02101168781518936 2023-01-23 01:35:08.862464: step: 1000/527, loss: 0.0004650593036785722 2023-01-23 01:35:09.997498: step: 1004/527, loss: 0.1222929060459137 2023-01-23 01:35:11.131531: step: 1008/527, loss: 0.016438627615571022 2023-01-23 01:35:12.268655: step: 1012/527, loss: 0.04703950881958008 2023-01-23 01:35:13.393811: step: 1016/527, loss: 0.23692750930786133 2023-01-23 01:35:14.482981: step: 1020/527, loss: 0.03495221212506294 2023-01-23 01:35:15.588706: step: 1024/527, loss: 0.04385042190551758 2023-01-23 01:35:16.700267: step: 1028/527, loss: 0.06741132587194443 2023-01-23 01:35:17.823443: step: 1032/527, loss: 0.014417458325624466 2023-01-23 01:35:18.945740: step: 1036/527, loss: 0.0696842223405838 2023-01-23 01:35:20.059779: step: 1040/527, loss: 0.0762234702706337 2023-01-23 01:35:21.166462: step: 1044/527, loss: 0.026877976953983307 2023-01-23 01:35:22.260373: step: 1048/527, loss: 0.5681314468383789 2023-01-23 01:35:23.353218: step: 1052/527, loss: 0.027744673192501068 2023-01-23 01:35:24.476158: step: 1056/527, loss: 0.033165834844112396 2023-01-23 01:35:25.604628: step: 1060/527, loss: 0.04122314229607582 2023-01-23 01:35:26.718835: step: 1064/527, loss: 0.11802501231431961 2023-01-23 01:35:27.824143: step: 1068/527, loss: 0.058196358382701874 2023-01-23 01:35:28.943476: step: 1072/527, loss: 0.03922281414270401 2023-01-23 01:35:30.072250: step: 1076/527, loss: 0.07330923527479172 2023-01-23 01:35:31.188297: step: 1080/527, loss: 0.07492885738611221 2023-01-23 01:35:32.292745: step: 1084/527, loss: 0.048386767506599426 2023-01-23 01:35:33.397850: step: 1088/527, loss: 0.0008638381841592491 2023-01-23 01:35:34.549241: step: 1092/527, loss: 0.01000371016561985 2023-01-23 01:35:35.672850: step: 1096/527, loss: 0.060792066156864166 2023-01-23 01:35:36.840464: step: 1100/527, loss: 0.08706007152795792 2023-01-23 01:35:37.972454: step: 1104/527, loss: 0.05498151481151581 2023-01-23 01:35:39.061522: step: 1108/527, loss: 0.04267864301800728 2023-01-23 01:35:40.182705: step: 1112/527, loss: 0.07260684669017792 2023-01-23 01:35:41.287035: step: 1116/527, loss: 0.07278890907764435 2023-01-23 01:35:42.414186: step: 1120/527, loss: 0.9936366677284241 2023-01-23 01:35:43.514877: step: 1124/527, loss: 0.0083169462159276 2023-01-23 01:35:44.653456: step: 1128/527, loss: 0.02300419844686985 2023-01-23 01:35:45.752385: step: 1132/527, loss: 0.08628582954406738 2023-01-23 01:35:46.909945: step: 1136/527, loss: 0.11872711032629013 2023-01-23 01:35:47.982187: step: 1140/527, loss: 0.004105567932128906 2023-01-23 01:35:49.103553: step: 1144/527, loss: 0.011553764343261719 2023-01-23 01:35:50.259115: step: 1148/527, loss: 0.12733431160449982 2023-01-23 01:35:51.365376: step: 1152/527, loss: 0.0121329789981246 2023-01-23 01:35:52.466634: step: 1156/527, loss: 0.07633152604103088 2023-01-23 01:35:53.571139: step: 1160/527, loss: 0.01623370125889778 2023-01-23 01:35:54.679269: step: 1164/527, loss: 0.013297795318067074 2023-01-23 01:35:55.789344: step: 1168/527, loss: 0.019066954031586647 2023-01-23 01:35:56.888893: step: 1172/527, loss: 0.033713530749082565 2023-01-23 01:35:58.008592: step: 1176/527, loss: 0.08863174170255661 2023-01-23 01:35:59.159920: step: 1180/527, loss: 0.059471987187862396 2023-01-23 01:36:00.322222: step: 1184/527, loss: 0.038346003741025925 2023-01-23 01:36:01.437985: step: 1188/527, loss: 0.8626817464828491 2023-01-23 01:36:02.555471: step: 1192/527, loss: 0.014910697937011719 2023-01-23 01:36:03.665319: step: 1196/527, loss: 0.008986949920654297 2023-01-23 01:36:04.792034: step: 1200/527, loss: 0.02667694166302681 2023-01-23 01:36:05.899699: step: 1204/527, loss: 0.01373071689158678 2023-01-23 01:36:07.038729: step: 1208/527, loss: 0.0757112056016922 2023-01-23 01:36:08.147293: step: 1212/527, loss: 0.6679500937461853 2023-01-23 01:36:09.271480: step: 1216/527, loss: 0.06474065780639648 2023-01-23 01:36:10.367648: step: 1220/527, loss: 0.005532789509743452 2023-01-23 01:36:11.487828: step: 1224/527, loss: 0.002811431884765625 2023-01-23 01:36:12.608699: step: 1228/527, loss: 0.04421323910355568 2023-01-23 01:36:13.743075: step: 1232/527, loss: 0.01188964769244194 2023-01-23 01:36:14.854544: step: 1236/527, loss: 0.029691221192479134 2023-01-23 01:36:15.949905: step: 1240/527, loss: 0.0054779052734375 2023-01-23 01:36:17.122337: step: 1244/527, loss: 0.9309937357902527 2023-01-23 01:36:18.213217: step: 1248/527, loss: 0.06322555989027023 2023-01-23 01:36:19.342402: step: 1252/527, loss: 0.01869661919772625 2023-01-23 01:36:20.438468: step: 1256/527, loss: 0.05436287075281143 2023-01-23 01:36:21.563764: step: 1260/527, loss: 0.5027929544448853 2023-01-23 01:36:22.674569: step: 1264/527, loss: 0.0004455566522665322 2023-01-23 01:36:23.778680: step: 1268/527, loss: 0.02612585946917534 2023-01-23 01:36:24.958132: step: 1272/527, loss: 0.1235879436135292 2023-01-23 01:36:26.054696: step: 1276/527, loss: 0.006830072030425072 2023-01-23 01:36:27.161335: step: 1280/527, loss: 0.006117630284279585 2023-01-23 01:36:28.299742: step: 1284/527, loss: 0.032392311841249466 2023-01-23 01:36:29.377456: step: 1288/527, loss: 0.08437366783618927 2023-01-23 01:36:30.532732: step: 1292/527, loss: 0.014499664306640625 2023-01-23 01:36:31.671479: step: 1296/527, loss: 0.024416828528046608 2023-01-23 01:36:32.787209: step: 1300/527, loss: 0.08685169368982315 2023-01-23 01:36:33.895298: step: 1304/527, loss: 0.0749754011631012 2023-01-23 01:36:35.019465: step: 1308/527, loss: 0.06223135069012642 2023-01-23 01:36:36.140116: step: 1312/527, loss: 0.019977666437625885 2023-01-23 01:36:37.227159: step: 1316/527, loss: 0.04885587841272354 2023-01-23 01:36:38.347156: step: 1320/527, loss: 0.09427537769079208 2023-01-23 01:36:39.444834: step: 1324/527, loss: 0.01026086788624525 2023-01-23 01:36:40.532496: step: 1328/527, loss: 0.003559684846550226 2023-01-23 01:36:41.629938: step: 1332/527, loss: 0.02853412553668022 2023-01-23 01:36:42.754792: step: 1336/527, loss: 0.04844551160931587 2023-01-23 01:36:43.857701: step: 1340/527, loss: 0.06234912946820259 2023-01-23 01:36:44.981010: step: 1344/527, loss: 0.09103145450353622 2023-01-23 01:36:46.087272: step: 1348/527, loss: 0.12149371951818466 2023-01-23 01:36:47.170737: step: 1352/527, loss: 0.011648845858871937 2023-01-23 01:36:48.257884: step: 1356/527, loss: 0.024466516450047493 2023-01-23 01:36:49.381598: step: 1360/527, loss: 0.0091400146484375 2023-01-23 01:36:50.494836: step: 1364/527, loss: 0.008623885922133923 2023-01-23 01:36:51.612503: step: 1368/527, loss: 0.0035786153748631477 2023-01-23 01:36:52.718153: step: 1372/527, loss: 0.05435457453131676 2023-01-23 01:36:53.825653: step: 1376/527, loss: 0.02626619301736355 2023-01-23 01:36:54.952128: step: 1380/527, loss: 0.0870644599199295 2023-01-23 01:36:56.075549: step: 1384/527, loss: 0.016962861642241478 2023-01-23 01:36:57.193199: step: 1388/527, loss: 0.03856506198644638 2023-01-23 01:36:58.328902: step: 1392/527, loss: 0.2629574239253998 2023-01-23 01:36:59.427825: step: 1396/527, loss: 0.01939401775598526 2023-01-23 01:37:00.526699: step: 1400/527, loss: 0.004716205410659313 2023-01-23 01:37:01.668838: step: 1404/527, loss: 0.07821989059448242 2023-01-23 01:37:02.773732: step: 1408/527, loss: 0.0006430625799112022 2023-01-23 01:37:03.879988: step: 1412/527, loss: 0.018964242190122604 2023-01-23 01:37:04.979665: step: 1416/527, loss: 0.04407491534948349 2023-01-23 01:37:06.093381: step: 1420/527, loss: 0.0819365456700325 2023-01-23 01:37:07.201125: step: 1424/527, loss: 0.06758375465869904 2023-01-23 01:37:08.332605: step: 1428/527, loss: 0.0044998168013989925 2023-01-23 01:37:09.426925: step: 1432/527, loss: 0.5633466243743896 2023-01-23 01:37:10.550135: step: 1436/527, loss: 0.06056986004114151 2023-01-23 01:37:11.672772: step: 1440/527, loss: 0.0574704185128212 2023-01-23 01:37:12.763159: step: 1444/527, loss: 0.23752641677856445 2023-01-23 01:37:13.901640: step: 1448/527, loss: 0.4880952537059784 2023-01-23 01:37:15.028195: step: 1452/527, loss: 0.04465198516845703 2023-01-23 01:37:16.145222: step: 1456/527, loss: 0.004951906390488148 2023-01-23 01:37:17.272977: step: 1460/527, loss: 0.010083580389618874 2023-01-23 01:37:18.396865: step: 1464/527, loss: 0.04278833791613579 2023-01-23 01:37:19.520993: step: 1468/527, loss: 0.049361709505319595 2023-01-23 01:37:20.620939: step: 1472/527, loss: 0.004602623172104359 2023-01-23 01:37:21.714867: step: 1476/527, loss: 0.04504900053143501 2023-01-23 01:37:22.833226: step: 1480/527, loss: 0.05486660078167915 2023-01-23 01:37:23.913676: step: 1484/527, loss: 0.04359569400548935 2023-01-23 01:37:25.041610: step: 1488/527, loss: 0.09672889858484268 2023-01-23 01:37:26.166711: step: 1492/527, loss: 0.06715402752161026 2023-01-23 01:37:27.282933: step: 1496/527, loss: 0.030863190069794655 2023-01-23 01:37:28.393938: step: 1500/527, loss: 0.042561959475278854 2023-01-23 01:37:29.522747: step: 1504/527, loss: 0.25667861104011536 2023-01-23 01:37:30.622010: step: 1508/527, loss: 0.03336620330810547 2023-01-23 01:37:31.739939: step: 1512/527, loss: 0.03314628452062607 2023-01-23 01:37:32.857521: step: 1516/527, loss: 0.020035363733768463 2023-01-23 01:37:33.946087: step: 1520/527, loss: 0.026305008679628372 2023-01-23 01:37:35.043253: step: 1524/527, loss: 0.029089193791151047 2023-01-23 01:37:36.154003: step: 1528/527, loss: 0.02731633372604847 2023-01-23 01:37:37.264921: step: 1532/527, loss: 0.04406256601214409 2023-01-23 01:37:38.369296: step: 1536/527, loss: 0.07775793224573135 2023-01-23 01:37:39.492683: step: 1540/527, loss: 0.06643657386302948 2023-01-23 01:37:40.596997: step: 1544/527, loss: 0.012332677841186523 2023-01-23 01:37:41.730557: step: 1548/527, loss: 0.012622261419892311 2023-01-23 01:37:42.848188: step: 1552/527, loss: 0.08494973182678223 2023-01-23 01:37:43.970460: step: 1556/527, loss: 0.10484905540943146 2023-01-23 01:37:45.090304: step: 1560/527, loss: 0.020217323675751686 2023-01-23 01:37:46.213001: step: 1564/527, loss: 0.027220916002988815 2023-01-23 01:37:47.347067: step: 1568/527, loss: 0.05368633568286896 2023-01-23 01:37:48.482896: step: 1572/527, loss: 0.04664745181798935 2023-01-23 01:37:49.597029: step: 1576/527, loss: 0.024153614416718483 2023-01-23 01:37:50.695482: step: 1580/527, loss: 0.010813355445861816 2023-01-23 01:37:51.796036: step: 1584/527, loss: 0.028812408447265625 2023-01-23 01:37:52.899184: step: 1588/527, loss: 0.05734281614422798 2023-01-23 01:37:54.005423: step: 1592/527, loss: 0.03375907242298126 2023-01-23 01:37:55.099991: step: 1596/527, loss: 0.014395713806152344 2023-01-23 01:37:56.201307: step: 1600/527, loss: 0.03240685537457466 2023-01-23 01:37:57.294050: step: 1604/527, loss: 0.7524027824401855 2023-01-23 01:37:58.418379: step: 1608/527, loss: 0.00562329264357686 2023-01-23 01:37:59.570799: step: 1612/527, loss: 0.06875276565551758 2023-01-23 01:38:00.693893: step: 1616/527, loss: 0.061720944941043854 2023-01-23 01:38:01.805937: step: 1620/527, loss: 0.022563554346561432 2023-01-23 01:38:02.933701: step: 1624/527, loss: 0.07205381244421005 2023-01-23 01:38:04.062937: step: 1628/527, loss: 0.029694175347685814 2023-01-23 01:38:05.202250: step: 1632/527, loss: 0.07472991943359375 2023-01-23 01:38:06.328469: step: 1636/527, loss: 0.054024696350097656 2023-01-23 01:38:07.413863: step: 1640/527, loss: 0.0029277324210852385 2023-01-23 01:38:08.530069: step: 1644/527, loss: 0.0031118392944335938 2023-01-23 01:38:09.649121: step: 1648/527, loss: 0.08872871100902557 2023-01-23 01:38:10.755083: step: 1652/527, loss: 0.08338575810194016 2023-01-23 01:38:11.894192: step: 1656/527, loss: 0.020903684198856354 2023-01-23 01:38:13.037143: step: 1660/527, loss: 0.016211891546845436 2023-01-23 01:38:14.129420: step: 1664/527, loss: 0.029464852064847946 2023-01-23 01:38:15.291864: step: 1668/527, loss: 0.2940831184387207 2023-01-23 01:38:16.414752: step: 1672/527, loss: 1.0269176959991455 2023-01-23 01:38:17.518715: step: 1676/527, loss: 0.03849220275878906 2023-01-23 01:38:18.649336: step: 1680/527, loss: 0.018545055761933327 2023-01-23 01:38:19.757729: step: 1684/527, loss: 0.0028781890869140625 2023-01-23 01:38:20.853906: step: 1688/527, loss: 0.07716947048902512 2023-01-23 01:38:21.959937: step: 1692/527, loss: 0.01875319518148899 2023-01-23 01:38:23.079672: step: 1696/527, loss: 0.005181122105568647 2023-01-23 01:38:24.181694: step: 1700/527, loss: 0.04525575786828995 2023-01-23 01:38:25.288229: step: 1704/527, loss: 0.02023792453110218 2023-01-23 01:38:26.428146: step: 1708/527, loss: 0.026078414171934128 2023-01-23 01:38:27.578766: step: 1712/527, loss: 0.04098577797412872 2023-01-23 01:38:28.690190: step: 1716/527, loss: 0.008429097943007946 2023-01-23 01:38:29.816767: step: 1720/527, loss: 0.09411545097827911 2023-01-23 01:38:30.914837: step: 1724/527, loss: 0.019701480865478516 2023-01-23 01:38:32.035782: step: 1728/527, loss: 0.0583980567753315 2023-01-23 01:38:33.137819: step: 1732/527, loss: 0.028992796316742897 2023-01-23 01:38:34.277093: step: 1736/527, loss: 0.09212350845336914 2023-01-23 01:38:35.407374: step: 1740/527, loss: 0.00423011789098382 2023-01-23 01:38:36.537879: step: 1744/527, loss: 0.10672563314437866 2023-01-23 01:38:37.678037: step: 1748/527, loss: 0.056700803339481354 2023-01-23 01:38:38.788346: step: 1752/527, loss: 0.05688953399658203 2023-01-23 01:38:39.896707: step: 1756/527, loss: 0.04488696902990341 2023-01-23 01:38:41.000012: step: 1760/527, loss: 0.04690838232636452 2023-01-23 01:38:42.134472: step: 1764/527, loss: 0.005692005157470703 2023-01-23 01:38:43.231508: step: 1768/527, loss: 0.014497661963105202 2023-01-23 01:38:44.335605: step: 1772/527, loss: 0.021654700860381126 2023-01-23 01:38:45.472439: step: 1776/527, loss: 0.03479165956377983 2023-01-23 01:38:46.567965: step: 1780/527, loss: 0.015383625403046608 2023-01-23 01:38:47.663673: step: 1784/527, loss: 0.05962171405553818 2023-01-23 01:38:48.774386: step: 1788/527, loss: 0.03440684825181961 2023-01-23 01:38:49.893113: step: 1792/527, loss: 0.022205591201782227 2023-01-23 01:38:51.013791: step: 1796/527, loss: 0.025052262470126152 2023-01-23 01:38:52.109532: step: 1800/527, loss: 0.041197583079338074 2023-01-23 01:38:53.249585: step: 1804/527, loss: 0.011138200759887695 2023-01-23 01:38:54.373341: step: 1808/527, loss: 0.005200004205107689 2023-01-23 01:38:55.520912: step: 1812/527, loss: 0.0211184024810791 2023-01-23 01:38:56.619002: step: 1816/527, loss: 0.0179640781134367 2023-01-23 01:38:57.763252: step: 1820/527, loss: 0.05691700056195259 2023-01-23 01:38:58.887479: step: 1824/527, loss: 0.0329742468893528 2023-01-23 01:38:59.985936: step: 1828/527, loss: 0.007965756580233574 2023-01-23 01:39:01.111235: step: 1832/527, loss: 0.03431396186351776 2023-01-23 01:39:02.219210: step: 1836/527, loss: 0.00861892756074667 2023-01-23 01:39:03.339859: step: 1840/527, loss: 0.0369756706058979 2023-01-23 01:39:04.476285: step: 1844/527, loss: 0.003796482225880027 2023-01-23 01:39:05.590308: step: 1848/527, loss: 0.004118728451430798 2023-01-23 01:39:06.728649: step: 1852/527, loss: 0.028210068121552467 2023-01-23 01:39:07.847334: step: 1856/527, loss: 0.05536289140582085 2023-01-23 01:39:08.946574: step: 1860/527, loss: 0.022899245843291283 2023-01-23 01:39:10.083284: step: 1864/527, loss: 0.06281552463769913 2023-01-23 01:39:11.210208: step: 1868/527, loss: 0.03542738035321236 2023-01-23 01:39:12.349831: step: 1872/527, loss: 0.04755659028887749 2023-01-23 01:39:13.447407: step: 1876/527, loss: 0.05237589031457901 2023-01-23 01:39:14.584128: step: 1880/527, loss: 0.026126384735107422 2023-01-23 01:39:15.680258: step: 1884/527, loss: 0.03187618404626846 2023-01-23 01:39:16.820129: step: 1888/527, loss: 0.16256046295166016 2023-01-23 01:39:17.927732: step: 1892/527, loss: 0.02219209633767605 2023-01-23 01:39:19.060600: step: 1896/527, loss: 0.0382721908390522 2023-01-23 01:39:20.181201: step: 1900/527, loss: 0.024348163977265358 2023-01-23 01:39:21.296737: step: 1904/527, loss: 0.019045734778046608 2023-01-23 01:39:22.422984: step: 1908/527, loss: 0.0006413937080651522 2023-01-23 01:39:23.535417: step: 1912/527, loss: 0.3515731990337372 2023-01-23 01:39:24.653172: step: 1916/527, loss: 0.04123964160680771 2023-01-23 01:39:25.780635: step: 1920/527, loss: 0.047490693628787994 2023-01-23 01:39:26.889448: step: 1924/527, loss: 0.01776914671063423 2023-01-23 01:39:28.003481: step: 1928/527, loss: 0.009844970889389515 2023-01-23 01:39:29.071672: step: 1932/527, loss: 0.009848546236753464 2023-01-23 01:39:30.167618: step: 1936/527, loss: 0.006745052058249712 2023-01-23 01:39:31.284647: step: 1940/527, loss: 0.08625411987304688 2023-01-23 01:39:32.379358: step: 1944/527, loss: 0.012703323736786842 2023-01-23 01:39:33.512804: step: 1948/527, loss: 0.05798950046300888 2023-01-23 01:39:34.645725: step: 1952/527, loss: 0.0014046193100512028 2023-01-23 01:39:35.763011: step: 1956/527, loss: 0.744663417339325 2023-01-23 01:39:36.872783: step: 1960/527, loss: 0.0025683403946459293 2023-01-23 01:39:37.987874: step: 1964/527, loss: 0.0832613930106163 2023-01-23 01:39:39.140951: step: 1968/527, loss: 0.033837415277957916 2023-01-23 01:39:40.257279: step: 1972/527, loss: 0.03206272050738335 2023-01-23 01:39:41.357041: step: 1976/527, loss: 0.11563168466091156 2023-01-23 01:39:42.439892: step: 1980/527, loss: 0.01674327813088894 2023-01-23 01:39:43.566902: step: 1984/527, loss: 0.055724527686834335 2023-01-23 01:39:44.677741: step: 1988/527, loss: 0.07718362659215927 2023-01-23 01:39:45.823698: step: 1992/527, loss: 0.49698877334594727 2023-01-23 01:39:46.912172: step: 1996/527, loss: 0.0918058454990387 2023-01-23 01:39:48.013929: step: 2000/527, loss: 0.07412996888160706 2023-01-23 01:39:49.140415: step: 2004/527, loss: 0.8807516694068909 2023-01-23 01:39:50.224953: step: 2008/527, loss: 0.012332153506577015 2023-01-23 01:39:51.372412: step: 2012/527, loss: 0.015344763174653053 2023-01-23 01:39:52.519678: step: 2016/527, loss: 0.04353685304522514 2023-01-23 01:39:53.609487: step: 2020/527, loss: 0.02442960813641548 2023-01-23 01:39:54.737510: step: 2024/527, loss: 0.0745411366224289 2023-01-23 01:39:55.847581: step: 2028/527, loss: 0.020720958709716797 2023-01-23 01:39:56.987859: step: 2032/527, loss: 0.039470769464969635 2023-01-23 01:39:58.116338: step: 2036/527, loss: 0.020329762250185013 2023-01-23 01:39:59.204710: step: 2040/527, loss: 0.024169350042939186 2023-01-23 01:40:00.343221: step: 2044/527, loss: 0.0059812068939208984 2023-01-23 01:40:01.459053: step: 2048/527, loss: 0.011644172482192516 2023-01-23 01:40:02.570995: step: 2052/527, loss: 0.046210192143917084 2023-01-23 01:40:03.715212: step: 2056/527, loss: 0.03106365166604519 2023-01-23 01:40:04.827498: step: 2060/527, loss: 0.05386839061975479 2023-01-23 01:40:05.943058: step: 2064/527, loss: 0.17499685287475586 2023-01-23 01:40:07.058387: step: 2068/527, loss: 0.05699767917394638 2023-01-23 01:40:08.169536: step: 2072/527, loss: 0.10650528967380524 2023-01-23 01:40:09.298638: step: 2076/527, loss: 0.01688671112060547 2023-01-23 01:40:10.450819: step: 2080/527, loss: 0.0027322769165039062 2023-01-23 01:40:11.586011: step: 2084/527, loss: 0.16313126683235168 2023-01-23 01:40:12.688691: step: 2088/527, loss: 0.11428767442703247 2023-01-23 01:40:13.792465: step: 2092/527, loss: 0.015866708010435104 2023-01-23 01:40:14.945981: step: 2096/527, loss: 0.019983459264039993 2023-01-23 01:40:16.069026: step: 2100/527, loss: 0.05142416059970856 2023-01-23 01:40:17.178122: step: 2104/527, loss: 0.007344258017838001 2023-01-23 01:40:18.292303: step: 2108/527, loss: 0.1389904022216797 ================================================== Loss: 0.070 -------------------- Dev: {'event': {'p': 0.592479674796748, 'r': 0.7762982689747004, 'f1': 0.6720461095100865}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Test: {'event': {'p': 0.6183136899365367, 'r': 0.7794285714285715, 'f1': 0.6895854398382204}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Chinese: {'event': {'p': 0.5595238095238095, 'r': 0.8703703703703703, 'f1': 0.6811594202898551}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Korean: {'event': {'p': 0.603448275862069, 'r': 0.5555555555555556, 'f1': 0.5785123966942148}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Russian: {'event': {'p': 0.48717948717948717, 'r': 0.5277777777777778, 'f1': 0.5066666666666667}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Eng Test for Korean: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 13 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:40:59.807155: step: 4/527, loss: 0.05483436957001686 2023-01-23 01:41:00.942476: step: 8/527, loss: 0.013237237930297852 2023-01-23 01:41:02.091046: step: 12/527, loss: 0.048719264566898346 2023-01-23 01:41:03.214635: step: 16/527, loss: 0.0151824951171875 2023-01-23 01:41:04.343301: step: 20/527, loss: 0.021438216790556908 2023-01-23 01:41:05.487728: step: 24/527, loss: 0.0351351723074913 2023-01-23 01:41:06.590670: step: 28/527, loss: 0.23648852109909058 2023-01-23 01:41:07.692815: step: 32/527, loss: 0.07371443510055542 2023-01-23 01:41:08.816281: step: 36/527, loss: 0.031215669587254524 2023-01-23 01:41:09.931151: step: 40/527, loss: 0.0012390136253088713 2023-01-23 01:41:11.059110: step: 44/527, loss: 0.141058549284935 2023-01-23 01:41:12.203286: step: 48/527, loss: 0.0750507339835167 2023-01-23 01:41:13.330364: step: 52/527, loss: 0.033423613756895065 2023-01-23 01:41:14.442780: step: 56/527, loss: 0.28667691349983215 2023-01-23 01:41:15.581022: step: 60/527, loss: 0.018096160143613815 2023-01-23 01:41:16.682187: step: 64/527, loss: 0.040366366505622864 2023-01-23 01:41:17.802959: step: 68/527, loss: 0.04597120359539986 2023-01-23 01:41:18.924342: step: 72/527, loss: 0.05049419403076172 2023-01-23 01:41:20.029861: step: 76/527, loss: 0.032770346850156784 2023-01-23 01:41:21.152267: step: 80/527, loss: 0.033068373799324036 2023-01-23 01:41:22.296344: step: 84/527, loss: 0.025774337351322174 2023-01-23 01:41:23.392425: step: 88/527, loss: 0.1166144385933876 2023-01-23 01:41:24.519127: step: 92/527, loss: 0.21611842513084412 2023-01-23 01:41:25.605839: step: 96/527, loss: 0.03145551681518555 2023-01-23 01:41:26.740193: step: 100/527, loss: 0.012934256345033646 2023-01-23 01:41:27.868754: step: 104/527, loss: 0.01688366010785103 2023-01-23 01:41:28.968179: step: 108/527, loss: 0.01282196119427681 2023-01-23 01:41:30.103298: step: 112/527, loss: 0.0035398483742028475 2023-01-23 01:41:31.219736: step: 116/527, loss: 0.021576501429080963 2023-01-23 01:41:32.343361: step: 120/527, loss: 0.031232357025146484 2023-01-23 01:41:33.449036: step: 124/527, loss: 0.012781786732375622 2023-01-23 01:41:34.550817: step: 128/527, loss: 0.006638812832534313 2023-01-23 01:41:35.675621: step: 132/527, loss: 0.0774417445063591 2023-01-23 01:41:36.816903: step: 136/527, loss: 0.031858157366514206 2023-01-23 01:41:37.926102: step: 140/527, loss: 0.020217036828398705 2023-01-23 01:41:39.047257: step: 144/527, loss: 0.34765854477882385 2023-01-23 01:41:40.181902: step: 148/527, loss: 0.004458904266357422 2023-01-23 01:41:41.333955: step: 152/527, loss: 0.04680920019745827 2023-01-23 01:41:42.456842: step: 156/527, loss: 0.001400089357048273 2023-01-23 01:41:43.589243: step: 160/527, loss: 0.010438013821840286 2023-01-23 01:41:44.697484: step: 164/527, loss: 0.03862609714269638 2023-01-23 01:41:45.833881: step: 168/527, loss: 0.017327118664979935 2023-01-23 01:41:46.981292: step: 172/527, loss: 0.11928883194923401 2023-01-23 01:41:48.105547: step: 176/527, loss: 0.002925062319263816 2023-01-23 01:41:49.232363: step: 180/527, loss: 0.035975027829408646 2023-01-23 01:41:50.342296: step: 184/527, loss: 0.008519936352968216 2023-01-23 01:41:51.444291: step: 188/527, loss: 0.009121417999267578 2023-01-23 01:41:52.587269: step: 192/527, loss: 0.007589531131088734 2023-01-23 01:41:53.691254: step: 196/527, loss: 0.0016632080078125 2023-01-23 01:41:54.856273: step: 200/527, loss: 0.018468666821718216 2023-01-23 01:41:55.986682: step: 204/527, loss: 0.07905006408691406 2023-01-23 01:41:57.098275: step: 208/527, loss: 0.05135173723101616 2023-01-23 01:41:58.220980: step: 212/527, loss: 0.008401012979447842 2023-01-23 01:41:59.367071: step: 216/527, loss: 0.011823464184999466 2023-01-23 01:42:00.466915: step: 220/527, loss: 0.03426084667444229 2023-01-23 01:42:01.570056: step: 224/527, loss: 0.0034529685508459806 2023-01-23 01:42:02.699033: step: 228/527, loss: 0.07303076237440109 2023-01-23 01:42:03.821906: step: 232/527, loss: 0.0059465887024998665 2023-01-23 01:42:04.924617: step: 236/527, loss: 0.010770061053335667 2023-01-23 01:42:06.008046: step: 240/527, loss: 0.0003796577802859247 2023-01-23 01:42:07.113894: step: 244/527, loss: 0.033928871154785156 2023-01-23 01:42:08.214762: step: 248/527, loss: 0.019697191193699837 2023-01-23 01:42:09.346161: step: 252/527, loss: 0.005572700873017311 2023-01-23 01:42:10.457659: step: 256/527, loss: 0.04180946201086044 2023-01-23 01:42:11.582980: step: 260/527, loss: 0.017916489392518997 2023-01-23 01:42:12.728547: step: 264/527, loss: 0.2511996328830719 2023-01-23 01:42:13.826651: step: 268/527, loss: 0.00015192032151389867 2023-01-23 01:42:14.930810: step: 272/527, loss: 0.025321578606963158 2023-01-23 01:42:16.056488: step: 276/527, loss: 0.09013523906469345 2023-01-23 01:42:17.174329: step: 280/527, loss: 0.8055596351623535 2023-01-23 01:42:18.299278: step: 284/527, loss: 0.0018808366730809212 2023-01-23 01:42:19.415277: step: 288/527, loss: 0.038468360900878906 2023-01-23 01:42:20.500434: step: 292/527, loss: 0.05327153205871582 2023-01-23 01:42:21.637729: step: 296/527, loss: 0.0043884990736842155 2023-01-23 01:42:22.765277: step: 300/527, loss: 0.01793527603149414 2023-01-23 01:42:23.866797: step: 304/527, loss: 0.0072252992540597916 2023-01-23 01:42:24.997826: step: 308/527, loss: 0.02978992462158203 2023-01-23 01:42:26.100472: step: 312/527, loss: 0.14135456085205078 2023-01-23 01:42:27.245009: step: 316/527, loss: 0.038083840161561966 2023-01-23 01:42:28.374209: step: 320/527, loss: 0.06750774383544922 2023-01-23 01:42:29.514486: step: 324/527, loss: 0.0014193535316735506 2023-01-23 01:42:30.609445: step: 328/527, loss: 0.031321145594120026 2023-01-23 01:42:31.721977: step: 332/527, loss: 0.015298556536436081 2023-01-23 01:42:32.852198: step: 336/527, loss: 0.10010266304016113 2023-01-23 01:42:33.984525: step: 340/527, loss: 0.021171577274799347 2023-01-23 01:42:35.147021: step: 344/527, loss: 0.0022035599686205387 2023-01-23 01:42:36.269974: step: 348/527, loss: 0.4072292447090149 2023-01-23 01:42:37.358776: step: 352/527, loss: 0.0101292310282588 2023-01-23 01:42:38.501474: step: 356/527, loss: 0.014102363958954811 2023-01-23 01:42:39.628559: step: 360/527, loss: 0.0020759582985192537 2023-01-23 01:42:40.751061: step: 364/527, loss: 0.00048065185546875 2023-01-23 01:42:41.844893: step: 368/527, loss: 0.011178016662597656 2023-01-23 01:42:42.940413: step: 372/527, loss: 0.043828632682561874 2023-01-23 01:42:44.044657: step: 376/527, loss: 0.001505184220150113 2023-01-23 01:42:45.139074: step: 380/527, loss: 0.298762708902359 2023-01-23 01:42:46.266515: step: 384/527, loss: 0.00553207378834486 2023-01-23 01:42:47.379520: step: 388/527, loss: 0.01676025427877903 2023-01-23 01:42:48.560351: step: 392/527, loss: 0.0258955005556345 2023-01-23 01:42:49.661958: step: 396/527, loss: 0.020168591290712357 2023-01-23 01:42:50.775914: step: 400/527, loss: 0.022164441645145416 2023-01-23 01:42:51.904335: step: 404/527, loss: 0.014228916727006435 2023-01-23 01:42:53.001393: step: 408/527, loss: 0.0035377503372728825 2023-01-23 01:42:54.088086: step: 412/527, loss: 0.09039898216724396 2023-01-23 01:42:55.225565: step: 416/527, loss: 0.07017679512500763 2023-01-23 01:42:56.308275: step: 420/527, loss: 0.057897478342056274 2023-01-23 01:42:57.417864: step: 424/527, loss: 0.03418402746319771 2023-01-23 01:42:58.529830: step: 428/527, loss: 0.11395101994276047 2023-01-23 01:42:59.650944: step: 432/527, loss: 0.005800294689834118 2023-01-23 01:43:00.767346: step: 436/527, loss: 0.06565552204847336 2023-01-23 01:43:01.871972: step: 440/527, loss: 0.015845298767089844 2023-01-23 01:43:02.977317: step: 444/527, loss: 0.09393219649791718 2023-01-23 01:43:04.103143: step: 448/527, loss: 0.021622370928525925 2023-01-23 01:43:05.219730: step: 452/527, loss: 0.024625588208436966 2023-01-23 01:43:06.365021: step: 456/527, loss: 0.013593673706054688 2023-01-23 01:43:07.508123: step: 460/527, loss: 0.04725513607263565 2023-01-23 01:43:08.611371: step: 464/527, loss: 0.015504170209169388 2023-01-23 01:43:09.724771: step: 468/527, loss: 0.0030059814453125 2023-01-23 01:43:10.822743: step: 472/527, loss: 0.044334981590509415 2023-01-23 01:43:11.937879: step: 476/527, loss: 0.04174797609448433 2023-01-23 01:43:13.064498: step: 480/527, loss: 0.05864086374640465 2023-01-23 01:43:14.173948: step: 484/527, loss: 0.04286251589655876 2023-01-23 01:43:15.260012: step: 488/527, loss: 0.01697230339050293 2023-01-23 01:43:16.402517: step: 492/527, loss: 0.07252369076013565 2023-01-23 01:43:17.508278: step: 496/527, loss: 0.012589598074555397 2023-01-23 01:43:18.634321: step: 500/527, loss: 0.01417403295636177 2023-01-23 01:43:19.750853: step: 504/527, loss: 0.049610234797000885 2023-01-23 01:43:20.876957: step: 508/527, loss: 0.057517051696777344 2023-01-23 01:43:21.972778: step: 512/527, loss: 0.009934711270034313 2023-01-23 01:43:23.086894: step: 516/527, loss: 0.01191701926290989 2023-01-23 01:43:24.242144: step: 520/527, loss: 0.06930327415466309 2023-01-23 01:43:25.340970: step: 524/527, loss: 0.09046545624732971 2023-01-23 01:43:26.467384: step: 528/527, loss: 0.12750712037086487 2023-01-23 01:43:27.572691: step: 532/527, loss: 0.023652076721191406 2023-01-23 01:43:28.691158: step: 536/527, loss: 0.023132897913455963 2023-01-23 01:43:29.808512: step: 540/527, loss: 0.03845147788524628 2023-01-23 01:43:30.912462: step: 544/527, loss: 0.0004531860467977822 2023-01-23 01:43:32.039912: step: 548/527, loss: 0.014815902337431908 2023-01-23 01:43:33.182200: step: 552/527, loss: 0.057401467114686966 2023-01-23 01:43:34.267217: step: 556/527, loss: 0.0022819519508630037 2023-01-23 01:43:35.399069: step: 560/527, loss: 0.000146770486026071 2023-01-23 01:43:36.514766: step: 564/527, loss: 0.004998969845473766 2023-01-23 01:43:37.655154: step: 568/527, loss: 0.006308412179350853 2023-01-23 01:43:38.762016: step: 572/527, loss: 0.007084369659423828 2023-01-23 01:43:39.871793: step: 576/527, loss: 0.004778003320097923 2023-01-23 01:43:40.981270: step: 580/527, loss: 0.016982747241854668 2023-01-23 01:43:42.102167: step: 584/527, loss: 0.0042404173873364925 2023-01-23 01:43:43.263484: step: 588/527, loss: 0.00701332138851285 2023-01-23 01:43:44.400571: step: 592/527, loss: 0.03866443783044815 2023-01-23 01:43:45.501139: step: 596/527, loss: 0.02811145968735218 2023-01-23 01:43:46.639088: step: 600/527, loss: 0.07745261490345001 2023-01-23 01:43:47.726573: step: 604/527, loss: 0.08996114879846573 2023-01-23 01:43:48.851695: step: 608/527, loss: 0.003554916474968195 2023-01-23 01:43:50.008492: step: 612/527, loss: 0.036653995513916016 2023-01-23 01:43:51.107795: step: 616/527, loss: 0.00877599697560072 2023-01-23 01:43:52.228147: step: 620/527, loss: 0.03270683437585831 2023-01-23 01:43:53.336051: step: 624/527, loss: 0.10150251537561417 2023-01-23 01:43:54.475806: step: 628/527, loss: 0.018178559839725494 2023-01-23 01:43:55.561813: step: 632/527, loss: 0.014668917283415794 2023-01-23 01:43:56.697582: step: 636/527, loss: 0.0009556770673952997 2023-01-23 01:43:57.802102: step: 640/527, loss: 0.034028053283691406 2023-01-23 01:43:58.909141: step: 644/527, loss: 0.01180257834494114 2023-01-23 01:44:00.009751: step: 648/527, loss: 0.04315795749425888 2023-01-23 01:44:01.120778: step: 652/527, loss: 0.04329271614551544 2023-01-23 01:44:02.227066: step: 656/527, loss: 0.0748906135559082 2023-01-23 01:44:03.330907: step: 660/527, loss: 0.7598684430122375 2023-01-23 01:44:04.431547: step: 664/527, loss: 0.03290081024169922 2023-01-23 01:44:05.546374: step: 668/527, loss: 0.003654098603874445 2023-01-23 01:44:06.669289: step: 672/527, loss: 0.019790077582001686 2023-01-23 01:44:07.780665: step: 676/527, loss: 0.042586613446474075 2023-01-23 01:44:08.898768: step: 680/527, loss: 0.01973877102136612 2023-01-23 01:44:10.007518: step: 684/527, loss: 0.0228181853890419 2023-01-23 01:44:11.095827: step: 688/527, loss: 0.05579328536987305 2023-01-23 01:44:12.247207: step: 692/527, loss: 0.04766101762652397 2023-01-23 01:44:13.400975: step: 696/527, loss: 0.02511434629559517 2023-01-23 01:44:14.513334: step: 700/527, loss: 0.023438608273863792 2023-01-23 01:44:15.628228: step: 704/527, loss: 0.08914432674646378 2023-01-23 01:44:16.706531: step: 708/527, loss: 0.03259143978357315 2023-01-23 01:44:17.828837: step: 712/527, loss: 0.0074783326126635075 2023-01-23 01:44:18.927361: step: 716/527, loss: 0.038152217864990234 2023-01-23 01:44:20.065015: step: 720/527, loss: 0.022386979311704636 2023-01-23 01:44:21.156814: step: 724/527, loss: 0.0526885986328125 2023-01-23 01:44:22.269343: step: 728/527, loss: 0.09961071610450745 2023-01-23 01:44:23.389806: step: 732/527, loss: 0.0810524970293045 2023-01-23 01:44:24.531349: step: 736/527, loss: 0.01377859152853489 2023-01-23 01:44:25.626683: step: 740/527, loss: 0.011016559787094593 2023-01-23 01:44:26.783125: step: 744/527, loss: 0.06735400855541229 2023-01-23 01:44:27.903478: step: 748/527, loss: 0.01777200773358345 2023-01-23 01:44:29.013341: step: 752/527, loss: 0.0016444266075268388 2023-01-23 01:44:30.160200: step: 756/527, loss: 0.04064963012933731 2023-01-23 01:44:31.290708: step: 760/527, loss: 0.0959063470363617 2023-01-23 01:44:32.431444: step: 764/527, loss: 0.05010666698217392 2023-01-23 01:44:33.547323: step: 768/527, loss: 0.007413243874907494 2023-01-23 01:44:34.638078: step: 772/527, loss: 0.03229045867919922 2023-01-23 01:44:35.761028: step: 776/527, loss: 0.0047776224091649055 2023-01-23 01:44:36.863306: step: 780/527, loss: 0.02356776036322117 2023-01-23 01:44:37.956554: step: 784/527, loss: 0.0036592960823327303 2023-01-23 01:44:39.059154: step: 788/527, loss: 0.06131782382726669 2023-01-23 01:44:40.209025: step: 792/527, loss: 0.022019004449248314 2023-01-23 01:44:41.340286: step: 796/527, loss: 0.006955373100936413 2023-01-23 01:44:42.434629: step: 800/527, loss: 0.0364038422703743 2023-01-23 01:44:43.542303: step: 804/527, loss: 0.07245779037475586 2023-01-23 01:44:44.662053: step: 808/527, loss: 0.039669133722782135 2023-01-23 01:44:45.754617: step: 812/527, loss: 0.018953992053866386 2023-01-23 01:44:46.858636: step: 816/527, loss: 0.060761354863643646 2023-01-23 01:44:47.971226: step: 820/527, loss: 0.02682756446301937 2023-01-23 01:44:49.071953: step: 824/527, loss: 0.011416817083954811 2023-01-23 01:44:50.186562: step: 828/527, loss: 0.029276657849550247 2023-01-23 01:44:51.320809: step: 832/527, loss: 0.06891937553882599 2023-01-23 01:44:52.429893: step: 836/527, loss: 0.011512089520692825 2023-01-23 01:44:53.526508: step: 840/527, loss: 0.022316837683320045 2023-01-23 01:44:54.606421: step: 844/527, loss: 0.0008707523229531944 2023-01-23 01:44:55.727469: step: 848/527, loss: 0.04833660274744034 2023-01-23 01:44:56.851955: step: 852/527, loss: 0.009812736883759499 2023-01-23 01:44:57.956120: step: 856/527, loss: 0.07840891182422638 2023-01-23 01:44:59.094488: step: 860/527, loss: 0.09594688564538956 2023-01-23 01:45:00.213712: step: 864/527, loss: 0.015311798080801964 2023-01-23 01:45:01.335689: step: 868/527, loss: 0.1030879020690918 2023-01-23 01:45:02.437775: step: 872/527, loss: 0.031411126255989075 2023-01-23 01:45:03.545933: step: 876/527, loss: 0.025075819343328476 2023-01-23 01:45:04.702329: step: 880/527, loss: 0.09888773411512375 2023-01-23 01:45:05.814317: step: 884/527, loss: 0.01148681715130806 2023-01-23 01:45:06.908617: step: 888/527, loss: 0.0035987854935228825 2023-01-23 01:45:08.025078: step: 892/527, loss: 0.012640666216611862 2023-01-23 01:45:09.149697: step: 896/527, loss: 0.09748916327953339 2023-01-23 01:45:10.235445: step: 900/527, loss: 0.02198486216366291 2023-01-23 01:45:11.331139: step: 904/527, loss: 0.04246368631720543 2023-01-23 01:45:12.459466: step: 908/527, loss: 0.03166789934039116 2023-01-23 01:45:13.592096: step: 912/527, loss: 0.03403482213616371 2023-01-23 01:45:14.703213: step: 916/527, loss: 0.0325227752327919 2023-01-23 01:45:15.861035: step: 920/527, loss: 0.04322462156414986 2023-01-23 01:45:16.983895: step: 924/527, loss: 0.01076965406537056 2023-01-23 01:45:18.113042: step: 928/527, loss: 0.057614900171756744 2023-01-23 01:45:19.252433: step: 932/527, loss: 0.21050433814525604 2023-01-23 01:45:20.334596: step: 936/527, loss: 0.014586257748305798 2023-01-23 01:45:21.452383: step: 940/527, loss: 0.06573820114135742 2023-01-23 01:45:22.572962: step: 944/527, loss: 0.28446221351623535 2023-01-23 01:45:23.684567: step: 948/527, loss: 0.035993292927742004 2023-01-23 01:45:24.798538: step: 952/527, loss: 0.08791609108448029 2023-01-23 01:45:25.923644: step: 956/527, loss: 0.023986244574189186 2023-01-23 01:45:27.039299: step: 960/527, loss: 0.05573015660047531 2023-01-23 01:45:28.133806: step: 964/527, loss: 0.11212072521448135 2023-01-23 01:45:29.264153: step: 968/527, loss: 0.08363381028175354 2023-01-23 01:45:30.386619: step: 972/527, loss: 0.009811019524931908 2023-01-23 01:45:31.505941: step: 976/527, loss: 0.03224515914916992 2023-01-23 01:45:32.600608: step: 980/527, loss: 0.018446924164891243 2023-01-23 01:45:33.721656: step: 984/527, loss: 0.06971893459558487 2023-01-23 01:45:34.865360: step: 988/527, loss: 0.04734306409955025 2023-01-23 01:45:35.985475: step: 992/527, loss: 0.020210934802889824 2023-01-23 01:45:37.100213: step: 996/527, loss: 0.08323478698730469 2023-01-23 01:45:38.214412: step: 1000/527, loss: 0.03024921379983425 2023-01-23 01:45:39.316026: step: 1004/527, loss: 0.017646122723817825 2023-01-23 01:45:40.442909: step: 1008/527, loss: 0.6664426922798157 2023-01-23 01:45:41.555319: step: 1012/527, loss: 0.019197940826416016 2023-01-23 01:45:42.680896: step: 1016/527, loss: 0.0016107559204101562 2023-01-23 01:45:43.774605: step: 1020/527, loss: 0.01998577080667019 2023-01-23 01:45:44.880927: step: 1024/527, loss: 0.049959614872932434 2023-01-23 01:45:46.022961: step: 1028/527, loss: 0.19796809554100037 2023-01-23 01:45:47.118533: step: 1032/527, loss: 0.03255829960107803 2023-01-23 01:45:48.262561: step: 1036/527, loss: 0.06451807171106339 2023-01-23 01:45:49.379882: step: 1040/527, loss: 0.003612697124481201 2023-01-23 01:45:50.485018: step: 1044/527, loss: 0.00577092170715332 2023-01-23 01:45:51.600133: step: 1048/527, loss: 0.07512908428907394 2023-01-23 01:45:52.712760: step: 1052/527, loss: 0.00082230573752895 2023-01-23 01:45:53.830720: step: 1056/527, loss: 0.06961727142333984 2023-01-23 01:45:54.953958: step: 1060/527, loss: 0.724550724029541 2023-01-23 01:45:56.079339: step: 1064/527, loss: 0.04136085510253906 2023-01-23 01:45:57.193888: step: 1068/527, loss: 0.07345481216907501 2023-01-23 01:45:58.300745: step: 1072/527, loss: 0.008295250125229359 2023-01-23 01:45:59.411006: step: 1076/527, loss: 0.04828685149550438 2023-01-23 01:46:00.559608: step: 1080/527, loss: 0.007529068272560835 2023-01-23 01:46:01.675796: step: 1084/527, loss: 0.004834223072975874 2023-01-23 01:46:02.771954: step: 1088/527, loss: 0.0325281135737896 2023-01-23 01:46:03.880148: step: 1092/527, loss: 0.1525605171918869 2023-01-23 01:46:05.049251: step: 1096/527, loss: 0.07212257385253906 2023-01-23 01:46:06.173106: step: 1100/527, loss: 0.05296153947710991 2023-01-23 01:46:07.293183: step: 1104/527, loss: 0.05182475969195366 2023-01-23 01:46:08.414891: step: 1108/527, loss: 0.00960607547312975 2023-01-23 01:46:09.538436: step: 1112/527, loss: 0.003126526018604636 2023-01-23 01:46:10.643074: step: 1116/527, loss: 0.03246298059821129 2023-01-23 01:46:11.773541: step: 1120/527, loss: 0.015102053061127663 2023-01-23 01:46:12.871801: step: 1124/527, loss: 0.009367561899125576 2023-01-23 01:46:13.965836: step: 1128/527, loss: 0.07105594873428345 2023-01-23 01:46:15.100389: step: 1132/527, loss: 0.03155851364135742 2023-01-23 01:46:16.185190: step: 1136/527, loss: 0.013150978833436966 2023-01-23 01:46:17.290574: step: 1140/527, loss: 0.035373687744140625 2023-01-23 01:46:18.393837: step: 1144/527, loss: 0.03545961529016495 2023-01-23 01:46:19.503943: step: 1148/527, loss: 0.020163822919130325 2023-01-23 01:46:20.638477: step: 1152/527, loss: 0.008792400360107422 2023-01-23 01:46:21.746288: step: 1156/527, loss: 0.19859322905540466 2023-01-23 01:46:22.861068: step: 1160/527, loss: 0.009354878216981888 2023-01-23 01:46:23.976915: step: 1164/527, loss: 0.020555878058075905 2023-01-23 01:46:25.126001: step: 1168/527, loss: 0.03493805229663849 2023-01-23 01:46:26.239528: step: 1172/527, loss: 0.20771950483322144 2023-01-23 01:46:27.368295: step: 1176/527, loss: 0.007491398137062788 2023-01-23 01:46:28.470739: step: 1180/527, loss: 0.008032703772187233 2023-01-23 01:46:29.570171: step: 1184/527, loss: 0.09722200036048889 2023-01-23 01:46:30.720075: step: 1188/527, loss: 0.5823045969009399 2023-01-23 01:46:31.806042: step: 1192/527, loss: 0.03454260900616646 2023-01-23 01:46:32.939096: step: 1196/527, loss: 0.01978473737835884 2023-01-23 01:46:34.066151: step: 1200/527, loss: 0.015386868268251419 2023-01-23 01:46:35.154008: step: 1204/527, loss: 0.005093097686767578 2023-01-23 01:46:36.284967: step: 1208/527, loss: 0.03135328367352486 2023-01-23 01:46:37.382087: step: 1212/527, loss: 0.032625965774059296 2023-01-23 01:46:38.499527: step: 1216/527, loss: 0.03395795822143555 2023-01-23 01:46:39.629614: step: 1220/527, loss: 0.005505943670868874 2023-01-23 01:46:40.767626: step: 1224/527, loss: 0.05205840989947319 2023-01-23 01:46:41.895709: step: 1228/527, loss: 0.0622289665043354 2023-01-23 01:46:43.013431: step: 1232/527, loss: 0.041649818420410156 2023-01-23 01:46:44.120788: step: 1236/527, loss: 0.013238049112260342 2023-01-23 01:46:45.286491: step: 1240/527, loss: 0.057599641382694244 2023-01-23 01:46:46.414170: step: 1244/527, loss: 0.012153577990829945 2023-01-23 01:46:47.520176: step: 1248/527, loss: 0.005779457278549671 2023-01-23 01:46:48.615445: step: 1252/527, loss: 0.04761715233325958 2023-01-23 01:46:49.733955: step: 1256/527, loss: 0.004703808110207319 2023-01-23 01:46:50.841573: step: 1260/527, loss: 0.057230666279792786 2023-01-23 01:46:51.958067: step: 1264/527, loss: 0.12317228317260742 2023-01-23 01:46:53.069727: step: 1268/527, loss: 0.02155895158648491 2023-01-23 01:46:54.162195: step: 1272/527, loss: 0.01656317710876465 2023-01-23 01:46:55.266001: step: 1276/527, loss: 0.2647598385810852 2023-01-23 01:46:56.388621: step: 1280/527, loss: 0.003936290740966797 2023-01-23 01:46:57.498726: step: 1284/527, loss: 0.01997528038918972 2023-01-23 01:46:58.623945: step: 1288/527, loss: 0.15362955629825592 2023-01-23 01:46:59.723637: step: 1292/527, loss: 0.013242244720458984 2023-01-23 01:47:00.841540: step: 1296/527, loss: 0.022014617919921875 2023-01-23 01:47:01.948802: step: 1300/527, loss: 0.038667868822813034 2023-01-23 01:47:03.079776: step: 1304/527, loss: 0.23070096969604492 2023-01-23 01:47:04.207157: step: 1308/527, loss: 0.08120041340589523 2023-01-23 01:47:05.337044: step: 1312/527, loss: 0.0051898956298828125 2023-01-23 01:47:06.467504: step: 1316/527, loss: 0.07891617715358734 2023-01-23 01:47:07.577422: step: 1320/527, loss: 0.0029237749986350536 2023-01-23 01:47:08.704192: step: 1324/527, loss: 0.02219226583838463 2023-01-23 01:47:09.823275: step: 1328/527, loss: 0.004751873202621937 2023-01-23 01:47:10.968714: step: 1332/527, loss: 0.009826470166444778 2023-01-23 01:47:12.122783: step: 1336/527, loss: 0.0006240367656573653 2023-01-23 01:47:13.257587: step: 1340/527, loss: 0.02756500244140625 2023-01-23 01:47:14.366652: step: 1344/527, loss: 0.03516406938433647 2023-01-23 01:47:15.474706: step: 1348/527, loss: 0.00695037841796875 2023-01-23 01:47:16.625905: step: 1352/527, loss: 0.024383973330259323 2023-01-23 01:47:17.727075: step: 1356/527, loss: 0.002441930817440152 2023-01-23 01:47:18.830888: step: 1360/527, loss: 0.0034097672905772924 2023-01-23 01:47:20.014980: step: 1364/527, loss: 0.0023772239219397306 2023-01-23 01:47:21.090113: step: 1368/527, loss: 0.03637723997235298 2023-01-23 01:47:22.205428: step: 1372/527, loss: 0.04784221947193146 2023-01-23 01:47:23.336460: step: 1376/527, loss: 0.0019015312427654862 2023-01-23 01:47:24.441131: step: 1380/527, loss: 0.27636557817459106 2023-01-23 01:47:25.550834: step: 1384/527, loss: 0.016938496381044388 2023-01-23 01:47:26.653521: step: 1388/527, loss: 0.0158888828009367 2023-01-23 01:47:27.753296: step: 1392/527, loss: 0.002195119857788086 2023-01-23 01:47:28.865612: step: 1396/527, loss: 0.006830978207290173 2023-01-23 01:47:29.997767: step: 1400/527, loss: 0.018581580370664597 2023-01-23 01:47:31.120615: step: 1404/527, loss: 0.016751576215028763 2023-01-23 01:47:32.241692: step: 1408/527, loss: 0.04961090162396431 2023-01-23 01:47:33.380996: step: 1412/527, loss: 0.06777743995189667 2023-01-23 01:47:34.513524: step: 1416/527, loss: 0.021691275760531425 2023-01-23 01:47:35.624435: step: 1420/527, loss: 0.29374438524246216 2023-01-23 01:47:36.702907: step: 1424/527, loss: 0.014352274127304554 2023-01-23 01:47:37.820692: step: 1428/527, loss: 0.03940172120928764 2023-01-23 01:47:38.901094: step: 1432/527, loss: 0.010601520538330078 2023-01-23 01:47:40.000359: step: 1436/527, loss: 0.6946795582771301 2023-01-23 01:47:41.151431: step: 1440/527, loss: 0.051271721720695496 2023-01-23 01:47:42.253241: step: 1444/527, loss: 0.049335479736328125 2023-01-23 01:47:43.365648: step: 1448/527, loss: 0.006586647126823664 2023-01-23 01:47:44.546167: step: 1452/527, loss: 0.11700229346752167 2023-01-23 01:47:45.675517: step: 1456/527, loss: 0.0926612839102745 2023-01-23 01:47:46.814039: step: 1460/527, loss: 0.07294356822967529 2023-01-23 01:47:47.953254: step: 1464/527, loss: 0.028957556933164597 2023-01-23 01:47:49.031315: step: 1468/527, loss: 0.02010526694357395 2023-01-23 01:47:50.177642: step: 1472/527, loss: 0.007172584533691406 2023-01-23 01:47:51.309471: step: 1476/527, loss: 0.04345288500189781 2023-01-23 01:47:52.425414: step: 1480/527, loss: 0.04587268829345703 2023-01-23 01:47:53.531229: step: 1484/527, loss: 0.005192121956497431 2023-01-23 01:47:54.630341: step: 1488/527, loss: 0.029194356873631477 2023-01-23 01:47:55.759167: step: 1492/527, loss: 0.022762108594179153 2023-01-23 01:47:56.887701: step: 1496/527, loss: 0.0031383514869958162 2023-01-23 01:47:57.998448: step: 1500/527, loss: 0.06720896065235138 2023-01-23 01:47:59.103448: step: 1504/527, loss: 0.03612027317285538 2023-01-23 01:48:00.212144: step: 1508/527, loss: 0.035645853728055954 2023-01-23 01:48:01.327039: step: 1512/527, loss: 0.06742215156555176 2023-01-23 01:48:02.428336: step: 1516/527, loss: 0.0030162811744958162 2023-01-23 01:48:03.532875: step: 1520/527, loss: 0.006982946302741766 2023-01-23 01:48:04.674557: step: 1524/527, loss: 0.10296420753002167 2023-01-23 01:48:05.777812: step: 1528/527, loss: 0.015073109418153763 2023-01-23 01:48:06.912918: step: 1532/527, loss: 0.007985735312104225 2023-01-23 01:48:08.026087: step: 1536/527, loss: 0.022669600322842598 2023-01-23 01:48:09.134123: step: 1540/527, loss: 0.004206657409667969 2023-01-23 01:48:10.247973: step: 1544/527, loss: 0.004276275634765625 2023-01-23 01:48:11.392745: step: 1548/527, loss: 0.015130805782973766 2023-01-23 01:48:12.529654: step: 1552/527, loss: 0.0010839462047442794 2023-01-23 01:48:13.651311: step: 1556/527, loss: 0.0053723338060081005 2023-01-23 01:48:14.773273: step: 1560/527, loss: 0.06706143170595169 2023-01-23 01:48:15.898857: step: 1564/527, loss: 0.03491344675421715 2023-01-23 01:48:17.000495: step: 1568/527, loss: 0.025592470541596413 2023-01-23 01:48:18.122374: step: 1572/527, loss: 0.005748748779296875 2023-01-23 01:48:19.252459: step: 1576/527, loss: 0.005154228303581476 2023-01-23 01:48:20.370487: step: 1580/527, loss: 0.01882152445614338 2023-01-23 01:48:21.464821: step: 1584/527, loss: 0.05542793497443199 2023-01-23 01:48:22.590336: step: 1588/527, loss: 0.013289166614413261 2023-01-23 01:48:23.713679: step: 1592/527, loss: 0.009035682305693626 2023-01-23 01:48:24.829487: step: 1596/527, loss: 0.005931854248046875 2023-01-23 01:48:25.938754: step: 1600/527, loss: 0.0595739409327507 2023-01-23 01:48:27.091960: step: 1604/527, loss: 0.05165109783411026 2023-01-23 01:48:28.208209: step: 1608/527, loss: 0.004200268071144819 2023-01-23 01:48:29.351762: step: 1612/527, loss: 0.08597545325756073 2023-01-23 01:48:30.458366: step: 1616/527, loss: 0.05741174519062042 2023-01-23 01:48:31.594040: step: 1620/527, loss: 0.0024640082847326994 2023-01-23 01:48:32.677776: step: 1624/527, loss: 0.01656198501586914 2023-01-23 01:48:33.798637: step: 1628/527, loss: 0.07062244415283203 2023-01-23 01:48:34.920260: step: 1632/527, loss: 0.0074443817138671875 2023-01-23 01:48:36.041056: step: 1636/527, loss: 0.16391582787036896 2023-01-23 01:48:37.168852: step: 1640/527, loss: 0.011956358328461647 2023-01-23 01:48:38.292199: step: 1644/527, loss: 0.2724836468696594 2023-01-23 01:48:39.398850: step: 1648/527, loss: 0.009928036481142044 2023-01-23 01:48:40.504829: step: 1652/527, loss: 0.04166774824261665 2023-01-23 01:48:41.648548: step: 1656/527, loss: 0.04453125223517418 2023-01-23 01:48:42.774429: step: 1660/527, loss: 0.00957473460584879 2023-01-23 01:48:43.873408: step: 1664/527, loss: 0.0003952026308979839 2023-01-23 01:48:44.973995: step: 1668/527, loss: 0.1094079464673996 2023-01-23 01:48:46.086175: step: 1672/527, loss: 0.004000854678452015 2023-01-23 01:48:47.236306: step: 1676/527, loss: 0.046807195991277695 2023-01-23 01:48:48.316673: step: 1680/527, loss: 0.0013566971756517887 2023-01-23 01:48:49.453858: step: 1684/527, loss: 0.04736337810754776 2023-01-23 01:48:50.594036: step: 1688/527, loss: 0.05189934000372887 2023-01-23 01:48:51.726899: step: 1692/527, loss: 0.00387496966868639 2023-01-23 01:48:52.850325: step: 1696/527, loss: 0.036879539489746094 2023-01-23 01:48:53.956854: step: 1700/527, loss: 0.0012104511260986328 2023-01-23 01:48:55.132065: step: 1704/527, loss: 0.061257075518369675 2023-01-23 01:48:56.253437: step: 1708/527, loss: 0.015995407477021217 2023-01-23 01:48:57.376488: step: 1712/527, loss: 0.03021831624209881 2023-01-23 01:48:58.465564: step: 1716/527, loss: 0.051409244537353516 2023-01-23 01:48:59.631707: step: 1720/527, loss: 0.021474361419677734 2023-01-23 01:49:00.758259: step: 1724/527, loss: 0.012036371044814587 2023-01-23 01:49:01.866447: step: 1728/527, loss: 0.007368040271103382 2023-01-23 01:49:02.940673: step: 1732/527, loss: 0.09078263491392136 2023-01-23 01:49:04.025179: step: 1736/527, loss: 0.0004385948122944683 2023-01-23 01:49:05.206111: step: 1740/527, loss: 0.028565645217895508 2023-01-23 01:49:06.320181: step: 1744/527, loss: 0.02421112172305584 2023-01-23 01:49:07.456809: step: 1748/527, loss: 0.07216129451990128 2023-01-23 01:49:08.588204: step: 1752/527, loss: 0.08621430397033691 2023-01-23 01:49:09.709625: step: 1756/527, loss: 0.017140865325927734 2023-01-23 01:49:10.845917: step: 1760/527, loss: 0.003023004624992609 2023-01-23 01:49:12.003605: step: 1764/527, loss: 0.06977224349975586 2023-01-23 01:49:13.113626: step: 1768/527, loss: 0.0037202835083007812 2023-01-23 01:49:14.255574: step: 1772/527, loss: 0.04111452028155327 2023-01-23 01:49:15.401718: step: 1776/527, loss: 0.008955001831054688 2023-01-23 01:49:16.503224: step: 1780/527, loss: 0.018445873633027077 2023-01-23 01:49:17.609804: step: 1784/527, loss: 0.06114606931805611 2023-01-23 01:49:18.728642: step: 1788/527, loss: 0.004582786466926336 2023-01-23 01:49:19.851251: step: 1792/527, loss: 0.05067186802625656 2023-01-23 01:49:20.969481: step: 1796/527, loss: 0.007205224130302668 2023-01-23 01:49:22.083762: step: 1800/527, loss: 0.05734921991825104 2023-01-23 01:49:23.188467: step: 1804/527, loss: 0.025539016351103783 2023-01-23 01:49:24.289568: step: 1808/527, loss: 0.046971406787633896 2023-01-23 01:49:25.411401: step: 1812/527, loss: 0.6912172436714172 2023-01-23 01:49:26.525150: step: 1816/527, loss: 0.06328163295984268 2023-01-23 01:49:27.634698: step: 1820/527, loss: 0.2096502184867859 2023-01-23 01:49:28.765602: step: 1824/527, loss: 0.009600304998457432 2023-01-23 01:49:29.877592: step: 1828/527, loss: 0.0025440691970288754 2023-01-23 01:49:31.007534: step: 1832/527, loss: 0.06278133392333984 2023-01-23 01:49:32.132362: step: 1836/527, loss: 0.012899207882583141 2023-01-23 01:49:33.268734: step: 1840/527, loss: 0.032385826110839844 2023-01-23 01:49:34.358033: step: 1844/527, loss: 0.036482714116573334 2023-01-23 01:49:35.483094: step: 1848/527, loss: 0.002086067106574774 2023-01-23 01:49:36.592729: step: 1852/527, loss: 0.041127681732177734 2023-01-23 01:49:37.720964: step: 1856/527, loss: 0.13921909034252167 2023-01-23 01:49:38.847971: step: 1860/527, loss: 0.04081106185913086 2023-01-23 01:49:39.988492: step: 1864/527, loss: 0.04165401682257652 2023-01-23 01:49:41.112901: step: 1868/527, loss: 0.0629599541425705 2023-01-23 01:49:42.221935: step: 1872/527, loss: 0.01438918150961399 2023-01-23 01:49:43.302818: step: 1876/527, loss: 0.02493257448077202 2023-01-23 01:49:44.426993: step: 1880/527, loss: 0.0367732048034668 2023-01-23 01:49:45.518337: step: 1884/527, loss: 0.0055550173856318 2023-01-23 01:49:46.628552: step: 1888/527, loss: 0.0018085002666339278 2023-01-23 01:49:47.714783: step: 1892/527, loss: 0.02495403401553631 2023-01-23 01:49:48.869617: step: 1896/527, loss: 0.046590615063905716 2023-01-23 01:49:49.998533: step: 1900/527, loss: 0.00687255896627903 2023-01-23 01:49:51.111510: step: 1904/527, loss: 0.6215629577636719 2023-01-23 01:49:52.226790: step: 1908/527, loss: 0.039576247334480286 2023-01-23 01:49:53.307455: step: 1912/527, loss: 0.003080749651417136 2023-01-23 01:49:54.431152: step: 1916/527, loss: 0.012486744672060013 2023-01-23 01:49:55.563289: step: 1920/527, loss: 0.041993334889411926 2023-01-23 01:49:56.661434: step: 1924/527, loss: 0.006340312771499157 2023-01-23 01:49:57.792319: step: 1928/527, loss: 0.09931173175573349 2023-01-23 01:49:58.905881: step: 1932/527, loss: 0.1064685806632042 2023-01-23 01:50:00.009424: step: 1936/527, loss: 0.07896146178245544 2023-01-23 01:50:01.133247: step: 1940/527, loss: 0.05091896280646324 2023-01-23 01:50:02.250792: step: 1944/527, loss: 0.02133970335125923 2023-01-23 01:50:03.349624: step: 1948/527, loss: 0.02213573455810547 2023-01-23 01:50:04.502868: step: 1952/527, loss: 0.031040765345096588 2023-01-23 01:50:05.573735: step: 1956/527, loss: 0.014957714825868607 2023-01-23 01:50:06.677915: step: 1960/527, loss: 0.03968362882733345 2023-01-23 01:50:07.798435: step: 1964/527, loss: 0.03171835094690323 2023-01-23 01:50:08.896712: step: 1968/527, loss: 0.006890201475471258 2023-01-23 01:50:10.006398: step: 1972/527, loss: 0.015164995566010475 2023-01-23 01:50:11.116081: step: 1976/527, loss: 0.01812286488711834 2023-01-23 01:50:12.228104: step: 1980/527, loss: 0.017806529998779297 2023-01-23 01:50:13.354935: step: 1984/527, loss: 0.058237459510564804 2023-01-23 01:50:14.436020: step: 1988/527, loss: 0.01932344399392605 2023-01-23 01:50:15.582032: step: 1992/527, loss: 0.009842300787568092 2023-01-23 01:50:16.703382: step: 1996/527, loss: 0.025014685466885567 2023-01-23 01:50:17.808180: step: 2000/527, loss: 0.012192345224320889 2023-01-23 01:50:18.912048: step: 2004/527, loss: 0.02413005754351616 2023-01-23 01:50:20.032410: step: 2008/527, loss: 0.0463964082300663 2023-01-23 01:50:21.122837: step: 2012/527, loss: 0.06092128902673721 2023-01-23 01:50:22.221636: step: 2016/527, loss: 0.03946990892291069 2023-01-23 01:50:23.383076: step: 2020/527, loss: 0.0009015083778649569 2023-01-23 01:50:24.467369: step: 2024/527, loss: 0.009719084948301315 2023-01-23 01:50:25.577776: step: 2028/527, loss: 0.06445427238941193 2023-01-23 01:50:26.683160: step: 2032/527, loss: 0.08793125301599503 2023-01-23 01:50:27.794265: step: 2036/527, loss: 0.10137882083654404 2023-01-23 01:50:28.936325: step: 2040/527, loss: 0.05763416364789009 2023-01-23 01:50:30.029330: step: 2044/527, loss: 0.5947321653366089 2023-01-23 01:50:31.127791: step: 2048/527, loss: 0.05582108721137047 2023-01-23 01:50:32.245804: step: 2052/527, loss: 0.0015582561027258635 2023-01-23 01:50:33.355078: step: 2056/527, loss: 0.007390642538666725 2023-01-23 01:50:34.454750: step: 2060/527, loss: 0.01080179214477539 2023-01-23 01:50:35.589877: step: 2064/527, loss: 0.008870887570083141 2023-01-23 01:50:36.710478: step: 2068/527, loss: 0.010660935193300247 2023-01-23 01:50:37.820862: step: 2072/527, loss: 0.0019012928241863847 2023-01-23 01:50:38.948320: step: 2076/527, loss: 0.02033062092959881 2023-01-23 01:50:40.070083: step: 2080/527, loss: 0.04858198016881943 2023-01-23 01:50:41.162831: step: 2084/527, loss: 0.006595945917069912 2023-01-23 01:50:42.281236: step: 2088/527, loss: 0.06519432365894318 2023-01-23 01:50:43.413236: step: 2092/527, loss: 0.07375431060791016 2023-01-23 01:50:44.516273: step: 2096/527, loss: 0.024370575323700905 2023-01-23 01:50:45.586020: step: 2100/527, loss: 0.0045320987701416016 2023-01-23 01:50:46.686638: step: 2104/527, loss: 0.03608770668506622 2023-01-23 01:50:47.794401: step: 2108/527, loss: 0.007403564173728228 ================================================== Loss: 0.052 -------------------- Dev: {'event': {'p': 0.6024716786817713, 'r': 0.7789613848202397, 'f1': 0.6794425087108013}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Test: {'event': {'p': 0.6284801460520311, 'r': 0.7868571428571428, 'f1': 0.6988074092869829}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Chinese: {'event': {'p': 0.5454545454545454, 'r': 0.8888888888888888, 'f1': 0.676056338028169}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Korean: {'event': {'p': 0.6071428571428571, 'r': 0.5396825396825397, 'f1': 0.5714285714285714}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Russian: {'event': {'p': 0.43902439024390244, 'r': 0.5, 'f1': 0.4675324675324676}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Eng Test for Korean: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 14 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 01:51:29.236667: step: 4/527, loss: 0.025342464447021484 2023-01-23 01:51:30.363044: step: 8/527, loss: 0.015662766993045807 2023-01-23 01:51:31.488168: step: 12/527, loss: 0.02370300330221653 2023-01-23 01:51:32.603376: step: 16/527, loss: 0.03798742592334747 2023-01-23 01:51:33.727334: step: 20/527, loss: 0.011620378121733665 2023-01-23 01:51:34.847737: step: 24/527, loss: 0.04960274696350098 2023-01-23 01:51:35.921183: step: 28/527, loss: 0.0037850383669137955 2023-01-23 01:51:37.041031: step: 32/527, loss: 0.03542933613061905 2023-01-23 01:51:38.171828: step: 36/527, loss: 0.04210076108574867 2023-01-23 01:51:39.284838: step: 40/527, loss: 0.03690643236041069 2023-01-23 01:51:40.412641: step: 44/527, loss: 0.0018833160866051912 2023-01-23 01:51:41.517984: step: 48/527, loss: 0.001009273575618863 2023-01-23 01:51:42.621056: step: 52/527, loss: 0.034002162516117096 2023-01-23 01:51:43.731891: step: 56/527, loss: 0.008019638247787952 2023-01-23 01:51:44.839030: step: 60/527, loss: 0.0033050538040697575 2023-01-23 01:51:45.947622: step: 64/527, loss: 0.05381298065185547 2023-01-23 01:51:47.052714: step: 68/527, loss: 0.0032682418823242188 2023-01-23 01:51:48.148443: step: 72/527, loss: 0.034348104149103165 2023-01-23 01:51:49.287544: step: 76/527, loss: 0.018347548320889473 2023-01-23 01:51:50.393573: step: 80/527, loss: 0.007658672519028187 2023-01-23 01:51:51.510024: step: 84/527, loss: 0.012154294177889824 2023-01-23 01:51:52.645180: step: 88/527, loss: 0.005300331395119429 2023-01-23 01:51:53.750962: step: 92/527, loss: 0.004478359594941139 2023-01-23 01:51:54.866851: step: 96/527, loss: 0.005043506622314453 2023-01-23 01:51:55.992296: step: 100/527, loss: 0.006303215399384499 2023-01-23 01:51:57.081290: step: 104/527, loss: 0.2584305703639984 2023-01-23 01:51:58.203118: step: 108/527, loss: 0.01438894309103489 2023-01-23 01:51:59.288724: step: 112/527, loss: 0.012206220999360085 2023-01-23 01:52:00.395764: step: 116/527, loss: 0.009972000494599342 2023-01-23 01:52:01.522212: step: 120/527, loss: 0.059607695788145065 2023-01-23 01:52:02.612579: step: 124/527, loss: 0.04517402499914169 2023-01-23 01:52:03.719760: step: 128/527, loss: 0.010956143960356712 2023-01-23 01:52:04.833628: step: 132/527, loss: 0.05373172461986542 2023-01-23 01:52:05.938133: step: 136/527, loss: 0.05832987278699875 2023-01-23 01:52:07.041169: step: 140/527, loss: 0.03638887405395508 2023-01-23 01:52:08.127133: step: 144/527, loss: 0.026297666132450104 2023-01-23 01:52:09.244707: step: 148/527, loss: 0.005062866490334272 2023-01-23 01:52:10.353262: step: 152/527, loss: 0.026729965582489967 2023-01-23 01:52:11.466479: step: 156/527, loss: 0.0020202635787427425 2023-01-23 01:52:12.587936: step: 160/527, loss: 0.00012168884131824598 2023-01-23 01:52:13.728205: step: 164/527, loss: 0.02196211740374565 2023-01-23 01:52:14.822588: step: 168/527, loss: 0.0033535019028931856 2023-01-23 01:52:15.924470: step: 172/527, loss: 0.0831810012459755 2023-01-23 01:52:17.057315: step: 176/527, loss: 0.06321487575769424 2023-01-23 01:52:18.170171: step: 180/527, loss: 0.008753872476518154 2023-01-23 01:52:19.304448: step: 184/527, loss: 0.023058701306581497 2023-01-23 01:52:20.419555: step: 188/527, loss: 0.1346489042043686 2023-01-23 01:52:21.534803: step: 192/527, loss: 0.0027726651169359684 2023-01-23 01:52:22.657989: step: 196/527, loss: 0.07787355780601501 2023-01-23 01:52:23.776913: step: 200/527, loss: 0.029454898089170456 2023-01-23 01:52:24.948017: step: 204/527, loss: 0.010012245737016201 2023-01-23 01:52:26.086367: step: 208/527, loss: 0.1282539665699005 2023-01-23 01:52:27.197424: step: 212/527, loss: 0.04250774532556534 2023-01-23 01:52:28.325594: step: 216/527, loss: 0.00432662945240736 2023-01-23 01:52:29.440183: step: 220/527, loss: 0.004070854280143976 2023-01-23 01:52:30.548895: step: 224/527, loss: 0.0023965835571289062 2023-01-23 01:52:31.659692: step: 228/527, loss: 0.050461579114198685 2023-01-23 01:52:32.781885: step: 232/527, loss: 0.6652324795722961 2023-01-23 01:52:33.896404: step: 236/527, loss: 0.10006442666053772 2023-01-23 01:52:35.026475: step: 240/527, loss: 0.011378193274140358 2023-01-23 01:52:36.145513: step: 244/527, loss: 0.011311912909150124 2023-01-23 01:52:37.302882: step: 248/527, loss: 0.003432846162468195 2023-01-23 01:52:38.423455: step: 252/527, loss: 0.6147485375404358 2023-01-23 01:52:39.566351: step: 256/527, loss: 0.004846381954848766 2023-01-23 01:52:40.688189: step: 260/527, loss: 0.004418564029037952 2023-01-23 01:52:41.818848: step: 264/527, loss: 0.4266657531261444 2023-01-23 01:52:42.909718: step: 268/527, loss: 0.04324007034301758 2023-01-23 01:52:44.008891: step: 272/527, loss: 0.001559400581754744 2023-01-23 01:52:45.131674: step: 276/527, loss: 0.030055999755859375 2023-01-23 01:52:46.268727: step: 280/527, loss: 0.46589404344558716 2023-01-23 01:52:47.385746: step: 284/527, loss: 0.018753718584775925 2023-01-23 01:52:48.519326: step: 288/527, loss: 0.038478851318359375 2023-01-23 01:52:49.626058: step: 292/527, loss: 0.015268707647919655 2023-01-23 01:52:50.743688: step: 296/527, loss: 0.028193378821015358 2023-01-23 01:52:51.862724: step: 300/527, loss: 0.019567109644412994 2023-01-23 01:52:52.987093: step: 304/527, loss: 0.04838895797729492 2023-01-23 01:52:54.130430: step: 308/527, loss: 0.03289680555462837 2023-01-23 01:52:55.265542: step: 312/527, loss: 0.005839347839355469 2023-01-23 01:52:56.384961: step: 316/527, loss: 0.005610847845673561 2023-01-23 01:52:57.498355: step: 320/527, loss: 0.02583608590066433 2023-01-23 01:52:58.596634: step: 324/527, loss: 7.62939453125e-05 2023-01-23 01:52:59.742611: step: 328/527, loss: 0.03259086608886719 2023-01-23 01:53:00.859437: step: 332/527, loss: 0.07112522423267365 2023-01-23 01:53:01.970077: step: 336/527, loss: 0.0004938125493936241 2023-01-23 01:53:03.176494: step: 340/527, loss: 0.0396239273250103 2023-01-23 01:53:04.296671: step: 344/527, loss: 0.03540802001953125 2023-01-23 01:53:05.435243: step: 348/527, loss: 0.014509011059999466 2023-01-23 01:53:06.549992: step: 352/527, loss: 0.004350089933723211 2023-01-23 01:53:07.661794: step: 356/527, loss: 0.006381416693329811 2023-01-23 01:53:08.791716: step: 360/527, loss: 0.1478293389081955 2023-01-23 01:53:09.914667: step: 364/527, loss: 0.05781584233045578 2023-01-23 01:53:11.048539: step: 368/527, loss: 0.015013408847153187 2023-01-23 01:53:12.165119: step: 372/527, loss: 0.014759588986635208 2023-01-23 01:53:13.266453: step: 376/527, loss: 0.012675666250288486 2023-01-23 01:53:14.369903: step: 380/527, loss: 0.3324183225631714 2023-01-23 01:53:15.501944: step: 384/527, loss: 0.006377982906997204 2023-01-23 01:53:16.596456: step: 388/527, loss: 0.017656756564974785 2023-01-23 01:53:17.715934: step: 392/527, loss: 0.11679382622241974 2023-01-23 01:53:18.830526: step: 396/527, loss: 0.03275261074304581 2023-01-23 01:53:19.981132: step: 400/527, loss: 6.4849853515625e-05 2023-01-23 01:53:21.107479: step: 404/527, loss: 0.00860381219536066 2023-01-23 01:53:22.216227: step: 408/527, loss: 0.07595701515674591 2023-01-23 01:53:23.352307: step: 412/527, loss: 0.010304831899702549 2023-01-23 01:53:24.450978: step: 416/527, loss: 1.087188684323337e-05 2023-01-23 01:53:25.556573: step: 420/527, loss: 0.12250528484582901 2023-01-23 01:53:26.663481: step: 424/527, loss: 0.006567221600562334 2023-01-23 01:53:27.770730: step: 428/527, loss: 0.0013615608913823962 2023-01-23 01:53:28.866579: step: 432/527, loss: 0.00019998550124000758 2023-01-23 01:53:30.025504: step: 436/527, loss: 0.018865682184696198 2023-01-23 01:53:31.147301: step: 440/527, loss: 0.04039287567138672 2023-01-23 01:53:32.277119: step: 444/527, loss: 0.03710479661822319 2023-01-23 01:53:33.406219: step: 448/527, loss: 0.30073174834251404 2023-01-23 01:53:34.501787: step: 452/527, loss: 0.009966278448700905 2023-01-23 01:53:35.601951: step: 456/527, loss: 0.005640125367790461 2023-01-23 01:53:36.686641: step: 460/527, loss: 0.04480600357055664 2023-01-23 01:53:37.813381: step: 464/527, loss: 0.00991830788552761 2023-01-23 01:53:38.954348: step: 468/527, loss: 0.020785141736268997 2023-01-23 01:53:40.060535: step: 472/527, loss: 0.016439056023955345 2023-01-23 01:53:41.196721: step: 476/527, loss: 0.06115417554974556 2023-01-23 01:53:42.307669: step: 480/527, loss: 0.0018594504799693823 2023-01-23 01:53:43.409531: step: 484/527, loss: 0.015018082223832607 2023-01-23 01:53:44.537492: step: 488/527, loss: 0.13724756240844727 2023-01-23 01:53:45.653871: step: 492/527, loss: 0.049356844276189804 2023-01-23 01:53:46.755109: step: 496/527, loss: 0.001153802964836359 2023-01-23 01:53:47.851105: step: 500/527, loss: 0.014649583026766777 2023-01-23 01:53:48.954514: step: 504/527, loss: 0.059430789202451706 2023-01-23 01:53:50.084440: step: 508/527, loss: 0.012227917090058327 2023-01-23 01:53:51.178834: step: 512/527, loss: 0.03984313830733299 2023-01-23 01:53:52.306642: step: 516/527, loss: 0.015184975229203701 2023-01-23 01:53:53.442626: step: 520/527, loss: 0.0008680344326421618 2023-01-23 01:53:54.559914: step: 524/527, loss: 0.047830674797296524 2023-01-23 01:53:55.674156: step: 528/527, loss: 0.029259586706757545 2023-01-23 01:53:56.812205: step: 532/527, loss: 0.004869365599006414 2023-01-23 01:53:57.958443: step: 536/527, loss: 0.0010447502136230469 2023-01-23 01:53:59.045676: step: 540/527, loss: 0.004124260041862726 2023-01-23 01:54:00.165024: step: 544/527, loss: 0.11318397521972656 2023-01-23 01:54:01.270803: step: 548/527, loss: 0.03995170816779137 2023-01-23 01:54:02.397207: step: 552/527, loss: 0.01352317351847887 2023-01-23 01:54:03.515728: step: 556/527, loss: 0.22084417939186096 2023-01-23 01:54:04.630859: step: 560/527, loss: 0.07673053443431854 2023-01-23 01:54:05.736006: step: 564/527, loss: 0.00282459263689816 2023-01-23 01:54:06.862004: step: 568/527, loss: 0.01804647594690323 2023-01-23 01:54:07.983511: step: 572/527, loss: 0.005524539854377508 2023-01-23 01:54:09.068858: step: 576/527, loss: 0.0006015778053551912 2023-01-23 01:54:10.206879: step: 580/527, loss: 0.08562207967042923 2023-01-23 01:54:11.338692: step: 584/527, loss: 0.026229478418827057 2023-01-23 01:54:12.463418: step: 588/527, loss: 0.09473800659179688 2023-01-23 01:54:13.565197: step: 592/527, loss: 0.048650264739990234 2023-01-23 01:54:14.673591: step: 596/527, loss: 0.0312257781624794 2023-01-23 01:54:15.793122: step: 600/527, loss: 0.009479904547333717 2023-01-23 01:54:16.926720: step: 604/527, loss: 0.05222473293542862 2023-01-23 01:54:18.037362: step: 608/527, loss: 0.003637027693912387 2023-01-23 01:54:19.147432: step: 612/527, loss: 0.009443949908018112 2023-01-23 01:54:20.265111: step: 616/527, loss: 0.003423309186473489 2023-01-23 01:54:21.444169: step: 620/527, loss: 0.016902543604373932 2023-01-23 01:54:22.531205: step: 624/527, loss: 0.06863898783922195 2023-01-23 01:54:23.671714: step: 628/527, loss: 0.44794130325317383 2023-01-23 01:54:24.776665: step: 632/527, loss: 0.006907748989760876 2023-01-23 01:54:25.902681: step: 636/527, loss: 0.030873488634824753 2023-01-23 01:54:27.000929: step: 640/527, loss: 0.03454799950122833 2023-01-23 01:54:28.107705: step: 644/527, loss: 0.010816765949130058 2023-01-23 01:54:29.245198: step: 648/527, loss: 0.02709946781396866 2023-01-23 01:54:30.352585: step: 652/527, loss: 0.007910347543656826 2023-01-23 01:54:31.494609: step: 656/527, loss: 0.10797033458948135 2023-01-23 01:54:32.581100: step: 660/527, loss: 0.01495513878762722 2023-01-23 01:54:33.702212: step: 664/527, loss: 0.014375876635313034 2023-01-23 01:54:34.817999: step: 668/527, loss: 0.00589065533131361 2023-01-23 01:54:35.939986: step: 672/527, loss: 0.03317613527178764 2023-01-23 01:54:37.073751: step: 676/527, loss: 0.15463848412036896 2023-01-23 01:54:38.198661: step: 680/527, loss: 0.013688469305634499 2023-01-23 01:54:39.339434: step: 684/527, loss: 0.0020911216270178556 2023-01-23 01:54:40.422666: step: 688/527, loss: 0.016767119988799095 2023-01-23 01:54:41.501581: step: 692/527, loss: 0.022665690630674362 2023-01-23 01:54:42.646598: step: 696/527, loss: 0.025562833994627 2023-01-23 01:54:43.788334: step: 700/527, loss: 0.026140881702303886 2023-01-23 01:54:44.885537: step: 704/527, loss: 0.05078735202550888 2023-01-23 01:54:46.003765: step: 708/527, loss: 0.42712095379829407 2023-01-23 01:54:47.124112: step: 712/527, loss: 0.0013141154777258635 2023-01-23 01:54:48.242868: step: 716/527, loss: 0.020473767071962357 2023-01-23 01:54:49.347794: step: 720/527, loss: 0.022878218442201614 2023-01-23 01:54:50.459485: step: 724/527, loss: 0.012124061584472656 2023-01-23 01:54:51.579830: step: 728/527, loss: 0.07249081879854202 2023-01-23 01:54:52.695041: step: 732/527, loss: 0.07187262177467346 2023-01-23 01:54:53.818275: step: 736/527, loss: 0.16689807176589966 2023-01-23 01:54:54.926602: step: 740/527, loss: 0.011294281110167503 2023-01-23 01:54:56.043108: step: 744/527, loss: 0.02014617994427681 2023-01-23 01:54:57.147338: step: 748/527, loss: 0.009103154763579369 2023-01-23 01:54:58.259297: step: 752/527, loss: 0.020002175122499466 2023-01-23 01:54:59.393011: step: 756/527, loss: 0.042470645159482956 2023-01-23 01:55:00.495401: step: 760/527, loss: 0.004442119505256414 2023-01-23 01:55:01.669388: step: 764/527, loss: 0.009454727172851562 2023-01-23 01:55:02.770204: step: 768/527, loss: 0.01426792237907648 2023-01-23 01:55:03.897864: step: 772/527, loss: 0.018658161163330078 2023-01-23 01:55:05.009791: step: 776/527, loss: 0.0053310394287109375 2023-01-23 01:55:06.098379: step: 780/527, loss: 0.10296133160591125 2023-01-23 01:55:07.262067: step: 784/527, loss: 0.04624900966882706 2023-01-23 01:55:08.396405: step: 788/527, loss: 0.021212387830018997 2023-01-23 01:55:09.493826: step: 792/527, loss: 0.02985220029950142 2023-01-23 01:55:10.598816: step: 796/527, loss: 0.06775122135877609 2023-01-23 01:55:11.736128: step: 800/527, loss: 0.01259927824139595 2023-01-23 01:55:12.855462: step: 804/527, loss: 0.004502058029174805 2023-01-23 01:55:13.970193: step: 808/527, loss: 0.017753221094608307 2023-01-23 01:55:15.073103: step: 812/527, loss: 0.025107765570282936 2023-01-23 01:55:16.220120: step: 816/527, loss: 0.005241298582404852 2023-01-23 01:55:17.340330: step: 820/527, loss: 0.03649468347430229 2023-01-23 01:55:18.452256: step: 824/527, loss: 0.0032578466925770044 2023-01-23 01:55:19.592675: step: 828/527, loss: 0.021381378173828125 2023-01-23 01:55:20.720023: step: 832/527, loss: 0.03470878675580025 2023-01-23 01:55:21.850319: step: 836/527, loss: 0.007831478491425514 2023-01-23 01:55:22.970717: step: 840/527, loss: 0.030228901654481888 2023-01-23 01:55:24.092051: step: 844/527, loss: 0.03634333983063698 2023-01-23 01:55:25.208354: step: 848/527, loss: 0.024999428540468216 2023-01-23 01:55:26.316574: step: 852/527, loss: 0.02039337158203125 2023-01-23 01:55:27.434640: step: 856/527, loss: 0.014800739474594593 2023-01-23 01:55:28.543870: step: 860/527, loss: 0.05066204071044922 2023-01-23 01:55:29.657772: step: 864/527, loss: 0.00866842269897461 2023-01-23 01:55:30.763772: step: 868/527, loss: 0.011360406875610352 2023-01-23 01:55:31.879447: step: 872/527, loss: 0.028253793716430664 2023-01-23 01:55:32.967698: step: 876/527, loss: 0.00018615722365211695 2023-01-23 01:55:34.082769: step: 880/527, loss: 0.08463191986083984 2023-01-23 01:55:35.209793: step: 884/527, loss: 0.0015666962135583162 2023-01-23 01:55:36.321300: step: 888/527, loss: 0.00656938599422574 2023-01-23 01:55:37.419755: step: 892/527, loss: 0.009884834289550781 2023-01-23 01:55:38.531690: step: 896/527, loss: 0.006913280580192804 2023-01-23 01:55:39.648747: step: 900/527, loss: 0.0542207732796669 2023-01-23 01:55:40.773721: step: 904/527, loss: 0.013064814731478691 2023-01-23 01:55:41.877044: step: 908/527, loss: 0.01010751724243164 2023-01-23 01:55:42.979095: step: 912/527, loss: 0.05079488828778267 2023-01-23 01:55:44.134469: step: 916/527, loss: 0.0058464049361646175 2023-01-23 01:55:45.262617: step: 920/527, loss: 0.01996450498700142 2023-01-23 01:55:46.392780: step: 924/527, loss: 0.07865677028894424 2023-01-23 01:55:47.479216: step: 928/527, loss: 0.021787548437714577 2023-01-23 01:55:48.600887: step: 932/527, loss: 0.015448665246367455 2023-01-23 01:55:49.731253: step: 936/527, loss: 0.0018733978504315019 2023-01-23 01:55:50.841062: step: 940/527, loss: 0.05511321872472763 2023-01-23 01:55:51.962539: step: 944/527, loss: 0.0708896666765213 2023-01-23 01:55:53.067802: step: 948/527, loss: 0.05402517318725586 2023-01-23 01:55:54.195783: step: 952/527, loss: 0.03214244917035103 2023-01-23 01:55:55.318716: step: 956/527, loss: 0.05144014582037926 2023-01-23 01:55:56.420946: step: 960/527, loss: 0.004785824101418257 2023-01-23 01:55:57.520898: step: 964/527, loss: 0.014788723550736904 2023-01-23 01:55:58.655404: step: 968/527, loss: 0.019421331584453583 2023-01-23 01:55:59.775081: step: 972/527, loss: 0.01750476472079754 2023-01-23 01:56:00.913961: step: 976/527, loss: 0.007056832779198885 2023-01-23 01:56:02.009946: step: 980/527, loss: 0.012920666486024857 2023-01-23 01:56:03.124488: step: 984/527, loss: 0.0067899227142333984 2023-01-23 01:56:04.279604: step: 988/527, loss: 0.0632476806640625 2023-01-23 01:56:05.431318: step: 992/527, loss: 0.02522573620080948 2023-01-23 01:56:06.528812: step: 996/527, loss: 0.0188446044921875 2023-01-23 01:56:07.682180: step: 1000/527, loss: 0.045894622802734375 2023-01-23 01:56:08.789414: step: 1004/527, loss: 0.0014340400230139494 2023-01-23 01:56:09.904597: step: 1008/527, loss: 0.007226801011711359 2023-01-23 01:56:11.037830: step: 1012/527, loss: 0.05158586800098419 2023-01-23 01:56:12.130923: step: 1016/527, loss: 0.0014204978942871094 2023-01-23 01:56:13.245616: step: 1020/527, loss: 0.0084984777495265 2023-01-23 01:56:14.358223: step: 1024/527, loss: 0.01755690574645996 2023-01-23 01:56:15.473524: step: 1028/527, loss: 0.07794008404016495 2023-01-23 01:56:16.602645: step: 1032/527, loss: 0.029554082080721855 2023-01-23 01:56:17.719731: step: 1036/527, loss: 0.029317475855350494 2023-01-23 01:56:18.847648: step: 1040/527, loss: 0.0023838996421545744 2023-01-23 01:56:19.978866: step: 1044/527, loss: 0.03493490070104599 2023-01-23 01:56:21.086196: step: 1048/527, loss: 0.0038268091157078743 2023-01-23 01:56:22.164893: step: 1052/527, loss: 0.009655284695327282 2023-01-23 01:56:23.287370: step: 1056/527, loss: 0.0029845237731933594 2023-01-23 01:56:24.435246: step: 1060/527, loss: 0.013245295733213425 2023-01-23 01:56:25.560468: step: 1064/527, loss: 0.00010099411883857101 2023-01-23 01:56:26.684826: step: 1068/527, loss: 0.021912433207035065 2023-01-23 01:56:27.789269: step: 1072/527, loss: 0.00048253536806441844 2023-01-23 01:56:28.944450: step: 1076/527, loss: 0.10121440887451172 2023-01-23 01:56:30.035009: step: 1080/527, loss: 0.05427970737218857 2023-01-23 01:56:31.152721: step: 1084/527, loss: 0.006516552530229092 2023-01-23 01:56:32.263905: step: 1088/527, loss: 0.016476059332489967 2023-01-23 01:56:33.379311: step: 1092/527, loss: 0.004337024874985218 2023-01-23 01:56:34.487974: step: 1096/527, loss: 0.030361175537109375 2023-01-23 01:56:35.615309: step: 1100/527, loss: 0.018692590296268463 2023-01-23 01:56:36.744790: step: 1104/527, loss: 0.000209808349609375 2023-01-23 01:56:37.862497: step: 1108/527, loss: 0.038996122777462006 2023-01-23 01:56:38.983083: step: 1112/527, loss: 0.25211867690086365 2023-01-23 01:56:40.124465: step: 1116/527, loss: 0.02198343351483345 2023-01-23 01:56:41.249148: step: 1120/527, loss: 0.05424303933978081 2023-01-23 01:56:42.370244: step: 1124/527, loss: 0.19440126419067383 2023-01-23 01:56:43.477389: step: 1128/527, loss: 0.02487773820757866 2023-01-23 01:56:44.592603: step: 1132/527, loss: 0.15693818032741547 2023-01-23 01:56:45.693175: step: 1136/527, loss: 0.0010755539406090975 2023-01-23 01:56:46.799505: step: 1140/527, loss: 0.01673154905438423 2023-01-23 01:56:47.900759: step: 1144/527, loss: 0.07730689644813538 2023-01-23 01:56:49.033276: step: 1148/527, loss: 0.0026268004439771175 2023-01-23 01:56:50.172965: step: 1152/527, loss: 0.12117500603199005 2023-01-23 01:56:51.291107: step: 1156/527, loss: 0.0006269455188885331 2023-01-23 01:56:52.403511: step: 1160/527, loss: 0.0002209663507528603 2023-01-23 01:56:53.495347: step: 1164/527, loss: 0.15881307423114777 2023-01-23 01:56:54.622071: step: 1168/527, loss: 0.021887589246034622 2023-01-23 01:56:55.748015: step: 1172/527, loss: 0.0023250579833984375 2023-01-23 01:56:56.874345: step: 1176/527, loss: 0.03418560326099396 2023-01-23 01:56:57.987407: step: 1180/527, loss: 0.0031715393997728825 2023-01-23 01:56:59.121338: step: 1184/527, loss: 0.0027289390563964844 2023-01-23 01:57:00.246481: step: 1188/527, loss: 0.019376277923583984 2023-01-23 01:57:01.378000: step: 1192/527, loss: 0.07493028044700623 2023-01-23 01:57:02.493026: step: 1196/527, loss: 0.0033516883850097656 2023-01-23 01:57:03.618423: step: 1200/527, loss: 0.0548737533390522 2023-01-23 01:57:04.773863: step: 1204/527, loss: 0.008812451735138893 2023-01-23 01:57:05.863935: step: 1208/527, loss: 0.12735100090503693 2023-01-23 01:57:07.015705: step: 1212/527, loss: 0.06548700481653214 2023-01-23 01:57:08.158619: step: 1216/527, loss: 0.010318947024643421 2023-01-23 01:57:09.301764: step: 1220/527, loss: 0.024636270478367805 2023-01-23 01:57:10.410012: step: 1224/527, loss: 0.04885859787464142 2023-01-23 01:57:11.506810: step: 1228/527, loss: 0.021091079339385033 2023-01-23 01:57:12.612918: step: 1232/527, loss: 0.054725077003240585 2023-01-23 01:57:13.706131: step: 1236/527, loss: 0.019669819623231888 2023-01-23 01:57:14.804328: step: 1240/527, loss: 0.06914804130792618 2023-01-23 01:57:15.913611: step: 1244/527, loss: 0.018675709143280983 2023-01-23 01:57:17.027980: step: 1248/527, loss: 0.0005324840312823653 2023-01-23 01:57:18.142489: step: 1252/527, loss: 0.02186889573931694 2023-01-23 01:57:19.252495: step: 1256/527, loss: 0.000992825604043901 2023-01-23 01:57:20.393739: step: 1260/527, loss: 0.019094567745923996 2023-01-23 01:57:21.533493: step: 1264/527, loss: 0.03783464431762695 2023-01-23 01:57:22.622548: step: 1268/527, loss: 0.0125885009765625 2023-01-23 01:57:23.719073: step: 1272/527, loss: 0.002647781278938055 2023-01-23 01:57:24.796237: step: 1276/527, loss: 0.07914400845766068 2023-01-23 01:57:25.896536: step: 1280/527, loss: 0.014637947082519531 2023-01-23 01:57:27.020119: step: 1284/527, loss: 0.016568852588534355 2023-01-23 01:57:28.130597: step: 1288/527, loss: 0.037230875343084335 2023-01-23 01:57:29.247798: step: 1292/527, loss: 0.01924166828393936 2023-01-23 01:57:30.362258: step: 1296/527, loss: 0.02688770368695259 2023-01-23 01:57:31.489672: step: 1300/527, loss: 0.33169764280319214 2023-01-23 01:57:32.612763: step: 1304/527, loss: 0.05361328274011612 2023-01-23 01:57:33.739444: step: 1308/527, loss: 0.0058762077242136 2023-01-23 01:57:34.854046: step: 1312/527, loss: 0.05802622064948082 2023-01-23 01:57:35.972343: step: 1316/527, loss: 0.011061620898544788 2023-01-23 01:57:37.076880: step: 1320/527, loss: 0.06392412632703781 2023-01-23 01:57:38.192555: step: 1324/527, loss: 0.06148626282811165 2023-01-23 01:57:39.318356: step: 1328/527, loss: 0.0288877971470356 2023-01-23 01:57:40.455561: step: 1332/527, loss: 0.03205270692706108 2023-01-23 01:57:41.550086: step: 1336/527, loss: 0.00723114050924778 2023-01-23 01:57:42.666797: step: 1340/527, loss: 0.031041037291288376 2023-01-23 01:57:43.763119: step: 1344/527, loss: 0.03764476999640465 2023-01-23 01:57:44.899287: step: 1348/527, loss: 0.018552685156464577 2023-01-23 01:57:46.037167: step: 1352/527, loss: 0.0029336928855627775 2023-01-23 01:57:47.186060: step: 1356/527, loss: 0.04499192163348198 2023-01-23 01:57:48.305132: step: 1360/527, loss: 0.012230491265654564 2023-01-23 01:57:49.443357: step: 1364/527, loss: 0.06836624443531036 2023-01-23 01:57:50.562020: step: 1368/527, loss: 0.04246025159955025 2023-01-23 01:57:51.672391: step: 1372/527, loss: 0.07367686927318573 2023-01-23 01:57:52.782957: step: 1376/527, loss: 0.03567257151007652 2023-01-23 01:57:53.913265: step: 1380/527, loss: 0.0007926941034384072 2023-01-23 01:57:55.028010: step: 1384/527, loss: 0.009362125769257545 2023-01-23 01:57:56.148742: step: 1388/527, loss: 0.04927559196949005 2023-01-23 01:57:57.270291: step: 1392/527, loss: 0.013686568476259708 2023-01-23 01:57:58.377122: step: 1396/527, loss: 0.006678963080048561 2023-01-23 01:57:59.482544: step: 1400/527, loss: 0.022643184289336205 2023-01-23 01:58:00.631105: step: 1404/527, loss: 0.005195808596909046 2023-01-23 01:58:01.735485: step: 1408/527, loss: 0.06345569342374802 2023-01-23 01:58:02.835720: step: 1412/527, loss: 0.007413959130644798 2023-01-23 01:58:03.992457: step: 1416/527, loss: 0.0896310806274414 2023-01-23 01:58:05.087452: step: 1420/527, loss: 0.40943050384521484 2023-01-23 01:58:06.202437: step: 1424/527, loss: 0.015190697275102139 2023-01-23 01:58:07.307399: step: 1428/527, loss: 0.02352161519229412 2023-01-23 01:58:08.405726: step: 1432/527, loss: 0.022275350987911224 2023-01-23 01:58:09.509959: step: 1436/527, loss: 0.03592429310083389 2023-01-23 01:58:10.645129: step: 1440/527, loss: 0.0030696869362145662 2023-01-23 01:58:11.764416: step: 1444/527, loss: 0.01106948871165514 2023-01-23 01:58:12.885543: step: 1448/527, loss: 0.0014821052318438888 2023-01-23 01:58:13.981820: step: 1452/527, loss: 0.010616493411362171 2023-01-23 01:58:15.060693: step: 1456/527, loss: 0.049814701080322266 2023-01-23 01:58:16.170934: step: 1460/527, loss: 0.40736040472984314 2023-01-23 01:58:17.283492: step: 1464/527, loss: 0.7366933822631836 2023-01-23 01:58:18.403082: step: 1468/527, loss: 0.03532399982213974 2023-01-23 01:58:19.527043: step: 1472/527, loss: 0.01801624335348606 2023-01-23 01:58:20.628323: step: 1476/527, loss: 0.019459152594208717 2023-01-23 01:58:21.739055: step: 1480/527, loss: 0.024257086217403412 2023-01-23 01:58:22.872574: step: 1484/527, loss: 0.02486143261194229 2023-01-23 01:58:23.997490: step: 1488/527, loss: 0.003638839814811945 2023-01-23 01:58:25.074708: step: 1492/527, loss: 0.009184408001601696 2023-01-23 01:58:26.176775: step: 1496/527, loss: 0.02962617762386799 2023-01-23 01:58:27.302934: step: 1500/527, loss: 0.023097610101103783 2023-01-23 01:58:28.425848: step: 1504/527, loss: 0.011059141717851162 2023-01-23 01:58:29.536151: step: 1508/527, loss: 0.017860984429717064 2023-01-23 01:58:30.629343: step: 1512/527, loss: 0.04872875660657883 2023-01-23 01:58:31.736632: step: 1516/527, loss: 0.13870486617088318 2023-01-23 01:58:32.819390: step: 1520/527, loss: 0.008144950494170189 2023-01-23 01:58:33.922718: step: 1524/527, loss: 0.009200001135468483 2023-01-23 01:58:35.036048: step: 1528/527, loss: 0.01956958696246147 2023-01-23 01:58:36.132723: step: 1532/527, loss: 0.007416916079819202 2023-01-23 01:58:37.253392: step: 1536/527, loss: 0.010880470275878906 2023-01-23 01:58:38.344395: step: 1540/527, loss: 0.007791805546730757 2023-01-23 01:58:39.441385: step: 1544/527, loss: 0.00788569450378418 2023-01-23 01:58:40.536488: step: 1548/527, loss: 0.012665462680161 2023-01-23 01:58:41.670172: step: 1552/527, loss: 0.05111370235681534 2023-01-23 01:58:42.807265: step: 1556/527, loss: 0.09236469864845276 2023-01-23 01:58:43.925551: step: 1560/527, loss: 0.0020856859628111124 2023-01-23 01:58:45.008567: step: 1564/527, loss: 0.007236766628921032 2023-01-23 01:58:46.115046: step: 1568/527, loss: 0.0023843764793127775 2023-01-23 01:58:47.244294: step: 1572/527, loss: 0.014798736199736595 2023-01-23 01:58:48.362661: step: 1576/527, loss: 0.002789783524349332 2023-01-23 01:58:49.486561: step: 1580/527, loss: 0.07930745929479599 2023-01-23 01:58:50.596323: step: 1584/527, loss: 0.19496233761310577 2023-01-23 01:58:51.675731: step: 1588/527, loss: 0.02212362363934517 2023-01-23 01:58:52.789784: step: 1592/527, loss: 0.0036204815842211246 2023-01-23 01:58:53.907016: step: 1596/527, loss: 0.002599430037662387 2023-01-23 01:58:55.025509: step: 1600/527, loss: 0.00913381576538086 2023-01-23 01:58:56.104252: step: 1604/527, loss: 0.05161609873175621 2023-01-23 01:58:57.227864: step: 1608/527, loss: 0.006849098484963179 2023-01-23 01:58:58.357645: step: 1612/527, loss: 0.00035114289494231343 2023-01-23 01:58:59.456969: step: 1616/527, loss: 0.026199722662568092 2023-01-23 01:59:00.564674: step: 1620/527, loss: 0.00015335083298850805 2023-01-23 01:59:01.714832: step: 1624/527, loss: 0.003810882568359375 2023-01-23 01:59:02.821426: step: 1628/527, loss: 0.010427093133330345 2023-01-23 01:59:03.937488: step: 1632/527, loss: 0.024481844156980515 2023-01-23 01:59:05.053732: step: 1636/527, loss: 0.022313212975859642 2023-01-23 01:59:06.151415: step: 1640/527, loss: 0.00387744908221066 2023-01-23 01:59:07.244461: step: 1644/527, loss: 0.23459607362747192 2023-01-23 01:59:08.362098: step: 1648/527, loss: 0.009972477331757545 2023-01-23 01:59:09.492828: step: 1652/527, loss: 0.038657426834106445 2023-01-23 01:59:10.617033: step: 1656/527, loss: 0.0717834010720253 2023-01-23 01:59:11.732268: step: 1660/527, loss: 0.008960723876953125 2023-01-23 01:59:12.847996: step: 1664/527, loss: 0.018883515149354935 2023-01-23 01:59:13.962701: step: 1668/527, loss: 0.07325001060962677 2023-01-23 01:59:15.048024: step: 1672/527, loss: 0.007762718480080366 2023-01-23 01:59:16.162553: step: 1676/527, loss: 0.01847999170422554 2023-01-23 01:59:17.293122: step: 1680/527, loss: 0.0014606475597247481 2023-01-23 01:59:18.402560: step: 1684/527, loss: 0.00630222586914897 2023-01-23 01:59:19.561899: step: 1688/527, loss: 0.024070357903838158 2023-01-23 01:59:20.687555: step: 1692/527, loss: 0.08104170858860016 2023-01-23 01:59:21.785259: step: 1696/527, loss: 0.016492461785674095 2023-01-23 01:59:22.954534: step: 1700/527, loss: 0.0006495476118288934 2023-01-23 01:59:24.062824: step: 1704/527, loss: 0.0002934455987997353 2023-01-23 01:59:25.215661: step: 1708/527, loss: 0.036316778510808945 2023-01-23 01:59:26.363686: step: 1712/527, loss: 0.03200960159301758 2023-01-23 01:59:27.491120: step: 1716/527, loss: 0.05724811553955078 2023-01-23 01:59:28.618098: step: 1720/527, loss: 0.0046852584928274155 2023-01-23 01:59:29.749797: step: 1724/527, loss: 0.009562206454575062 2023-01-23 01:59:30.853445: step: 1728/527, loss: 0.0020021439995616674 2023-01-23 01:59:31.968908: step: 1732/527, loss: 0.002619171282276511 2023-01-23 01:59:33.105106: step: 1736/527, loss: 0.14416447281837463 2023-01-23 01:59:34.225142: step: 1740/527, loss: 0.13138779997825623 2023-01-23 01:59:35.327225: step: 1744/527, loss: 0.036812592297792435 2023-01-23 01:59:36.441047: step: 1748/527, loss: 0.0043649678118526936 2023-01-23 01:59:37.553815: step: 1752/527, loss: 0.022001124918460846 2023-01-23 01:59:38.674936: step: 1756/527, loss: 0.07706394046545029 2023-01-23 01:59:39.796112: step: 1760/527, loss: 0.009550285525619984 2023-01-23 01:59:40.946362: step: 1764/527, loss: 0.020464133471250534 2023-01-23 01:59:42.093880: step: 1768/527, loss: 0.04759788513183594 2023-01-23 01:59:43.218142: step: 1772/527, loss: 0.005223751533776522 2023-01-23 01:59:44.344970: step: 1776/527, loss: 0.03288135677576065 2023-01-23 01:59:45.477822: step: 1780/527, loss: 0.055021099746227264 2023-01-23 01:59:46.577503: step: 1784/527, loss: 0.008260106667876244 2023-01-23 01:59:47.696419: step: 1788/527, loss: 0.05603409186005592 2023-01-23 01:59:48.804149: step: 1792/527, loss: 0.02227792702615261 2023-01-23 01:59:49.895718: step: 1796/527, loss: 0.00020914076594635844 2023-01-23 01:59:51.056080: step: 1800/527, loss: 0.012109661474823952 2023-01-23 01:59:52.193491: step: 1804/527, loss: 0.016881417483091354 2023-01-23 01:59:53.308115: step: 1808/527, loss: 0.025268077850341797 2023-01-23 01:59:54.425857: step: 1812/527, loss: 0.0006565094226971269 2023-01-23 01:59:55.583047: step: 1816/527, loss: 0.26937222480773926 2023-01-23 01:59:56.705088: step: 1820/527, loss: 0.0032444957178086042 2023-01-23 01:59:57.818704: step: 1824/527, loss: 0.0413576140999794 2023-01-23 01:59:58.955434: step: 1828/527, loss: 0.23703232407569885 2023-01-23 02:00:00.061019: step: 1832/527, loss: 0.017943954095244408 2023-01-23 02:00:01.163763: step: 1836/527, loss: 0.0021665573585778475 2023-01-23 02:00:02.304838: step: 1840/527, loss: 0.025844955816864967 2023-01-23 02:00:03.436451: step: 1844/527, loss: 0.027740098536014557 2023-01-23 02:00:04.585640: step: 1848/527, loss: 0.08496513962745667 2023-01-23 02:00:05.679300: step: 1852/527, loss: 0.01747722551226616 2023-01-23 02:00:06.800803: step: 1856/527, loss: 0.00048122406587935984 2023-01-23 02:00:07.937878: step: 1860/527, loss: 0.07278356701135635 2023-01-23 02:00:09.061084: step: 1864/527, loss: 0.062247514724731445 2023-01-23 02:00:10.199861: step: 1868/527, loss: 0.019217777997255325 2023-01-23 02:00:11.317702: step: 1872/527, loss: 0.08824024349451065 2023-01-23 02:00:12.426442: step: 1876/527, loss: 0.13959059119224548 2023-01-23 02:00:13.544363: step: 1880/527, loss: 0.01808653026819229 2023-01-23 02:00:14.653369: step: 1884/527, loss: 0.0028315545059740543 2023-01-23 02:00:15.748352: step: 1888/527, loss: 0.02536029927432537 2023-01-23 02:00:16.884958: step: 1892/527, loss: 0.10564308613538742 2023-01-23 02:00:18.000262: step: 1896/527, loss: 0.060558609664440155 2023-01-23 02:00:19.117853: step: 1900/527, loss: 0.004756450653076172 2023-01-23 02:00:20.250452: step: 1904/527, loss: 0.04950676113367081 2023-01-23 02:00:21.404124: step: 1908/527, loss: 0.019383717328310013 2023-01-23 02:00:22.505907: step: 1912/527, loss: 0.09795437753200531 2023-01-23 02:00:23.626010: step: 1916/527, loss: 0.04067039489746094 2023-01-23 02:00:24.723057: step: 1920/527, loss: 0.2678912878036499 2023-01-23 02:00:25.830912: step: 1924/527, loss: 0.019498825073242188 2023-01-23 02:00:26.938496: step: 1928/527, loss: 0.13204097747802734 2023-01-23 02:00:28.046430: step: 1932/527, loss: 0.05231637880206108 2023-01-23 02:00:29.195768: step: 1936/527, loss: 0.050676919519901276 2023-01-23 02:00:30.265376: step: 1940/527, loss: 0.026834391057491302 2023-01-23 02:00:31.390481: step: 1944/527, loss: 0.003731632139533758 2023-01-23 02:00:32.504955: step: 1948/527, loss: 0.012117576785385609 2023-01-23 02:00:33.666199: step: 1952/527, loss: 0.045914556831121445 2023-01-23 02:00:34.776023: step: 1956/527, loss: 0.026911545544862747 2023-01-23 02:00:35.903496: step: 1960/527, loss: 0.010535622015595436 2023-01-23 02:00:37.010676: step: 1964/527, loss: 0.0117524154484272 2023-01-23 02:00:38.131455: step: 1968/527, loss: 0.002340126084163785 2023-01-23 02:00:39.269833: step: 1972/527, loss: 0.00016446113295387477 2023-01-23 02:00:40.387176: step: 1976/527, loss: 0.04923286288976669 2023-01-23 02:00:41.503662: step: 1980/527, loss: 0.04620952531695366 2023-01-23 02:00:42.600298: step: 1984/527, loss: 0.004095935728400946 2023-01-23 02:00:43.743398: step: 1988/527, loss: 0.006879043765366077 2023-01-23 02:00:44.871938: step: 1992/527, loss: 0.046857260167598724 2023-01-23 02:00:45.979287: step: 1996/527, loss: 0.3629687428474426 2023-01-23 02:00:47.097175: step: 2000/527, loss: 0.07240734249353409 2023-01-23 02:00:48.215615: step: 2004/527, loss: 0.012729418464004993 2023-01-23 02:00:49.350258: step: 2008/527, loss: 0.02142629772424698 2023-01-23 02:00:50.484384: step: 2012/527, loss: 0.07787895202636719 2023-01-23 02:00:51.612488: step: 2016/527, loss: 0.008347177878022194 2023-01-23 02:00:52.711336: step: 2020/527, loss: 0.010721921920776367 2023-01-23 02:00:53.815184: step: 2024/527, loss: 0.008940315805375576 2023-01-23 02:00:54.915637: step: 2028/527, loss: 0.01839742809534073 2023-01-23 02:00:56.040215: step: 2032/527, loss: 0.008983040228486061 2023-01-23 02:00:57.164212: step: 2036/527, loss: 0.2618073523044586 2023-01-23 02:00:58.287403: step: 2040/527, loss: 0.10872535407543182 2023-01-23 02:00:59.404158: step: 2044/527, loss: 0.004775047302246094 2023-01-23 02:01:00.535968: step: 2048/527, loss: 0.0004932403680868447 2023-01-23 02:01:01.657276: step: 2052/527, loss: 0.07493095844984055 2023-01-23 02:01:02.759772: step: 2056/527, loss: 0.012756919488310814 2023-01-23 02:01:03.864702: step: 2060/527, loss: 0.10483141243457794 2023-01-23 02:01:04.974788: step: 2064/527, loss: 0.07885026931762695 2023-01-23 02:01:06.113810: step: 2068/527, loss: 0.008841801434755325 2023-01-23 02:01:07.258967: step: 2072/527, loss: 0.14572273194789886 2023-01-23 02:01:08.378215: step: 2076/527, loss: 0.011713081039488316 2023-01-23 02:01:09.492317: step: 2080/527, loss: 0.020029496401548386 2023-01-23 02:01:10.581396: step: 2084/527, loss: 0.013035583309829235 2023-01-23 02:01:11.701863: step: 2088/527, loss: 0.043305281549692154 2023-01-23 02:01:12.840579: step: 2092/527, loss: 0.0033678056206554174 2023-01-23 02:01:13.934709: step: 2096/527, loss: 0.03669004514813423 2023-01-23 02:01:15.054727: step: 2100/527, loss: 0.06149006262421608 2023-01-23 02:01:16.173903: step: 2104/527, loss: 0.02208099514245987 2023-01-23 02:01:17.299607: step: 2108/527, loss: 0.0014815330505371094 ================================================== Loss: 0.044 -------------------- Dev: {'event': {'p': 0.556390977443609, 'r': 0.7882822902796272, 'f1': 0.6523415977961432}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Test: {'event': {'p': 0.5934856175972927, 'r': 0.8017142857142857, 'f1': 0.6820612542537676}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Chinese: {'event': {'p': 0.4725274725274725, 'r': 0.7962962962962963, 'f1': 0.593103448275862}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Korean: {'event': {'p': 0.5633802816901409, 'r': 0.6349206349206349, 'f1': 0.5970149253731343}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Russian: {'event': {'p': 0.43478260869565216, 'r': 0.5555555555555556, 'f1': 0.4878048780487805}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Eng Test for Korean: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 15 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:01:59.565172: step: 4/527, loss: 0.22581176459789276 2023-01-23 02:02:00.654000: step: 8/527, loss: 0.02170557901263237 2023-01-23 02:02:01.761018: step: 12/527, loss: 0.007999849505722523 2023-01-23 02:02:02.889571: step: 16/527, loss: 0.22186845541000366 2023-01-23 02:02:04.030752: step: 20/527, loss: 0.016269873827695847 2023-01-23 02:02:05.126391: step: 24/527, loss: 0.011776065453886986 2023-01-23 02:02:06.259765: step: 28/527, loss: 0.02494525909423828 2023-01-23 02:02:07.379550: step: 32/527, loss: 0.0640285462141037 2023-01-23 02:02:08.509612: step: 36/527, loss: 0.04752330482006073 2023-01-23 02:02:09.630429: step: 40/527, loss: 0.153310164809227 2023-01-23 02:02:10.782475: step: 44/527, loss: 0.0013426780933514237 2023-01-23 02:02:11.917813: step: 48/527, loss: 0.027942562475800514 2023-01-23 02:02:13.032662: step: 52/527, loss: 0.030162621289491653 2023-01-23 02:02:14.148191: step: 56/527, loss: 0.04598961025476456 2023-01-23 02:02:15.275676: step: 60/527, loss: 0.035779476165771484 2023-01-23 02:02:16.370146: step: 64/527, loss: 0.028416920453310013 2023-01-23 02:02:17.474016: step: 68/527, loss: 0.23880743980407715 2023-01-23 02:02:18.559470: step: 72/527, loss: 0.04108009114861488 2023-01-23 02:02:19.669772: step: 76/527, loss: 0.001728153321892023 2023-01-23 02:02:20.773640: step: 80/527, loss: 0.011955452151596546 2023-01-23 02:02:21.918062: step: 84/527, loss: 0.03989720344543457 2023-01-23 02:02:23.053285: step: 88/527, loss: 0.4120454788208008 2023-01-23 02:02:24.158959: step: 92/527, loss: 0.006381893530488014 2023-01-23 02:02:25.257460: step: 96/527, loss: 0.010520553216338158 2023-01-23 02:02:26.379835: step: 100/527, loss: 0.05761122703552246 2023-01-23 02:02:27.481889: step: 104/527, loss: 0.029200460761785507 2023-01-23 02:02:28.577036: step: 108/527, loss: 0.01035852450877428 2023-01-23 02:02:29.675008: step: 112/527, loss: 0.006377363111823797 2023-01-23 02:02:30.777187: step: 116/527, loss: 0.31469297409057617 2023-01-23 02:02:31.890943: step: 120/527, loss: 0.15226240456104279 2023-01-23 02:02:33.016100: step: 124/527, loss: 0.031447507441043854 2023-01-23 02:02:34.156555: step: 128/527, loss: 0.015896225348114967 2023-01-23 02:02:35.288834: step: 132/527, loss: 0.00046215057955123484 2023-01-23 02:02:36.374213: step: 136/527, loss: 0.035303495824337006 2023-01-23 02:02:37.491848: step: 140/527, loss: 0.0394381508231163 2023-01-23 02:02:38.597393: step: 144/527, loss: 0.03318657726049423 2023-01-23 02:02:39.755767: step: 148/527, loss: 0.009058475494384766 2023-01-23 02:02:40.880484: step: 152/527, loss: 0.0007450580596923828 2023-01-23 02:02:41.990561: step: 156/527, loss: 0.0013151168823242188 2023-01-23 02:02:43.106141: step: 160/527, loss: 0.025045014917850494 2023-01-23 02:02:44.210448: step: 164/527, loss: 0.0030714033637195826 2023-01-23 02:02:45.316050: step: 168/527, loss: 0.0380069725215435 2023-01-23 02:02:46.423498: step: 172/527, loss: 0.017401695251464844 2023-01-23 02:02:47.555695: step: 176/527, loss: 0.0007852554554119706 2023-01-23 02:02:48.662903: step: 180/527, loss: 0.007587719243019819 2023-01-23 02:02:49.783211: step: 184/527, loss: 0.01526489295065403 2023-01-23 02:02:50.921018: step: 188/527, loss: 0.020406054332852364 2023-01-23 02:02:52.051755: step: 192/527, loss: 0.00583915738388896 2023-01-23 02:02:53.174132: step: 196/527, loss: 0.002165126847103238 2023-01-23 02:02:54.290626: step: 200/527, loss: 0.030858324840664864 2023-01-23 02:02:55.394389: step: 204/527, loss: 0.00779070844873786 2023-01-23 02:02:56.497977: step: 208/527, loss: 0.025826454162597656 2023-01-23 02:02:57.611556: step: 212/527, loss: 0.0029317857697606087 2023-01-23 02:02:58.742760: step: 216/527, loss: 0.01831207238137722 2023-01-23 02:02:59.829618: step: 220/527, loss: 0.0293342936784029 2023-01-23 02:03:00.965288: step: 224/527, loss: 0.008865833282470703 2023-01-23 02:03:02.063980: step: 228/527, loss: 0.0002617835998535156 2023-01-23 02:03:03.181938: step: 232/527, loss: 0.02391691319644451 2023-01-23 02:03:04.300961: step: 236/527, loss: 0.0868292823433876 2023-01-23 02:03:05.421545: step: 240/527, loss: 0.041964102536439896 2023-01-23 02:03:06.565209: step: 244/527, loss: 0.017502497881650925 2023-01-23 02:03:07.686451: step: 248/527, loss: 0.0001388549862895161 2023-01-23 02:03:08.808973: step: 252/527, loss: 0.02922849729657173 2023-01-23 02:03:09.921524: step: 256/527, loss: 0.0016942977672442794 2023-01-23 02:03:11.017005: step: 260/527, loss: 0.0062961578369140625 2023-01-23 02:03:12.132203: step: 264/527, loss: 0.0023731710389256477 2023-01-23 02:03:13.232439: step: 268/527, loss: 0.004225730895996094 2023-01-23 02:03:14.332878: step: 272/527, loss: 0.02932577021420002 2023-01-23 02:03:15.456059: step: 276/527, loss: 0.01079249382019043 2023-01-23 02:03:16.573073: step: 280/527, loss: 0.0006466865306720138 2023-01-23 02:03:17.704651: step: 284/527, loss: 0.013340759091079235 2023-01-23 02:03:18.839872: step: 288/527, loss: 0.046209193766117096 2023-01-23 02:03:19.946012: step: 292/527, loss: 0.0004779815790243447 2023-01-23 02:03:21.056436: step: 296/527, loss: 0.004534339997917414 2023-01-23 02:03:22.180031: step: 300/527, loss: 0.004288864322006702 2023-01-23 02:03:23.304917: step: 304/527, loss: 0.040387917309999466 2023-01-23 02:03:24.481720: step: 308/527, loss: 2.307891918462701e-05 2023-01-23 02:03:25.574986: step: 312/527, loss: 0.004644202999770641 2023-01-23 02:03:26.681108: step: 316/527, loss: 0.07197723537683487 2023-01-23 02:03:27.788212: step: 320/527, loss: 0.00131816859357059 2023-01-23 02:03:28.904678: step: 324/527, loss: 0.0008647919166833162 2023-01-23 02:03:30.032525: step: 328/527, loss: 0.0026178359985351562 2023-01-23 02:03:31.193240: step: 332/527, loss: 0.03224804252386093 2023-01-23 02:03:32.282553: step: 336/527, loss: 0.07398414611816406 2023-01-23 02:03:33.382675: step: 340/527, loss: 0.008480453863739967 2023-01-23 02:03:34.488994: step: 344/527, loss: 0.02752704545855522 2023-01-23 02:03:35.671749: step: 348/527, loss: 0.028559494763612747 2023-01-23 02:03:36.785410: step: 352/527, loss: 0.014121342450380325 2023-01-23 02:03:37.899731: step: 356/527, loss: 0.014368820935487747 2023-01-23 02:03:39.018523: step: 360/527, loss: 0.07667160034179688 2023-01-23 02:03:40.130656: step: 364/527, loss: 0.021698379889130592 2023-01-23 02:03:41.236904: step: 368/527, loss: 0.0033740997314453125 2023-01-23 02:03:42.328389: step: 372/527, loss: 0.0021216394379734993 2023-01-23 02:03:43.435925: step: 376/527, loss: 0.0009344101417809725 2023-01-23 02:03:44.595588: step: 380/527, loss: 0.08845214545726776 2023-01-23 02:03:45.730243: step: 384/527, loss: 0.06588821858167648 2023-01-23 02:03:46.845475: step: 388/527, loss: 0.0014370918506756425 2023-01-23 02:03:47.935608: step: 392/527, loss: 0.19271382689476013 2023-01-23 02:03:49.077459: step: 396/527, loss: 0.01190652884542942 2023-01-23 02:03:50.191493: step: 400/527, loss: 0.0011715888977050781 2023-01-23 02:03:51.300917: step: 404/527, loss: 0.0005519866826944053 2023-01-23 02:03:52.425990: step: 408/527, loss: 0.009817123413085938 2023-01-23 02:03:53.545758: step: 412/527, loss: 0.2533043920993805 2023-01-23 02:03:54.652888: step: 416/527, loss: 6.67572021484375e-05 2023-01-23 02:03:55.768801: step: 420/527, loss: 0.06502113491296768 2023-01-23 02:03:56.911553: step: 424/527, loss: 0.020995713770389557 2023-01-23 02:03:58.023185: step: 428/527, loss: 0.01771850697696209 2023-01-23 02:03:59.143736: step: 432/527, loss: 0.009616279974579811 2023-01-23 02:04:00.279149: step: 436/527, loss: 0.002470398088917136 2023-01-23 02:04:01.447041: step: 440/527, loss: 0.02363281324505806 2023-01-23 02:04:02.575705: step: 444/527, loss: 0.07266445457935333 2023-01-23 02:04:03.724739: step: 448/527, loss: 0.009250020608305931 2023-01-23 02:04:04.847214: step: 452/527, loss: 0.0042115929536521435 2023-01-23 02:04:05.990757: step: 456/527, loss: 0.0006821156130172312 2023-01-23 02:04:07.102640: step: 460/527, loss: 0.028563881292939186 2023-01-23 02:04:08.230751: step: 464/527, loss: 0.032236672937870026 2023-01-23 02:04:09.346750: step: 468/527, loss: 0.013486957177519798 2023-01-23 02:04:10.451338: step: 472/527, loss: 0.028739070519804955 2023-01-23 02:04:11.570379: step: 476/527, loss: 0.029852628707885742 2023-01-23 02:04:12.709513: step: 480/527, loss: 0.003395938780158758 2023-01-23 02:04:13.817785: step: 484/527, loss: 0.029524995014071465 2023-01-23 02:04:14.964547: step: 488/527, loss: 0.03154907375574112 2023-01-23 02:04:16.079709: step: 492/527, loss: 0.001433944795280695 2023-01-23 02:04:17.182054: step: 496/527, loss: 0.01949033886194229 2023-01-23 02:04:18.291331: step: 500/527, loss: 0.23448696732521057 2023-01-23 02:04:19.396254: step: 504/527, loss: 0.004198408219963312 2023-01-23 02:04:20.508377: step: 508/527, loss: 0.00396652240306139 2023-01-23 02:04:21.632207: step: 512/527, loss: 0.026943014934659004 2023-01-23 02:04:22.752146: step: 516/527, loss: 0.005537987221032381 2023-01-23 02:04:23.886966: step: 520/527, loss: 0.1422453075647354 2023-01-23 02:04:24.968363: step: 524/527, loss: 0.0011471748584881425 2023-01-23 02:04:26.089645: step: 528/527, loss: 0.01975860632956028 2023-01-23 02:04:27.185784: step: 532/527, loss: 0.014313125051558018 2023-01-23 02:04:28.298306: step: 536/527, loss: 0.03721790388226509 2023-01-23 02:04:29.407985: step: 540/527, loss: 0.025241805240511894 2023-01-23 02:04:30.514177: step: 544/527, loss: 0.006464958656579256 2023-01-23 02:04:31.617328: step: 548/527, loss: 0.003733062883839011 2023-01-23 02:04:32.741593: step: 552/527, loss: 0.010371970944106579 2023-01-23 02:04:33.837122: step: 556/527, loss: 0.04620685800909996 2023-01-23 02:04:34.962537: step: 560/527, loss: 0.2306041717529297 2023-01-23 02:04:36.089500: step: 564/527, loss: 0.005190277472138405 2023-01-23 02:04:37.210751: step: 568/527, loss: 0.010545730590820312 2023-01-23 02:04:38.341359: step: 572/527, loss: 0.027575969696044922 2023-01-23 02:04:39.458197: step: 576/527, loss: 0.014630794525146484 2023-01-23 02:04:40.559290: step: 580/527, loss: 0.004957115743309259 2023-01-23 02:04:41.674773: step: 584/527, loss: 0.008568287827074528 2023-01-23 02:04:42.802696: step: 588/527, loss: 0.0028623580001294613 2023-01-23 02:04:43.953694: step: 592/527, loss: 0.11433592438697815 2023-01-23 02:04:45.070102: step: 596/527, loss: 0.016902972012758255 2023-01-23 02:04:46.241720: step: 600/527, loss: 0.038194846361875534 2023-01-23 02:04:47.357666: step: 604/527, loss: 0.018544532358646393 2023-01-23 02:04:48.444751: step: 608/527, loss: 0.004984951112419367 2023-01-23 02:04:49.555209: step: 612/527, loss: 0.002406501676887274 2023-01-23 02:04:50.657482: step: 616/527, loss: 0.055287934839725494 2023-01-23 02:04:51.734795: step: 620/527, loss: 0.0013731956714764237 2023-01-23 02:04:52.847204: step: 624/527, loss: 0.004456761293113232 2023-01-23 02:04:53.976248: step: 628/527, loss: 0.08327207714319229 2023-01-23 02:04:55.099363: step: 632/527, loss: 0.049032118171453476 2023-01-23 02:04:56.230370: step: 636/527, loss: 0.02480325661599636 2023-01-23 02:04:57.333491: step: 640/527, loss: 0.03411727026104927 2023-01-23 02:04:58.424310: step: 644/527, loss: 0.0045403484255075455 2023-01-23 02:04:59.546928: step: 648/527, loss: 0.028308678418397903 2023-01-23 02:05:00.674851: step: 652/527, loss: 0.0013042927021160722 2023-01-23 02:05:01.777720: step: 656/527, loss: 0.0010877609020099044 2023-01-23 02:05:02.890373: step: 660/527, loss: 0.00903244037181139 2023-01-23 02:05:04.026843: step: 664/527, loss: 0.3747520446777344 2023-01-23 02:05:05.156087: step: 668/527, loss: 0.09205179661512375 2023-01-23 02:05:06.284410: step: 672/527, loss: 0.00017471313185524195 2023-01-23 02:05:07.400550: step: 676/527, loss: 0.04350528493523598 2023-01-23 02:05:08.503144: step: 680/527, loss: 0.006512451451271772 2023-01-23 02:05:09.631322: step: 684/527, loss: 0.002341025974601507 2023-01-23 02:05:10.735021: step: 688/527, loss: 0.019007539376616478 2023-01-23 02:05:11.818301: step: 692/527, loss: 0.05936779826879501 2023-01-23 02:05:12.964227: step: 696/527, loss: 0.002118778182193637 2023-01-23 02:05:14.108738: step: 700/527, loss: 0.04339752346277237 2023-01-23 02:05:15.221286: step: 704/527, loss: 0.011522864922881126 2023-01-23 02:05:16.393121: step: 708/527, loss: 0.09353247284889221 2023-01-23 02:05:17.469126: step: 712/527, loss: 0.0065042972564697266 2023-01-23 02:05:18.607697: step: 716/527, loss: 0.09559492766857147 2023-01-23 02:05:19.742735: step: 720/527, loss: 0.009997940622270107 2023-01-23 02:05:20.872938: step: 724/527, loss: 0.005124092102050781 2023-01-23 02:05:22.024973: step: 728/527, loss: 0.0015773654449731112 2023-01-23 02:05:23.120241: step: 732/527, loss: 0.0029996871016919613 2023-01-23 02:05:24.243834: step: 736/527, loss: 0.037795402109622955 2023-01-23 02:05:25.330482: step: 740/527, loss: 0.01293411199003458 2023-01-23 02:05:26.463102: step: 744/527, loss: 0.010936308652162552 2023-01-23 02:05:27.572534: step: 748/527, loss: 0.05385913699865341 2023-01-23 02:05:28.710750: step: 752/527, loss: 0.04314403608441353 2023-01-23 02:05:29.803876: step: 756/527, loss: 0.045433998107910156 2023-01-23 02:05:30.902284: step: 760/527, loss: 0.0537446066737175 2023-01-23 02:05:32.022231: step: 764/527, loss: 0.03394460678100586 2023-01-23 02:05:33.144697: step: 768/527, loss: 0.005544853396713734 2023-01-23 02:05:34.251412: step: 772/527, loss: 0.0033130645751953125 2023-01-23 02:05:35.372078: step: 776/527, loss: 0.04084720462560654 2023-01-23 02:05:36.480067: step: 780/527, loss: 0.04300842434167862 2023-01-23 02:05:37.626794: step: 784/527, loss: 0.010472392663359642 2023-01-23 02:05:38.774961: step: 788/527, loss: 0.004683780949562788 2023-01-23 02:05:39.890112: step: 792/527, loss: 0.02362346649169922 2023-01-23 02:05:41.015430: step: 796/527, loss: 0.019580459222197533 2023-01-23 02:05:42.098356: step: 800/527, loss: 0.0004859924374613911 2023-01-23 02:05:43.206223: step: 804/527, loss: 0.008272933773696423 2023-01-23 02:05:44.286379: step: 808/527, loss: 0.00017070770263671875 2023-01-23 02:05:45.428595: step: 812/527, loss: 0.03326749801635742 2023-01-23 02:05:46.544118: step: 816/527, loss: 0.07345166057348251 2023-01-23 02:05:47.664418: step: 820/527, loss: 0.012792587280273438 2023-01-23 02:05:48.791356: step: 824/527, loss: 0.0047397613525390625 2023-01-23 02:05:49.883509: step: 828/527, loss: 0.1512743979692459 2023-01-23 02:05:51.023236: step: 832/527, loss: 0.06097078323364258 2023-01-23 02:05:52.123431: step: 836/527, loss: 0.00021858215040992945 2023-01-23 02:05:53.230398: step: 840/527, loss: 0.0123291015625 2023-01-23 02:05:54.333798: step: 844/527, loss: 0.02930602990090847 2023-01-23 02:05:55.454191: step: 848/527, loss: 0.09752263873815536 2023-01-23 02:05:56.605935: step: 852/527, loss: 0.002418327145278454 2023-01-23 02:05:57.733838: step: 856/527, loss: 0.020143888890743256 2023-01-23 02:05:58.813370: step: 860/527, loss: 0.004431247711181641 2023-01-23 02:05:59.930130: step: 864/527, loss: 0.0016469955444335938 2023-01-23 02:06:01.031827: step: 868/527, loss: 0.08239050209522247 2023-01-23 02:06:02.178849: step: 872/527, loss: 0.05172882229089737 2023-01-23 02:06:03.296155: step: 876/527, loss: 0.030521774664521217 2023-01-23 02:06:04.423217: step: 880/527, loss: 0.11122265458106995 2023-01-23 02:06:05.553853: step: 884/527, loss: 0.0822354331612587 2023-01-23 02:06:06.678713: step: 888/527, loss: 0.029909705743193626 2023-01-23 02:06:07.778780: step: 892/527, loss: 0.0001676559477346018 2023-01-23 02:06:08.916293: step: 896/527, loss: 0.00345611572265625 2023-01-23 02:06:10.035646: step: 900/527, loss: 0.025514651089906693 2023-01-23 02:06:11.160632: step: 904/527, loss: 0.3412491977214813 2023-01-23 02:06:12.281816: step: 908/527, loss: 0.002812004182487726 2023-01-23 02:06:13.414020: step: 912/527, loss: 0.00018939972505904734 2023-01-23 02:06:14.537378: step: 916/527, loss: 0.006989955902099609 2023-01-23 02:06:15.646220: step: 920/527, loss: 0.038678836077451706 2023-01-23 02:06:16.737423: step: 924/527, loss: 0.0037471773102879524 2023-01-23 02:06:17.874640: step: 928/527, loss: 0.011445618234574795 2023-01-23 02:06:18.984064: step: 932/527, loss: 0.018006229773163795 2023-01-23 02:06:20.115786: step: 936/527, loss: 0.013768625445663929 2023-01-23 02:06:21.242303: step: 940/527, loss: 0.00183448800817132 2023-01-23 02:06:22.346851: step: 944/527, loss: 0.011353719048202038 2023-01-23 02:06:23.467984: step: 948/527, loss: 0.01029129046946764 2023-01-23 02:06:24.605543: step: 952/527, loss: 0.3560040593147278 2023-01-23 02:06:25.737035: step: 956/527, loss: 0.008546257391571999 2023-01-23 02:06:26.837009: step: 960/527, loss: 0.0031757354736328125 2023-01-23 02:06:27.933167: step: 964/527, loss: 0.003930854611098766 2023-01-23 02:06:29.046794: step: 968/527, loss: 0.008129405789077282 2023-01-23 02:06:30.156057: step: 972/527, loss: 0.06368846446275711 2023-01-23 02:06:31.264964: step: 976/527, loss: 0.021250534802675247 2023-01-23 02:06:32.385661: step: 980/527, loss: 0.10567241162061691 2023-01-23 02:06:33.494458: step: 984/527, loss: 0.04129352420568466 2023-01-23 02:06:34.602362: step: 988/527, loss: 0.04807844012975693 2023-01-23 02:06:35.708216: step: 992/527, loss: 0.030477095395326614 2023-01-23 02:06:36.822462: step: 996/527, loss: 0.005741024389863014 2023-01-23 02:06:37.927077: step: 1000/527, loss: 0.0018512726528570056 2023-01-23 02:06:39.019884: step: 1004/527, loss: 0.037592507898807526 2023-01-23 02:06:40.155485: step: 1008/527, loss: 0.013914298266172409 2023-01-23 02:06:41.313116: step: 1012/527, loss: 0.06752672046422958 2023-01-23 02:06:42.417717: step: 1016/527, loss: 0.007949447259306908 2023-01-23 02:06:43.543175: step: 1020/527, loss: 0.003915214445441961 2023-01-23 02:06:44.631116: step: 1024/527, loss: 0.01251897867769003 2023-01-23 02:06:45.740257: step: 1028/527, loss: 0.005850601010024548 2023-01-23 02:06:46.863150: step: 1032/527, loss: 0.0817023292183876 2023-01-23 02:06:47.949072: step: 1036/527, loss: 0.008228874765336514 2023-01-23 02:06:49.051265: step: 1040/527, loss: 0.009776497259736061 2023-01-23 02:06:50.180642: step: 1044/527, loss: 0.0006307124858722091 2023-01-23 02:06:51.308719: step: 1048/527, loss: 0.00175647740252316 2023-01-23 02:06:52.444641: step: 1052/527, loss: 0.017972279340028763 2023-01-23 02:06:53.564527: step: 1056/527, loss: 0.008592414669692516 2023-01-23 02:06:54.669749: step: 1060/527, loss: 0.03538760915398598 2023-01-23 02:06:55.790437: step: 1064/527, loss: 0.004697036929428577 2023-01-23 02:06:56.924421: step: 1068/527, loss: 0.13068370521068573 2023-01-23 02:06:58.067065: step: 1072/527, loss: 0.0009929656516760588 2023-01-23 02:06:59.186670: step: 1076/527, loss: 0.0016904830699786544 2023-01-23 02:07:00.299661: step: 1080/527, loss: 0.009094525128602982 2023-01-23 02:07:01.400934: step: 1084/527, loss: 0.02324857749044895 2023-01-23 02:07:02.515871: step: 1088/527, loss: 0.035799790173769 2023-01-23 02:07:03.621865: step: 1092/527, loss: 0.07102776318788528 2023-01-23 02:07:04.742315: step: 1096/527, loss: 0.005430793855339289 2023-01-23 02:07:05.851245: step: 1100/527, loss: 0.0266552921384573 2023-01-23 02:07:06.953732: step: 1104/527, loss: 0.006526744458824396 2023-01-23 02:07:08.062384: step: 1108/527, loss: 0.019536161795258522 2023-01-23 02:07:09.180608: step: 1112/527, loss: 0.035013582557439804 2023-01-23 02:07:10.304766: step: 1116/527, loss: 0.011952400207519531 2023-01-23 02:07:11.441878: step: 1120/527, loss: 0.02453918568789959 2023-01-23 02:07:12.569864: step: 1124/527, loss: 0.013444328680634499 2023-01-23 02:07:13.707935: step: 1128/527, loss: 0.0037784576416015625 2023-01-23 02:07:14.830220: step: 1132/527, loss: 0.06779623031616211 2023-01-23 02:07:15.930316: step: 1136/527, loss: 0.005590343382209539 2023-01-23 02:07:17.020503: step: 1140/527, loss: 0.051160432398319244 2023-01-23 02:07:18.150358: step: 1144/527, loss: 0.02247796021401882 2023-01-23 02:07:19.260369: step: 1148/527, loss: 0.003013419918715954 2023-01-23 02:07:20.372950: step: 1152/527, loss: 0.00958948116749525 2023-01-23 02:07:21.471615: step: 1156/527, loss: 0.01073455810546875 2023-01-23 02:07:22.552391: step: 1160/527, loss: 0.0077984812669456005 2023-01-23 02:07:23.631714: step: 1164/527, loss: 0.005529975984245539 2023-01-23 02:07:24.767432: step: 1168/527, loss: 0.003546428633853793 2023-01-23 02:07:25.872955: step: 1172/527, loss: 0.014021635986864567 2023-01-23 02:07:26.993805: step: 1176/527, loss: 0.014738941565155983 2023-01-23 02:07:28.141006: step: 1180/527, loss: 0.009393453598022461 2023-01-23 02:07:29.283134: step: 1184/527, loss: 0.0003514289855957031 2023-01-23 02:07:30.399617: step: 1188/527, loss: 0.0018528937362134457 2023-01-23 02:07:31.510652: step: 1192/527, loss: 0.008082438260316849 2023-01-23 02:07:32.587101: step: 1196/527, loss: 0.0009001732105389237 2023-01-23 02:07:33.692119: step: 1200/527, loss: 0.004994392395019531 2023-01-23 02:07:34.817674: step: 1204/527, loss: 0.08577661216259003 2023-01-23 02:07:35.963684: step: 1208/527, loss: 0.012539064511656761 2023-01-23 02:07:37.084159: step: 1212/527, loss: 0.023921776562929153 2023-01-23 02:07:38.187943: step: 1216/527, loss: 0.04786815866827965 2023-01-23 02:07:39.321061: step: 1220/527, loss: 0.02133164368569851 2023-01-23 02:07:40.449060: step: 1224/527, loss: 0.028213834390044212 2023-01-23 02:07:41.540539: step: 1228/527, loss: 0.041399385780096054 2023-01-23 02:07:42.644919: step: 1232/527, loss: 0.023358918726444244 2023-01-23 02:07:43.759236: step: 1236/527, loss: 0.02531290054321289 2023-01-23 02:07:44.858332: step: 1240/527, loss: 0.06339474022388458 2023-01-23 02:07:45.964634: step: 1244/527, loss: 3.576399087905884 2023-01-23 02:07:47.102082: step: 1248/527, loss: 0.004817867651581764 2023-01-23 02:07:48.244568: step: 1252/527, loss: 0.08507785946130753 2023-01-23 02:07:49.351713: step: 1256/527, loss: 0.015553951263427734 2023-01-23 02:07:50.452741: step: 1260/527, loss: 0.0004887580871582031 2023-01-23 02:07:51.570230: step: 1264/527, loss: 0.0008686066139489412 2023-01-23 02:07:52.664454: step: 1268/527, loss: 0.022221755236387253 2023-01-23 02:07:53.765729: step: 1272/527, loss: 0.0034784318413585424 2023-01-23 02:07:54.860110: step: 1276/527, loss: 0.0016757011180743575 2023-01-23 02:07:55.987071: step: 1280/527, loss: 0.025454092770814896 2023-01-23 02:07:57.102082: step: 1284/527, loss: 0.003937912173569202 2023-01-23 02:07:58.215532: step: 1288/527, loss: 0.011016273871064186 2023-01-23 02:07:59.336415: step: 1292/527, loss: 0.02684783935546875 2023-01-23 02:08:00.446813: step: 1296/527, loss: 0.0024065019097179174 2023-01-23 02:08:01.554384: step: 1300/527, loss: 0.03681793436408043 2023-01-23 02:08:02.659616: step: 1304/527, loss: 0.017932891845703125 2023-01-23 02:08:03.778690: step: 1308/527, loss: 0.0014888762962073088 2023-01-23 02:08:04.894476: step: 1312/527, loss: 0.02482910081744194 2023-01-23 02:08:06.003145: step: 1316/527, loss: 0.020549679175019264 2023-01-23 02:08:07.099856: step: 1320/527, loss: 0.023158837109804153 2023-01-23 02:08:08.228363: step: 1324/527, loss: 0.060376547276973724 2023-01-23 02:08:09.366454: step: 1328/527, loss: 0.00930476188659668 2023-01-23 02:08:10.462502: step: 1332/527, loss: 0.021881485357880592 2023-01-23 02:08:11.590081: step: 1336/527, loss: 0.009923458099365234 2023-01-23 02:08:12.703712: step: 1340/527, loss: 0.005238723941147327 2023-01-23 02:08:13.821212: step: 1344/527, loss: 0.6213347315788269 2023-01-23 02:08:14.940589: step: 1348/527, loss: 0.028046799823641777 2023-01-23 02:08:16.044294: step: 1352/527, loss: 0.00892176665365696 2023-01-23 02:08:17.159446: step: 1356/527, loss: 0.170710027217865 2023-01-23 02:08:18.294374: step: 1360/527, loss: 0.025066375732421875 2023-01-23 02:08:19.408381: step: 1364/527, loss: 0.0086218835785985 2023-01-23 02:08:20.530844: step: 1368/527, loss: 0.09632397443056107 2023-01-23 02:08:21.658718: step: 1372/527, loss: 0.05324440076947212 2023-01-23 02:08:22.771226: step: 1376/527, loss: 0.0177596565335989 2023-01-23 02:08:23.872469: step: 1380/527, loss: 0.006068420596420765 2023-01-23 02:08:24.991648: step: 1384/527, loss: 0.042371559888124466 2023-01-23 02:08:26.108340: step: 1388/527, loss: 0.0009443283197470009 2023-01-23 02:08:27.224104: step: 1392/527, loss: 0.00021743775869254023 2023-01-23 02:08:28.356672: step: 1396/527, loss: 0.0001392364501953125 2023-01-23 02:08:29.476744: step: 1400/527, loss: 0.019303608685731888 2023-01-23 02:08:30.575935: step: 1404/527, loss: 0.00343151087872684 2023-01-23 02:08:31.730825: step: 1408/527, loss: 0.028829192742705345 2023-01-23 02:08:32.842827: step: 1412/527, loss: 0.01144785899668932 2023-01-23 02:08:33.944029: step: 1416/527, loss: 0.015566635876893997 2023-01-23 02:08:35.105801: step: 1420/527, loss: 0.0021302225068211555 2023-01-23 02:08:36.239970: step: 1424/527, loss: 0.0002058029203908518 2023-01-23 02:08:37.354965: step: 1428/527, loss: 0.0196699146181345 2023-01-23 02:08:38.496686: step: 1432/527, loss: 0.02204723283648491 2023-01-23 02:08:39.597987: step: 1436/527, loss: 0.003985119052231312 2023-01-23 02:08:40.735455: step: 1440/527, loss: 0.08211441338062286 2023-01-23 02:08:41.835082: step: 1444/527, loss: 0.026621095836162567 2023-01-23 02:08:42.976775: step: 1448/527, loss: 0.11128316074609756 2023-01-23 02:08:44.082181: step: 1452/527, loss: 0.023581314831972122 2023-01-23 02:08:45.204427: step: 1456/527, loss: 0.0001388549862895161 2023-01-23 02:08:46.322791: step: 1460/527, loss: 0.00271263113245368 2023-01-23 02:08:47.430325: step: 1464/527, loss: 0.04239244386553764 2023-01-23 02:08:48.554391: step: 1468/527, loss: 0.02157735824584961 2023-01-23 02:08:49.661763: step: 1472/527, loss: 0.0339265838265419 2023-01-23 02:08:50.783644: step: 1476/527, loss: 0.0025482177734375 2023-01-23 02:08:51.863916: step: 1480/527, loss: 0.003231144044548273 2023-01-23 02:08:52.990321: step: 1484/527, loss: 0.026413630694150925 2023-01-23 02:08:54.100038: step: 1488/527, loss: 0.013781165704131126 2023-01-23 02:08:55.213933: step: 1492/527, loss: 0.00566444406285882 2023-01-23 02:08:56.354507: step: 1496/527, loss: 0.007185888476669788 2023-01-23 02:08:57.468751: step: 1500/527, loss: 0.050115205347537994 2023-01-23 02:08:58.586107: step: 1504/527, loss: 0.021830463781952858 2023-01-23 02:08:59.701389: step: 1508/527, loss: 0.014794539660215378 2023-01-23 02:09:00.805053: step: 1512/527, loss: 0.0015491485828533769 2023-01-23 02:09:01.963651: step: 1516/527, loss: 0.04486217349767685 2023-01-23 02:09:03.066858: step: 1520/527, loss: 0.04860267788171768 2023-01-23 02:09:04.188719: step: 1524/527, loss: 0.04024339094758034 2023-01-23 02:09:05.326077: step: 1528/527, loss: 0.00019292831711936742 2023-01-23 02:09:06.426411: step: 1532/527, loss: 0.011592579074203968 2023-01-23 02:09:07.537430: step: 1536/527, loss: 0.006886291783303022 2023-01-23 02:09:08.651920: step: 1540/527, loss: 0.0837029442191124 2023-01-23 02:09:09.768186: step: 1544/527, loss: 0.05651798099279404 2023-01-23 02:09:10.872368: step: 1548/527, loss: 0.019631672650575638 2023-01-23 02:09:11.989000: step: 1552/527, loss: 0.021854113787412643 2023-01-23 02:09:13.092079: step: 1556/527, loss: 0.018078995868563652 2023-01-23 02:09:14.198309: step: 1560/527, loss: 0.016287613660097122 2023-01-23 02:09:15.320105: step: 1564/527, loss: 0.00038185121957212687 2023-01-23 02:09:16.468754: step: 1568/527, loss: 0.004917717073112726 2023-01-23 02:09:17.576627: step: 1572/527, loss: 0.028669262304902077 2023-01-23 02:09:18.685888: step: 1576/527, loss: 0.019704438745975494 2023-01-23 02:09:19.815659: step: 1580/527, loss: 0.007667827419936657 2023-01-23 02:09:20.925476: step: 1584/527, loss: 0.02697582356631756 2023-01-23 02:09:22.034337: step: 1588/527, loss: 0.02042541466653347 2023-01-23 02:09:23.123706: step: 1592/527, loss: 0.030007362365722656 2023-01-23 02:09:24.233334: step: 1596/527, loss: 0.001234936760738492 2023-01-23 02:09:25.363160: step: 1600/527, loss: 0.021441077813506126 2023-01-23 02:09:26.498063: step: 1604/527, loss: 0.05410599708557129 2023-01-23 02:09:27.631423: step: 1608/527, loss: 0.007120895199477673 2023-01-23 02:09:28.753017: step: 1612/527, loss: 0.014181185513734818 2023-01-23 02:09:29.892255: step: 1616/527, loss: 0.016245556995272636 2023-01-23 02:09:31.030413: step: 1620/527, loss: 0.26022282242774963 2023-01-23 02:09:32.186670: step: 1624/527, loss: 0.015086937695741653 2023-01-23 02:09:33.320993: step: 1628/527, loss: 0.04329581931233406 2023-01-23 02:09:34.438665: step: 1632/527, loss: 0.0022755623795092106 2023-01-23 02:09:35.565836: step: 1636/527, loss: 0.0029495239723473787 2023-01-23 02:09:36.696751: step: 1640/527, loss: 0.16014838218688965 2023-01-23 02:09:37.822351: step: 1644/527, loss: 0.10472507774829865 2023-01-23 02:09:38.921144: step: 1648/527, loss: 0.004257449880242348 2023-01-23 02:09:40.022169: step: 1652/527, loss: 0.009605741128325462 2023-01-23 02:09:41.146152: step: 1656/527, loss: 0.014307642355561256 2023-01-23 02:09:42.256550: step: 1660/527, loss: 0.0010386466747149825 2023-01-23 02:09:43.368481: step: 1664/527, loss: 0.012788581661880016 2023-01-23 02:09:44.493854: step: 1668/527, loss: 0.004460525698959827 2023-01-23 02:09:45.607388: step: 1672/527, loss: 0.06954727321863174 2023-01-23 02:09:46.730577: step: 1676/527, loss: 0.0713268294930458 2023-01-23 02:09:47.841968: step: 1680/527, loss: 0.2839442491531372 2023-01-23 02:09:48.945908: step: 1684/527, loss: 0.004411220550537109 2023-01-23 02:09:50.066254: step: 1688/527, loss: 0.016005326062440872 2023-01-23 02:09:51.147363: step: 1692/527, loss: 0.007894039154052734 2023-01-23 02:09:52.260323: step: 1696/527, loss: 0.004533099941909313 2023-01-23 02:09:53.362181: step: 1700/527, loss: 0.004893636330962181 2023-01-23 02:09:54.456707: step: 1704/527, loss: 0.001990080112591386 2023-01-23 02:09:55.584746: step: 1708/527, loss: 0.0029388905968517065 2023-01-23 02:09:56.689432: step: 1712/527, loss: 0.000920200371183455 2023-01-23 02:09:57.789527: step: 1716/527, loss: 0.11655457317829132 2023-01-23 02:09:58.900915: step: 1720/527, loss: 0.0239060390740633 2023-01-23 02:10:00.032832: step: 1724/527, loss: 0.0016626358265057206 2023-01-23 02:10:01.142676: step: 1728/527, loss: 0.012367535382509232 2023-01-23 02:10:02.259640: step: 1732/527, loss: 0.00020959675021003932 2023-01-23 02:10:03.377746: step: 1736/527, loss: 0.002056598663330078 2023-01-23 02:10:04.499607: step: 1740/527, loss: 0.14610633254051208 2023-01-23 02:10:05.605203: step: 1744/527, loss: 0.06456060707569122 2023-01-23 02:10:06.735020: step: 1748/527, loss: 0.08133984357118607 2023-01-23 02:10:07.836169: step: 1752/527, loss: 0.08122367411851883 2023-01-23 02:10:08.989826: step: 1756/527, loss: 0.0542144775390625 2023-01-23 02:10:10.094556: step: 1760/527, loss: 0.0044806962832808495 2023-01-23 02:10:11.218894: step: 1764/527, loss: 0.14074520766735077 2023-01-23 02:10:12.337692: step: 1768/527, loss: 0.03765411674976349 2023-01-23 02:10:13.449958: step: 1772/527, loss: 0.006587505340576172 2023-01-23 02:10:14.560569: step: 1776/527, loss: 0.0027670860290527344 2023-01-23 02:10:15.675215: step: 1780/527, loss: 0.003584956983104348 2023-01-23 02:10:16.800348: step: 1784/527, loss: 0.010698128491640091 2023-01-23 02:10:17.979850: step: 1788/527, loss: 0.03421001508831978 2023-01-23 02:10:19.107460: step: 1792/527, loss: 0.009135819040238857 2023-01-23 02:10:20.234618: step: 1796/527, loss: 0.03324737399816513 2023-01-23 02:10:21.342510: step: 1800/527, loss: 0.0672907829284668 2023-01-23 02:10:22.443425: step: 1804/527, loss: 0.12344705313444138 2023-01-23 02:10:23.564063: step: 1808/527, loss: 0.09816322475671768 2023-01-23 02:10:24.657837: step: 1812/527, loss: 0.0033520699944347143 2023-01-23 02:10:25.772576: step: 1816/527, loss: 0.0016025543445721269 2023-01-23 02:10:26.917962: step: 1820/527, loss: 0.024089908227324486 2023-01-23 02:10:28.032338: step: 1824/527, loss: 0.00564842252060771 2023-01-23 02:10:29.114401: step: 1828/527, loss: 0.03233394771814346 2023-01-23 02:10:30.228630: step: 1832/527, loss: 0.019582130014896393 2023-01-23 02:10:31.342843: step: 1836/527, loss: 0.0007551193702965975 2023-01-23 02:10:32.466351: step: 1840/527, loss: 0.04893064498901367 2023-01-23 02:10:33.559566: step: 1844/527, loss: 0.08800573647022247 2023-01-23 02:10:34.678923: step: 1848/527, loss: 0.03629150241613388 2023-01-23 02:10:35.788585: step: 1852/527, loss: 0.005641841795295477 2023-01-23 02:10:36.907575: step: 1856/527, loss: 0.022507095709443092 2023-01-23 02:10:38.054635: step: 1860/527, loss: 0.6415468454360962 2023-01-23 02:10:39.185453: step: 1864/527, loss: 0.05641660839319229 2023-01-23 02:10:40.285582: step: 1868/527, loss: 0.08890552818775177 2023-01-23 02:10:41.397783: step: 1872/527, loss: 0.0191650390625 2023-01-23 02:10:42.519167: step: 1876/527, loss: 0.1533823013305664 2023-01-23 02:10:43.653624: step: 1880/527, loss: 0.011720657348632812 2023-01-23 02:10:44.750659: step: 1884/527, loss: 0.02922229655086994 2023-01-23 02:10:45.857598: step: 1888/527, loss: 0.0018682957161217928 2023-01-23 02:10:46.980006: step: 1892/527, loss: 0.08601666241884232 2023-01-23 02:10:48.128790: step: 1896/527, loss: 0.026267433539032936 2023-01-23 02:10:49.252591: step: 1900/527, loss: 0.0032966614235192537 2023-01-23 02:10:50.359045: step: 1904/527, loss: 0.0005238056182861328 2023-01-23 02:10:51.436820: step: 1908/527, loss: 0.0026113989297300577 2023-01-23 02:10:52.542609: step: 1912/527, loss: 0.006095409393310547 2023-01-23 02:10:53.654522: step: 1916/527, loss: 0.001840448472648859 2023-01-23 02:10:54.813283: step: 1920/527, loss: 0.0058341980911791325 2023-01-23 02:10:55.955159: step: 1924/527, loss: 0.24067649245262146 2023-01-23 02:10:57.077915: step: 1928/527, loss: 0.05374440923333168 2023-01-23 02:10:58.214231: step: 1932/527, loss: 0.008095169439911842 2023-01-23 02:10:59.317356: step: 1936/527, loss: 0.027173995971679688 2023-01-23 02:11:00.415459: step: 1940/527, loss: 0.009208775125443935 2023-01-23 02:11:01.525991: step: 1944/527, loss: 0.07383685559034348 2023-01-23 02:11:02.625571: step: 1948/527, loss: 0.0012336254585534334 2023-01-23 02:11:03.708393: step: 1952/527, loss: 0.013745879754424095 2023-01-23 02:11:04.821549: step: 1956/527, loss: 0.0004837036249227822 2023-01-23 02:11:05.941929: step: 1960/527, loss: 0.003995132632553577 2023-01-23 02:11:07.064101: step: 1964/527, loss: 0.0010470390552654862 2023-01-23 02:11:08.183347: step: 1968/527, loss: 0.0015604018699377775 2023-01-23 02:11:09.300600: step: 1972/527, loss: 0.027089644223451614 2023-01-23 02:11:10.414356: step: 1976/527, loss: 0.03243446722626686 2023-01-23 02:11:11.563809: step: 1980/527, loss: 0.028348732739686966 2023-01-23 02:11:12.646133: step: 1984/527, loss: 0.006929552648216486 2023-01-23 02:11:13.772839: step: 1988/527, loss: 0.00692596472799778 2023-01-23 02:11:14.875047: step: 1992/527, loss: 0.02545328438282013 2023-01-23 02:11:15.992962: step: 1996/527, loss: 0.007973099127411842 2023-01-23 02:11:17.135138: step: 2000/527, loss: 0.009815883822739124 2023-01-23 02:11:18.266583: step: 2004/527, loss: 0.01253671757876873 2023-01-23 02:11:19.406381: step: 2008/527, loss: 0.008331346325576305 2023-01-23 02:11:20.571118: step: 2012/527, loss: 0.10944052040576935 2023-01-23 02:11:21.667668: step: 2016/527, loss: 0.15852084755897522 2023-01-23 02:11:22.781170: step: 2020/527, loss: 0.000957489013671875 2023-01-23 02:11:23.935389: step: 2024/527, loss: 0.03190651163458824 2023-01-23 02:11:25.092297: step: 2028/527, loss: 0.021916961297392845 2023-01-23 02:11:26.173581: step: 2032/527, loss: 0.14196662604808807 2023-01-23 02:11:27.316689: step: 2036/527, loss: 0.0024091722443699837 2023-01-23 02:11:28.462947: step: 2040/527, loss: 0.07789897918701172 2023-01-23 02:11:29.567294: step: 2044/527, loss: 0.048813819885253906 2023-01-23 02:11:30.683421: step: 2048/527, loss: 0.02327709272503853 2023-01-23 02:11:31.786459: step: 2052/527, loss: 0.018462779000401497 2023-01-23 02:11:32.933256: step: 2056/527, loss: 0.09607505798339844 2023-01-23 02:11:34.053275: step: 2060/527, loss: 0.027627278119325638 2023-01-23 02:11:35.168250: step: 2064/527, loss: 0.002675390336662531 2023-01-23 02:11:36.271792: step: 2068/527, loss: 0.0028102875221520662 2023-01-23 02:11:37.383259: step: 2072/527, loss: 0.0029817582108080387 2023-01-23 02:11:38.505747: step: 2076/527, loss: 0.03514309227466583 2023-01-23 02:11:39.617373: step: 2080/527, loss: 0.02349252812564373 2023-01-23 02:11:40.724218: step: 2084/527, loss: 0.04308328405022621 2023-01-23 02:11:41.842183: step: 2088/527, loss: 0.3347829282283783 2023-01-23 02:11:42.947174: step: 2092/527, loss: 0.03971891477704048 2023-01-23 02:11:44.040216: step: 2096/527, loss: 0.03657674789428711 2023-01-23 02:11:45.134083: step: 2100/527, loss: 0.012680244632065296 2023-01-23 02:11:46.270570: step: 2104/527, loss: 0.007027816958725452 2023-01-23 02:11:47.399187: step: 2108/527, loss: 0.12496252357959747 ================================================== Loss: 0.043 -------------------- Dev: {'event': {'p': 0.579, 'r': 0.7709720372836218, 'f1': 0.6613363792118789}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Test: {'event': {'p': 0.6074235807860262, 'r': 0.7948571428571428, 'f1': 0.6886138613861386}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Chinese: {'event': {'p': 0.5348837209302325, 'r': 0.8518518518518519, 'f1': 0.6571428571428571}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Korean: {'event': {'p': 0.5606060606060606, 'r': 0.5873015873015873, 'f1': 0.5736434108527131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Russian: {'event': {'p': 0.4146341463414634, 'r': 0.4722222222222222, 'f1': 0.4415584415584415}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6232044198895028, 'r': 0.7509986684420772, 'f1': 0.6811594202898552}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Eng Test for Korean: {'event': {'p': 0.614123006833713, 'r': 0.7702857142857142, 'f1': 0.6833967046894803}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Korean: {'event': {'p': 0.6808510638297872, 'r': 0.5079365079365079, 'f1': 0.5818181818181817}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 16 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:12:29.149021: step: 4/527, loss: 0.01796550862491131 2023-01-23 02:12:30.287000: step: 8/527, loss: 0.003760433290153742 2023-01-23 02:12:31.398125: step: 12/527, loss: 0.0010778427822515368 2023-01-23 02:12:32.490073: step: 16/527, loss: 0.00014400482177734375 2023-01-23 02:12:33.629520: step: 20/527, loss: 0.02779836766421795 2023-01-23 02:12:34.781679: step: 24/527, loss: 0.02455463446676731 2023-01-23 02:12:35.902188: step: 28/527, loss: 0.0013832091353833675 2023-01-23 02:12:37.006821: step: 32/527, loss: 0.0024348259903490543 2023-01-23 02:12:38.127162: step: 36/527, loss: 0.03339195251464844 2023-01-23 02:12:39.265797: step: 40/527, loss: 0.09266510605812073 2023-01-23 02:12:40.377237: step: 44/527, loss: 0.002629089169204235 2023-01-23 02:12:41.488170: step: 48/527, loss: 0.0015424728626385331 2023-01-23 02:12:42.596044: step: 52/527, loss: 0.0314350388944149 2023-01-23 02:12:43.699875: step: 56/527, loss: 0.029792023822665215 2023-01-23 02:12:44.798906: step: 60/527, loss: 0.03655251860618591 2023-01-23 02:12:45.928744: step: 64/527, loss: 0.018351269885897636 2023-01-23 02:12:47.011757: step: 68/527, loss: 0.018922902643680573 2023-01-23 02:12:48.126132: step: 72/527, loss: 0.010276054963469505 2023-01-23 02:12:49.294068: step: 76/527, loss: 0.010428142733871937 2023-01-23 02:12:50.409056: step: 80/527, loss: 0.0011554717784747481 2023-01-23 02:12:51.507845: step: 84/527, loss: 0.061025045812129974 2023-01-23 02:12:52.597297: step: 88/527, loss: 0.014474892988801003 2023-01-23 02:12:53.717917: step: 92/527, loss: 7.530748553108424e-05 2023-01-23 02:12:54.832343: step: 96/527, loss: 0.014089679345488548 2023-01-23 02:12:55.958553: step: 100/527, loss: 0.032923128455877304 2023-01-23 02:12:57.078160: step: 104/527, loss: 0.004480648320168257 2023-01-23 02:12:58.179961: step: 108/527, loss: 0.0306764617562294 2023-01-23 02:12:59.270017: step: 112/527, loss: 0.003929805941879749 2023-01-23 02:13:00.403873: step: 116/527, loss: 0.03920435905456543 2023-01-23 02:13:01.488036: step: 120/527, loss: 0.005666923709213734 2023-01-23 02:13:02.596550: step: 124/527, loss: 0.0018841744167730212 2023-01-23 02:13:03.701183: step: 128/527, loss: 0.004389571957290173 2023-01-23 02:13:04.820272: step: 132/527, loss: 0.0018618584144860506 2023-01-23 02:13:05.969153: step: 136/527, loss: 0.022548485547304153 2023-01-23 02:13:07.085971: step: 140/527, loss: 0.07386932522058487 2023-01-23 02:13:08.208394: step: 144/527, loss: 0.036187365651130676 2023-01-23 02:13:09.283283: step: 148/527, loss: 0.00187263498082757 2023-01-23 02:13:10.387569: step: 152/527, loss: 0.007227516267448664 2023-01-23 02:13:11.505784: step: 156/527, loss: 0.02659015730023384 2023-01-23 02:13:12.639711: step: 160/527, loss: 0.08658161014318466 2023-01-23 02:13:13.765145: step: 164/527, loss: 0.003027343889698386 2023-01-23 02:13:14.901030: step: 168/527, loss: 0.041294097900390625 2023-01-23 02:13:16.026820: step: 172/527, loss: 0.01939373090863228 2023-01-23 02:13:17.149413: step: 176/527, loss: 0.003173828125 2023-01-23 02:13:18.271614: step: 180/527, loss: 0.045679762959480286 2023-01-23 02:13:19.369648: step: 184/527, loss: 0.017844010144472122 2023-01-23 02:13:20.482569: step: 188/527, loss: 0.0351131409406662 2023-01-23 02:13:21.600329: step: 192/527, loss: 0.0023085596039891243 2023-01-23 02:13:22.714768: step: 196/527, loss: 0.02093084529042244 2023-01-23 02:13:23.846218: step: 200/527, loss: 0.007523871026933193 2023-01-23 02:13:24.946189: step: 204/527, loss: 0.013370228931307793 2023-01-23 02:13:26.052006: step: 208/527, loss: 0.0009019851568154991 2023-01-23 02:13:27.143558: step: 212/527, loss: 0.0714532881975174 2023-01-23 02:13:28.289195: step: 216/527, loss: 0.06932735443115234 2023-01-23 02:13:29.403867: step: 220/527, loss: 0.004470443818718195 2023-01-23 02:13:30.495030: step: 224/527, loss: 0.01931924745440483 2023-01-23 02:13:31.651750: step: 228/527, loss: 0.0025319100823253393 2023-01-23 02:13:32.766455: step: 232/527, loss: 0.0019164086552336812 2023-01-23 02:13:33.862196: step: 236/527, loss: 0.052451133728027344 2023-01-23 02:13:34.978373: step: 240/527, loss: 0.05847921222448349 2023-01-23 02:13:36.078287: step: 244/527, loss: 0.0020477562211453915 2023-01-23 02:13:37.157276: step: 248/527, loss: 0.023990154266357422 2023-01-23 02:13:38.284063: step: 252/527, loss: 0.00993657112121582 2023-01-23 02:13:39.414894: step: 256/527, loss: 0.009394359774887562 2023-01-23 02:13:40.541713: step: 260/527, loss: 0.007433701306581497 2023-01-23 02:13:41.679680: step: 264/527, loss: 0.028497029095888138 2023-01-23 02:13:42.814805: step: 268/527, loss: 0.019512366503477097 2023-01-23 02:13:43.919671: step: 272/527, loss: 0.00151910784188658 2023-01-23 02:13:45.016963: step: 276/527, loss: 0.009914685040712357 2023-01-23 02:13:46.144883: step: 280/527, loss: 0.0026863098610192537 2023-01-23 02:13:47.262407: step: 284/527, loss: 0.02004718966782093 2023-01-23 02:13:48.377465: step: 288/527, loss: 0.003618431044742465 2023-01-23 02:13:49.493048: step: 292/527, loss: 0.022746752947568893 2023-01-23 02:13:50.602656: step: 296/527, loss: 0.024396896362304688 2023-01-23 02:13:51.733005: step: 300/527, loss: 0.03436164930462837 2023-01-23 02:13:52.878160: step: 304/527, loss: 0.004638195037841797 2023-01-23 02:13:53.961878: step: 308/527, loss: 0.013498115353286266 2023-01-23 02:13:55.075226: step: 312/527, loss: 0.07237797230482101 2023-01-23 02:13:56.193165: step: 316/527, loss: 0.022986816242337227 2023-01-23 02:13:57.359542: step: 320/527, loss: 0.009342384524643421 2023-01-23 02:13:58.453685: step: 324/527, loss: 0.0014060974353924394 2023-01-23 02:13:59.565625: step: 328/527, loss: 0.007357978727668524 2023-01-23 02:14:00.691823: step: 332/527, loss: 0.016836928203701973 2023-01-23 02:14:01.794767: step: 336/527, loss: 0.007280445192009211 2023-01-23 02:14:02.904684: step: 340/527, loss: 0.013801097869873047 2023-01-23 02:14:04.009591: step: 344/527, loss: 0.004681110382080078 2023-01-23 02:14:05.120233: step: 348/527, loss: 0.0008754730224609375 2023-01-23 02:14:06.263950: step: 352/527, loss: 0.01829557493329048 2023-01-23 02:14:07.379075: step: 356/527, loss: 0.03895444795489311 2023-01-23 02:14:08.501155: step: 360/527, loss: 0.033132076263427734 2023-01-23 02:14:09.607000: step: 364/527, loss: 0.002926349639892578 2023-01-23 02:14:10.746342: step: 368/527, loss: 0.03482809290289879 2023-01-23 02:14:11.847420: step: 372/527, loss: 0.025053169578313828 2023-01-23 02:14:12.956469: step: 376/527, loss: 0.03913097456097603 2023-01-23 02:14:14.076878: step: 380/527, loss: 0.0005289077525958419 2023-01-23 02:14:15.181358: step: 384/527, loss: 0.00032129290048033 2023-01-23 02:14:16.322515: step: 388/527, loss: 0.0016092300647869706 2023-01-23 02:14:17.397444: step: 392/527, loss: 0.002812719438225031 2023-01-23 02:14:18.480384: step: 396/527, loss: 0.012699365615844727 2023-01-23 02:14:19.586541: step: 400/527, loss: 0.01876373402774334 2023-01-23 02:14:20.691578: step: 404/527, loss: 0.00021696090698242188 2023-01-23 02:14:21.802509: step: 408/527, loss: 0.004645347595214844 2023-01-23 02:14:22.895857: step: 412/527, loss: 0.013503074645996094 2023-01-23 02:14:24.006128: step: 416/527, loss: 0.0683530792593956 2023-01-23 02:14:25.104321: step: 420/527, loss: 0.0010607719887048006 2023-01-23 02:14:26.224037: step: 424/527, loss: 0.015596198849380016 2023-01-23 02:14:27.339228: step: 428/527, loss: 0.004405117128044367 2023-01-23 02:14:28.451237: step: 432/527, loss: 0.013647508807480335 2023-01-23 02:14:29.584008: step: 436/527, loss: 0.01462634839117527 2023-01-23 02:14:30.696246: step: 440/527, loss: 0.004271316342055798 2023-01-23 02:14:31.831960: step: 444/527, loss: 0.002661037491634488 2023-01-23 02:14:32.966493: step: 448/527, loss: 0.13009434938430786 2023-01-23 02:14:34.089747: step: 452/527, loss: 0.02267303504049778 2023-01-23 02:14:35.186278: step: 456/527, loss: 0.04173927754163742 2023-01-23 02:14:36.339454: step: 460/527, loss: 0.012711978517472744 2023-01-23 02:14:37.494160: step: 464/527, loss: 0.012140464968979359 2023-01-23 02:14:38.663540: step: 468/527, loss: 0.00994110107421875 2023-01-23 02:14:39.786314: step: 472/527, loss: 0.035683441907167435 2023-01-23 02:14:40.934206: step: 476/527, loss: 0.10478676855564117 2023-01-23 02:14:42.053852: step: 480/527, loss: 0.0024392367340624332 2023-01-23 02:14:43.153522: step: 484/527, loss: 0.003881168318912387 2023-01-23 02:14:44.337391: step: 488/527, loss: 0.03218808025121689 2023-01-23 02:14:45.432107: step: 492/527, loss: 0.027152251452207565 2023-01-23 02:14:46.545628: step: 496/527, loss: 0.0036808967124670744 2023-01-23 02:14:47.640732: step: 500/527, loss: 0.005822372622787952 2023-01-23 02:14:48.749240: step: 504/527, loss: 0.022550487890839577 2023-01-23 02:14:49.861818: step: 508/527, loss: 0.03886609151959419 2023-01-23 02:14:50.981566: step: 512/527, loss: 0.0007715702522546053 2023-01-23 02:14:52.108627: step: 516/527, loss: 0.021832657977938652 2023-01-23 02:14:53.209749: step: 520/527, loss: 0.03423614427447319 2023-01-23 02:14:54.326415: step: 524/527, loss: 0.02031841315329075 2023-01-23 02:14:55.461763: step: 528/527, loss: 0.0026806830428540707 2023-01-23 02:14:56.565345: step: 532/527, loss: 0.005516243167221546 2023-01-23 02:14:57.680853: step: 536/527, loss: 0.00011639595322776586 2023-01-23 02:14:58.778982: step: 540/527, loss: 0.0018218994373455644 2023-01-23 02:14:59.881736: step: 544/527, loss: 0.0581180565059185 2023-01-23 02:15:01.010334: step: 548/527, loss: 0.00033597947913222015 2023-01-23 02:15:02.126047: step: 552/527, loss: 0.345529168844223 2023-01-23 02:15:03.254816: step: 556/527, loss: 0.010593319311738014 2023-01-23 02:15:04.367020: step: 560/527, loss: 0.013450169004499912 2023-01-23 02:15:05.496901: step: 564/527, loss: 0.004492378327995539 2023-01-23 02:15:06.608303: step: 568/527, loss: 0.0871075689792633 2023-01-23 02:15:07.717320: step: 572/527, loss: 0.00222606654278934 2023-01-23 02:15:08.811265: step: 576/527, loss: 0.0002362251398153603 2023-01-23 02:15:09.949358: step: 580/527, loss: 0.02419462241232395 2023-01-23 02:15:11.112966: step: 584/527, loss: 0.04224071651697159 2023-01-23 02:15:12.230494: step: 588/527, loss: 0.0037647245917469263 2023-01-23 02:15:13.345053: step: 592/527, loss: 0.029642868787050247 2023-01-23 02:15:14.449605: step: 596/527, loss: 0.001161861466243863 2023-01-23 02:15:15.557277: step: 600/527, loss: 0.0027507306076586246 2023-01-23 02:15:16.670141: step: 604/527, loss: 0.013688409700989723 2023-01-23 02:15:17.779864: step: 608/527, loss: 0.04600029066205025 2023-01-23 02:15:18.894411: step: 612/527, loss: 0.015257549472153187 2023-01-23 02:15:20.012240: step: 616/527, loss: 0.0034721374977380037 2023-01-23 02:15:21.130381: step: 620/527, loss: 0.0031791210640221834 2023-01-23 02:15:22.216935: step: 624/527, loss: 0.0018707276321947575 2023-01-23 02:15:23.305088: step: 628/527, loss: 0.08830833435058594 2023-01-23 02:15:24.429735: step: 632/527, loss: 0.003943443298339844 2023-01-23 02:15:25.561429: step: 636/527, loss: 0.047086525708436966 2023-01-23 02:15:26.668689: step: 640/527, loss: 0.017644787207245827 2023-01-23 02:15:27.768766: step: 644/527, loss: 0.012405682355165482 2023-01-23 02:15:28.891864: step: 648/527, loss: 0.00797119177877903 2023-01-23 02:15:30.035030: step: 652/527, loss: 0.049462128430604935 2023-01-23 02:15:31.137467: step: 656/527, loss: 0.08826261013746262 2023-01-23 02:15:32.262713: step: 660/527, loss: 0.023029519245028496 2023-01-23 02:15:33.356081: step: 664/527, loss: 0.05416812747716904 2023-01-23 02:15:34.491696: step: 668/527, loss: 0.04210786521434784 2023-01-23 02:15:35.600816: step: 672/527, loss: 0.49136239290237427 2023-01-23 02:15:36.746832: step: 676/527, loss: 0.033941127359867096 2023-01-23 02:15:37.858992: step: 680/527, loss: 0.00445899972692132 2023-01-23 02:15:38.973531: step: 684/527, loss: 0.05203769728541374 2023-01-23 02:15:40.093011: step: 688/527, loss: 0.0015582084888592362 2023-01-23 02:15:41.185694: step: 692/527, loss: 0.023854637518525124 2023-01-23 02:15:42.332141: step: 696/527, loss: 0.0023053172044456005 2023-01-23 02:15:43.481227: step: 700/527, loss: 0.009936237707734108 2023-01-23 02:15:44.615127: step: 704/527, loss: 0.010214841924607754 2023-01-23 02:15:45.715130: step: 708/527, loss: 0.010755729861557484 2023-01-23 02:15:46.830161: step: 712/527, loss: 0.018483353778719902 2023-01-23 02:15:47.923785: step: 716/527, loss: 0.002727699466049671 2023-01-23 02:15:49.022084: step: 720/527, loss: 0.009651947766542435 2023-01-23 02:15:50.159729: step: 724/527, loss: 0.003670978592708707 2023-01-23 02:15:51.244939: step: 728/527, loss: 0.021419240161776543 2023-01-23 02:15:52.370877: step: 732/527, loss: 0.005337810609489679 2023-01-23 02:15:53.494745: step: 736/527, loss: 0.0007686137687414885 2023-01-23 02:15:54.624867: step: 740/527, loss: 0.016780495643615723 2023-01-23 02:15:55.733129: step: 744/527, loss: 0.00103168492205441 2023-01-23 02:15:56.833298: step: 748/527, loss: 0.01735544204711914 2023-01-23 02:15:57.941893: step: 752/527, loss: 0.011169195175170898 2023-01-23 02:15:59.043409: step: 756/527, loss: 0.04162712022662163 2023-01-23 02:16:00.149295: step: 760/527, loss: 0.003524875734001398 2023-01-23 02:16:01.269132: step: 764/527, loss: 0.008033180609345436 2023-01-23 02:16:02.367035: step: 768/527, loss: 0.006381560117006302 2023-01-23 02:16:03.489820: step: 772/527, loss: 0.007855224423110485 2023-01-23 02:16:04.625537: step: 776/527, loss: 0.36501750349998474 2023-01-23 02:16:05.737062: step: 780/527, loss: 0.08331408351659775 2023-01-23 02:16:06.852069: step: 784/527, loss: 0.012855243869125843 2023-01-23 02:16:07.951388: step: 788/527, loss: 0.000392723100958392 2023-01-23 02:16:09.079983: step: 792/527, loss: 0.023824501782655716 2023-01-23 02:16:10.188731: step: 796/527, loss: 0.10647717118263245 2023-01-23 02:16:11.312031: step: 800/527, loss: 0.03512544557452202 2023-01-23 02:16:12.404207: step: 804/527, loss: 0.011204028502106667 2023-01-23 02:16:13.512220: step: 808/527, loss: 0.0025238990783691406 2023-01-23 02:16:14.608770: step: 812/527, loss: 0.007000732235610485 2023-01-23 02:16:15.745987: step: 816/527, loss: 0.02518615871667862 2023-01-23 02:16:16.849605: step: 820/527, loss: 0.0374029166996479 2023-01-23 02:16:17.989124: step: 824/527, loss: 0.054720211774110794 2023-01-23 02:16:19.104856: step: 828/527, loss: 0.0014748573303222656 2023-01-23 02:16:20.198744: step: 832/527, loss: 0.0024270296562463045 2023-01-23 02:16:21.273133: step: 836/527, loss: 0.02218623086810112 2023-01-23 02:16:22.394098: step: 840/527, loss: 0.07157979160547256 2023-01-23 02:16:23.493731: step: 844/527, loss: 0.040227461606264114 2023-01-23 02:16:24.614680: step: 848/527, loss: 0.0009369850158691406 2023-01-23 02:16:25.751539: step: 852/527, loss: 0.0375455841422081 2023-01-23 02:16:26.900382: step: 856/527, loss: 0.03953418880701065 2023-01-23 02:16:28.014901: step: 860/527, loss: 0.03898897394537926 2023-01-23 02:16:29.118516: step: 864/527, loss: 0.007123851682990789 2023-01-23 02:16:30.213668: step: 868/527, loss: 0.033642008900642395 2023-01-23 02:16:31.317110: step: 872/527, loss: 0.03359575197100639 2023-01-23 02:16:32.442561: step: 876/527, loss: 5.254745337879285e-05 2023-01-23 02:16:33.543006: step: 880/527, loss: 0.02740020863711834 2023-01-23 02:16:34.647589: step: 884/527, loss: 0.03701953962445259 2023-01-23 02:16:35.777397: step: 888/527, loss: 0.030324744060635567 2023-01-23 02:16:36.906000: step: 892/527, loss: 0.0009420395363122225 2023-01-23 02:16:38.047213: step: 896/527, loss: 0.024372100830078125 2023-01-23 02:16:39.129827: step: 900/527, loss: 0.06078539043664932 2023-01-23 02:16:40.242668: step: 904/527, loss: 0.017445659264922142 2023-01-23 02:16:41.360733: step: 908/527, loss: 0.06255665421485901 2023-01-23 02:16:42.472887: step: 912/527, loss: 0.07086696475744247 2023-01-23 02:16:43.590981: step: 916/527, loss: 0.030018115416169167 2023-01-23 02:16:44.701928: step: 920/527, loss: 0.01457139104604721 2023-01-23 02:16:45.860538: step: 924/527, loss: 0.004891872406005859 2023-01-23 02:16:47.002242: step: 928/527, loss: 0.07119999080896378 2023-01-23 02:16:48.147192: step: 932/527, loss: 0.0032543183770030737 2023-01-23 02:16:49.304287: step: 936/527, loss: 0.03839254379272461 2023-01-23 02:16:50.416564: step: 940/527, loss: 0.008921097964048386 2023-01-23 02:16:51.543451: step: 944/527, loss: 0.0021088600624352694 2023-01-23 02:16:52.667905: step: 948/527, loss: 0.036472320556640625 2023-01-23 02:16:53.766947: step: 952/527, loss: 0.0025005340576171875 2023-01-23 02:16:54.887816: step: 956/527, loss: 0.011091423220932484 2023-01-23 02:16:56.011866: step: 960/527, loss: 0.107201486825943 2023-01-23 02:16:57.114136: step: 964/527, loss: 0.014519404619932175 2023-01-23 02:16:58.222599: step: 968/527, loss: 0.005887222476303577 2023-01-23 02:16:59.365456: step: 972/527, loss: 0.010346031747758389 2023-01-23 02:17:00.477463: step: 976/527, loss: 0.03791503980755806 2023-01-23 02:17:01.625873: step: 980/527, loss: 0.04982910305261612 2023-01-23 02:17:02.741856: step: 984/527, loss: 0.016743946820497513 2023-01-23 02:17:03.869634: step: 988/527, loss: 0.01588287390768528 2023-01-23 02:17:04.976856: step: 992/527, loss: 0.008346558548510075 2023-01-23 02:17:06.092285: step: 996/527, loss: 0.012001896277070045 2023-01-23 02:17:07.230988: step: 1000/527, loss: 0.003459155559539795 2023-01-23 02:17:08.365198: step: 1004/527, loss: 0.004527187906205654 2023-01-23 02:17:09.486525: step: 1008/527, loss: 0.002144432161003351 2023-01-23 02:17:10.569356: step: 1012/527, loss: 0.01035156287252903 2023-01-23 02:17:11.687634: step: 1016/527, loss: 0.060842517763376236 2023-01-23 02:17:12.812252: step: 1020/527, loss: 0.01167592965066433 2023-01-23 02:17:13.931946: step: 1024/527, loss: 0.02686777152121067 2023-01-23 02:17:15.052408: step: 1028/527, loss: 0.06627483665943146 2023-01-23 02:17:16.153701: step: 1032/527, loss: 0.0034605979453772306 2023-01-23 02:17:17.265011: step: 1036/527, loss: 0.031024957075715065 2023-01-23 02:17:18.406632: step: 1040/527, loss: 0.10779333114624023 2023-01-23 02:17:19.505272: step: 1044/527, loss: 0.011530781164765358 2023-01-23 02:17:20.609974: step: 1048/527, loss: 0.0011577607365325093 2023-01-23 02:17:21.708207: step: 1052/527, loss: 0.06679125130176544 2023-01-23 02:17:22.811575: step: 1056/527, loss: 0.00014677047147415578 2023-01-23 02:17:23.911120: step: 1060/527, loss: 0.0007278919219970703 2023-01-23 02:17:25.037582: step: 1064/527, loss: 0.022234534844756126 2023-01-23 02:17:26.152249: step: 1068/527, loss: 0.011813163757324219 2023-01-23 02:17:27.292483: step: 1072/527, loss: 0.04335355758666992 2023-01-23 02:17:28.414744: step: 1076/527, loss: 0.0291106216609478 2023-01-23 02:17:29.537837: step: 1080/527, loss: 0.0002305984526174143 2023-01-23 02:17:30.652689: step: 1084/527, loss: 0.022954082116484642 2023-01-23 02:17:31.765000: step: 1088/527, loss: 0.014372778125107288 2023-01-23 02:17:32.898762: step: 1092/527, loss: 0.025388337671756744 2023-01-23 02:17:34.002446: step: 1096/527, loss: 0.004535865969955921 2023-01-23 02:17:35.088300: step: 1100/527, loss: 0.015138912945985794 2023-01-23 02:17:36.198301: step: 1104/527, loss: 0.004838848020881414 2023-01-23 02:17:37.311837: step: 1108/527, loss: 0.006296730134636164 2023-01-23 02:17:38.408133: step: 1112/527, loss: 0.0045442585833370686 2023-01-23 02:17:39.529108: step: 1116/527, loss: 0.0018225193489342928 2023-01-23 02:17:40.640763: step: 1120/527, loss: 5.817413693876006e-05 2023-01-23 02:17:41.760554: step: 1124/527, loss: 0.051497459411621094 2023-01-23 02:17:42.869225: step: 1128/527, loss: 0.035170771181583405 2023-01-23 02:17:44.002200: step: 1132/527, loss: 0.046622373163700104 2023-01-23 02:17:45.112747: step: 1136/527, loss: 0.02722950093448162 2023-01-23 02:17:46.254369: step: 1140/527, loss: -1.9073468138230965e-07 2023-01-23 02:17:47.337714: step: 1144/527, loss: 0.00045289992704056203 2023-01-23 02:17:48.448952: step: 1148/527, loss: 0.013576650060713291 2023-01-23 02:17:49.577326: step: 1152/527, loss: 0.030684662982821465 2023-01-23 02:17:50.725894: step: 1156/527, loss: 0.03546333312988281 2023-01-23 02:17:51.864242: step: 1160/527, loss: 0.0027689458802342415 2023-01-23 02:17:52.967615: step: 1164/527, loss: 0.0054517751559615135 2023-01-23 02:17:54.081448: step: 1168/527, loss: 0.008447457104921341 2023-01-23 02:17:55.171531: step: 1172/527, loss: 0.026525402441620827 2023-01-23 02:17:56.277601: step: 1176/527, loss: 0.004005050752311945 2023-01-23 02:17:57.393711: step: 1180/527, loss: 0.001748037408106029 2023-01-23 02:17:58.501238: step: 1184/527, loss: 0.037444498389959335 2023-01-23 02:17:59.632614: step: 1188/527, loss: 0.06391939520835876 2023-01-23 02:18:00.765797: step: 1192/527, loss: 0.01869945600628853 2023-01-23 02:18:01.898811: step: 1196/527, loss: 0.00645866384729743 2023-01-23 02:18:03.015910: step: 1200/527, loss: 0.022257041186094284 2023-01-23 02:18:04.157207: step: 1204/527, loss: 0.07166080921888351 2023-01-23 02:18:05.257930: step: 1208/527, loss: 0.018420221284031868 2023-01-23 02:18:06.372169: step: 1212/527, loss: 0.0022628784645348787 2023-01-23 02:18:07.488495: step: 1216/527, loss: 0.0005194902187213302 2023-01-23 02:18:08.589511: step: 1220/527, loss: 0.00015821456327103078 2023-01-23 02:18:09.729549: step: 1224/527, loss: 0.008310413919389248 2023-01-23 02:18:10.858280: step: 1228/527, loss: 0.006076240912079811 2023-01-23 02:18:11.965937: step: 1232/527, loss: 0.11685733497142792 2023-01-23 02:18:13.065542: step: 1236/527, loss: 0.006088638212531805 2023-01-23 02:18:14.183911: step: 1240/527, loss: 0.0011143683223053813 2023-01-23 02:18:15.299641: step: 1244/527, loss: 0.0017307281959801912 2023-01-23 02:18:16.413008: step: 1248/527, loss: 0.00015778541273903102 2023-01-23 02:18:17.510431: step: 1252/527, loss: 0.09545579552650452 2023-01-23 02:18:18.636074: step: 1256/527, loss: 0.014463711529970169 2023-01-23 02:18:19.758672: step: 1260/527, loss: 0.008971309289336205 2023-01-23 02:18:20.847017: step: 1264/527, loss: 0.007992362603545189 2023-01-23 02:18:21.972084: step: 1268/527, loss: 0.020517636090517044 2023-01-23 02:18:23.100406: step: 1272/527, loss: 0.042435456067323685 2023-01-23 02:18:24.231634: step: 1276/527, loss: 0.03025665320456028 2023-01-23 02:18:25.341921: step: 1280/527, loss: 0.0012463569873943925 2023-01-23 02:18:26.458098: step: 1284/527, loss: 0.009975815191864967 2023-01-23 02:18:27.575309: step: 1288/527, loss: 0.016057778149843216 2023-01-23 02:18:28.682354: step: 1292/527, loss: 0.05933056026697159 2023-01-23 02:18:29.805580: step: 1296/527, loss: 0.012752151116728783 2023-01-23 02:18:30.930746: step: 1300/527, loss: 0.022022247314453125 2023-01-23 02:18:32.035890: step: 1304/527, loss: 0.0022297382820397615 2023-01-23 02:18:33.158918: step: 1308/527, loss: 0.0024038313422352076 2023-01-23 02:18:34.293106: step: 1312/527, loss: 0.0033733369782567024 2023-01-23 02:18:35.420937: step: 1316/527, loss: 0.03678445890545845 2023-01-23 02:18:36.499439: step: 1320/527, loss: 0.0043600560165941715 2023-01-23 02:18:37.606981: step: 1324/527, loss: 0.01308136060833931 2023-01-23 02:18:38.740260: step: 1328/527, loss: 0.015399932861328125 2023-01-23 02:18:39.906849: step: 1332/527, loss: 0.0035995482467114925 2023-01-23 02:18:41.021537: step: 1336/527, loss: 0.005376053042709827 2023-01-23 02:18:42.121959: step: 1340/527, loss: 0.31654825806617737 2023-01-23 02:18:43.255120: step: 1344/527, loss: 0.0012459754943847656 2023-01-23 02:18:44.395247: step: 1348/527, loss: 0.0011630058288574219 2023-01-23 02:18:45.550568: step: 1352/527, loss: 0.022261619567871094 2023-01-23 02:18:46.638084: step: 1356/527, loss: 0.004092406947165728 2023-01-23 02:18:47.749266: step: 1360/527, loss: 0.0007270813221111894 2023-01-23 02:18:48.911624: step: 1364/527, loss: 0.00796198844909668 2023-01-23 02:18:50.050398: step: 1368/527, loss: 0.009603118523955345 2023-01-23 02:18:51.146417: step: 1372/527, loss: 0.0005840301746502519 2023-01-23 02:18:52.239770: step: 1376/527, loss: 0.921379804611206 2023-01-23 02:18:53.339680: step: 1380/527, loss: 0.007695198059082031 2023-01-23 02:18:54.464403: step: 1384/527, loss: 0.010226774029433727 2023-01-23 02:18:55.578982: step: 1388/527, loss: 0.020283127203583717 2023-01-23 02:18:56.694297: step: 1392/527, loss: 0.043895721435546875 2023-01-23 02:18:57.811980: step: 1396/527, loss: 0.060689929872751236 2023-01-23 02:18:58.925129: step: 1400/527, loss: 0.014291572384536266 2023-01-23 02:19:00.015106: step: 1404/527, loss: 0.03801288455724716 2023-01-23 02:19:01.111426: step: 1408/527, loss: 0.010149812325835228 2023-01-23 02:19:02.249473: step: 1412/527, loss: 0.001574784517288208 2023-01-23 02:19:03.356534: step: 1416/527, loss: 0.0871967077255249 2023-01-23 02:19:04.473929: step: 1420/527, loss: 0.00023174285888671875 2023-01-23 02:19:05.587124: step: 1424/527, loss: 0.04286012426018715 2023-01-23 02:19:06.686327: step: 1428/527, loss: 0.007236289791762829 2023-01-23 02:19:07.812017: step: 1432/527, loss: 0.0009522438049316406 2023-01-23 02:19:08.974892: step: 1436/527, loss: 0.008500671945512295 2023-01-23 02:19:10.092638: step: 1440/527, loss: 0.0034437179565429688 2023-01-23 02:19:11.211175: step: 1444/527, loss: 0.07065653800964355 2023-01-23 02:19:12.355272: step: 1448/527, loss: 0.011742210015654564 2023-01-23 02:19:13.471905: step: 1452/527, loss: 0.00027484894962981343 2023-01-23 02:19:14.575344: step: 1456/527, loss: 0.003152942517772317 2023-01-23 02:19:15.689062: step: 1460/527, loss: 0.06954765319824219 2023-01-23 02:19:16.806253: step: 1464/527, loss: 0.006107473745942116 2023-01-23 02:19:17.941208: step: 1468/527, loss: 0.004796219058334827 2023-01-23 02:19:19.019010: step: 1472/527, loss: 0.007786941714584827 2023-01-23 02:19:20.139904: step: 1476/527, loss: 0.007374858949333429 2023-01-23 02:19:21.264077: step: 1480/527, loss: 0.021729031577706337 2023-01-23 02:19:22.375299: step: 1484/527, loss: 0.06575126200914383 2023-01-23 02:19:23.466923: step: 1488/527, loss: 0.06252014636993408 2023-01-23 02:19:24.564697: step: 1492/527, loss: 0.0047200205735862255 2023-01-23 02:19:25.647575: step: 1496/527, loss: 0.0017782404320314527 2023-01-23 02:19:26.740521: step: 1500/527, loss: 0.00934600830078125 2023-01-23 02:19:27.832184: step: 1504/527, loss: 0.0001566886785440147 2023-01-23 02:19:28.974315: step: 1508/527, loss: 0.032178688794374466 2023-01-23 02:19:30.099531: step: 1512/527, loss: 0.0026976587250828743 2023-01-23 02:19:31.189514: step: 1516/527, loss: 0.01969432830810547 2023-01-23 02:19:32.305420: step: 1520/527, loss: 0.016445541754364967 2023-01-23 02:19:33.435750: step: 1524/527, loss: 0.057711124420166016 2023-01-23 02:19:34.539814: step: 1528/527, loss: 0.00487098703160882 2023-01-23 02:19:35.672877: step: 1532/527, loss: 0.005397987086325884 2023-01-23 02:19:36.802353: step: 1536/527, loss: 0.005328846164047718 2023-01-23 02:19:37.930003: step: 1540/527, loss: 0.022876929491758347 2023-01-23 02:19:39.034800: step: 1544/527, loss: 0.0001852035493357107 2023-01-23 02:19:40.143860: step: 1548/527, loss: 0.0034211156889796257 2023-01-23 02:19:41.246531: step: 1552/527, loss: 0.014150619506835938 2023-01-23 02:19:42.364946: step: 1556/527, loss: 0.0007873058784753084 2023-01-23 02:19:43.461981: step: 1560/527, loss: 0.004472828004509211 2023-01-23 02:19:44.581204: step: 1564/527, loss: 0.013537311926484108 2023-01-23 02:19:45.683370: step: 1568/527, loss: 0.08655796945095062 2023-01-23 02:19:46.803528: step: 1572/527, loss: 0.019203854724764824 2023-01-23 02:19:47.925812: step: 1576/527, loss: 0.007057666778564453 2023-01-23 02:19:49.036224: step: 1580/527, loss: 0.06712310016155243 2023-01-23 02:19:50.157166: step: 1584/527, loss: 0.0003762722190003842 2023-01-23 02:19:51.321282: step: 1588/527, loss: 0.09232282638549805 2023-01-23 02:19:52.432951: step: 1592/527, loss: 0.002775049302726984 2023-01-23 02:19:53.551721: step: 1596/527, loss: 0.011726761236786842 2023-01-23 02:19:54.657795: step: 1600/527, loss: 0.015575027093291283 2023-01-23 02:19:55.786740: step: 1604/527, loss: 0.02930011786520481 2023-01-23 02:19:56.918353: step: 1608/527, loss: 0.012111186981201172 2023-01-23 02:19:58.007073: step: 1612/527, loss: 0.00037741661071777344 2023-01-23 02:19:59.128667: step: 1616/527, loss: 0.0036823274567723274 2023-01-23 02:20:00.280573: step: 1620/527, loss: 0.045983076095581055 2023-01-23 02:20:01.381309: step: 1624/527, loss: 0.0010075569152832031 2023-01-23 02:20:02.474049: step: 1628/527, loss: 0.015353393740952015 2023-01-23 02:20:03.591377: step: 1632/527, loss: 0.017975617200136185 2023-01-23 02:20:04.712519: step: 1636/527, loss: 0.06711041927337646 2023-01-23 02:20:05.822610: step: 1640/527, loss: 0.0044731139205396175 2023-01-23 02:20:06.915069: step: 1644/527, loss: 0.005176353268325329 2023-01-23 02:20:08.029348: step: 1648/527, loss: 0.019369127228856087 2023-01-23 02:20:09.135839: step: 1652/527, loss: 0.00023555755615234375 2023-01-23 02:20:10.254421: step: 1656/527, loss: 0.0156721118837595 2023-01-23 02:20:11.367282: step: 1660/527, loss: 0.04265708848834038 2023-01-23 02:20:12.500144: step: 1664/527, loss: 0.016286659985780716 2023-01-23 02:20:13.588206: step: 1668/527, loss: 0.00028324127197265625 2023-01-23 02:20:14.731562: step: 1672/527, loss: 0.14527744054794312 2023-01-23 02:20:15.842565: step: 1676/527, loss: 0.010168267413973808 2023-01-23 02:20:16.959524: step: 1680/527, loss: 0.00461611757054925 2023-01-23 02:20:18.069597: step: 1684/527, loss: 0.0010302544105798006 2023-01-23 02:20:19.197444: step: 1688/527, loss: 0.004906869027763605 2023-01-23 02:20:20.327129: step: 1692/527, loss: 0.009611368179321289 2023-01-23 02:20:21.440170: step: 1696/527, loss: 0.0039535523392260075 2023-01-23 02:20:22.541295: step: 1700/527, loss: 0.05470981448888779 2023-01-23 02:20:23.689541: step: 1704/527, loss: 0.01680011674761772 2023-01-23 02:20:24.801325: step: 1708/527, loss: 0.10050592571496964 2023-01-23 02:20:25.940434: step: 1712/527, loss: 0.0409025177359581 2023-01-23 02:20:27.081873: step: 1716/527, loss: 0.0009572983253747225 2023-01-23 02:20:28.187171: step: 1720/527, loss: 0.02409229427576065 2023-01-23 02:20:29.306356: step: 1724/527, loss: 0.00488967914134264 2023-01-23 02:20:30.406392: step: 1728/527, loss: 0.014126110821962357 2023-01-23 02:20:31.567377: step: 1732/527, loss: 0.02013702504336834 2023-01-23 02:20:32.676215: step: 1736/527, loss: 0.04024486616253853 2023-01-23 02:20:33.789917: step: 1740/527, loss: 0.0014992713695392013 2023-01-23 02:20:34.891810: step: 1744/527, loss: 0.007378387730568647 2023-01-23 02:20:35.992481: step: 1748/527, loss: 0.1129918098449707 2023-01-23 02:20:37.095495: step: 1752/527, loss: 0.024541517719626427 2023-01-23 02:20:38.204881: step: 1756/527, loss: 0.03152618557214737 2023-01-23 02:20:39.330787: step: 1760/527, loss: 0.0646120086312294 2023-01-23 02:20:40.444646: step: 1764/527, loss: 0.006183242425322533 2023-01-23 02:20:41.604154: step: 1768/527, loss: 0.025460053235292435 2023-01-23 02:20:42.738276: step: 1772/527, loss: 0.002890634583309293 2023-01-23 02:20:43.907967: step: 1776/527, loss: 0.0033100128639489412 2023-01-23 02:20:44.995038: step: 1780/527, loss: 0.41713735461235046 2023-01-23 02:20:46.088658: step: 1784/527, loss: 0.00674018869176507 2023-01-23 02:20:47.206129: step: 1788/527, loss: 0.011173821054399014 2023-01-23 02:20:48.312584: step: 1792/527, loss: 1.888275073724799e-05 2023-01-23 02:20:49.421149: step: 1796/527, loss: 0.11146698147058487 2023-01-23 02:20:50.565645: step: 1800/527, loss: 0.06440496444702148 2023-01-23 02:20:51.698766: step: 1804/527, loss: 0.06115322187542915 2023-01-23 02:20:52.838849: step: 1808/527, loss: 0.01685056835412979 2023-01-23 02:20:53.961336: step: 1812/527, loss: 0.026769066229462624 2023-01-23 02:20:55.075470: step: 1816/527, loss: 0.006432008929550648 2023-01-23 02:20:56.199554: step: 1820/527, loss: 0.03898449242115021 2023-01-23 02:20:57.332649: step: 1824/527, loss: 0.02519378624856472 2023-01-23 02:20:58.466629: step: 1828/527, loss: 0.01451888121664524 2023-01-23 02:20:59.597685: step: 1832/527, loss: 0.015449142083525658 2023-01-23 02:21:00.753212: step: 1836/527, loss: 0.002057361649349332 2023-01-23 02:21:01.853489: step: 1840/527, loss: 0.0005893707275390625 2023-01-23 02:21:02.976416: step: 1844/527, loss: 0.007954454980790615 2023-01-23 02:21:04.072502: step: 1848/527, loss: 0.0884731262922287 2023-01-23 02:21:05.207098: step: 1852/527, loss: 0.034000396728515625 2023-01-23 02:21:06.336117: step: 1856/527, loss: 0.683038055896759 2023-01-23 02:21:07.454216: step: 1860/527, loss: 0.00017032623873092234 2023-01-23 02:21:08.533601: step: 1864/527, loss: 4.692077709478326e-05 2023-01-23 02:21:09.661022: step: 1868/527, loss: 0.03025531768798828 2023-01-23 02:21:10.797214: step: 1872/527, loss: 0.012592792510986328 2023-01-23 02:21:11.940413: step: 1876/527, loss: 0.0018447877373546362 2023-01-23 02:21:13.061528: step: 1880/527, loss: 0.0005530357593670487 2023-01-23 02:21:14.183850: step: 1884/527, loss: 0.09721268713474274 2023-01-23 02:21:15.269051: step: 1888/527, loss: 0.03811340406537056 2023-01-23 02:21:16.387381: step: 1892/527, loss: 0.003135299775749445 2023-01-23 02:21:17.520596: step: 1896/527, loss: 0.0025246620643883944 2023-01-23 02:21:18.630419: step: 1900/527, loss: 0.41839560866355896 2023-01-23 02:21:19.751331: step: 1904/527, loss: 0.00585174560546875 2023-01-23 02:21:20.836621: step: 1908/527, loss: 0.04412689432501793 2023-01-23 02:21:21.956247: step: 1912/527, loss: 0.007361793890595436 2023-01-23 02:21:23.084805: step: 1916/527, loss: 0.0014139175182208419 2023-01-23 02:21:24.199266: step: 1920/527, loss: 0.058358386158943176 2023-01-23 02:21:25.307287: step: 1924/527, loss: 0.0028151513542979956 2023-01-23 02:21:26.406082: step: 1928/527, loss: 0.13248196244239807 2023-01-23 02:21:27.529248: step: 1932/527, loss: 0.011483192443847656 2023-01-23 02:21:28.622451: step: 1936/527, loss: 0.02354288101196289 2023-01-23 02:21:29.727885: step: 1940/527, loss: 0.28597211837768555 2023-01-23 02:21:30.815653: step: 1944/527, loss: 0.022793864831328392 2023-01-23 02:21:31.921551: step: 1948/527, loss: 0.05590400844812393 2023-01-23 02:21:33.077555: step: 1952/527, loss: 0.019139863550662994 2023-01-23 02:21:34.175028: step: 1956/527, loss: 0.029625702649354935 2023-01-23 02:21:35.285307: step: 1960/527, loss: 0.069280244410038 2023-01-23 02:21:36.422079: step: 1964/527, loss: 0.007741022389382124 2023-01-23 02:21:37.544177: step: 1968/527, loss: 0.023952770978212357 2023-01-23 02:21:38.677625: step: 1972/527, loss: 0.009385023266077042 2023-01-23 02:21:39.839002: step: 1976/527, loss: 0.004924774169921875 2023-01-23 02:21:40.953723: step: 1980/527, loss: 0.008701229467988014 2023-01-23 02:21:42.074942: step: 1984/527, loss: 0.025692177936434746 2023-01-23 02:21:43.155927: step: 1988/527, loss: 0.1431722640991211 2023-01-23 02:21:44.284353: step: 1992/527, loss: 0.0931774154305458 2023-01-23 02:21:45.420374: step: 1996/527, loss: 0.003478527069091797 2023-01-23 02:21:46.521565: step: 2000/527, loss: 0.015098334290087223 2023-01-23 02:21:47.654475: step: 2004/527, loss: 0.010980415157973766 2023-01-23 02:21:48.793455: step: 2008/527, loss: 0.049023061990737915 2023-01-23 02:21:49.896923: step: 2012/527, loss: 0.015363835729658604 2023-01-23 02:21:51.030662: step: 2016/527, loss: 0.03264818340539932 2023-01-23 02:21:52.113721: step: 2020/527, loss: 0.0023262263275682926 2023-01-23 02:21:53.241320: step: 2024/527, loss: 0.018448637798428535 2023-01-23 02:21:54.346793: step: 2028/527, loss: 0.5469331741333008 2023-01-23 02:21:55.465130: step: 2032/527, loss: 0.42796844244003296 2023-01-23 02:21:56.570793: step: 2036/527, loss: 0.005140590947121382 2023-01-23 02:21:57.708370: step: 2040/527, loss: 0.024213504046201706 2023-01-23 02:21:58.818563: step: 2044/527, loss: 0.03206014633178711 2023-01-23 02:21:59.916676: step: 2048/527, loss: 0.03523874282836914 2023-01-23 02:22:01.036133: step: 2052/527, loss: 0.06481237709522247 2023-01-23 02:22:02.177882: step: 2056/527, loss: 0.001087188720703125 2023-01-23 02:22:03.312985: step: 2060/527, loss: 0.0037787912879139185 2023-01-23 02:22:04.439733: step: 2064/527, loss: 0.06499803066253662 2023-01-23 02:22:05.536310: step: 2068/527, loss: 0.033544253557920456 2023-01-23 02:22:06.651769: step: 2072/527, loss: 0.0015295982593670487 2023-01-23 02:22:07.743379: step: 2076/527, loss: 0.008977080695331097 2023-01-23 02:22:08.880782: step: 2080/527, loss: 0.02858905866742134 2023-01-23 02:22:09.975653: step: 2084/527, loss: 0.01874542236328125 2023-01-23 02:22:11.075863: step: 2088/527, loss: 0.036957550793886185 2023-01-23 02:22:12.167031: step: 2092/527, loss: 0.0003108978271484375 2023-01-23 02:22:13.279702: step: 2096/527, loss: 0.025326158851385117 2023-01-23 02:22:14.389601: step: 2100/527, loss: 0.008788109757006168 2023-01-23 02:22:15.500412: step: 2104/527, loss: 0.024109981954097748 2023-01-23 02:22:16.675949: step: 2108/527, loss: 0.05748730152845383 ================================================== Loss: 0.032 -------------------- Dev: {'event': {'p': 0.6066252587991718, 'r': 0.7802929427430093, 'f1': 0.6825859056493885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Test: {'event': {'p': 0.62580054894785, 'r': 0.7817142857142857, 'f1': 0.6951219512195121}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Chinese: {'event': {'p': 0.5581395348837209, 'r': 0.8888888888888888, 'f1': 0.6857142857142857}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Russian: {'event': {'p': 0.5135135135135135, 'r': 0.5277777777777778, 'f1': 0.5205479452054794}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} New best korean model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066252587991718, 'r': 0.7802929427430093, 'f1': 0.6825859056493885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Eng Test for Korean: {'event': {'p': 0.62580054894785, 'r': 0.7817142857142857, 'f1': 0.6951219512195121}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 17 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:23:06.887418: step: 4/527, loss: 0.012023543938994408 2023-01-23 02:23:07.991548: step: 8/527, loss: 0.04094543308019638 2023-01-23 02:23:09.078948: step: 12/527, loss: 0.0006013870588503778 2023-01-23 02:23:10.175605: step: 16/527, loss: 0.011975479312241077 2023-01-23 02:23:11.275640: step: 20/527, loss: 0.04260425642132759 2023-01-23 02:23:12.399911: step: 24/527, loss: 0.010041999630630016 2023-01-23 02:23:13.484212: step: 28/527, loss: 0.01732778549194336 2023-01-23 02:23:14.664149: step: 32/527, loss: 0.017427541315555573 2023-01-23 02:23:15.744982: step: 36/527, loss: 0.0002480506955180317 2023-01-23 02:23:16.842450: step: 40/527, loss: 0.0013387680519372225 2023-01-23 02:23:17.940692: step: 44/527, loss: 0.0036637308076024055 2023-01-23 02:23:19.091207: step: 48/527, loss: 0.009945297613739967 2023-01-23 02:23:20.172447: step: 52/527, loss: 0.002210330916568637 2023-01-23 02:23:21.286775: step: 56/527, loss: 0.00011782647197833285 2023-01-23 02:23:22.402697: step: 60/527, loss: 0.010311508551239967 2023-01-23 02:23:23.515797: step: 64/527, loss: 0.02317028120160103 2023-01-23 02:23:24.602638: step: 68/527, loss: 0.00042667388333939016 2023-01-23 02:23:25.719737: step: 72/527, loss: 0.0027159214951097965 2023-01-23 02:23:26.828954: step: 76/527, loss: 0.01687854528427124 2023-01-23 02:23:27.957452: step: 80/527, loss: 0.006239509675651789 2023-01-23 02:23:29.070152: step: 84/527, loss: 0.00021395683870650828 2023-01-23 02:23:30.198728: step: 88/527, loss: 0.00017232896061614156 2023-01-23 02:23:31.299934: step: 92/527, loss: 0.007473564241081476 2023-01-23 02:23:32.411334: step: 96/527, loss: 0.0033813477493822575 2023-01-23 02:23:33.515527: step: 100/527, loss: 0.011180972680449486 2023-01-23 02:23:34.614961: step: 104/527, loss: 0.017066096886992455 2023-01-23 02:23:35.721444: step: 108/527, loss: 0.13654617965221405 2023-01-23 02:23:36.853699: step: 112/527, loss: 0.011764908209443092 2023-01-23 02:23:37.966321: step: 116/527, loss: 0.030293095856904984 2023-01-23 02:23:39.070533: step: 120/527, loss: 0.0072259907610714436 2023-01-23 02:23:40.178473: step: 124/527, loss: 0.0020763396751135588 2023-01-23 02:23:41.277112: step: 128/527, loss: 0.0016427993541583419 2023-01-23 02:23:42.394502: step: 132/527, loss: 0.012667465023696423 2023-01-23 02:23:43.488361: step: 136/527, loss: 0.00536799430847168 2023-01-23 02:23:44.587922: step: 140/527, loss: 0.02067899890244007 2023-01-23 02:23:45.680119: step: 144/527, loss: 0.04240426793694496 2023-01-23 02:23:46.792519: step: 148/527, loss: 0.006385612301528454 2023-01-23 02:23:47.905691: step: 152/527, loss: 0.0005168914794921875 2023-01-23 02:23:48.999855: step: 156/527, loss: 0.05644655600190163 2023-01-23 02:23:50.120610: step: 160/527, loss: 0.015594673343002796 2023-01-23 02:23:51.243289: step: 164/527, loss: 0.0006369590992107987 2023-01-23 02:23:52.358115: step: 168/527, loss: 0.018337726593017578 2023-01-23 02:23:53.501506: step: 172/527, loss: 0.043580248951911926 2023-01-23 02:23:54.583692: step: 176/527, loss: 0.00038948061410337687 2023-01-23 02:23:55.687935: step: 180/527, loss: 0.053795814514160156 2023-01-23 02:23:56.836114: step: 184/527, loss: 0.0003503799671307206 2023-01-23 02:23:57.940746: step: 188/527, loss: 0.0020420073997229338 2023-01-23 02:23:59.019031: step: 192/527, loss: 0.014146232977509499 2023-01-23 02:24:00.130953: step: 196/527, loss: 0.03922691196203232 2023-01-23 02:24:01.240361: step: 200/527, loss: 0.0014178275596350431 2023-01-23 02:24:02.368649: step: 204/527, loss: 0.023692702874541283 2023-01-23 02:24:03.468340: step: 208/527, loss: 0.0002446174621582031 2023-01-23 02:24:04.604232: step: 212/527, loss: 0.00988307036459446 2023-01-23 02:24:05.730528: step: 216/527, loss: 0.2236575186252594 2023-01-23 02:24:06.898710: step: 220/527, loss: 0.02660999447107315 2023-01-23 02:24:08.029148: step: 224/527, loss: 0.03299436718225479 2023-01-23 02:24:09.140946: step: 228/527, loss: 0.0026733397971838713 2023-01-23 02:24:10.269865: step: 232/527, loss: 0.4415349066257477 2023-01-23 02:24:11.366485: step: 236/527, loss: 0.014021254144608974 2023-01-23 02:24:12.450020: step: 240/527, loss: 0.011941814795136452 2023-01-23 02:24:13.582096: step: 244/527, loss: 0.03391771391034126 2023-01-23 02:24:14.688573: step: 248/527, loss: 0.008073044009506702 2023-01-23 02:24:15.800827: step: 252/527, loss: 0.002171135041862726 2023-01-23 02:24:16.917689: step: 256/527, loss: 0.026909636333584785 2023-01-23 02:24:18.053448: step: 260/527, loss: 0.012856770306825638 2023-01-23 02:24:19.166268: step: 264/527, loss: 0.005203152075409889 2023-01-23 02:24:20.254996: step: 268/527, loss: 0.0035745620261877775 2023-01-23 02:24:21.359415: step: 272/527, loss: 0.00018396376981399953 2023-01-23 02:24:22.459870: step: 276/527, loss: 0.5419387817382812 2023-01-23 02:24:23.589413: step: 280/527, loss: 0.04282200336456299 2023-01-23 02:24:24.682093: step: 284/527, loss: -1.4305115314527939e-07 2023-01-23 02:24:25.795468: step: 288/527, loss: 0.01764402538537979 2023-01-23 02:24:26.922337: step: 292/527, loss: 0.00885772705078125 2023-01-23 02:24:28.029344: step: 296/527, loss: 0.04000279679894447 2023-01-23 02:24:29.143881: step: 300/527, loss: 0.00015449525380972773 2023-01-23 02:24:30.281427: step: 304/527, loss: 0.00456314068287611 2023-01-23 02:24:31.384479: step: 308/527, loss: 0.02991771697998047 2023-01-23 02:24:32.497074: step: 312/527, loss: 0.03943290933966637 2023-01-23 02:24:33.574421: step: 316/527, loss: 0.006877040956169367 2023-01-23 02:24:34.762231: step: 320/527, loss: 0.026555204764008522 2023-01-23 02:24:35.864406: step: 324/527, loss: 3.44276413670741e-05 2023-01-23 02:24:37.005240: step: 328/527, loss: 0.009870767593383789 2023-01-23 02:24:38.129677: step: 332/527, loss: 0.0012714386684820056 2023-01-23 02:24:39.235639: step: 336/527, loss: 0.006802749820053577 2023-01-23 02:24:40.391800: step: 340/527, loss: 0.011634444817900658 2023-01-23 02:24:41.501727: step: 344/527, loss: 0.0029994011856615543 2023-01-23 02:24:42.604243: step: 348/527, loss: 0.015510296449065208 2023-01-23 02:24:43.736507: step: 352/527, loss: 0.017762470990419388 2023-01-23 02:24:44.835479: step: 356/527, loss: 0.0029795647133141756 2023-01-23 02:24:45.942101: step: 360/527, loss: 0.004479885566979647 2023-01-23 02:24:47.059949: step: 364/527, loss: 0.03564911335706711 2023-01-23 02:24:48.187766: step: 368/527, loss: 0.018178272992372513 2023-01-23 02:24:49.297614: step: 372/527, loss: 0.004526329226791859 2023-01-23 02:24:50.404950: step: 376/527, loss: 0.0023242949973791838 2023-01-23 02:24:51.541736: step: 380/527, loss: 0.01954975165426731 2023-01-23 02:24:52.718201: step: 384/527, loss: 0.0011478423839434981 2023-01-23 02:24:53.846330: step: 388/527, loss: 0.03871011734008789 2023-01-23 02:24:54.992271: step: 392/527, loss: 0.03529510647058487 2023-01-23 02:24:56.087931: step: 396/527, loss: 0.005182838533073664 2023-01-23 02:24:57.251677: step: 400/527, loss: 0.001241493271663785 2023-01-23 02:24:58.349305: step: 404/527, loss: 0.006157398223876953 2023-01-23 02:24:59.460925: step: 408/527, loss: 0.011606884188950062 2023-01-23 02:25:00.540917: step: 412/527, loss: 0.0034851073287427425 2023-01-23 02:25:01.670863: step: 416/527, loss: 0.00500755337998271 2023-01-23 02:25:02.786895: step: 420/527, loss: 0.05559854954481125 2023-01-23 02:25:03.904970: step: 424/527, loss: 0.0035361291375011206 2023-01-23 02:25:05.020619: step: 428/527, loss: 0.0012041092850267887 2023-01-23 02:25:06.121817: step: 432/527, loss: 0.03157329931855202 2023-01-23 02:25:07.256663: step: 436/527, loss: 0.008916949853301048 2023-01-23 02:25:08.353390: step: 440/527, loss: 0.0005478858947753906 2023-01-23 02:25:09.469466: step: 444/527, loss: 0.0008813858148641884 2023-01-23 02:25:10.593826: step: 448/527, loss: 0.0001125335693359375 2023-01-23 02:25:11.729866: step: 452/527, loss: 0.0183684341609478 2023-01-23 02:25:12.824495: step: 456/527, loss: 0.038498688489198685 2023-01-23 02:25:13.938576: step: 460/527, loss: 0.0067369937896728516 2023-01-23 02:25:15.068917: step: 464/527, loss: 0.008778715506196022 2023-01-23 02:25:16.190453: step: 468/527, loss: 0.008963823318481445 2023-01-23 02:25:17.328916: step: 472/527, loss: 0.00019350051297806203 2023-01-23 02:25:18.436380: step: 476/527, loss: 0.04838857799768448 2023-01-23 02:25:19.566365: step: 480/527, loss: 0.017499923706054688 2023-01-23 02:25:20.710376: step: 484/527, loss: 0.0019974708557128906 2023-01-23 02:25:21.817483: step: 488/527, loss: 7.62939453125e-05 2023-01-23 02:25:22.911641: step: 492/527, loss: 2.040863000729587e-05 2023-01-23 02:25:24.016949: step: 496/527, loss: 0.04352226108312607 2023-01-23 02:25:25.144434: step: 500/527, loss: 0.013031196780502796 2023-01-23 02:25:26.232646: step: 504/527, loss: 0.017792606726288795 2023-01-23 02:25:27.332582: step: 508/527, loss: 0.041040610522031784 2023-01-23 02:25:28.455487: step: 512/527, loss: 0.007233142852783203 2023-01-23 02:25:29.556176: step: 516/527, loss: 0.005320454016327858 2023-01-23 02:25:30.647753: step: 520/527, loss: 0.019151879474520683 2023-01-23 02:25:31.809601: step: 524/527, loss: 0.0002973705413751304 2023-01-23 02:25:32.931661: step: 528/527, loss: 0.02302999421954155 2023-01-23 02:25:34.052917: step: 532/527, loss: 0.00029754641582258046 2023-01-23 02:25:35.187551: step: 536/527, loss: 0.006839322857558727 2023-01-23 02:25:36.310033: step: 540/527, loss: 0.03465862572193146 2023-01-23 02:25:37.434190: step: 544/527, loss: 0.04239311069250107 2023-01-23 02:25:38.540453: step: 548/527, loss: 0.009869957342743874 2023-01-23 02:25:39.674336: step: 552/527, loss: 0.03346814960241318 2023-01-23 02:25:40.781540: step: 556/527, loss: 0.006892109289765358 2023-01-23 02:25:41.910684: step: 560/527, loss: 0.012940024957060814 2023-01-23 02:25:42.992407: step: 564/527, loss: 0.03335418552160263 2023-01-23 02:25:44.140188: step: 568/527, loss: 0.009668446145951748 2023-01-23 02:25:45.252314: step: 572/527, loss: 0.008803081698715687 2023-01-23 02:25:46.355399: step: 576/527, loss: 0.0006324768182821572 2023-01-23 02:25:47.476603: step: 580/527, loss: 0.008044051937758923 2023-01-23 02:25:48.594583: step: 584/527, loss: 0.012132071889936924 2023-01-23 02:25:49.726011: step: 588/527, loss: 0.003170299343764782 2023-01-23 02:25:50.834724: step: 592/527, loss: 0.0005458832019940019 2023-01-23 02:25:51.928130: step: 596/527, loss: 0.044548988342285156 2023-01-23 02:25:53.050139: step: 600/527, loss: 0.031710244715213776 2023-01-23 02:25:54.138171: step: 604/527, loss: 0.046830371022224426 2023-01-23 02:25:55.258725: step: 608/527, loss: 0.0058609009720385075 2023-01-23 02:25:56.379321: step: 612/527, loss: 0.003025627229362726 2023-01-23 02:25:57.507876: step: 616/527, loss: 0.015439033508300781 2023-01-23 02:25:58.613324: step: 620/527, loss: 0.00023517609224654734 2023-01-23 02:25:59.718226: step: 624/527, loss: 0.017304515466094017 2023-01-23 02:26:00.861031: step: 628/527, loss: 0.00124187464825809 2023-01-23 02:26:01.959570: step: 632/527, loss: 0.6089746356010437 2023-01-23 02:26:03.080428: step: 636/527, loss: 0.0002651214599609375 2023-01-23 02:26:04.191608: step: 640/527, loss: 0.12038660049438477 2023-01-23 02:26:05.324590: step: 644/527, loss: 0.04125823825597763 2023-01-23 02:26:06.460890: step: 648/527, loss: 0.002989357803016901 2023-01-23 02:26:07.589809: step: 652/527, loss: 0.0663488358259201 2023-01-23 02:26:08.734403: step: 656/527, loss: 0.021756362169981003 2023-01-23 02:26:09.854734: step: 660/527, loss: 0.050377894192934036 2023-01-23 02:26:10.980861: step: 664/527, loss: 0.0016487122047692537 2023-01-23 02:26:12.071651: step: 668/527, loss: 5.831718590343371e-05 2023-01-23 02:26:13.192522: step: 672/527, loss: 0.010119056329131126 2023-01-23 02:26:14.296790: step: 676/527, loss: 0.00011010170419467613 2023-01-23 02:26:15.396305: step: 680/527, loss: 0.01700625568628311 2023-01-23 02:26:16.523269: step: 684/527, loss: 0.008041572757065296 2023-01-23 02:26:17.654469: step: 688/527, loss: 0.0005945205921307206 2023-01-23 02:26:18.735268: step: 692/527, loss: 0.0014999390114098787 2023-01-23 02:26:19.851292: step: 696/527, loss: 0.006946563720703125 2023-01-23 02:26:20.982413: step: 700/527, loss: 0.0009490966331213713 2023-01-23 02:26:22.108739: step: 704/527, loss: 0.0010449886322021484 2023-01-23 02:26:23.219268: step: 708/527, loss: 0.0012478828430175781 2023-01-23 02:26:24.361048: step: 712/527, loss: 0.008593942038714886 2023-01-23 02:26:25.464049: step: 716/527, loss: 0.028827382251620293 2023-01-23 02:26:26.592175: step: 720/527, loss: 0.003876304719597101 2023-01-23 02:26:27.708685: step: 724/527, loss: 0.1774301528930664 2023-01-23 02:26:28.820659: step: 728/527, loss: 0.0007627487066201866 2023-01-23 02:26:29.946144: step: 732/527, loss: 0.0036920548882335424 2023-01-23 02:26:31.060041: step: 736/527, loss: 0.014010143466293812 2023-01-23 02:26:32.172622: step: 740/527, loss: 0.016585636883974075 2023-01-23 02:26:33.286053: step: 744/527, loss: 0.002349674701690674 2023-01-23 02:26:34.391641: step: 748/527, loss: 0.018917512148618698 2023-01-23 02:26:35.482334: step: 752/527, loss: 0.020033836364746094 2023-01-23 02:26:36.577547: step: 756/527, loss: 0.0008847237331792712 2023-01-23 02:26:37.702380: step: 760/527, loss: 0.04032916948199272 2023-01-23 02:26:38.827487: step: 764/527, loss: 0.00039119721623137593 2023-01-23 02:26:39.926732: step: 768/527, loss: 0.009390830993652344 2023-01-23 02:26:41.067938: step: 772/527, loss: 0.0029379846528172493 2023-01-23 02:26:42.186661: step: 776/527, loss: 0.0011009216541424394 2023-01-23 02:26:43.304363: step: 780/527, loss: 0.0897190049290657 2023-01-23 02:26:44.401895: step: 784/527, loss: 0.014131307601928711 2023-01-23 02:26:45.516119: step: 788/527, loss: 0.012793255038559437 2023-01-23 02:26:46.617222: step: 792/527, loss: 0.010584450326859951 2023-01-23 02:26:47.723320: step: 796/527, loss: 0.012411785311996937 2023-01-23 02:26:48.845929: step: 800/527, loss: 0.01278076134622097 2023-01-23 02:26:49.962406: step: 804/527, loss: 0.01890554465353489 2023-01-23 02:26:51.072079: step: 808/527, loss: 0.016850853338837624 2023-01-23 02:26:52.202875: step: 812/527, loss: 0.026802444830536842 2023-01-23 02:26:53.327798: step: 816/527, loss: 0.011115193367004395 2023-01-23 02:26:54.447629: step: 820/527, loss: 0.0012163162464275956 2023-01-23 02:26:55.561920: step: 824/527, loss: 0.012557792477309704 2023-01-23 02:26:56.676805: step: 828/527, loss: 0.017565656453371048 2023-01-23 02:26:57.777376: step: 832/527, loss: 0.0028841018211096525 2023-01-23 02:26:58.896472: step: 836/527, loss: 0.013046360574662685 2023-01-23 02:27:00.037858: step: 840/527, loss: 0.012713813222944736 2023-01-23 02:27:01.168209: step: 844/527, loss: 0.04854559898376465 2023-01-23 02:27:02.270781: step: 848/527, loss: 0.006130218971520662 2023-01-23 02:27:03.394564: step: 852/527, loss: 0.010632324032485485 2023-01-23 02:27:04.494004: step: 856/527, loss: 0.007375145331025124 2023-01-23 02:27:05.633952: step: 860/527, loss: 0.009763908572494984 2023-01-23 02:27:06.758043: step: 864/527, loss: 0.032332804054021835 2023-01-23 02:27:07.879324: step: 868/527, loss: 0.02527923695743084 2023-01-23 02:27:08.990789: step: 872/527, loss: 0.03210477903485298 2023-01-23 02:27:10.093106: step: 876/527, loss: 0.0006395339732989669 2023-01-23 02:27:11.231790: step: 880/527, loss: 0.09718990325927734 2023-01-23 02:27:12.333713: step: 884/527, loss: 0.00027589796809479594 2023-01-23 02:27:13.415319: step: 888/527, loss: 0.002987766172736883 2023-01-23 02:27:14.515602: step: 892/527, loss: 0.012757696211338043 2023-01-23 02:27:15.639161: step: 896/527, loss: 0.12178345024585724 2023-01-23 02:27:16.798920: step: 900/527, loss: 0.06779947876930237 2023-01-23 02:27:17.909916: step: 904/527, loss: 0.010059070773422718 2023-01-23 02:27:19.045717: step: 908/527, loss: 0.0022483826614916325 2023-01-23 02:27:20.154852: step: 912/527, loss: 0.017208267003297806 2023-01-23 02:27:21.272859: step: 916/527, loss: 0.011341189965605736 2023-01-23 02:27:22.375418: step: 920/527, loss: 0.008183193393051624 2023-01-23 02:27:23.483107: step: 924/527, loss: 0.0021278380881994963 2023-01-23 02:27:24.585968: step: 928/527, loss: 0.014651966281235218 2023-01-23 02:27:25.715100: step: 932/527, loss: 0.09289512783288956 2023-01-23 02:27:26.847679: step: 936/527, loss: 0.025646591559052467 2023-01-23 02:27:27.956252: step: 940/527, loss: 0.15113377571105957 2023-01-23 02:27:29.090280: step: 944/527, loss: 0.01959419436752796 2023-01-23 02:27:30.254810: step: 948/527, loss: 0.04614443704485893 2023-01-23 02:27:31.348636: step: 952/527, loss: 0.0031536102760583162 2023-01-23 02:27:32.454102: step: 956/527, loss: 0.03664245456457138 2023-01-23 02:27:33.570534: step: 960/527, loss: 0.0006259441724978387 2023-01-23 02:27:34.716906: step: 964/527, loss: 0.00119953160174191 2023-01-23 02:27:35.862451: step: 968/527, loss: 0.00019273758516646922 2023-01-23 02:27:36.966792: step: 972/527, loss: 0.013826752081513405 2023-01-23 02:27:38.113890: step: 976/527, loss: 6.198883056640625e-05 2023-01-23 02:27:39.241791: step: 980/527, loss: 0.0074058775790035725 2023-01-23 02:27:40.346669: step: 984/527, loss: 0.0036906241439282894 2023-01-23 02:27:41.477874: step: 988/527, loss: 0.02922978438436985 2023-01-23 02:27:42.563303: step: 992/527, loss: 0.0187088493257761 2023-01-23 02:27:43.701790: step: 996/527, loss: 0.002597617916762829 2023-01-23 02:27:44.851198: step: 1000/527, loss: 0.19203153252601624 2023-01-23 02:27:45.994944: step: 1004/527, loss: 0.0067153931595385075 2023-01-23 02:27:47.098262: step: 1008/527, loss: 0.0017892837058752775 2023-01-23 02:27:48.226476: step: 1012/527, loss: 0.0003285408020019531 2023-01-23 02:27:49.389408: step: 1016/527, loss: 0.004096603486686945 2023-01-23 02:27:50.558003: step: 1020/527, loss: 0.013352966867387295 2023-01-23 02:27:51.632449: step: 1024/527, loss: 0.0005763053777627647 2023-01-23 02:27:52.739680: step: 1028/527, loss: 0.03412031754851341 2023-01-23 02:27:53.872394: step: 1032/527, loss: 0.00015039443678688258 2023-01-23 02:27:54.999227: step: 1036/527, loss: 0.007493781857192516 2023-01-23 02:27:56.130375: step: 1040/527, loss: 0.008114052005112171 2023-01-23 02:27:57.263939: step: 1044/527, loss: 0.04449958726763725 2023-01-23 02:27:58.365238: step: 1048/527, loss: 0.0006418228149414062 2023-01-23 02:27:59.500285: step: 1052/527, loss: 0.030861472710967064 2023-01-23 02:28:00.600438: step: 1056/527, loss: 0.017779922112822533 2023-01-23 02:28:01.700533: step: 1060/527, loss: 0.004881381988525391 2023-01-23 02:28:02.794369: step: 1064/527, loss: 0.01088571548461914 2023-01-23 02:28:03.889842: step: 1068/527, loss: 0.007139015477150679 2023-01-23 02:28:04.981771: step: 1072/527, loss: 0.004183827433735132 2023-01-23 02:28:06.079780: step: 1076/527, loss: 0.028364038094878197 2023-01-23 02:28:07.207439: step: 1080/527, loss: 0.00625190744176507 2023-01-23 02:28:08.361601: step: 1084/527, loss: 0.022876977920532227 2023-01-23 02:28:09.487374: step: 1088/527, loss: 0.1670890897512436 2023-01-23 02:28:10.619903: step: 1092/527, loss: 0.005206489935517311 2023-01-23 02:28:11.730185: step: 1096/527, loss: 0.025931548327207565 2023-01-23 02:28:12.861832: step: 1100/527, loss: 0.0007472038269042969 2023-01-23 02:28:13.991349: step: 1104/527, loss: 0.0023030161391943693 2023-01-23 02:28:15.106730: step: 1108/527, loss: 0.007352924905717373 2023-01-23 02:28:16.238820: step: 1112/527, loss: 0.02576141245663166 2023-01-23 02:28:17.342869: step: 1116/527, loss: 0.017031479626893997 2023-01-23 02:28:18.447226: step: 1120/527, loss: 0.014708328992128372 2023-01-23 02:28:19.560724: step: 1124/527, loss: 0.08752937614917755 2023-01-23 02:28:20.703816: step: 1128/527, loss: 0.04369544982910156 2023-01-23 02:28:21.811273: step: 1132/527, loss: 0.027330685406923294 2023-01-23 02:28:22.925497: step: 1136/527, loss: 6.36577678960748e-05 2023-01-23 02:28:24.061296: step: 1140/527, loss: 0.0549774169921875 2023-01-23 02:28:25.195079: step: 1144/527, loss: 0.0032377243041992188 2023-01-23 02:28:26.334189: step: 1148/527, loss: 0.045659683644771576 2023-01-23 02:28:27.427138: step: 1152/527, loss: 0.01936493068933487 2023-01-23 02:28:28.539422: step: 1156/527, loss: 0.0019378185970708728 2023-01-23 02:28:29.663945: step: 1160/527, loss: 0.002151107881218195 2023-01-23 02:28:30.771391: step: 1164/527, loss: 0.002830600831657648 2023-01-23 02:28:31.867872: step: 1168/527, loss: 0.0023395537864416838 2023-01-23 02:28:33.008591: step: 1172/527, loss: 0.6695175170898438 2023-01-23 02:28:34.103047: step: 1176/527, loss: 0.048874858766794205 2023-01-23 02:28:35.229689: step: 1180/527, loss: 0.0002659797901287675 2023-01-23 02:28:36.347383: step: 1184/527, loss: 0.0029108047019690275 2023-01-23 02:28:37.474778: step: 1188/527, loss: 0.09607252478599548 2023-01-23 02:28:38.604005: step: 1192/527, loss: 0.02913341484963894 2023-01-23 02:28:39.743020: step: 1196/527, loss: 0.0024205208756029606 2023-01-23 02:28:40.868909: step: 1200/527, loss: 0.006854248233139515 2023-01-23 02:28:41.986532: step: 1204/527, loss: 0.00104522705078125 2023-01-23 02:28:43.160408: step: 1208/527, loss: 0.00017652512178756297 2023-01-23 02:28:44.301301: step: 1212/527, loss: 0.00683021591976285 2023-01-23 02:28:45.402872: step: 1216/527, loss: 0.0012537003494799137 2023-01-23 02:28:46.521891: step: 1220/527, loss: 0.005291557405143976 2023-01-23 02:28:47.634537: step: 1224/527, loss: 0.001508522080257535 2023-01-23 02:28:48.750849: step: 1228/527, loss: 0.0008301734924316406 2023-01-23 02:28:49.880222: step: 1232/527, loss: 0.009391403757035732 2023-01-23 02:28:50.979382: step: 1236/527, loss: 0.0007136345375329256 2023-01-23 02:28:52.091519: step: 1240/527, loss: 2.1266936528263614e-05 2023-01-23 02:28:53.192717: step: 1244/527, loss: 0.003770160721614957 2023-01-23 02:28:54.326440: step: 1248/527, loss: 0.0062957764603197575 2023-01-23 02:28:55.465569: step: 1252/527, loss: 0.003944778349250555 2023-01-23 02:28:56.578383: step: 1256/527, loss: 0.04713840410113335 2023-01-23 02:28:57.688877: step: 1260/527, loss: 0.0015480995643883944 2023-01-23 02:28:58.800842: step: 1264/527, loss: 9.126662916969508e-05 2023-01-23 02:28:59.899258: step: 1268/527, loss: 0.021884823217988014 2023-01-23 02:29:01.047376: step: 1272/527, loss: 0.04333152994513512 2023-01-23 02:29:02.177869: step: 1276/527, loss: 0.029973983764648438 2023-01-23 02:29:03.332124: step: 1280/527, loss: 0.016769981011748314 2023-01-23 02:29:04.441655: step: 1284/527, loss: 0.008774567395448685 2023-01-23 02:29:05.549657: step: 1288/527, loss: 0.007098579313606024 2023-01-23 02:29:06.667238: step: 1292/527, loss: 0.025838637724518776 2023-01-23 02:29:07.756919: step: 1296/527, loss: 0.01569361612200737 2023-01-23 02:29:08.862577: step: 1300/527, loss: 0.002582645509392023 2023-01-23 02:29:09.970945: step: 1304/527, loss: 0.0036819458473473787 2023-01-23 02:29:11.074945: step: 1308/527, loss: 0.0008226395002566278 2023-01-23 02:29:12.203675: step: 1312/527, loss: 0.009534453973174095 2023-01-23 02:29:13.323520: step: 1316/527, loss: 0.001239776611328125 2023-01-23 02:29:14.419354: step: 1320/527, loss: 0.025084828957915306 2023-01-23 02:29:15.548128: step: 1324/527, loss: 0.033122241497039795 2023-01-23 02:29:16.644046: step: 1328/527, loss: 0.005719661712646484 2023-01-23 02:29:17.732405: step: 1332/527, loss: 0.0009204388479702175 2023-01-23 02:29:18.841414: step: 1336/527, loss: 0.0003449440118856728 2023-01-23 02:29:19.971025: step: 1340/527, loss: 0.0166518222540617 2023-01-23 02:29:21.094901: step: 1344/527, loss: 0.01487827394157648 2023-01-23 02:29:22.193082: step: 1348/527, loss: 0.004667234607040882 2023-01-23 02:29:23.282916: step: 1352/527, loss: 0.003722286317497492 2023-01-23 02:29:24.360320: step: 1356/527, loss: 0.018856525421142578 2023-01-23 02:29:25.453317: step: 1360/527, loss: 0.00654869107529521 2023-01-23 02:29:26.585963: step: 1364/527, loss: 0.008455371484160423 2023-01-23 02:29:27.719639: step: 1368/527, loss: 0.007340908050537109 2023-01-23 02:29:28.814608: step: 1372/527, loss: 0.010366343893110752 2023-01-23 02:29:29.976527: step: 1376/527, loss: 0.03050079569220543 2023-01-23 02:29:31.089568: step: 1380/527, loss: 0.0019420147873461246 2023-01-23 02:29:32.205374: step: 1384/527, loss: 0.01141667366027832 2023-01-23 02:29:33.332661: step: 1388/527, loss: 0.0011735915904864669 2023-01-23 02:29:34.476634: step: 1392/527, loss: 0.0193634033203125 2023-01-23 02:29:35.581540: step: 1396/527, loss: 2.822876012942288e-05 2023-01-23 02:29:36.722747: step: 1400/527, loss: 0.026920510455965996 2023-01-23 02:29:37.836407: step: 1404/527, loss: 0.0067053320817649364 2023-01-23 02:29:38.933620: step: 1408/527, loss: 0.01545019168406725 2023-01-23 02:29:40.052602: step: 1412/527, loss: 0.07587843388319016 2023-01-23 02:29:41.170487: step: 1416/527, loss: 0.026473617181181908 2023-01-23 02:29:42.267023: step: 1420/527, loss: 0.0024975778069347143 2023-01-23 02:29:43.383005: step: 1424/527, loss: 0.03621387854218483 2023-01-23 02:29:44.485781: step: 1428/527, loss: 0.003635978791862726 2023-01-23 02:29:45.607344: step: 1432/527, loss: 0.022147178649902344 2023-01-23 02:29:46.714345: step: 1436/527, loss: 0.0013586044078692794 2023-01-23 02:29:47.828001: step: 1440/527, loss: 0.014281844720244408 2023-01-23 02:29:48.932626: step: 1444/527, loss: 0.01669750176370144 2023-01-23 02:29:50.078599: step: 1448/527, loss: 0.0029286385979503393 2023-01-23 02:29:51.211200: step: 1452/527, loss: 0.08057241141796112 2023-01-23 02:29:52.327942: step: 1456/527, loss: 0.0029115676879882812 2023-01-23 02:29:53.465333: step: 1460/527, loss: 0.012459207326173782 2023-01-23 02:29:54.582615: step: 1464/527, loss: 0.01417398452758789 2023-01-23 02:29:55.687700: step: 1468/527, loss: 0.04093170166015625 2023-01-23 02:29:56.818249: step: 1472/527, loss: 0.026829909533262253 2023-01-23 02:29:57.943976: step: 1476/527, loss: 0.00031766892061568797 2023-01-23 02:29:59.044658: step: 1480/527, loss: 0.00047388076200149953 2023-01-23 02:30:00.163158: step: 1484/527, loss: 0.0019450187683105469 2023-01-23 02:30:01.300596: step: 1488/527, loss: 0.01916656456887722 2023-01-23 02:30:02.432618: step: 1492/527, loss: 0.04605579748749733 2023-01-23 02:30:03.560188: step: 1496/527, loss: 0.002811384154483676 2023-01-23 02:30:04.659854: step: 1500/527, loss: 0.018135739490389824 2023-01-23 02:30:05.767388: step: 1504/527, loss: 0.0009221077198162675 2023-01-23 02:30:06.881647: step: 1508/527, loss: 0.00060272216796875 2023-01-23 02:30:08.035467: step: 1512/527, loss: 0.05436153709888458 2023-01-23 02:30:09.174144: step: 1516/527, loss: 0.0025850296951830387 2023-01-23 02:30:10.295085: step: 1520/527, loss: 0.03289975970983505 2023-01-23 02:30:11.414573: step: 1524/527, loss: 0.11478567868471146 2023-01-23 02:30:12.523202: step: 1528/527, loss: 0.00229644775390625 2023-01-23 02:30:13.655231: step: 1532/527, loss: 0.11428437381982803 2023-01-23 02:30:14.759168: step: 1536/527, loss: 0.03006310574710369 2023-01-23 02:30:15.848569: step: 1540/527, loss: 0.0005461692926473916 2023-01-23 02:30:16.993466: step: 1544/527, loss: 0.019603919237852097 2023-01-23 02:30:18.130314: step: 1548/527, loss: 0.03254871442914009 2023-01-23 02:30:19.252742: step: 1552/527, loss: 0.008860398083925247 2023-01-23 02:30:20.343138: step: 1556/527, loss: 0.010446262545883656 2023-01-23 02:30:21.476750: step: 1560/527, loss: 0.1184714287519455 2023-01-23 02:30:22.590323: step: 1564/527, loss: 0.05498543009161949 2023-01-23 02:30:23.687820: step: 1568/527, loss: 0.005354690831154585 2023-01-23 02:30:24.819307: step: 1572/527, loss: 0.0021903037559241056 2023-01-23 02:30:25.904681: step: 1576/527, loss: 2.9850007194909267e-05 2023-01-23 02:30:27.037624: step: 1580/527, loss: 0.00227947230450809 2023-01-23 02:30:28.163189: step: 1584/527, loss: 0.002041339874267578 2023-01-23 02:30:29.284020: step: 1588/527, loss: 0.08831587433815002 2023-01-23 02:30:30.385993: step: 1592/527, loss: 0.019071388989686966 2023-01-23 02:30:31.500546: step: 1596/527, loss: 0.013457775115966797 2023-01-23 02:30:32.640005: step: 1600/527, loss: 0.013419151306152344 2023-01-23 02:30:33.757070: step: 1604/527, loss: 0.004733848385512829 2023-01-23 02:30:34.897208: step: 1608/527, loss: 0.022649575024843216 2023-01-23 02:30:36.015070: step: 1612/527, loss: 0.032288696616888046 2023-01-23 02:30:37.112955: step: 1616/527, loss: 0.0032791136763989925 2023-01-23 02:30:38.264976: step: 1620/527, loss: 0.004955387208610773 2023-01-23 02:30:39.368749: step: 1624/527, loss: 0.027863217517733574 2023-01-23 02:30:40.470714: step: 1628/527, loss: 0.010656165890395641 2023-01-23 02:30:41.592662: step: 1632/527, loss: 0.03336024284362793 2023-01-23 02:30:42.739403: step: 1636/527, loss: 0.019741438329219818 2023-01-23 02:30:43.865029: step: 1640/527, loss: 0.022695159539580345 2023-01-23 02:30:44.971588: step: 1644/527, loss: 0.00014290810213424265 2023-01-23 02:30:46.069234: step: 1648/527, loss: 0.00019950867863371968 2023-01-23 02:30:47.195453: step: 1652/527, loss: 0.0030302046798169613 2023-01-23 02:30:48.291541: step: 1656/527, loss: 0.00017089844914153218 2023-01-23 02:30:49.463773: step: 1660/527, loss: 0.014669609256088734 2023-01-23 02:30:50.553877: step: 1664/527, loss: 0.08985739201307297 2023-01-23 02:30:51.696036: step: 1668/527, loss: 0.03051757998764515 2023-01-23 02:30:52.855576: step: 1672/527, loss: 0.06256332993507385 2023-01-23 02:30:53.967382: step: 1676/527, loss: 0.03703415393829346 2023-01-23 02:30:55.073118: step: 1680/527, loss: 0.08514900505542755 2023-01-23 02:30:56.172854: step: 1684/527, loss: 0.04565849527716637 2023-01-23 02:30:57.280073: step: 1688/527, loss: 0.006635952275246382 2023-01-23 02:30:58.361735: step: 1692/527, loss: 0.008678436279296875 2023-01-23 02:30:59.464849: step: 1696/527, loss: 0.027866745367646217 2023-01-23 02:31:00.572062: step: 1700/527, loss: 0.042009733617305756 2023-01-23 02:31:01.689309: step: 1704/527, loss: 0.0009476661798544228 2023-01-23 02:31:02.791638: step: 1708/527, loss: 0.025545883923768997 2023-01-23 02:31:03.909104: step: 1712/527, loss: 0.014864349737763405 2023-01-23 02:31:05.002944: step: 1716/527, loss: 0.0021449089981615543 2023-01-23 02:31:06.128938: step: 1720/527, loss: 0.023329783231019974 2023-01-23 02:31:07.284123: step: 1724/527, loss: 0.045185379683971405 2023-01-23 02:31:08.403040: step: 1728/527, loss: 0.00017251967801712453 2023-01-23 02:31:09.482676: step: 1732/527, loss: 0.015349388122558594 2023-01-23 02:31:10.600485: step: 1736/527, loss: 0.013393688946962357 2023-01-23 02:31:11.710981: step: 1740/527, loss: 0.001576328300870955 2023-01-23 02:31:12.870219: step: 1744/527, loss: 0.006050777621567249 2023-01-23 02:31:13.960559: step: 1748/527, loss: 0.0046484945341944695 2023-01-23 02:31:15.086339: step: 1752/527, loss: 0.022418595850467682 2023-01-23 02:31:16.208227: step: 1756/527, loss: 0.015752125531435013 2023-01-23 02:31:17.299514: step: 1760/527, loss: 0.0025797844864428043 2023-01-23 02:31:18.429595: step: 1764/527, loss: 0.05885200574994087 2023-01-23 02:31:19.559668: step: 1768/527, loss: 0.006206226535141468 2023-01-23 02:31:20.684941: step: 1772/527, loss: 0.002802944276481867 2023-01-23 02:31:21.808383: step: 1776/527, loss: 0.01677875593304634 2023-01-23 02:31:22.908367: step: 1780/527, loss: 0.00016489028348587453 2023-01-23 02:31:23.978922: step: 1784/527, loss: 0.0174605380743742 2023-01-23 02:31:25.127638: step: 1788/527, loss: 0.007315254304558039 2023-01-23 02:31:26.240945: step: 1792/527, loss: 0.006932068150490522 2023-01-23 02:31:27.319185: step: 1796/527, loss: 0.008213234134018421 2023-01-23 02:31:28.456710: step: 1800/527, loss: 0.00031769275665283203 2023-01-23 02:31:29.573479: step: 1804/527, loss: 0.0651666596531868 2023-01-23 02:31:30.707382: step: 1808/527, loss: 0.022185945883393288 2023-01-23 02:31:31.811930: step: 1812/527, loss: 0.01569500006735325 2023-01-23 02:31:32.915085: step: 1816/527, loss: 0.018458176404237747 2023-01-23 02:31:34.029965: step: 1820/527, loss: 0.051366619765758514 2023-01-23 02:31:35.162217: step: 1824/527, loss: 0.007671976462006569 2023-01-23 02:31:36.259737: step: 1828/527, loss: 0.0042264461517333984 2023-01-23 02:31:37.381171: step: 1832/527, loss: 0.008039474487304688 2023-01-23 02:31:38.487295: step: 1836/527, loss: 0.0223586093634367 2023-01-23 02:31:39.582979: step: 1840/527, loss: 0.014232253655791283 2023-01-23 02:31:40.712180: step: 1844/527, loss: 0.036349013447761536 2023-01-23 02:31:41.813985: step: 1848/527, loss: 0.007012367248535156 2023-01-23 02:31:42.914286: step: 1852/527, loss: 4.1484832763671875e-05 2023-01-23 02:31:44.044517: step: 1856/527, loss: 0.017586613073945045 2023-01-23 02:31:45.147565: step: 1860/527, loss: 0.009233379736542702 2023-01-23 02:31:46.267081: step: 1864/527, loss: 9.050369408214465e-05 2023-01-23 02:31:47.404317: step: 1868/527, loss: 0.0008281707996502519 2023-01-23 02:31:48.578427: step: 1872/527, loss: 0.045211028307676315 2023-01-23 02:31:49.734380: step: 1876/527, loss: 0.0004749298095703125 2023-01-23 02:31:50.834556: step: 1880/527, loss: 0.011823082342743874 2023-01-23 02:31:51.983441: step: 1884/527, loss: 0.0005674362182617188 2023-01-23 02:31:53.101010: step: 1888/527, loss: 0.04866781085729599 2023-01-23 02:31:54.214540: step: 1892/527, loss: 0.0034399032592773438 2023-01-23 02:31:55.362318: step: 1896/527, loss: 0.004403114318847656 2023-01-23 02:31:56.523215: step: 1900/527, loss: 0.029995013028383255 2023-01-23 02:31:57.654105: step: 1904/527, loss: 0.005824279971420765 2023-01-23 02:31:58.754278: step: 1908/527, loss: 0.001561021781526506 2023-01-23 02:31:59.856481: step: 1912/527, loss: 0.006012153811752796 2023-01-23 02:32:00.972291: step: 1916/527, loss: 0.001210117363370955 2023-01-23 02:32:02.084094: step: 1920/527, loss: 0.015359211713075638 2023-01-23 02:32:03.202067: step: 1924/527, loss: 0.00070018763653934 2023-01-23 02:32:04.293654: step: 1928/527, loss: 0.012458897195756435 2023-01-23 02:32:05.400544: step: 1932/527, loss: 0.008224201388657093 2023-01-23 02:32:06.496441: step: 1936/527, loss: 0.002225685166195035 2023-01-23 02:32:07.613708: step: 1940/527, loss: 0.04883261024951935 2023-01-23 02:32:08.738855: step: 1944/527, loss: 0.025039149448275566 2023-01-23 02:32:09.867505: step: 1948/527, loss: 0.0023357393220067024 2023-01-23 02:32:10.985419: step: 1952/527, loss: 0.011423682793974876 2023-01-23 02:32:12.072925: step: 1956/527, loss: 0.00042676928569562733 2023-01-23 02:32:13.189185: step: 1960/527, loss: 0.00040721893310546875 2023-01-23 02:32:14.288055: step: 1964/527, loss: 0.03636159747838974 2023-01-23 02:32:15.406035: step: 1968/527, loss: 0.001659393310546875 2023-01-23 02:32:16.488110: step: 1972/527, loss: 0.03954753652215004 2023-01-23 02:32:17.604681: step: 1976/527, loss: 0.0014842988457530737 2023-01-23 02:32:18.696612: step: 1980/527, loss: 0.01186308916658163 2023-01-23 02:32:19.835249: step: 1984/527, loss: 0.06276760250329971 2023-01-23 02:32:20.935942: step: 1988/527, loss: 0.008978081867098808 2023-01-23 02:32:22.032254: step: 1992/527, loss: 0.0027392387855798006 2023-01-23 02:32:23.141436: step: 1996/527, loss: 0.00047826769878156483 2023-01-23 02:32:24.263222: step: 2000/527, loss: 0.0005186080816201866 2023-01-23 02:32:25.361417: step: 2004/527, loss: 0.0012513160472735763 2023-01-23 02:32:26.463118: step: 2008/527, loss: 0.0024038313422352076 2023-01-23 02:32:27.576651: step: 2012/527, loss: 0.011249733157455921 2023-01-23 02:32:28.700453: step: 2016/527, loss: 0.0002080917329294607 2023-01-23 02:32:29.818435: step: 2020/527, loss: 0.05853691324591637 2023-01-23 02:32:30.932011: step: 2024/527, loss: 0.10398578643798828 2023-01-23 02:32:32.041419: step: 2028/527, loss: 0.016141893342137337 2023-01-23 02:32:33.152553: step: 2032/527, loss: 7.305145118152723e-05 2023-01-23 02:32:34.287688: step: 2036/527, loss: 0.0618865042924881 2023-01-23 02:32:35.384952: step: 2040/527, loss: 0.004568052478134632 2023-01-23 02:32:36.494212: step: 2044/527, loss: 0.050662897527217865 2023-01-23 02:32:37.627635: step: 2048/527, loss: 0.009787654504179955 2023-01-23 02:32:38.740659: step: 2052/527, loss: 0.20280705392360687 2023-01-23 02:32:39.875300: step: 2056/527, loss: 0.1688835173845291 2023-01-23 02:32:40.981776: step: 2060/527, loss: 0.008786487393081188 2023-01-23 02:32:42.139590: step: 2064/527, loss: 0.0009190559503622353 2023-01-23 02:32:43.281135: step: 2068/527, loss: 0.00034971238346770406 2023-01-23 02:32:44.410741: step: 2072/527, loss: 0.006160736549645662 2023-01-23 02:32:45.525180: step: 2076/527, loss: 0.018799399957060814 2023-01-23 02:32:46.622761: step: 2080/527, loss: 0.05475263670086861 2023-01-23 02:32:47.727781: step: 2084/527, loss: 0.0007116794586181641 2023-01-23 02:32:48.822600: step: 2088/527, loss: 0.0013433456188067794 2023-01-23 02:32:49.968516: step: 2092/527, loss: 0.048093464225530624 2023-01-23 02:32:51.066458: step: 2096/527, loss: 0.07046828418970108 2023-01-23 02:32:52.171561: step: 2100/527, loss: 0.002475023502483964 2023-01-23 02:32:53.313842: step: 2104/527, loss: 0.2958931028842926 2023-01-23 02:32:54.453110: step: 2108/527, loss: 0.11183682084083557 ================================================== Loss: 0.024 -------------------- Dev: {'event': {'p': 0.584, 'r': 0.7776298268974701, 'f1': 0.6670474014848657}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Test: {'event': {'p': 0.6253430924062214, 'r': 0.7811428571428571, 'f1': 0.6946138211382115}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Chinese: {'event': {'p': 0.5609756097560976, 'r': 0.8518518518518519, 'f1': 0.6764705882352942}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Korean: {'event': {'p': 0.559322033898305, 'r': 0.5238095238095238, 'f1': 0.5409836065573771}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Russian: {'event': {'p': 0.42857142857142855, 'r': 0.5, 'f1': 0.4615384615384615}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066252587991718, 'r': 0.7802929427430093, 'f1': 0.6825859056493885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Eng Test for Korean: {'event': {'p': 0.62580054894785, 'r': 0.7817142857142857, 'f1': 0.6951219512195121}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 18 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:33:36.256674: step: 4/527, loss: 0.040186502039432526 2023-01-23 02:33:37.365109: step: 8/527, loss: 0.0672091469168663 2023-01-23 02:33:38.486749: step: 12/527, loss: 0.027982331812381744 2023-01-23 02:33:39.624658: step: 16/527, loss: 0.0005046845180913806 2023-01-23 02:33:40.741651: step: 20/527, loss: 0.022781657055020332 2023-01-23 02:33:41.893499: step: 24/527, loss: 0.024577999487519264 2023-01-23 02:33:42.977533: step: 28/527, loss: 0.001166784786619246 2023-01-23 02:33:44.097205: step: 32/527, loss: 0.006028938107192516 2023-01-23 02:33:45.212249: step: 36/527, loss: 0.02263965643942356 2023-01-23 02:33:46.319501: step: 40/527, loss: 0.021024418994784355 2023-01-23 02:33:47.414330: step: 44/527, loss: 0.013406133279204369 2023-01-23 02:33:48.504977: step: 48/527, loss: 3.4761429560603574e-05 2023-01-23 02:33:49.645154: step: 52/527, loss: 0.016150474548339844 2023-01-23 02:33:50.765172: step: 56/527, loss: 0.02635030634701252 2023-01-23 02:33:51.871273: step: 60/527, loss: 0.017991255968809128 2023-01-23 02:33:52.977893: step: 64/527, loss: 0.0016645431751385331 2023-01-23 02:33:54.068166: step: 68/527, loss: 0.0035830498673021793 2023-01-23 02:33:55.175259: step: 72/527, loss: 0.03319501876831055 2023-01-23 02:33:56.270562: step: 76/527, loss: 0.0007450103294104338 2023-01-23 02:33:57.384160: step: 80/527, loss: 0.04156055673956871 2023-01-23 02:33:58.570159: step: 84/527, loss: 0.011931514367461205 2023-01-23 02:33:59.697200: step: 88/527, loss: 0.011863517574965954 2023-01-23 02:34:00.836392: step: 92/527, loss: 0.003681659698486328 2023-01-23 02:34:01.927093: step: 96/527, loss: 0.001239872071892023 2023-01-23 02:34:03.017636: step: 100/527, loss: 0.0032170298509299755 2023-01-23 02:34:04.129192: step: 104/527, loss: 0.010618782602250576 2023-01-23 02:34:05.250924: step: 108/527, loss: 0.002019691513851285 2023-01-23 02:34:06.406979: step: 112/527, loss: 0.030876733362674713 2023-01-23 02:34:07.516411: step: 116/527, loss: 0.009692382998764515 2023-01-23 02:34:08.637051: step: 120/527, loss: 0.03450927883386612 2023-01-23 02:34:09.751132: step: 124/527, loss: 0.0016699791885912418 2023-01-23 02:34:10.857157: step: 128/527, loss: 0.0008918762323446572 2023-01-23 02:34:11.941132: step: 132/527, loss: 0.013242721557617188 2023-01-23 02:34:13.060442: step: 136/527, loss: 0.007526874542236328 2023-01-23 02:34:14.195117: step: 140/527, loss: 0.008176612667739391 2023-01-23 02:34:15.302256: step: 144/527, loss: 0.007441902533173561 2023-01-23 02:34:16.391493: step: 148/527, loss: 6.408691115211695e-05 2023-01-23 02:34:17.485773: step: 152/527, loss: 0.04285097122192383 2023-01-23 02:34:18.615828: step: 156/527, loss: 0.0005171775701455772 2023-01-23 02:34:19.753735: step: 160/527, loss: 0.0024738311767578125 2023-01-23 02:34:20.847208: step: 164/527, loss: 0.01603221893310547 2023-01-23 02:34:21.899921: step: 168/527, loss: 0.006485557649284601 2023-01-23 02:34:23.020896: step: 172/527, loss: 0.001798486802726984 2023-01-23 02:34:24.126759: step: 176/527, loss: 0.008454036898911 2023-01-23 02:34:25.232156: step: 180/527, loss: 0.0202668197453022 2023-01-23 02:34:26.391159: step: 184/527, loss: 0.011136150918900967 2023-01-23 02:34:27.501768: step: 188/527, loss: 0.00013980866060592234 2023-01-23 02:34:28.635728: step: 192/527, loss: 0.0009034157264977694 2023-01-23 02:34:29.737038: step: 196/527, loss: 0.0030145645141601562 2023-01-23 02:34:30.841294: step: 200/527, loss: 0.0327632911503315 2023-01-23 02:34:31.968096: step: 204/527, loss: 0.0033899308182299137 2023-01-23 02:34:33.061520: step: 208/527, loss: 0.1989058405160904 2023-01-23 02:34:34.167652: step: 212/527, loss: 0.0010187149746343493 2023-01-23 02:34:35.301352: step: 216/527, loss: 0.0012699126964434981 2023-01-23 02:34:36.440850: step: 220/527, loss: 0.0005788326379843056 2023-01-23 02:34:37.560320: step: 224/527, loss: 0.004528141114860773 2023-01-23 02:34:38.650605: step: 228/527, loss: 0.0027210235130041838 2023-01-23 02:34:39.768302: step: 232/527, loss: 0.010525750927627087 2023-01-23 02:34:40.903530: step: 236/527, loss: 0.062098123133182526 2023-01-23 02:34:42.002010: step: 240/527, loss: 0.007528495974838734 2023-01-23 02:34:43.121247: step: 244/527, loss: 0.021370315924286842 2023-01-23 02:34:44.213209: step: 248/527, loss: 0.01905994489789009 2023-01-23 02:34:45.344581: step: 252/527, loss: 0.0005049228784628212 2023-01-23 02:34:46.484497: step: 256/527, loss: 0.0009790421463549137 2023-01-23 02:34:47.598736: step: 260/527, loss: 0.00026769639225676656 2023-01-23 02:34:48.724865: step: 264/527, loss: 0.0027523040771484375 2023-01-23 02:34:49.836289: step: 268/527, loss: 0.0002803802490234375 2023-01-23 02:34:50.978074: step: 272/527, loss: 0.10391931235790253 2023-01-23 02:34:52.083999: step: 276/527, loss: 0.00026702880859375 2023-01-23 02:34:53.205560: step: 280/527, loss: 0.00011491775512695312 2023-01-23 02:34:54.356250: step: 284/527, loss: 0.0007890701526775956 2023-01-23 02:34:55.438072: step: 288/527, loss: 0.004887866787612438 2023-01-23 02:34:56.539263: step: 292/527, loss: 0.005933189298957586 2023-01-23 02:34:57.647457: step: 296/527, loss: 0.0014652431709691882 2023-01-23 02:34:58.761203: step: 300/527, loss: 0.03130292892456055 2023-01-23 02:34:59.850279: step: 304/527, loss: 0.015362311154603958 2023-01-23 02:35:00.944242: step: 308/527, loss: 0.011118436232209206 2023-01-23 02:35:02.040656: step: 312/527, loss: 0.04197206348180771 2023-01-23 02:35:03.176554: step: 316/527, loss: 0.0005729199037887156 2023-01-23 02:35:04.255900: step: 320/527, loss: 0.020746994763612747 2023-01-23 02:35:05.355289: step: 324/527, loss: 0.012500714510679245 2023-01-23 02:35:06.447398: step: 328/527, loss: 0.02304520644247532 2023-01-23 02:35:07.526021: step: 332/527, loss: 0.0002158164861612022 2023-01-23 02:35:08.638048: step: 336/527, loss: 0.4049331843852997 2023-01-23 02:35:09.737563: step: 340/527, loss: 0.05718555673956871 2023-01-23 02:35:10.850574: step: 344/527, loss: 0.01338043250143528 2023-01-23 02:35:11.960235: step: 348/527, loss: 0.014764976687729359 2023-01-23 02:35:13.075119: step: 352/527, loss: 0.0810842514038086 2023-01-23 02:35:14.188017: step: 356/527, loss: 0.03073091432452202 2023-01-23 02:35:15.315456: step: 360/527, loss: 0.0013738631969317794 2023-01-23 02:35:16.448581: step: 364/527, loss: 0.0003941059112548828 2023-01-23 02:35:17.592888: step: 368/527, loss: 0.06422261893749237 2023-01-23 02:35:18.702986: step: 372/527, loss: 0.04136824607849121 2023-01-23 02:35:19.812698: step: 376/527, loss: 0.07424090802669525 2023-01-23 02:35:20.932178: step: 380/527, loss: 0.0432492271065712 2023-01-23 02:35:22.137191: step: 384/527, loss: 0.05633983761072159 2023-01-23 02:35:23.240851: step: 388/527, loss: 0.03162508085370064 2023-01-23 02:35:24.348840: step: 392/527, loss: 0.04622936621308327 2023-01-23 02:35:25.471110: step: 396/527, loss: 6.642937660217285e-05 2023-01-23 02:35:26.571611: step: 400/527, loss: 0.022319983690977097 2023-01-23 02:35:27.722233: step: 404/527, loss: 0.014006996527314186 2023-01-23 02:35:28.873637: step: 408/527, loss: 0.04410495609045029 2023-01-23 02:35:29.978203: step: 412/527, loss: 0.029336167499423027 2023-01-23 02:35:31.094629: step: 416/527, loss: 0.003387451171875 2023-01-23 02:35:32.242958: step: 420/527, loss: 0.02224436029791832 2023-01-23 02:35:33.350758: step: 424/527, loss: 0.027769852429628372 2023-01-23 02:35:34.458982: step: 428/527, loss: 0.017683982849121094 2023-01-23 02:35:35.581938: step: 432/527, loss: 0.015288162976503372 2023-01-23 02:35:36.706268: step: 436/527, loss: 0.030890941619873047 2023-01-23 02:35:37.848061: step: 440/527, loss: 0.33327770233154297 2023-01-23 02:35:38.976357: step: 444/527, loss: 0.07016811519861221 2023-01-23 02:35:40.095629: step: 448/527, loss: 0.04353253170847893 2023-01-23 02:35:41.236261: step: 452/527, loss: 3.261566234868951e-05 2023-01-23 02:35:42.397027: step: 456/527, loss: 0.3369169235229492 2023-01-23 02:35:43.497582: step: 460/527, loss: 0.002392006106674671 2023-01-23 02:35:44.588816: step: 464/527, loss: 0.006875419523566961 2023-01-23 02:35:45.737675: step: 468/527, loss: 0.03046569786965847 2023-01-23 02:35:46.856729: step: 472/527, loss: 0.006514167878776789 2023-01-23 02:35:47.991364: step: 476/527, loss: 0.008658219128847122 2023-01-23 02:35:49.118042: step: 480/527, loss: 0.0027048110496252775 2023-01-23 02:35:50.259970: step: 484/527, loss: 0.0037185668479651213 2023-01-23 02:35:51.392400: step: 488/527, loss: 0.01454925537109375 2023-01-23 02:35:52.527542: step: 492/527, loss: 0.010749674402177334 2023-01-23 02:35:53.632106: step: 496/527, loss: 0.00014638900756835938 2023-01-23 02:35:54.786034: step: 500/527, loss: 0.02398385852575302 2023-01-23 02:35:55.879013: step: 504/527, loss: 0.00013920068158768117 2023-01-23 02:35:56.998088: step: 508/527, loss: 0.0033648491371423006 2023-01-23 02:35:58.109932: step: 512/527, loss: 0.017268039286136627 2023-01-23 02:35:59.221823: step: 516/527, loss: 0.0032089711166918278 2023-01-23 02:36:00.335530: step: 520/527, loss: 0.014353180304169655 2023-01-23 02:36:01.474562: step: 524/527, loss: 0.020438384264707565 2023-01-23 02:36:02.629915: step: 528/527, loss: 0.02605152130126953 2023-01-23 02:36:03.796734: step: 532/527, loss: 0.004709052853286266 2023-01-23 02:36:04.892635: step: 536/527, loss: 0.0019268036121502519 2023-01-23 02:36:05.993963: step: 540/527, loss: 0.0007682800642214715 2023-01-23 02:36:07.112788: step: 544/527, loss: 0.025227930396795273 2023-01-23 02:36:08.235072: step: 548/527, loss: 8.37326078908518e-05 2023-01-23 02:36:09.327059: step: 552/527, loss: 0.013734627515077591 2023-01-23 02:36:10.448765: step: 556/527, loss: 0.05985736846923828 2023-01-23 02:36:11.580631: step: 560/527, loss: 0.0016315460670739412 2023-01-23 02:36:12.682271: step: 564/527, loss: 0.0042414190247654915 2023-01-23 02:36:13.783680: step: 568/527, loss: 0.06443500518798828 2023-01-23 02:36:14.914794: step: 572/527, loss: 0.012848282232880592 2023-01-23 02:36:16.016675: step: 576/527, loss: 0.025479793548583984 2023-01-23 02:36:17.135851: step: 580/527, loss: 0.00021419525728560984 2023-01-23 02:36:18.246109: step: 584/527, loss: 0.0008415222400799394 2023-01-23 02:36:19.359204: step: 588/527, loss: 0.02218952216207981 2023-01-23 02:36:20.479988: step: 592/527, loss: 0.004956436343491077 2023-01-23 02:36:21.633641: step: 596/527, loss: 0.0010934829479083419 2023-01-23 02:36:22.765350: step: 600/527, loss: 0.016013814136385918 2023-01-23 02:36:23.873139: step: 604/527, loss: 6.909370858920738e-05 2023-01-23 02:36:25.026573: step: 608/527, loss: 0.0004241943533997983 2023-01-23 02:36:26.106727: step: 612/527, loss: 0.040265657007694244 2023-01-23 02:36:27.259517: step: 616/527, loss: 0.03331737592816353 2023-01-23 02:36:28.342357: step: 620/527, loss: 0.004127550404518843 2023-01-23 02:36:29.455702: step: 624/527, loss: 0.011080646887421608 2023-01-23 02:36:30.555121: step: 628/527, loss: 0.004027175717055798 2023-01-23 02:36:31.663522: step: 632/527, loss: 0.003848457243293524 2023-01-23 02:36:32.778804: step: 636/527, loss: 0.027270127087831497 2023-01-23 02:36:33.881708: step: 640/527, loss: 0.022963905707001686 2023-01-23 02:36:35.014564: step: 644/527, loss: 0.0001430511474609375 2023-01-23 02:36:36.126388: step: 648/527, loss: 0.3130761981010437 2023-01-23 02:36:37.254135: step: 652/527, loss: 0.004669570829719305 2023-01-23 02:36:38.378336: step: 656/527, loss: 0.0062469481490552425 2023-01-23 02:36:39.466707: step: 660/527, loss: 0.0010677337413653731 2023-01-23 02:36:40.605862: step: 664/527, loss: 0.014051247388124466 2023-01-23 02:36:41.718571: step: 668/527, loss: 3.1948089599609375e-05 2023-01-23 02:36:42.840995: step: 672/527, loss: 0.0029112815391272306 2023-01-23 02:36:43.926122: step: 676/527, loss: 0.002524852752685547 2023-01-23 02:36:45.036656: step: 680/527, loss: 0.0013834952842444181 2023-01-23 02:36:46.138301: step: 684/527, loss: 0.009224272333085537 2023-01-23 02:36:47.276442: step: 688/527, loss: 0.0004795074346475303 2023-01-23 02:36:48.378401: step: 692/527, loss: 0.04675617441534996 2023-01-23 02:36:49.490126: step: 696/527, loss: 0.018195820972323418 2023-01-23 02:36:50.657889: step: 700/527, loss: 0.03568840026855469 2023-01-23 02:36:51.762249: step: 704/527, loss: 0.007313728332519531 2023-01-23 02:36:52.854558: step: 708/527, loss: 0.018531514331698418 2023-01-23 02:36:53.952473: step: 712/527, loss: 0.0020042420364916325 2023-01-23 02:36:55.065232: step: 716/527, loss: 0.005865669343620539 2023-01-23 02:36:56.176403: step: 720/527, loss: 0.005946349818259478 2023-01-23 02:36:57.274614: step: 724/527, loss: 0.46851426362991333 2023-01-23 02:36:58.396936: step: 728/527, loss: 0.026128005236387253 2023-01-23 02:36:59.501874: step: 732/527, loss: 0.060364533215761185 2023-01-23 02:37:00.606519: step: 736/527, loss: 0.006414318457245827 2023-01-23 02:37:01.721699: step: 740/527, loss: 0.004105186555534601 2023-01-23 02:37:02.882038: step: 744/527, loss: 0.035622406750917435 2023-01-23 02:37:03.996044: step: 748/527, loss: 0.01057128980755806 2023-01-23 02:37:05.098216: step: 752/527, loss: 0.020526790991425514 2023-01-23 02:37:06.207648: step: 756/527, loss: 0.009049797430634499 2023-01-23 02:37:07.320285: step: 760/527, loss: 0.0011502265697345138 2023-01-23 02:37:08.411992: step: 764/527, loss: 0.03863391652703285 2023-01-23 02:37:09.519909: step: 768/527, loss: 0.0030647278763353825 2023-01-23 02:37:10.662517: step: 772/527, loss: 0.03179154545068741 2023-01-23 02:37:11.753964: step: 776/527, loss: 0.0024461746215820312 2023-01-23 02:37:12.840051: step: 780/527, loss: 0.0003772735653910786 2023-01-23 02:37:13.955912: step: 784/527, loss: 0.02175169065594673 2023-01-23 02:37:15.076601: step: 788/527, loss: 0.04328460991382599 2023-01-23 02:37:16.180578: step: 792/527, loss: 0.023350238800048828 2023-01-23 02:37:17.306353: step: 796/527, loss: 0.007850075140595436 2023-01-23 02:37:18.437426: step: 800/527, loss: 0.0018194199074059725 2023-01-23 02:37:19.563880: step: 804/527, loss: 0.0034483911003917456 2023-01-23 02:37:20.673261: step: 808/527, loss: 0.011926270090043545 2023-01-23 02:37:21.775218: step: 812/527, loss: 0.0006452560191974044 2023-01-23 02:37:22.929790: step: 816/527, loss: 0.016267968341708183 2023-01-23 02:37:24.052589: step: 820/527, loss: 0.018735790625214577 2023-01-23 02:37:25.147125: step: 824/527, loss: 0.020946310833096504 2023-01-23 02:37:26.279953: step: 828/527, loss: 0.05528726428747177 2023-01-23 02:37:27.406977: step: 832/527, loss: 0.01360416505485773 2023-01-23 02:37:28.494718: step: 836/527, loss: 0.0006442070007324219 2023-01-23 02:37:29.593286: step: 840/527, loss: 0.06638355553150177 2023-01-23 02:37:30.764450: step: 844/527, loss: 0.00012216568575240672 2023-01-23 02:37:31.897039: step: 848/527, loss: 0.027976226061582565 2023-01-23 02:37:33.032923: step: 852/527, loss: 0.0016086578834801912 2023-01-23 02:37:34.125566: step: 856/527, loss: 0.014592933468520641 2023-01-23 02:37:35.250577: step: 860/527, loss: 0.022863317281007767 2023-01-23 02:37:36.370881: step: 864/527, loss: 0.004327702801674604 2023-01-23 02:37:37.487511: step: 868/527, loss: 0.009133911691606045 2023-01-23 02:37:38.616668: step: 872/527, loss: 0.6425380706787109 2023-01-23 02:37:39.741016: step: 876/527, loss: 0.00011920928955078125 2023-01-23 02:37:40.845369: step: 880/527, loss: 0.08990326523780823 2023-01-23 02:37:41.976136: step: 884/527, loss: 0.00058155064471066 2023-01-23 02:37:43.085735: step: 888/527, loss: 0.0012882233131676912 2023-01-23 02:37:44.206789: step: 892/527, loss: 0.08394451439380646 2023-01-23 02:37:45.327969: step: 896/527, loss: 0.003975295927375555 2023-01-23 02:37:46.454058: step: 900/527, loss: 0.0011583161540329456 2023-01-23 02:37:47.578212: step: 904/527, loss: 0.003781271167099476 2023-01-23 02:37:48.670441: step: 908/527, loss: 0.002442479133605957 2023-01-23 02:37:49.816648: step: 912/527, loss: 0.03841390460729599 2023-01-23 02:37:50.949739: step: 916/527, loss: 0.04734230041503906 2023-01-23 02:37:52.067145: step: 920/527, loss: 0.006579494569450617 2023-01-23 02:37:53.165423: step: 924/527, loss: 0.0032499313820153475 2023-01-23 02:37:54.276552: step: 928/527, loss: 0.04964857175946236 2023-01-23 02:37:55.436081: step: 932/527, loss: 0.006509637925773859 2023-01-23 02:37:56.529326: step: 936/527, loss: 0.0014746190281584859 2023-01-23 02:37:57.640749: step: 940/527, loss: 0.019540976732969284 2023-01-23 02:37:58.737925: step: 944/527, loss: 0.019904911518096924 2023-01-23 02:37:59.832608: step: 948/527, loss: 0.018244266510009766 2023-01-23 02:38:00.929159: step: 952/527, loss: 0.0028182982932776213 2023-01-23 02:38:02.019815: step: 956/527, loss: 0.008939648047089577 2023-01-23 02:38:03.127589: step: 960/527, loss: 0.0893101766705513 2023-01-23 02:38:04.252542: step: 964/527, loss: 0.19413211941719055 2023-01-23 02:38:05.363115: step: 968/527, loss: 0.0056614927016198635 2023-01-23 02:38:06.472999: step: 972/527, loss: 0.0016385079361498356 2023-01-23 02:38:07.563964: step: 976/527, loss: 0.012868690304458141 2023-01-23 02:38:08.680568: step: 980/527, loss: 0.004455185029655695 2023-01-23 02:38:09.827246: step: 984/527, loss: 0.0041336058638989925 2023-01-23 02:38:10.924328: step: 988/527, loss: 0.001928138779476285 2023-01-23 02:38:12.018241: step: 992/527, loss: 0.05878520384430885 2023-01-23 02:38:13.170614: step: 996/527, loss: 0.011075782589614391 2023-01-23 02:38:14.245447: step: 1000/527, loss: 0.0019528151024132967 2023-01-23 02:38:15.320389: step: 1004/527, loss: 0.0164534579962492 2023-01-23 02:38:16.412499: step: 1008/527, loss: 0.0019310475327074528 2023-01-23 02:38:17.517043: step: 1012/527, loss: 0.019474124535918236 2023-01-23 02:38:18.630281: step: 1016/527, loss: 0.005499840248376131 2023-01-23 02:38:19.727881: step: 1020/527, loss: 0.009505176916718483 2023-01-23 02:38:20.854523: step: 1024/527, loss: 0.000293731689453125 2023-01-23 02:38:21.934819: step: 1028/527, loss: 0.029192542657256126 2023-01-23 02:38:23.089381: step: 1032/527, loss: 0.0027622224297374487 2023-01-23 02:38:24.239846: step: 1036/527, loss: 0.03670082241296768 2023-01-23 02:38:25.364643: step: 1040/527, loss: 0.00013669728650711477 2023-01-23 02:38:26.495693: step: 1044/527, loss: 0.002657604170963168 2023-01-23 02:38:27.651474: step: 1048/527, loss: 0.007655620574951172 2023-01-23 02:38:28.829160: step: 1052/527, loss: 0.009197616018354893 2023-01-23 02:38:29.941549: step: 1056/527, loss: 0.6586270928382874 2023-01-23 02:38:31.042125: step: 1060/527, loss: 0.0012735367054119706 2023-01-23 02:38:32.142489: step: 1064/527, loss: 0.035904884338378906 2023-01-23 02:38:33.265915: step: 1068/527, loss: 0.05943875387310982 2023-01-23 02:38:34.376903: step: 1072/527, loss: 0.07975101470947266 2023-01-23 02:38:35.503865: step: 1076/527, loss: 0.009791184216737747 2023-01-23 02:38:36.619926: step: 1080/527, loss: 0.0922277420759201 2023-01-23 02:38:37.693005: step: 1084/527, loss: 0.007860183715820312 2023-01-23 02:38:38.789201: step: 1088/527, loss: 0.018537236377596855 2023-01-23 02:38:39.887015: step: 1092/527, loss: 0.02582836151123047 2023-01-23 02:38:40.971540: step: 1096/527, loss: 0.0018868923652917147 2023-01-23 02:38:42.101424: step: 1100/527, loss: 0.008661841973662376 2023-01-23 02:38:43.227945: step: 1104/527, loss: 0.041791535913944244 2023-01-23 02:38:44.350105: step: 1108/527, loss: 0.025666045024991035 2023-01-23 02:38:45.467331: step: 1112/527, loss: 0.0021966935601085424 2023-01-23 02:38:46.583136: step: 1116/527, loss: 0.10806134343147278 2023-01-23 02:38:47.703275: step: 1120/527, loss: 0.0062334537506103516 2023-01-23 02:38:48.803330: step: 1124/527, loss: 0.0002914428769145161 2023-01-23 02:38:49.923420: step: 1128/527, loss: 0.024120425805449486 2023-01-23 02:38:51.050187: step: 1132/527, loss: 0.0005186080816201866 2023-01-23 02:38:52.191551: step: 1136/527, loss: 0.007250594906508923 2023-01-23 02:38:53.269681: step: 1140/527, loss: 0.0031056401785463095 2023-01-23 02:38:54.401664: step: 1144/527, loss: 0.01995544508099556 2023-01-23 02:38:55.527878: step: 1148/527, loss: 0.13898760080337524 2023-01-23 02:38:56.629752: step: 1152/527, loss: 0.029405975714325905 2023-01-23 02:38:57.735147: step: 1156/527, loss: 0.2844974398612976 2023-01-23 02:38:58.878414: step: 1160/527, loss: 0.0012763501144945621 2023-01-23 02:39:00.026396: step: 1164/527, loss: 0.019233321771025658 2023-01-23 02:39:01.162197: step: 1168/527, loss: 0.045714668929576874 2023-01-23 02:39:02.272354: step: 1172/527, loss: 0.0005618572467938066 2023-01-23 02:39:03.375306: step: 1176/527, loss: 0.021721458062529564 2023-01-23 02:39:04.528205: step: 1180/527, loss: 0.01964702643454075 2023-01-23 02:39:05.678206: step: 1184/527, loss: 0.004763889592140913 2023-01-23 02:39:06.801397: step: 1188/527, loss: 0.04556236043572426 2023-01-23 02:39:07.938272: step: 1192/527, loss: 0.007729625795036554 2023-01-23 02:39:09.060078: step: 1196/527, loss: 0.004032897762954235 2023-01-23 02:39:10.206913: step: 1200/527, loss: 0.044917963445186615 2023-01-23 02:39:11.291932: step: 1204/527, loss: 0.003471946809440851 2023-01-23 02:39:12.440252: step: 1208/527, loss: 0.010382939130067825 2023-01-23 02:39:13.568827: step: 1212/527, loss: 0.007255256175994873 2023-01-23 02:39:14.688609: step: 1216/527, loss: 0.018157387152314186 2023-01-23 02:39:15.784316: step: 1220/527, loss: 0.00022411346435546875 2023-01-23 02:39:16.881984: step: 1224/527, loss: 0.00933299120515585 2023-01-23 02:39:17.963401: step: 1228/527, loss: 0.26342129707336426 2023-01-23 02:39:19.051127: step: 1232/527, loss: 0.0018136025173589587 2023-01-23 02:39:20.183707: step: 1236/527, loss: 0.0006340026739053428 2023-01-23 02:39:21.292066: step: 1240/527, loss: 0.018594931811094284 2023-01-23 02:39:22.405629: step: 1244/527, loss: 0.0012929915683344007 2023-01-23 02:39:23.521131: step: 1248/527, loss: 0.025081824511289597 2023-01-23 02:39:24.667311: step: 1252/527, loss: 0.006076335906982422 2023-01-23 02:39:25.775730: step: 1256/527, loss: 0.0028447150252759457 2023-01-23 02:39:26.886433: step: 1260/527, loss: 0.04791984334588051 2023-01-23 02:39:28.009019: step: 1264/527, loss: 0.007804680150002241 2023-01-23 02:39:29.147145: step: 1268/527, loss: 0.10680066049098969 2023-01-23 02:39:30.262560: step: 1272/527, loss: 0.3834855258464813 2023-01-23 02:39:31.365184: step: 1276/527, loss: 0.010625124908983707 2023-01-23 02:39:32.471819: step: 1280/527, loss: 0.05281725153326988 2023-01-23 02:39:33.606857: step: 1284/527, loss: 0.007564067840576172 2023-01-23 02:39:34.722791: step: 1288/527, loss: 0.005911350250244141 2023-01-23 02:39:35.827109: step: 1292/527, loss: 0.0004930496215820312 2023-01-23 02:39:36.928095: step: 1296/527, loss: 0.0060134888626635075 2023-01-23 02:39:38.054179: step: 1300/527, loss: 0.0027416229713708162 2023-01-23 02:39:39.165432: step: 1304/527, loss: 0.013891208916902542 2023-01-23 02:39:40.256568: step: 1308/527, loss: 0.004548025317490101 2023-01-23 02:39:41.360326: step: 1312/527, loss: 0.03413820266723633 2023-01-23 02:39:42.460443: step: 1316/527, loss: 0.005631828214973211 2023-01-23 02:39:43.552277: step: 1320/527, loss: 0.0016065121162682772 2023-01-23 02:39:44.664802: step: 1324/527, loss: 0.003700304077938199 2023-01-23 02:39:45.814558: step: 1328/527, loss: 0.004315471742302179 2023-01-23 02:39:46.927397: step: 1332/527, loss: -3.814697265625e-06 2023-01-23 02:39:48.037266: step: 1336/527, loss: 0.043228913098573685 2023-01-23 02:39:49.151644: step: 1340/527, loss: 4.375918388366699 2023-01-23 02:39:50.259191: step: 1344/527, loss: 0.03010549396276474 2023-01-23 02:39:51.367929: step: 1348/527, loss: 0.004519939422607422 2023-01-23 02:39:52.451801: step: 1352/527, loss: 0.0009884834289550781 2023-01-23 02:39:53.558626: step: 1356/527, loss: 0.031243612989783287 2023-01-23 02:39:54.690508: step: 1360/527, loss: 3.14712519866589e-06 2023-01-23 02:39:55.785431: step: 1364/527, loss: 0.00040941237239167094 2023-01-23 02:39:56.907545: step: 1368/527, loss: 0.006172371096909046 2023-01-23 02:39:58.036386: step: 1372/527, loss: 0.0034801485016942024 2023-01-23 02:39:59.153917: step: 1376/527, loss: 0.07715263217687607 2023-01-23 02:40:00.290695: step: 1380/527, loss: 0.01746826246380806 2023-01-23 02:40:01.384559: step: 1384/527, loss: 0.09030818939208984 2023-01-23 02:40:02.482532: step: 1388/527, loss: 0.007637977600097656 2023-01-23 02:40:03.611076: step: 1392/527, loss: 0.007068062201142311 2023-01-23 02:40:04.718205: step: 1396/527, loss: 0.03342771530151367 2023-01-23 02:40:05.813707: step: 1400/527, loss: 0.014216804876923561 2023-01-23 02:40:06.897728: step: 1404/527, loss: 0.0035140991676598787 2023-01-23 02:40:08.034007: step: 1408/527, loss: 0.007841682061553001 2023-01-23 02:40:09.129110: step: 1412/527, loss: 0.12781424820423126 2023-01-23 02:40:10.241173: step: 1416/527, loss: 0.026134967803955078 2023-01-23 02:40:11.353835: step: 1420/527, loss: 0.016472624614834785 2023-01-23 02:40:12.462838: step: 1424/527, loss: 0.010421419516205788 2023-01-23 02:40:13.589776: step: 1428/527, loss: 0.018986130133271217 2023-01-23 02:40:14.701235: step: 1432/527, loss: 0.005397701170295477 2023-01-23 02:40:15.831349: step: 1436/527, loss: 0.0007076740730553865 2023-01-23 02:40:16.932016: step: 1440/527, loss: 0.011234093457460403 2023-01-23 02:40:18.029451: step: 1444/527, loss: 0.00044460297795012593 2023-01-23 02:40:19.170279: step: 1448/527, loss: 0.06940049678087234 2023-01-23 02:40:20.272019: step: 1452/527, loss: 0.02171926572918892 2023-01-23 02:40:21.400280: step: 1456/527, loss: 0.02141146920621395 2023-01-23 02:40:22.508460: step: 1460/527, loss: 0.004588508978486061 2023-01-23 02:40:23.616632: step: 1464/527, loss: 0.01808171346783638 2023-01-23 02:40:24.696835: step: 1468/527, loss: 0.020713090896606445 2023-01-23 02:40:25.783454: step: 1472/527, loss: 0.014095068909227848 2023-01-23 02:40:26.904815: step: 1476/527, loss: 0.0026751516852527857 2023-01-23 02:40:28.017956: step: 1480/527, loss: 4.301071021473035e-05 2023-01-23 02:40:29.145491: step: 1484/527, loss: 0.022134114056825638 2023-01-23 02:40:30.266626: step: 1488/527, loss: 0.08616063743829727 2023-01-23 02:40:31.406904: step: 1492/527, loss: 0.03980650752782822 2023-01-23 02:40:32.522461: step: 1496/527, loss: 0.006954193580895662 2023-01-23 02:40:33.632438: step: 1500/527, loss: 0.03614349663257599 2023-01-23 02:40:34.741532: step: 1504/527, loss: 0.0035955430939793587 2023-01-23 02:40:35.870213: step: 1508/527, loss: 0.0005060196272097528 2023-01-23 02:40:37.008508: step: 1512/527, loss: 0.009517860598862171 2023-01-23 02:40:38.124356: step: 1516/527, loss: 0.004303359892219305 2023-01-23 02:40:39.248716: step: 1520/527, loss: 0.05650768429040909 2023-01-23 02:40:40.357282: step: 1524/527, loss: 0.06997299194335938 2023-01-23 02:40:41.484858: step: 1528/527, loss: 0.00019397735013626516 2023-01-23 02:40:42.614032: step: 1532/527, loss: 0.0017027854919433594 2023-01-23 02:40:43.734861: step: 1536/527, loss: 0.011641979217529297 2023-01-23 02:40:44.838191: step: 1540/527, loss: 0.055008694529533386 2023-01-23 02:40:45.964694: step: 1544/527, loss: 0.024526499211788177 2023-01-23 02:40:47.065449: step: 1548/527, loss: 0.001004982041195035 2023-01-23 02:40:48.196530: step: 1552/527, loss: 0.010524654760956764 2023-01-23 02:40:49.305909: step: 1556/527, loss: 0.0004533767933025956 2023-01-23 02:40:50.419536: step: 1560/527, loss: 0.018765592947602272 2023-01-23 02:40:51.527838: step: 1564/527, loss: 0.004154527094215155 2023-01-23 02:40:52.640872: step: 1568/527, loss: 0.03361377865076065 2023-01-23 02:40:53.739777: step: 1572/527, loss: 0.01734332926571369 2023-01-23 02:40:54.836998: step: 1576/527, loss: 0.0003952026308979839 2023-01-23 02:40:55.948421: step: 1580/527, loss: 0.008944297209382057 2023-01-23 02:40:57.098618: step: 1584/527, loss: 0.04268951341509819 2023-01-23 02:40:58.218673: step: 1588/527, loss: 0.008399104699492455 2023-01-23 02:40:59.332576: step: 1592/527, loss: 0.008795356377959251 2023-01-23 02:41:00.439502: step: 1596/527, loss: 0.012418795377016068 2023-01-23 02:41:01.580466: step: 1600/527, loss: 0.0019072532886639237 2023-01-23 02:41:02.687721: step: 1604/527, loss: 8.267239172710106e-05 2023-01-23 02:41:03.807576: step: 1608/527, loss: 0.0005527496105059981 2023-01-23 02:41:04.928180: step: 1612/527, loss: 0.010962486267089844 2023-01-23 02:41:06.040361: step: 1616/527, loss: 0.0006602287758141756 2023-01-23 02:41:07.147590: step: 1620/527, loss: 0.031920626759529114 2023-01-23 02:41:08.274689: step: 1624/527, loss: 0.011629152111709118 2023-01-23 02:41:09.407248: step: 1628/527, loss: 0.0025787353515625 2023-01-23 02:41:10.504994: step: 1632/527, loss: 0.01799631118774414 2023-01-23 02:41:11.630813: step: 1636/527, loss: 0.007521533872932196 2023-01-23 02:41:12.741830: step: 1640/527, loss: 0.12186841666698456 2023-01-23 02:41:13.868799: step: 1644/527, loss: 0.011512279510498047 2023-01-23 02:41:14.993946: step: 1648/527, loss: 0.12494973838329315 2023-01-23 02:41:16.116441: step: 1652/527, loss: 0.011986018158495426 2023-01-23 02:41:17.242321: step: 1656/527, loss: 0.005015564151108265 2023-01-23 02:41:18.379986: step: 1660/527, loss: 0.007798528298735619 2023-01-23 02:41:19.514824: step: 1664/527, loss: 0.002794170519337058 2023-01-23 02:41:20.622139: step: 1668/527, loss: 0.02684185467660427 2023-01-23 02:41:21.733472: step: 1672/527, loss: 0.023714445531368256 2023-01-23 02:41:22.876801: step: 1676/527, loss: 0.013370160013437271 2023-01-23 02:41:23.987925: step: 1680/527, loss: 0.023885631933808327 2023-01-23 02:41:25.126790: step: 1684/527, loss: 0.00039606093196198344 2023-01-23 02:41:26.241042: step: 1688/527, loss: 0.0024572850670665503 2023-01-23 02:41:27.354509: step: 1692/527, loss: 0.003660488175228238 2023-01-23 02:41:28.469757: step: 1696/527, loss: 0.0020572184585034847 2023-01-23 02:41:29.557097: step: 1700/527, loss: 0.0004181861877441406 2023-01-23 02:41:30.666247: step: 1704/527, loss: 0.0007913351291790605 2023-01-23 02:41:31.804304: step: 1708/527, loss: 0.0002601623418740928 2023-01-23 02:41:32.975286: step: 1712/527, loss: 0.017992781475186348 2023-01-23 02:41:34.102689: step: 1716/527, loss: 0.009121155366301537 2023-01-23 02:41:35.200107: step: 1720/527, loss: 0.027771567925810814 2023-01-23 02:41:36.305327: step: 1724/527, loss: 0.005588436499238014 2023-01-23 02:41:37.414334: step: 1728/527, loss: 0.003928852267563343 2023-01-23 02:41:38.507226: step: 1732/527, loss: 0.005960989277809858 2023-01-23 02:41:39.637745: step: 1736/527, loss: 0.010779953561723232 2023-01-23 02:41:40.738132: step: 1740/527, loss: 0.0044099269434809685 2023-01-23 02:41:41.853615: step: 1744/527, loss: 0.0006672203307971358 2023-01-23 02:41:42.949719: step: 1748/527, loss: 0.0036901473067700863 2023-01-23 02:41:44.070952: step: 1752/527, loss: 0.027920342981815338 2023-01-23 02:41:45.194009: step: 1756/527, loss: 0.002324771834537387 2023-01-23 02:41:46.292681: step: 1760/527, loss: 0.10286064445972443 2023-01-23 02:41:47.397378: step: 1764/527, loss: 0.03215789794921875 2023-01-23 02:41:48.563600: step: 1768/527, loss: 0.015442848205566406 2023-01-23 02:41:49.679477: step: 1772/527, loss: 0.00025053025456145406 2023-01-23 02:41:50.775360: step: 1776/527, loss: 0.00043425560579635203 2023-01-23 02:41:51.889510: step: 1780/527, loss: 0.00130462646484375 2023-01-23 02:41:52.994480: step: 1784/527, loss: 0.0009635924943722785 2023-01-23 02:41:54.141440: step: 1788/527, loss: 0.01930699311196804 2023-01-23 02:41:55.303535: step: 1792/527, loss: 0.0031836749985814095 2023-01-23 02:41:56.443618: step: 1796/527, loss: 0.00045108795166015625 2023-01-23 02:41:57.581792: step: 1800/527, loss: 0.0006071091047488153 2023-01-23 02:41:58.670403: step: 1804/527, loss: 0.009622382931411266 2023-01-23 02:41:59.817998: step: 1808/527, loss: 0.10963650047779083 2023-01-23 02:42:00.951852: step: 1812/527, loss: 0.0015749931335449219 2023-01-23 02:42:02.063787: step: 1816/527, loss: 0.0002948761102743447 2023-01-23 02:42:03.190501: step: 1820/527, loss: 0.01230402011424303 2023-01-23 02:42:04.275379: step: 1824/527, loss: 0.022346878424286842 2023-01-23 02:42:05.385015: step: 1828/527, loss: 0.0017177582485601306 2023-01-23 02:42:06.496145: step: 1832/527, loss: 0.09004173427820206 2023-01-23 02:42:07.613921: step: 1836/527, loss: 0.043418265879154205 2023-01-23 02:42:08.749133: step: 1840/527, loss: 0.0003833770751953125 2023-01-23 02:42:09.864449: step: 1844/527, loss: 0.0007290840148925781 2023-01-23 02:42:10.994106: step: 1848/527, loss: 0.0024450302589684725 2023-01-23 02:42:12.076539: step: 1852/527, loss: 0.024298476055264473 2023-01-23 02:42:13.185031: step: 1856/527, loss: 0.007344054989516735 2023-01-23 02:42:14.306555: step: 1860/527, loss: 0.0016605377895757556 2023-01-23 02:42:15.435445: step: 1864/527, loss: 0.007188892923295498 2023-01-23 02:42:16.556312: step: 1868/527, loss: 0.040312767028808594 2023-01-23 02:42:17.687669: step: 1872/527, loss: 0.003297233721241355 2023-01-23 02:42:18.834115: step: 1876/527, loss: 0.018662549555301666 2023-01-23 02:42:20.022005: step: 1880/527, loss: 0.0031269071623682976 2023-01-23 02:42:21.134395: step: 1884/527, loss: 0.001056766603142023 2023-01-23 02:42:22.232012: step: 1888/527, loss: 0.09863471984863281 2023-01-23 02:42:23.356314: step: 1892/527, loss: 0.0003509760135784745 2023-01-23 02:42:24.494814: step: 1896/527, loss: 0.0010200501419603825 2023-01-23 02:42:25.611462: step: 1900/527, loss: 0.0005661010509356856 2023-01-23 02:42:26.715078: step: 1904/527, loss: 0.0011453628540039062 2023-01-23 02:42:27.798451: step: 1908/527, loss: 0.00019297302060294896 2023-01-23 02:42:28.897747: step: 1912/527, loss: 0.03646891564130783 2023-01-23 02:42:29.992961: step: 1916/527, loss: 0.019242573529481888 2023-01-23 02:42:31.115951: step: 1920/527, loss: 0.0015337944496423006 2023-01-23 02:42:32.241479: step: 1924/527, loss: 0.00052642822265625 2023-01-23 02:42:33.373448: step: 1928/527, loss: 0.0002960205019917339 2023-01-23 02:42:34.508462: step: 1932/527, loss: 0.00031070708064362407 2023-01-23 02:42:35.616391: step: 1936/527, loss: 0.00678596505895257 2023-01-23 02:42:36.728071: step: 1940/527, loss: 0.08375845104455948 2023-01-23 02:42:37.847175: step: 1944/527, loss: 0.014844894409179688 2023-01-23 02:42:38.967171: step: 1948/527, loss: 0.0428524985909462 2023-01-23 02:42:40.096767: step: 1952/527, loss: 0.050026893615722656 2023-01-23 02:42:41.210596: step: 1956/527, loss: 1.4208256006240845 2023-01-23 02:42:42.322667: step: 1960/527, loss: 0.005116653628647327 2023-01-23 02:42:43.447070: step: 1964/527, loss: 0.019698143005371094 2023-01-23 02:42:44.558631: step: 1968/527, loss: 0.0010331631638109684 2023-01-23 02:42:45.668290: step: 1972/527, loss: 0.02993779256939888 2023-01-23 02:42:46.777339: step: 1976/527, loss: 0.000225067138671875 2023-01-23 02:42:47.894375: step: 1980/527, loss: 0.08067512512207031 2023-01-23 02:42:48.999914: step: 1984/527, loss: 0.04687700420618057 2023-01-23 02:42:50.125694: step: 1988/527, loss: 0.018857955932617188 2023-01-23 02:42:51.253887: step: 1992/527, loss: 0.01280226744711399 2023-01-23 02:42:52.361023: step: 1996/527, loss: 0.02402694895863533 2023-01-23 02:42:53.495204: step: 2000/527, loss: 0.010155045427381992 2023-01-23 02:42:54.594400: step: 2004/527, loss: 0.08439507335424423 2023-01-23 02:42:55.713814: step: 2008/527, loss: 0.0087890625 2023-01-23 02:42:56.837450: step: 2012/527, loss: 0.003377723740413785 2023-01-23 02:42:57.969167: step: 2016/527, loss: 0.07241944968700409 2023-01-23 02:42:59.108411: step: 2020/527, loss: 0.042044639587402344 2023-01-23 02:43:00.237517: step: 2024/527, loss: 0.029285239055752754 2023-01-23 02:43:01.312019: step: 2028/527, loss: 0.058226823806762695 2023-01-23 02:43:02.442459: step: 2032/527, loss: 0.02170858345925808 2023-01-23 02:43:03.577588: step: 2036/527, loss: 0.0789773017168045 2023-01-23 02:43:04.672720: step: 2040/527, loss: 0.0006162643549032509 2023-01-23 02:43:05.763203: step: 2044/527, loss: 0.0015653610462322831 2023-01-23 02:43:06.882555: step: 2048/527, loss: 0.05936603620648384 2023-01-23 02:43:08.007921: step: 2052/527, loss: 0.6620147824287415 2023-01-23 02:43:09.168153: step: 2056/527, loss: 3.204345557605848e-05 2023-01-23 02:43:10.270046: step: 2060/527, loss: 0.0005727768293581903 2023-01-23 02:43:11.374684: step: 2064/527, loss: 0.0011310577392578125 2023-01-23 02:43:12.492569: step: 2068/527, loss: 0.0053021907806396484 2023-01-23 02:43:13.586966: step: 2072/527, loss: 0.006241464521735907 2023-01-23 02:43:14.720317: step: 2076/527, loss: 0.003083133604377508 2023-01-23 02:43:15.850344: step: 2080/527, loss: 0.0012887001503258944 2023-01-23 02:43:16.967663: step: 2084/527, loss: 9.527205838821828e-05 2023-01-23 02:43:18.100927: step: 2088/527, loss: 0.016990184783935547 2023-01-23 02:43:19.176872: step: 2092/527, loss: 0.01983466185629368 2023-01-23 02:43:20.289981: step: 2096/527, loss: 0.0018999100429937243 2023-01-23 02:43:21.375753: step: 2100/527, loss: 0.0005417823558673263 2023-01-23 02:43:22.508551: step: 2104/527, loss: 0.00032806396484375 2023-01-23 02:43:23.629142: step: 2108/527, loss: 0.021654987707734108 ================================================== Loss: 0.038 -------------------- Dev: {'event': {'p': 0.5776458951533135, 'r': 0.7776298268974701, 'f1': 0.662883087400681}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Test: {'event': {'p': 0.6356088560885609, 'r': 0.7874285714285715, 'f1': 0.703420112302195}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Korean: {'event': {'p': 0.5892857142857143, 'r': 0.5238095238095238, 'f1': 0.5546218487394958}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Russian: {'event': {'p': 0.5135135135135135, 'r': 0.5277777777777778, 'f1': 0.5205479452054794}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066252587991718, 'r': 0.7802929427430093, 'f1': 0.6825859056493885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Eng Test for Korean: {'event': {'p': 0.62580054894785, 'r': 0.7817142857142857, 'f1': 0.6951219512195121}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 19 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:44:05.100204: step: 4/527, loss: 0.0007443904760293663 2023-01-23 02:44:06.196935: step: 8/527, loss: 0.008268166333436966 2023-01-23 02:44:07.323502: step: 12/527, loss: 0.044470977038145065 2023-01-23 02:44:08.435576: step: 16/527, loss: 0.0016821861499920487 2023-01-23 02:44:09.505971: step: 20/527, loss: 0.0004517078632488847 2023-01-23 02:44:10.603686: step: 24/527, loss: 0.006455803290009499 2023-01-23 02:44:11.716321: step: 28/527, loss: 0.09223899990320206 2023-01-23 02:44:12.852887: step: 32/527, loss: 0.01903076097369194 2023-01-23 02:44:13.968610: step: 36/527, loss: 0.004425621125847101 2023-01-23 02:44:15.101732: step: 40/527, loss: 0.003958797547966242 2023-01-23 02:44:16.212083: step: 44/527, loss: 0.04101219400763512 2023-01-23 02:44:17.313004: step: 48/527, loss: 0.0017236709827557206 2023-01-23 02:44:18.432335: step: 52/527, loss: 0.02956390380859375 2023-01-23 02:44:19.567635: step: 56/527, loss: 0.01581869088113308 2023-01-23 02:44:20.687554: step: 60/527, loss: 0.07888869941234589 2023-01-23 02:44:21.776880: step: 64/527, loss: 0.0004093647003173828 2023-01-23 02:44:22.895820: step: 68/527, loss: 0.007456970401108265 2023-01-23 02:44:24.055722: step: 72/527, loss: 0.07659869641065598 2023-01-23 02:44:25.163950: step: 76/527, loss: 0.001312255859375 2023-01-23 02:44:26.284750: step: 80/527, loss: 0.0057531362399458885 2023-01-23 02:44:27.427565: step: 84/527, loss: 0.02678375318646431 2023-01-23 02:44:28.537339: step: 88/527, loss: 0.008371162228286266 2023-01-23 02:44:29.661879: step: 92/527, loss: 0.015362453646957874 2023-01-23 02:44:30.774479: step: 96/527, loss: 7.62939453125e-05 2023-01-23 02:44:31.899602: step: 100/527, loss: 0.06545820832252502 2023-01-23 02:44:32.990163: step: 104/527, loss: 0.007288122083991766 2023-01-23 02:44:34.114939: step: 108/527, loss: 0.03342590481042862 2023-01-23 02:44:35.246752: step: 112/527, loss: 0.0038074494805186987 2023-01-23 02:44:36.346925: step: 116/527, loss: 0.0034792902879416943 2023-01-23 02:44:37.470806: step: 120/527, loss: 0.1414480209350586 2023-01-23 02:44:38.604255: step: 124/527, loss: 0.009324932470917702 2023-01-23 02:44:39.727787: step: 128/527, loss: 0.024091817438602448 2023-01-23 02:44:40.856894: step: 132/527, loss: 0.0003219604550395161 2023-01-23 02:44:41.979894: step: 136/527, loss: 0.0018285751575604081 2023-01-23 02:44:43.075034: step: 140/527, loss: 0.028315259143710136 2023-01-23 02:44:44.214784: step: 144/527, loss: 0.007821083068847656 2023-01-23 02:44:45.346466: step: 148/527, loss: 0.002789115998893976 2023-01-23 02:44:46.460557: step: 152/527, loss: 0.0004796028370037675 2023-01-23 02:44:47.575300: step: 156/527, loss: 0.0440179817378521 2023-01-23 02:44:48.705780: step: 160/527, loss: 0.003673553466796875 2023-01-23 02:44:49.848210: step: 164/527, loss: 0.019334029406309128 2023-01-23 02:44:50.959818: step: 168/527, loss: 0.00721054058521986 2023-01-23 02:44:52.024288: step: 172/527, loss: 0.0033194541465491056 2023-01-23 02:44:53.143116: step: 176/527, loss: 2.37941749219317e-05 2023-01-23 02:44:54.232997: step: 180/527, loss: 0.0004592418554238975 2023-01-23 02:44:55.327191: step: 184/527, loss: 0.004555463790893555 2023-01-23 02:44:56.434715: step: 188/527, loss: 0.02675657346844673 2023-01-23 02:44:57.543469: step: 192/527, loss: 0.052603721618652344 2023-01-23 02:44:58.667443: step: 196/527, loss: 0.02210082858800888 2023-01-23 02:44:59.789924: step: 200/527, loss: 0.05299912020564079 2023-01-23 02:45:00.917670: step: 204/527, loss: 0.04600124433636665 2023-01-23 02:45:02.030962: step: 208/527, loss: 0.00979003868997097 2023-01-23 02:45:03.143126: step: 212/527, loss: 0.0033321380615234375 2023-01-23 02:45:04.255911: step: 216/527, loss: 0.00018072128295898438 2023-01-23 02:45:05.402335: step: 220/527, loss: 0.0023645402397960424 2023-01-23 02:45:06.487638: step: 224/527, loss: 0.0015435219975188375 2023-01-23 02:45:07.617847: step: 228/527, loss: 0.007020092569291592 2023-01-23 02:45:08.739748: step: 232/527, loss: 3.929138256353326e-05 2023-01-23 02:45:09.828021: step: 236/527, loss: 0.015948820859193802 2023-01-23 02:45:10.984253: step: 240/527, loss: 0.034228041768074036 2023-01-23 02:45:12.074209: step: 244/527, loss: 0.007886076346039772 2023-01-23 02:45:13.187097: step: 248/527, loss: 0.0001703262241790071 2023-01-23 02:45:14.295386: step: 252/527, loss: 0.01219644583761692 2023-01-23 02:45:15.385623: step: 256/527, loss: 0.0360623374581337 2023-01-23 02:45:16.514597: step: 260/527, loss: 0.00242443080060184 2023-01-23 02:45:17.627900: step: 264/527, loss: 0.005789470858871937 2023-01-23 02:45:18.690685: step: 268/527, loss: 0.05938892811536789 2023-01-23 02:45:19.817353: step: 272/527, loss: 0.000931215297896415 2023-01-23 02:45:20.912196: step: 276/527, loss: 0.0023778914473950863 2023-01-23 02:45:22.019497: step: 280/527, loss: 0.0001423835929017514 2023-01-23 02:45:23.141529: step: 284/527, loss: 0.0033958435524255037 2023-01-23 02:45:24.269298: step: 288/527, loss: 0.1252867877483368 2023-01-23 02:45:25.393188: step: 292/527, loss: 7.848739915061742e-05 2023-01-23 02:45:26.507177: step: 296/527, loss: 2.2266287803649902 2023-01-23 02:45:27.603136: step: 300/527, loss: 3.7956240703351796e-05 2023-01-23 02:45:28.732909: step: 304/527, loss: 0.0019287110771983862 2023-01-23 02:45:29.826896: step: 308/527, loss: 0.00761337298899889 2023-01-23 02:45:30.931663: step: 312/527, loss: 0.040021561086177826 2023-01-23 02:45:32.058384: step: 316/527, loss: 0.22668485343456268 2023-01-23 02:45:33.184675: step: 320/527, loss: 0.0023276328574866056 2023-01-23 02:45:34.309783: step: 324/527, loss: 0.0016707897884771228 2023-01-23 02:45:35.445758: step: 328/527, loss: 0.0007989883306436241 2023-01-23 02:45:36.565940: step: 332/527, loss: 0.016665171831846237 2023-01-23 02:45:37.720146: step: 336/527, loss: 0.022960377857089043 2023-01-23 02:45:38.827232: step: 340/527, loss: 0.0011849404545500875 2023-01-23 02:45:39.961312: step: 344/527, loss: 0.00054845807608217 2023-01-23 02:45:41.102473: step: 348/527, loss: 0.016692733392119408 2023-01-23 02:45:42.207412: step: 352/527, loss: 0.026497984305024147 2023-01-23 02:45:43.307919: step: 356/527, loss: 0.0018697739578783512 2023-01-23 02:45:44.440823: step: 360/527, loss: 0.02386179007589817 2023-01-23 02:45:45.536572: step: 364/527, loss: 0.011486626230180264 2023-01-23 02:45:46.654935: step: 368/527, loss: 0.02828803099691868 2023-01-23 02:45:47.774166: step: 372/527, loss: 0.0007494926685467362 2023-01-23 02:45:48.888522: step: 376/527, loss: 0.009190560318529606 2023-01-23 02:45:50.018979: step: 380/527, loss: 0.02438640594482422 2023-01-23 02:45:51.176208: step: 384/527, loss: 0.0008827209239825606 2023-01-23 02:45:52.288238: step: 388/527, loss: 0.0014810562133789062 2023-01-23 02:45:53.392551: step: 392/527, loss: 0.0005075454828329384 2023-01-23 02:45:54.515076: step: 396/527, loss: 0.002871942473575473 2023-01-23 02:45:55.644438: step: 400/527, loss: 0.003062725067138672 2023-01-23 02:45:56.791555: step: 404/527, loss: 0.013873100280761719 2023-01-23 02:45:57.881337: step: 408/527, loss: 0.00836792029440403 2023-01-23 02:45:58.983439: step: 412/527, loss: 0.0010294914245605469 2023-01-23 02:46:00.077934: step: 416/527, loss: 0.015017509460449219 2023-01-23 02:46:01.192841: step: 420/527, loss: 0.03229961544275284 2023-01-23 02:46:02.315794: step: 424/527, loss: 0.0203291904181242 2023-01-23 02:46:03.408018: step: 428/527, loss: 0.013685441575944424 2023-01-23 02:46:04.535823: step: 432/527, loss: 0.012811565771698952 2023-01-23 02:46:05.651978: step: 436/527, loss: 0.02293987385928631 2023-01-23 02:46:06.766297: step: 440/527, loss: 0.0366411916911602 2023-01-23 02:46:07.861043: step: 444/527, loss: 0.0037747861351817846 2023-01-23 02:46:08.987598: step: 448/527, loss: 0.03017454221844673 2023-01-23 02:46:10.098866: step: 452/527, loss: 0.023659897968173027 2023-01-23 02:46:11.193808: step: 456/527, loss: 0.0641777515411377 2023-01-23 02:46:12.305681: step: 460/527, loss: 0.0356692336499691 2023-01-23 02:46:13.423591: step: 464/527, loss: 0.007748937699943781 2023-01-23 02:46:14.524484: step: 468/527, loss: 0.00025978090707212687 2023-01-23 02:46:15.631974: step: 472/527, loss: 0.015533447265625 2023-01-23 02:46:16.782179: step: 476/527, loss: 0.0330720916390419 2023-01-23 02:46:17.883504: step: 480/527, loss: 0.011567306704819202 2023-01-23 02:46:19.034158: step: 484/527, loss: 0.014894485473632812 2023-01-23 02:46:20.160382: step: 488/527, loss: 0.008662509731948376 2023-01-23 02:46:21.273730: step: 492/527, loss: 0.00592122133821249 2023-01-23 02:46:22.384620: step: 496/527, loss: 0.6933574676513672 2023-01-23 02:46:23.510227: step: 500/527, loss: 0.029668331146240234 2023-01-23 02:46:24.623419: step: 504/527, loss: 0.036104775965213776 2023-01-23 02:46:25.751068: step: 508/527, loss: 0.008000469766557217 2023-01-23 02:46:26.837136: step: 512/527, loss: 0.0020822526421397924 2023-01-23 02:46:27.985846: step: 516/527, loss: 0.00012855530076194555 2023-01-23 02:46:29.111123: step: 520/527, loss: 0.11603926867246628 2023-01-23 02:46:30.255187: step: 524/527, loss: 0.019771242514252663 2023-01-23 02:46:31.431856: step: 528/527, loss: 0.008343124762177467 2023-01-23 02:46:32.555156: step: 532/527, loss: 0.004056167788803577 2023-01-23 02:46:33.666556: step: 536/527, loss: 0.005884981248527765 2023-01-23 02:46:34.809036: step: 540/527, loss: 0.002049064729362726 2023-01-23 02:46:35.905815: step: 544/527, loss: 0.0664072260260582 2023-01-23 02:46:37.006981: step: 548/527, loss: 0.010943364351987839 2023-01-23 02:46:38.110458: step: 552/527, loss: 0.0001542091486044228 2023-01-23 02:46:39.230559: step: 556/527, loss: 0.0034339905250817537 2023-01-23 02:46:40.313844: step: 560/527, loss: 0.005725670140236616 2023-01-23 02:46:41.433130: step: 564/527, loss: 0.0372682586312294 2023-01-23 02:46:42.539470: step: 568/527, loss: 9.71794142969884e-05 2023-01-23 02:46:43.655240: step: 572/527, loss: 0.24436703324317932 2023-01-23 02:46:44.782986: step: 576/527, loss: 0.00011959076800849289 2023-01-23 02:46:45.887136: step: 580/527, loss: 0.001471710274927318 2023-01-23 02:46:46.978769: step: 584/527, loss: 0.09455184638500214 2023-01-23 02:46:48.086672: step: 588/527, loss: 0.0020543099381029606 2023-01-23 02:46:49.186877: step: 592/527, loss: 0.015607167035341263 2023-01-23 02:46:50.281946: step: 596/527, loss: 0.00012907982454635203 2023-01-23 02:46:51.413939: step: 600/527, loss: 0.15989524126052856 2023-01-23 02:46:52.542909: step: 604/527, loss: 0.00011978149996139109 2023-01-23 02:46:53.656931: step: 608/527, loss: 0.003145408583804965 2023-01-23 02:46:54.763896: step: 612/527, loss: 0.18141384422779083 2023-01-23 02:46:55.898239: step: 616/527, loss: 0.04190473631024361 2023-01-23 02:46:57.028902: step: 620/527, loss: 0.003602695418521762 2023-01-23 02:46:58.154968: step: 624/527, loss: 0.06306210160255432 2023-01-23 02:46:59.277015: step: 628/527, loss: 0.03174161911010742 2023-01-23 02:47:00.363415: step: 632/527, loss: 0.0453639030456543 2023-01-23 02:47:01.457164: step: 636/527, loss: 0.015012543648481369 2023-01-23 02:47:02.573598: step: 640/527, loss: 0.07665643841028214 2023-01-23 02:47:03.671999: step: 644/527, loss: 0.008855295367538929 2023-01-23 02:47:04.769852: step: 648/527, loss: 0.015130424872040749 2023-01-23 02:47:05.876350: step: 652/527, loss: 0.004867649171501398 2023-01-23 02:47:06.982729: step: 656/527, loss: 0.036391355097293854 2023-01-23 02:47:08.120606: step: 660/527, loss: 0.026813887059688568 2023-01-23 02:47:09.221623: step: 664/527, loss: 0.00071887974627316 2023-01-23 02:47:10.341888: step: 668/527, loss: 0.023047685623168945 2023-01-23 02:47:11.481001: step: 672/527, loss: 0.006439399905502796 2023-01-23 02:47:12.612391: step: 676/527, loss: 0.04015917703509331 2023-01-23 02:47:13.711955: step: 680/527, loss: 0.012956619262695312 2023-01-23 02:47:14.815801: step: 684/527, loss: 0.004407978616654873 2023-01-23 02:47:15.920278: step: 688/527, loss: 0.06499719619750977 2023-01-23 02:47:17.060022: step: 692/527, loss: 0.0036669732071459293 2023-01-23 02:47:18.170952: step: 696/527, loss: 0.004702758975327015 2023-01-23 02:47:19.311150: step: 700/527, loss: 0.0019788744393736124 2023-01-23 02:47:20.465061: step: 704/527, loss: 0.0012704849941655993 2023-01-23 02:47:21.586393: step: 708/527, loss: 0.013256454840302467 2023-01-23 02:47:22.699542: step: 712/527, loss: 0.0035755636636167765 2023-01-23 02:47:23.815488: step: 716/527, loss: 0.0015518188010901213 2023-01-23 02:47:24.926614: step: 720/527, loss: 0.01841564290225506 2023-01-23 02:47:26.036894: step: 724/527, loss: 1.9073513612966053e-07 2023-01-23 02:47:27.168095: step: 728/527, loss: 0.002987384796142578 2023-01-23 02:47:28.281727: step: 732/527, loss: 0.0014947891468182206 2023-01-23 02:47:29.384085: step: 736/527, loss: 0.029780007898807526 2023-01-23 02:47:30.509541: step: 740/527, loss: 0.02634258382022381 2023-01-23 02:47:31.633732: step: 744/527, loss: 0.008105278015136719 2023-01-23 02:47:32.774956: step: 748/527, loss: 0.007285094819962978 2023-01-23 02:47:33.916227: step: 752/527, loss: 0.03876161575317383 2023-01-23 02:47:35.059484: step: 756/527, loss: 0.016056442633271217 2023-01-23 02:47:36.167780: step: 760/527, loss: 0.06254110485315323 2023-01-23 02:47:37.294602: step: 764/527, loss: 0.0197372455149889 2023-01-23 02:47:38.398729: step: 768/527, loss: 0.02198362536728382 2023-01-23 02:47:39.561229: step: 772/527, loss: 0.00655817985534668 2023-01-23 02:47:40.721878: step: 776/527, loss: 0.0031541825737804174 2023-01-23 02:47:41.835671: step: 780/527, loss: 0.02676544152200222 2023-01-23 02:47:42.950303: step: 784/527, loss: 0.07407913357019424 2023-01-23 02:47:44.087962: step: 788/527, loss: 0.006284618284553289 2023-01-23 02:47:45.189251: step: 792/527, loss: 0.02416963502764702 2023-01-23 02:47:46.337792: step: 796/527, loss: 0.004728412721306086 2023-01-23 02:47:47.444641: step: 800/527, loss: 0.0034944533836096525 2023-01-23 02:47:48.559670: step: 804/527, loss: 0.005403477232903242 2023-01-23 02:47:49.665253: step: 808/527, loss: 0.007008838467299938 2023-01-23 02:47:50.736598: step: 812/527, loss: 0.0005703926435671747 2023-01-23 02:47:51.812562: step: 816/527, loss: 0.00057306292001158 2023-01-23 02:47:52.943114: step: 820/527, loss: 5.3215029765851796e-05 2023-01-23 02:47:54.058885: step: 824/527, loss: 4.57763671875e-05 2023-01-23 02:47:55.150668: step: 828/527, loss: 0.016205692663788795 2023-01-23 02:47:56.246510: step: 832/527, loss: 0.00018529892258811742 2023-01-23 02:47:57.341702: step: 836/527, loss: 0.0020374299492686987 2023-01-23 02:47:58.462312: step: 840/527, loss: 0.005025672726333141 2023-01-23 02:47:59.591376: step: 844/527, loss: 0.008652115240693092 2023-01-23 02:48:00.697279: step: 848/527, loss: 0.20478478074073792 2023-01-23 02:48:01.836201: step: 852/527, loss: 0.018135596066713333 2023-01-23 02:48:02.960869: step: 856/527, loss: 0.000851535820402205 2023-01-23 02:48:04.056744: step: 860/527, loss: 0.04741621017456055 2023-01-23 02:48:05.176356: step: 864/527, loss: 0.009001445956528187 2023-01-23 02:48:06.294696: step: 868/527, loss: 0.0010059355990961194 2023-01-23 02:48:07.396903: step: 872/527, loss: 0.043395139276981354 2023-01-23 02:48:08.493201: step: 876/527, loss: 0.005222130101174116 2023-01-23 02:48:09.594432: step: 880/527, loss: 0.050980761647224426 2023-01-23 02:48:10.693967: step: 884/527, loss: 0.08708000928163528 2023-01-23 02:48:11.828135: step: 888/527, loss: 0.010383606888353825 2023-01-23 02:48:12.932690: step: 892/527, loss: 0.015842437744140625 2023-01-23 02:48:14.046034: step: 896/527, loss: 0.0002993583620991558 2023-01-23 02:48:15.180671: step: 900/527, loss: 0.016800498589873314 2023-01-23 02:48:16.308643: step: 904/527, loss: 0.015013694763183594 2023-01-23 02:48:17.461323: step: 908/527, loss: 0.004151248838752508 2023-01-23 02:48:18.585489: step: 912/527, loss: 0.6020898818969727 2023-01-23 02:48:19.729634: step: 916/527, loss: 0.013149452395737171 2023-01-23 02:48:20.841231: step: 920/527, loss: 0.005343389697372913 2023-01-23 02:48:21.954716: step: 924/527, loss: 0.12648582458496094 2023-01-23 02:48:23.085794: step: 928/527, loss: 0.0008691787952557206 2023-01-23 02:48:24.200857: step: 932/527, loss: 0.012345362454652786 2023-01-23 02:48:25.299785: step: 936/527, loss: 0.005781460087746382 2023-01-23 02:48:26.377469: step: 940/527, loss: 0.0277864933013916 2023-01-23 02:48:27.495839: step: 944/527, loss: 0.005160999018698931 2023-01-23 02:48:28.632589: step: 948/527, loss: 0.0010903358925133944 2023-01-23 02:48:29.720725: step: 952/527, loss: 0.0011754990555346012 2023-01-23 02:48:30.830153: step: 956/527, loss: 0.00010132789611816406 2023-01-23 02:48:31.925879: step: 960/527, loss: 0.00043659209040924907 2023-01-23 02:48:33.036900: step: 964/527, loss: 0.00053491594735533 2023-01-23 02:48:34.136061: step: 968/527, loss: 0.012289525009691715 2023-01-23 02:48:35.244494: step: 972/527, loss: 0.06495705246925354 2023-01-23 02:48:36.353248: step: 976/527, loss: 0.0006214663153514266 2023-01-23 02:48:37.487784: step: 980/527, loss: 0.0313752181828022 2023-01-23 02:48:38.594163: step: 984/527, loss: 0.004421997349709272 2023-01-23 02:48:39.717948: step: 988/527, loss: 0.0265058521181345 2023-01-23 02:48:40.814017: step: 992/527, loss: 0.004988765809684992 2023-01-23 02:48:41.891888: step: 996/527, loss: 0.0012411593925207853 2023-01-23 02:48:43.004302: step: 1000/527, loss: 1.573562803969253e-05 2023-01-23 02:48:44.110606: step: 1004/527, loss: 0.004400825593620539 2023-01-23 02:48:45.228410: step: 1008/527, loss: 0.02025900036096573 2023-01-23 02:48:46.351264: step: 1012/527, loss: 0.007733821868896484 2023-01-23 02:48:47.431552: step: 1016/527, loss: 0.00010827779624378309 2023-01-23 02:48:48.543218: step: 1020/527, loss: 0.02309245988726616 2023-01-23 02:48:49.667547: step: 1024/527, loss: 0.00011391640146030113 2023-01-23 02:48:50.775457: step: 1028/527, loss: 0.0014451027382165194 2023-01-23 02:48:51.916145: step: 1032/527, loss: 5.050436973571777 2023-01-23 02:48:53.031179: step: 1036/527, loss: 0.0002795219188556075 2023-01-23 02:48:54.135629: step: 1040/527, loss: 0.0002375125914113596 2023-01-23 02:48:55.274673: step: 1044/527, loss: 0.00471420306712389 2023-01-23 02:48:56.401522: step: 1048/527, loss: 0.04861316829919815 2023-01-23 02:48:57.524123: step: 1052/527, loss: 0.0007120132795535028 2023-01-23 02:48:58.623326: step: 1056/527, loss: 0.01694030873477459 2023-01-23 02:48:59.722045: step: 1060/527, loss: 0.06779942661523819 2023-01-23 02:49:00.826513: step: 1064/527, loss: 0.017715072259306908 2023-01-23 02:49:01.978137: step: 1068/527, loss: 0.0008481026161462069 2023-01-23 02:49:03.113403: step: 1072/527, loss: 0.0011031389003619552 2023-01-23 02:49:04.206891: step: 1076/527, loss: 0.0016887665260583162 2023-01-23 02:49:05.322707: step: 1080/527, loss: 0.006601429078727961 2023-01-23 02:49:06.433230: step: 1084/527, loss: 0.058020591735839844 2023-01-23 02:49:07.534951: step: 1088/527, loss: 0.013167820870876312 2023-01-23 02:49:08.677375: step: 1092/527, loss: 0.018180467188358307 2023-01-23 02:49:09.787624: step: 1096/527, loss: 0.022410202771425247 2023-01-23 02:49:10.875055: step: 1100/527, loss: 0.001178741455078125 2023-01-23 02:49:11.975273: step: 1104/527, loss: 0.00799331720918417 2023-01-23 02:49:13.071273: step: 1108/527, loss: 0.00014337897300720215 2023-01-23 02:49:14.206889: step: 1112/527, loss: 0.008269499987363815 2023-01-23 02:49:15.316023: step: 1116/527, loss: 0.009488487616181374 2023-01-23 02:49:16.419070: step: 1120/527, loss: 0.022758865728974342 2023-01-23 02:49:17.536578: step: 1124/527, loss: 0.0002480506955180317 2023-01-23 02:49:18.646666: step: 1128/527, loss: 0.007994461804628372 2023-01-23 02:49:19.768797: step: 1132/527, loss: 0.010111856274306774 2023-01-23 02:49:20.882416: step: 1136/527, loss: 0.33087652921676636 2023-01-23 02:49:21.992897: step: 1140/527, loss: 0.04574241489171982 2023-01-23 02:49:23.080344: step: 1144/527, loss: 0.002463054610416293 2023-01-23 02:49:24.180675: step: 1148/527, loss: 0.23794154822826385 2023-01-23 02:49:25.268635: step: 1152/527, loss: 0.0018032073276117444 2023-01-23 02:49:26.396448: step: 1156/527, loss: 0.013797283172607422 2023-01-23 02:49:27.532281: step: 1160/527, loss: 0.04929351806640625 2023-01-23 02:49:28.634934: step: 1164/527, loss: 0.003780269529670477 2023-01-23 02:49:29.780887: step: 1168/527, loss: 0.025474930182099342 2023-01-23 02:49:30.874793: step: 1172/527, loss: 0.026148319244384766 2023-01-23 02:49:32.000793: step: 1176/527, loss: 0.004789590835571289 2023-01-23 02:49:33.139405: step: 1180/527, loss: 0.06521129608154297 2023-01-23 02:49:34.237132: step: 1184/527, loss: 0.06087551265954971 2023-01-23 02:49:35.349702: step: 1188/527, loss: 0.006834506988525391 2023-01-23 02:49:36.473460: step: 1192/527, loss: 0.011268138885498047 2023-01-23 02:49:37.571845: step: 1196/527, loss: 0.03778543323278427 2023-01-23 02:49:38.724303: step: 1200/527, loss: 0.003999519627541304 2023-01-23 02:49:39.814956: step: 1204/527, loss: 0.001208496163599193 2023-01-23 02:49:40.933100: step: 1208/527, loss: 0.008374596014618874 2023-01-23 02:49:42.073908: step: 1212/527, loss: 0.03540964052081108 2023-01-23 02:49:43.185756: step: 1216/527, loss: 0.01873922348022461 2023-01-23 02:49:44.296758: step: 1220/527, loss: 0.0034141542855650187 2023-01-23 02:49:45.399835: step: 1224/527, loss: 0.005496692843735218 2023-01-23 02:49:46.513877: step: 1228/527, loss: 0.0016101838555186987 2023-01-23 02:49:47.668392: step: 1232/527, loss: 0.004622363951057196 2023-01-23 02:49:48.809732: step: 1236/527, loss: 0.0005783558008261025 2023-01-23 02:49:49.991881: step: 1240/527, loss: 0.006184959318488836 2023-01-23 02:49:51.126407: step: 1244/527, loss: 0.022483063861727715 2023-01-23 02:49:52.241790: step: 1248/527, loss: 1.0204315913142636e-05 2023-01-23 02:49:53.348041: step: 1252/527, loss: 0.00045022962149232626 2023-01-23 02:49:54.468418: step: 1256/527, loss: 0.015847396105527878 2023-01-23 02:49:55.549042: step: 1260/527, loss: 0.0023582458961755037 2023-01-23 02:49:56.646076: step: 1264/527, loss: 0.011016941629350185 2023-01-23 02:49:57.772239: step: 1268/527, loss: 0.015591287985444069 2023-01-23 02:49:58.880576: step: 1272/527, loss: 6.637573096668348e-05 2023-01-23 02:49:59.975924: step: 1276/527, loss: 4.6443943574558944e-05 2023-01-23 02:50:01.105951: step: 1280/527, loss: 0.00028514862060546875 2023-01-23 02:50:02.238857: step: 1284/527, loss: 0.03325338289141655 2023-01-23 02:50:03.384040: step: 1288/527, loss: 0.017682362347841263 2023-01-23 02:50:04.520192: step: 1292/527, loss: 0.0007347106584347785 2023-01-23 02:50:05.588242: step: 1296/527, loss: 0.00153436663094908 2023-01-23 02:50:06.719134: step: 1300/527, loss: 0.049478720873594284 2023-01-23 02:50:07.780624: step: 1304/527, loss: 0.0007441520574502647 2023-01-23 02:50:08.905605: step: 1308/527, loss: 8.449555025435984e-05 2023-01-23 02:50:10.030112: step: 1312/527, loss: 0.013600158505141735 2023-01-23 02:50:11.129522: step: 1316/527, loss: 0.004617118742316961 2023-01-23 02:50:12.243165: step: 1320/527, loss: 7.476806786144152e-05 2023-01-23 02:50:13.362105: step: 1324/527, loss: 0.02285308949649334 2023-01-23 02:50:14.500825: step: 1328/527, loss: 0.001990032382309437 2023-01-23 02:50:15.619464: step: 1332/527, loss: 0.002467632293701172 2023-01-23 02:50:16.732486: step: 1336/527, loss: 1.7833710444392636e-05 2023-01-23 02:50:17.864830: step: 1340/527, loss: 0.04860043525695801 2023-01-23 02:50:18.983709: step: 1344/527, loss: 0.5575911998748779 2023-01-23 02:50:20.108598: step: 1348/527, loss: 0.003898048307746649 2023-01-23 02:50:21.223928: step: 1352/527, loss: 0.0007877349853515625 2023-01-23 02:50:22.346256: step: 1356/527, loss: 0.0015155792934820056 2023-01-23 02:50:23.426730: step: 1360/527, loss: 0.06365413963794708 2023-01-23 02:50:24.539272: step: 1364/527, loss: 0.0220368392765522 2023-01-23 02:50:25.646490: step: 1368/527, loss: 0.00032845736132003367 2023-01-23 02:50:26.763255: step: 1372/527, loss: 0.050695229321718216 2023-01-23 02:50:27.854045: step: 1376/527, loss: 0.04951038211584091 2023-01-23 02:50:28.968684: step: 1380/527, loss: 8.392335075768642e-06 2023-01-23 02:50:30.065527: step: 1384/527, loss: 0.15241539478302002 2023-01-23 02:50:31.167860: step: 1388/527, loss: 0.0016799926524981856 2023-01-23 02:50:32.293012: step: 1392/527, loss: 0.007445049472153187 2023-01-23 02:50:33.405966: step: 1396/527, loss: 0.08291473984718323 2023-01-23 02:50:34.540442: step: 1400/527, loss: 8.468628220725805e-05 2023-01-23 02:50:35.684762: step: 1404/527, loss: 0.10357294231653214 2023-01-23 02:50:36.793284: step: 1408/527, loss: 0.0027973174583166838 2023-01-23 02:50:37.944644: step: 1412/527, loss: 0.007335376925766468 2023-01-23 02:50:39.105138: step: 1416/527, loss: 0.004297447390854359 2023-01-23 02:50:40.210375: step: 1420/527, loss: 0.0010617256630212069 2023-01-23 02:50:41.338029: step: 1424/527, loss: 0.010309696197509766 2023-01-23 02:50:42.451578: step: 1428/527, loss: 0.0004587173752952367 2023-01-23 02:50:43.566224: step: 1432/527, loss: 0.12448596954345703 2023-01-23 02:50:44.694229: step: 1436/527, loss: 0.014418245293200016 2023-01-23 02:50:45.803312: step: 1440/527, loss: 0.044605061411857605 2023-01-23 02:50:46.889329: step: 1444/527, loss: 5.3024294174974784e-05 2023-01-23 02:50:47.992529: step: 1448/527, loss: 0.10175151377916336 2023-01-23 02:50:49.084730: step: 1452/527, loss: 0.015194701962172985 2023-01-23 02:50:50.215354: step: 1456/527, loss: 0.0005909919855184853 2023-01-23 02:50:51.326500: step: 1460/527, loss: 0.04135628044605255 2023-01-23 02:50:52.434153: step: 1464/527, loss: 0.028210163116455078 2023-01-23 02:50:53.535132: step: 1468/527, loss: 0.0016727447509765625 2023-01-23 02:50:54.661395: step: 1472/527, loss: 0.0003479004080872983 2023-01-23 02:50:55.735738: step: 1476/527, loss: 0.00041565895662643015 2023-01-23 02:50:56.859038: step: 1480/527, loss: 0.001474666642025113 2023-01-23 02:50:57.955287: step: 1484/527, loss: 0.009755611419677734 2023-01-23 02:50:59.042543: step: 1488/527, loss: 0.016039704903960228 2023-01-23 02:51:00.153576: step: 1492/527, loss: 0.006434249691665173 2023-01-23 02:51:01.275576: step: 1496/527, loss: 0.016633987426757812 2023-01-23 02:51:02.390692: step: 1500/527, loss: 0.004258060362190008 2023-01-23 02:51:03.516867: step: 1504/527, loss: 0.006874752230942249 2023-01-23 02:51:04.624328: step: 1508/527, loss: 0.026272868737578392 2023-01-23 02:51:05.765590: step: 1512/527, loss: 0.000469398481072858 2023-01-23 02:51:06.892824: step: 1516/527, loss: 0.00040988920954987407 2023-01-23 02:51:08.002717: step: 1520/527, loss: 0.0004192829073872417 2023-01-23 02:51:09.149049: step: 1524/527, loss: 0.0008742331992834806 2023-01-23 02:51:10.248363: step: 1528/527, loss: 0.4072204530239105 2023-01-23 02:51:11.387484: step: 1532/527, loss: 0.020650483667850494 2023-01-23 02:51:12.501151: step: 1536/527, loss: 0.007707118988037109 2023-01-23 02:51:13.615491: step: 1540/527, loss: 0.006175232119858265 2023-01-23 02:51:14.752006: step: 1544/527, loss: 0.004504489712417126 2023-01-23 02:51:15.849085: step: 1548/527, loss: 0.0011019706726074219 2023-01-23 02:51:16.931260: step: 1552/527, loss: 0.009310722351074219 2023-01-23 02:51:18.081184: step: 1556/527, loss: 0.033133696764707565 2023-01-23 02:51:19.186820: step: 1560/527, loss: 0.0004373550764285028 2023-01-23 02:51:20.325425: step: 1564/527, loss: 0.0021636963356286287 2023-01-23 02:51:21.413583: step: 1568/527, loss: 0.005114078521728516 2023-01-23 02:51:22.523827: step: 1572/527, loss: 0.005603027530014515 2023-01-23 02:51:23.615937: step: 1576/527, loss: 0.0014803410740569234 2023-01-23 02:51:24.686702: step: 1580/527, loss: 0.00031299592228606343 2023-01-23 02:51:25.818941: step: 1584/527, loss: 0.004423046018928289 2023-01-23 02:51:26.925083: step: 1588/527, loss: 0.00604591378942132 2023-01-23 02:51:28.047909: step: 1592/527, loss: 0.06521540135145187 2023-01-23 02:51:29.169250: step: 1596/527, loss: 0.0016664505237713456 2023-01-23 02:51:30.282099: step: 1600/527, loss: 0.0035290243104100227 2023-01-23 02:51:31.413445: step: 1604/527, loss: 0.11249971389770508 2023-01-23 02:51:32.545509: step: 1608/527, loss: 0.03651590272784233 2023-01-23 02:51:33.661575: step: 1612/527, loss: 0.008687973022460938 2023-01-23 02:51:34.792692: step: 1616/527, loss: 0.053115081042051315 2023-01-23 02:51:35.903491: step: 1620/527, loss: 0.0007285118335857987 2023-01-23 02:51:36.994577: step: 1624/527, loss: 0.02037658728659153 2023-01-23 02:51:38.146311: step: 1628/527, loss: 0.002291774842888117 2023-01-23 02:51:39.238343: step: 1632/527, loss: 0.007607841398566961 2023-01-23 02:51:40.350029: step: 1636/527, loss: 0.024045085534453392 2023-01-23 02:51:41.448690: step: 1640/527, loss: 0.004744434729218483 2023-01-23 02:51:42.567924: step: 1644/527, loss: 0.00019474030705168843 2023-01-23 02:51:43.674188: step: 1648/527, loss: 0.07070579379796982 2023-01-23 02:51:44.817044: step: 1652/527, loss: 0.0008762359502725303 2023-01-23 02:51:45.920815: step: 1656/527, loss: 0.003662967821583152 2023-01-23 02:51:47.018799: step: 1660/527, loss: 0.0003383636358194053 2023-01-23 02:51:48.141272: step: 1664/527, loss: 6.895065598655492e-05 2023-01-23 02:51:49.295545: step: 1668/527, loss: 0.017535973340272903 2023-01-23 02:51:50.432731: step: 1672/527, loss: 0.10617122054100037 2023-01-23 02:51:51.565073: step: 1676/527, loss: 0.0004863739013671875 2023-01-23 02:51:52.691358: step: 1680/527, loss: 0.011040115728974342 2023-01-23 02:51:53.801592: step: 1684/527, loss: 0.00426063546910882 2023-01-23 02:51:54.990402: step: 1688/527, loss: 0.0628824234008789 2023-01-23 02:51:56.109219: step: 1692/527, loss: 0.01290226075798273 2023-01-23 02:51:57.238467: step: 1696/527, loss: 0.00955276470631361 2023-01-23 02:51:58.333192: step: 1700/527, loss: 0.00026407241239212453 2023-01-23 02:51:59.467181: step: 1704/527, loss: 0.05009784922003746 2023-01-23 02:52:00.576605: step: 1708/527, loss: 0.04191351309418678 2023-01-23 02:52:01.675324: step: 1712/527, loss: 0.0027766230050474405 2023-01-23 02:52:02.809673: step: 1716/527, loss: 0.01951727829873562 2023-01-23 02:52:03.918668: step: 1720/527, loss: 1.62124638336536e-06 2023-01-23 02:52:04.992586: step: 1724/527, loss: 0.0009394646040163934 2023-01-23 02:52:06.091704: step: 1728/527, loss: 0.0062885284423828125 2023-01-23 02:52:07.218759: step: 1732/527, loss: 0.0038815499283373356 2023-01-23 02:52:08.333810: step: 1736/527, loss: 4.6539309551008046e-05 2023-01-23 02:52:09.446322: step: 1740/527, loss: 0.0009376525995321572 2023-01-23 02:52:10.614564: step: 1744/527, loss: 0.000656509364489466 2023-01-23 02:52:11.731412: step: 1748/527, loss: 0.0020809650886803865 2023-01-23 02:52:12.842992: step: 1752/527, loss: 0.0075584412552416325 2023-01-23 02:52:13.938280: step: 1756/527, loss: 0.016242504119873047 2023-01-23 02:52:15.045760: step: 1760/527, loss: 0.015456486493349075 2023-01-23 02:52:16.178128: step: 1764/527, loss: 0.00047168732271529734 2023-01-23 02:52:17.307691: step: 1768/527, loss: 0.011204337701201439 2023-01-23 02:52:18.427126: step: 1772/527, loss: 0.026050280779600143 2023-01-23 02:52:19.555805: step: 1776/527, loss: 3.213882155250758e-05 2023-01-23 02:52:20.655574: step: 1780/527, loss: 0.07073793560266495 2023-01-23 02:52:21.790663: step: 1784/527, loss: 0.00127582554705441 2023-01-23 02:52:22.898328: step: 1788/527, loss: 0.01715879514813423 2023-01-23 02:52:24.006671: step: 1792/527, loss: 0.0037208558060228825 2023-01-23 02:52:25.139826: step: 1796/527, loss: 0.008762359619140625 2023-01-23 02:52:26.257606: step: 1800/527, loss: 0.02195739932358265 2023-01-23 02:52:27.408216: step: 1804/527, loss: 0.00017976760864257812 2023-01-23 02:52:28.534036: step: 1808/527, loss: 0.06419678032398224 2023-01-23 02:52:29.650044: step: 1812/527, loss: 0.0017028630245476961 2023-01-23 02:52:30.790231: step: 1816/527, loss: 0.008764171972870827 2023-01-23 02:52:31.910310: step: 1820/527, loss: 0.0010064125526696444 2023-01-23 02:52:33.035268: step: 1824/527, loss: 0.0009965896606445312 2023-01-23 02:52:34.154794: step: 1828/527, loss: 0.03653106838464737 2023-01-23 02:52:35.257497: step: 1832/527, loss: 0.0012649536365643144 2023-01-23 02:52:36.336996: step: 1836/527, loss: 0.04128437116742134 2023-01-23 02:52:37.439532: step: 1840/527, loss: 0.009892940521240234 2023-01-23 02:52:38.539022: step: 1844/527, loss: 0.005511665251106024 2023-01-23 02:52:39.658284: step: 1848/527, loss: 0.0699525699019432 2023-01-23 02:52:40.784778: step: 1852/527, loss: 0.04248318821191788 2023-01-23 02:52:41.903354: step: 1856/527, loss: 0.017084650695323944 2023-01-23 02:52:43.020845: step: 1860/527, loss: 0.010862540453672409 2023-01-23 02:52:44.138585: step: 1864/527, loss: 0.09968248009681702 2023-01-23 02:52:45.252675: step: 1868/527, loss: 0.024834442883729935 2023-01-23 02:52:46.328188: step: 1872/527, loss: 0.010555506683886051 2023-01-23 02:52:47.442076: step: 1876/527, loss: 0.0026895522605627775 2023-01-23 02:52:48.584304: step: 1880/527, loss: 0.012289523147046566 2023-01-23 02:52:49.738559: step: 1884/527, loss: 0.002155780792236328 2023-01-23 02:52:50.894331: step: 1888/527, loss: 0.005321407690644264 2023-01-23 02:52:52.024642: step: 1892/527, loss: 0.0006985664367675781 2023-01-23 02:52:53.150037: step: 1896/527, loss: 0.018883895128965378 2023-01-23 02:52:54.229039: step: 1900/527, loss: 1.3208389646024443e-05 2023-01-23 02:52:55.345157: step: 1904/527, loss: 0.004491257481276989 2023-01-23 02:52:56.448139: step: 1908/527, loss: 0.0019748688209801912 2023-01-23 02:52:57.542395: step: 1912/527, loss: 0.05485835298895836 2023-01-23 02:52:58.630791: step: 1916/527, loss: 0.00040683746919967234 2023-01-23 02:52:59.751394: step: 1920/527, loss: 0.014313507825136185 2023-01-23 02:53:00.872905: step: 1924/527, loss: 0.0024770735763013363 2023-01-23 02:53:01.994692: step: 1928/527, loss: 0.012394332326948643 2023-01-23 02:53:03.116014: step: 1932/527, loss: 0.016316033899784088 2023-01-23 02:53:04.240399: step: 1936/527, loss: 0.004023408982902765 2023-01-23 02:53:05.341675: step: 1940/527, loss: 2.8133392333984375e-05 2023-01-23 02:53:06.486704: step: 1944/527, loss: 0.0406283363699913 2023-01-23 02:53:07.571142: step: 1948/527, loss: 0.0002730369451455772 2023-01-23 02:53:08.678324: step: 1952/527, loss: 0.0045524598099291325 2023-01-23 02:53:09.823749: step: 1956/527, loss: 0.003428173018619418 2023-01-23 02:53:10.955767: step: 1960/527, loss: 0.0014935494400560856 2023-01-23 02:53:12.073362: step: 1964/527, loss: 0.0003504753112792969 2023-01-23 02:53:13.198110: step: 1968/527, loss: 0.03166026994585991 2023-01-23 02:53:14.294224: step: 1972/527, loss: 0.006565952207893133 2023-01-23 02:53:15.395778: step: 1976/527, loss: 0.0001829147367971018 2023-01-23 02:53:16.505780: step: 1980/527, loss: 0.0005499362596310675 2023-01-23 02:53:17.599949: step: 1984/527, loss: 0.00029096603975631297 2023-01-23 02:53:18.766002: step: 1988/527, loss: 0.0003496169811114669 2023-01-23 02:53:19.882094: step: 1992/527, loss: 0.004754447843879461 2023-01-23 02:53:21.017776: step: 1996/527, loss: 0.03133583068847656 2023-01-23 02:53:22.152409: step: 2000/527, loss: 0.0011091232299804688 2023-01-23 02:53:23.270822: step: 2004/527, loss: 0.021156834438443184 2023-01-23 02:53:24.394869: step: 2008/527, loss: 0.002025127410888672 2023-01-23 02:53:25.518333: step: 2012/527, loss: 0.5858330726623535 2023-01-23 02:53:26.616434: step: 2016/527, loss: 0.02097921445965767 2023-01-23 02:53:27.754788: step: 2020/527, loss: 0.007396125700324774 2023-01-23 02:53:28.841117: step: 2024/527, loss: 0.00014514924259856343 2023-01-23 02:53:29.965153: step: 2028/527, loss: 0.007003307342529297 2023-01-23 02:53:31.040966: step: 2032/527, loss: 0.0002628088113851845 2023-01-23 02:53:32.131166: step: 2036/527, loss: 0.00835494976490736 2023-01-23 02:53:33.238562: step: 2040/527, loss: 0.0005269050598144531 2023-01-23 02:53:34.379563: step: 2044/527, loss: 0.0018352508777752519 2023-01-23 02:53:35.548031: step: 2048/527, loss: 0.029383469372987747 2023-01-23 02:53:36.695997: step: 2052/527, loss: 0.0007335185655392706 2023-01-23 02:53:37.780618: step: 2056/527, loss: 3.585815284168348e-05 2023-01-23 02:53:38.912649: step: 2060/527, loss: 2.346038854739163e-05 2023-01-23 02:53:40.026313: step: 2064/527, loss: 0.006548976991325617 2023-01-23 02:53:41.145639: step: 2068/527, loss: 0.002814674284309149 2023-01-23 02:53:42.263644: step: 2072/527, loss: 0.001100254012271762 2023-01-23 02:53:43.383644: step: 2076/527, loss: 0.0017435074551030993 2023-01-23 02:53:44.484225: step: 2080/527, loss: 0.014860248193144798 2023-01-23 02:53:45.608059: step: 2084/527, loss: 0.02179727517068386 2023-01-23 02:53:46.693119: step: 2088/527, loss: 0.0015678404597565532 2023-01-23 02:53:47.811816: step: 2092/527, loss: 0.21712498366832733 2023-01-23 02:53:48.931655: step: 2096/527, loss: 0.004669570829719305 2023-01-23 02:53:50.030917: step: 2100/527, loss: 3.80516066798009e-05 2023-01-23 02:53:51.205479: step: 2104/527, loss: 0.055962562561035156 2023-01-23 02:53:52.318501: step: 2108/527, loss: 0.018947506323456764 ================================================== Loss: 0.039 -------------------- Dev: {'event': {'p': 0.626410835214447, 'r': 0.7390146471371505, 'f1': 0.678069639584606}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Test: {'event': {'p': 0.6311047889995258, 'r': 0.7605714285714286, 'f1': 0.6898160145115314}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Chinese: {'event': {'p': 0.5657894736842105, 'r': 0.7962962962962963, 'f1': 0.6615384615384615}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Korean: {'event': {'p': 0.6170212765957447, 'r': 0.4603174603174603, 'f1': 0.5272727272727272}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Russian: {'event': {'p': 0.4722222222222222, 'r': 0.4722222222222222, 'f1': 0.4722222222222222}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066252587991718, 'r': 0.7802929427430093, 'f1': 0.6825859056493885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Eng Test for Korean: {'event': {'p': 0.62580054894785, 'r': 0.7817142857142857, 'f1': 0.6951219512195121}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 20 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 02:54:33.502308: step: 4/527, loss: 3.4332275390625e-05 2023-01-23 02:54:34.583199: step: 8/527, loss: 3.547668529790826e-05 2023-01-23 02:54:35.704791: step: 12/527, loss: 0.0012718200450763106 2023-01-23 02:54:36.822238: step: 16/527, loss: 0.13088759779930115 2023-01-23 02:54:37.910121: step: 20/527, loss: 0.007852173410356045 2023-01-23 02:54:39.334972: step: 24/527, loss: 8.296966552734375e-05 2023-01-23 02:54:40.429318: step: 28/527, loss: 0.0007997513166628778 2023-01-23 02:54:41.550895: step: 32/527, loss: 0.00011363029625499621 2023-01-23 02:54:42.651485: step: 36/527, loss: 0.0019147873390465975 2023-01-23 02:54:43.747172: step: 40/527, loss: 7.109642319846898e-05 2023-01-23 02:54:44.874917: step: 44/527, loss: 0.008765220642089844 2023-01-23 02:54:46.009435: step: 48/527, loss: 0.09263362735509872 2023-01-23 02:54:47.159194: step: 52/527, loss: 0.04531841352581978 2023-01-23 02:54:48.259865: step: 56/527, loss: 2.927780406025704e-05 2023-01-23 02:54:49.401410: step: 60/527, loss: 0.0006204604869708419 2023-01-23 02:54:50.497583: step: 64/527, loss: 7.004738290561363e-05 2023-01-23 02:54:51.634970: step: 68/527, loss: 0.0672679916024208 2023-01-23 02:54:52.757590: step: 72/527, loss: 0.0907646119594574 2023-01-23 02:54:53.843253: step: 76/527, loss: 0.0064262389205396175 2023-01-23 02:54:54.959903: step: 80/527, loss: 0.015196133404970169 2023-01-23 02:54:56.080553: step: 84/527, loss: 0.0004684448358602822 2023-01-23 02:54:57.195564: step: 88/527, loss: 0.0026604891754686832 2023-01-23 02:54:58.304541: step: 92/527, loss: 0.00017480850510764867 2023-01-23 02:54:59.443585: step: 96/527, loss: 0.000293731689453125 2023-01-23 02:55:00.587362: step: 100/527, loss: 0.0004915237659588456 2023-01-23 02:55:01.669654: step: 104/527, loss: 0.0005902289994992316 2023-01-23 02:55:02.756366: step: 108/527, loss: 0.020103169605135918 2023-01-23 02:55:03.892074: step: 112/527, loss: 4.482269287109375e-05 2023-01-23 02:55:05.002250: step: 116/527, loss: 7.62939453125e-06 2023-01-23 02:55:06.115082: step: 120/527, loss: 5.426407005870715e-05 2023-01-23 02:55:07.220190: step: 124/527, loss: 0.17228764295578003 2023-01-23 02:55:08.317497: step: 128/527, loss: 0.0009261191007681191 2023-01-23 02:55:09.451469: step: 132/527, loss: 2.193450927734375e-05 2023-01-23 02:55:10.569666: step: 136/527, loss: 0.0014945983421057463 2023-01-23 02:55:11.692136: step: 140/527, loss: 0.0003391265927348286 2023-01-23 02:55:12.810763: step: 144/527, loss: 0.00045261383638717234 2023-01-23 02:55:13.908221: step: 148/527, loss: 1.5258790426742053e-06 2023-01-23 02:55:15.089780: step: 152/527, loss: 0.04457683488726616 2023-01-23 02:55:16.211869: step: 156/527, loss: 7.62939453125e-05 2023-01-23 02:55:17.333772: step: 160/527, loss: -1.8119810647476697e-06 2023-01-23 02:55:18.398465: step: 164/527, loss: 1.9454957509879023e-05 2023-01-23 02:55:19.512168: step: 168/527, loss: 0.05544090270996094 2023-01-23 02:55:20.604404: step: 172/527, loss: 0.003456211183220148 2023-01-23 02:55:21.738603: step: 176/527, loss: 0.008368110284209251 2023-01-23 02:55:22.858894: step: 180/527, loss: 0.0011278152232989669 2023-01-23 02:55:23.943801: step: 184/527, loss: 0.0002906799491029233 2023-01-23 02:55:25.020245: step: 188/527, loss: 0.03186435624957085 2023-01-23 02:55:26.119259: step: 192/527, loss: 0.011450577527284622 2023-01-23 02:55:27.232152: step: 196/527, loss: 0.0017066001892089844 2023-01-23 02:55:28.372632: step: 200/527, loss: 0.03494129329919815 2023-01-23 02:55:29.474728: step: 204/527, loss: 4.673004150390625e-05 2023-01-23 02:55:30.568006: step: 208/527, loss: 0.0001520156947663054 2023-01-23 02:55:31.682494: step: 212/527, loss: 0.010197353549301624 2023-01-23 02:55:32.791274: step: 216/527, loss: 0.00014171600923873484 2023-01-23 02:55:33.879672: step: 220/527, loss: 0.0008772850269451737 2023-01-23 02:55:35.022797: step: 224/527, loss: 0.0011756897438317537 2023-01-23 02:55:36.133265: step: 228/527, loss: 0.2603588104248047 2023-01-23 02:55:37.288024: step: 232/527, loss: 0.021329879760742188 2023-01-23 02:55:38.403661: step: 236/527, loss: 0.06982040405273438 2023-01-23 02:55:39.514550: step: 240/527, loss: 0.00021991730318404734 2023-01-23 02:55:40.636955: step: 244/527, loss: 0.04594488441944122 2023-01-23 02:55:41.765180: step: 248/527, loss: 0.0031266212463378906 2023-01-23 02:55:42.884370: step: 252/527, loss: 0.0005491256597451866 2023-01-23 02:55:44.023592: step: 256/527, loss: 0.00034198761568404734 2023-01-23 02:55:45.130854: step: 260/527, loss: 0.03650055080652237 2023-01-23 02:55:46.244394: step: 264/527, loss: 0.02214033715426922 2023-01-23 02:55:47.361177: step: 268/527, loss: 0.00029859543428756297 2023-01-23 02:55:48.469270: step: 272/527, loss: 9.002685692394152e-05 2023-01-23 02:55:49.584013: step: 276/527, loss: 0.0001253128139069304 2023-01-23 02:55:50.723340: step: 280/527, loss: 0.0002082824648823589 2023-01-23 02:55:51.818585: step: 284/527, loss: 0.009705162607133389 2023-01-23 02:55:52.942894: step: 288/527, loss: 1.144409225162235e-06 2023-01-23 02:55:54.085849: step: 292/527, loss: 0.010892581194639206 2023-01-23 02:55:55.181837: step: 296/527, loss: 0.025179386138916016 2023-01-23 02:55:56.317406: step: 300/527, loss: 0.0012948036892339587 2023-01-23 02:55:57.461806: step: 304/527, loss: 0.0010653496719896793 2023-01-23 02:55:58.585128: step: 308/527, loss: 0.00011682510375976562 2023-01-23 02:55:59.702948: step: 312/527, loss: 0.002443504286929965 2023-01-23 02:56:00.851189: step: 316/527, loss: 0.007702446077018976 2023-01-23 02:56:01.986424: step: 320/527, loss: 0.007098579779267311 2023-01-23 02:56:03.100920: step: 324/527, loss: 0.006360197439789772 2023-01-23 02:56:04.216505: step: 328/527, loss: 0.00254497560672462 2023-01-23 02:56:05.362393: step: 332/527, loss: 0.01922626607120037 2023-01-23 02:56:06.457768: step: 336/527, loss: 0.00019149779109284282 2023-01-23 02:56:07.571729: step: 340/527, loss: 0.024216175079345703 2023-01-23 02:56:08.752275: step: 344/527, loss: 0.027865981683135033 2023-01-23 02:56:09.872351: step: 348/527, loss: 0.007172203157097101 2023-01-23 02:56:10.999694: step: 352/527, loss: 0.05468282848596573 2023-01-23 02:56:12.110825: step: 356/527, loss: 0.00286350236274302 2023-01-23 02:56:13.240644: step: 360/527, loss: 0.026781558990478516 2023-01-23 02:56:14.381438: step: 364/527, loss: 0.0016329765785485506 2023-01-23 02:56:15.475154: step: 368/527, loss: 0.01660633087158203 2023-01-23 02:56:16.575531: step: 372/527, loss: 0.02549591101706028 2023-01-23 02:56:17.708106: step: 376/527, loss: 6.29425048828125e-05 2023-01-23 02:56:18.862811: step: 380/527, loss: 0.0035185813903808594 2023-01-23 02:56:20.000472: step: 384/527, loss: 0.002018308499827981 2023-01-23 02:56:21.156064: step: 388/527, loss: 0.0009639739873819053 2023-01-23 02:56:22.253489: step: 392/527, loss: 0.003463316010311246 2023-01-23 02:56:23.369984: step: 396/527, loss: 0.004067802801728249 2023-01-23 02:56:24.486995: step: 400/527, loss: 0.04203357547521591 2023-01-23 02:56:25.586824: step: 404/527, loss: 0.0005035400390625 2023-01-23 02:56:26.712650: step: 408/527, loss: 0.00013189316086936742 2023-01-23 02:56:27.845289: step: 412/527, loss: 0.0017994879744946957 2023-01-23 02:56:28.944718: step: 416/527, loss: 0.047010134905576706 2023-01-23 02:56:30.053909: step: 420/527, loss: 0.0003540039178915322 2023-01-23 02:56:31.130911: step: 424/527, loss: 0.0002872943878173828 2023-01-23 02:56:32.232614: step: 428/527, loss: 0.0030436518136411905 2023-01-23 02:56:33.356261: step: 432/527, loss: 0.0007802963373251259 2023-01-23 02:56:34.434516: step: 436/527, loss: 0.019963454455137253 2023-01-23 02:56:35.549886: step: 440/527, loss: 0.04179992899298668 2023-01-23 02:56:36.668680: step: 444/527, loss: 0.0018626212840899825 2023-01-23 02:56:37.758587: step: 448/527, loss: 0.02898721769452095 2023-01-23 02:56:38.851185: step: 452/527, loss: 6.0749058320652694e-05 2023-01-23 02:56:39.955667: step: 456/527, loss: 0.0012759207747876644 2023-01-23 02:56:41.083491: step: 460/527, loss: 0.0002582549932412803 2023-01-23 02:56:42.197530: step: 464/527, loss: 0.030570032075047493 2023-01-23 02:56:43.302765: step: 468/527, loss: 0.0023875238839536905 2023-01-23 02:56:44.435383: step: 472/527, loss: 0.011507606133818626 2023-01-23 02:56:45.555319: step: 476/527, loss: 0.004371261689811945 2023-01-23 02:56:46.663385: step: 480/527, loss: 0.007986831478774548 2023-01-23 02:56:47.792647: step: 484/527, loss: 0.0025652884505689144 2023-01-23 02:56:48.908912: step: 488/527, loss: 0.014095497317612171 2023-01-23 02:56:50.035265: step: 492/527, loss: 0.011564064770936966 2023-01-23 02:56:51.146487: step: 496/527, loss: 0.004274559207260609 2023-01-23 02:56:52.269148: step: 500/527, loss: 0.017993737012147903 2023-01-23 02:56:53.391305: step: 504/527, loss: 0.000107812877104152 2023-01-23 02:56:54.502201: step: 508/527, loss: 0.003637599991634488 2023-01-23 02:56:55.600310: step: 512/527, loss: 0.0009717941284179688 2023-01-23 02:56:56.735720: step: 516/527, loss: 0.037944599986076355 2023-01-23 02:56:57.865848: step: 520/527, loss: 0.0600128173828125 2023-01-23 02:56:58.979402: step: 524/527, loss: 0.01699652150273323 2023-01-23 02:57:00.066522: step: 528/527, loss: 0.008876705542206764 2023-01-23 02:57:01.153087: step: 532/527, loss: 0.00016136169142555445 2023-01-23 02:57:02.247719: step: 536/527, loss: 0.04047584533691406 2023-01-23 02:57:03.365792: step: 540/527, loss: 0.021314620971679688 2023-01-23 02:57:04.447317: step: 544/527, loss: 0.01648111455142498 2023-01-23 02:57:05.552882: step: 548/527, loss: 0.03600750118494034 2023-01-23 02:57:06.683916: step: 552/527, loss: 0.0039390563033521175 2023-01-23 02:57:07.792381: step: 556/527, loss: 3.471374657237902e-05 2023-01-23 02:57:08.957378: step: 560/527, loss: 0.029330160468816757 2023-01-23 02:57:10.068511: step: 564/527, loss: 0.04944801330566406 2023-01-23 02:57:11.199585: step: 568/527, loss: 0.008247566409409046 2023-01-23 02:57:12.359980: step: 572/527, loss: 0.011773395352065563 2023-01-23 02:57:13.456287: step: 576/527, loss: 7.591248140670359e-05 2023-01-23 02:57:14.569168: step: 580/527, loss: 0.04059505835175514 2023-01-23 02:57:15.676943: step: 584/527, loss: 3.337860107421875e-05 2023-01-23 02:57:16.812684: step: 588/527, loss: 0.0006385803571902215 2023-01-23 02:57:17.925841: step: 592/527, loss: 0.043839357793331146 2023-01-23 02:57:19.019661: step: 596/527, loss: 0.00633692741394043 2023-01-23 02:57:20.131222: step: 600/527, loss: 0.00019965172396041453 2023-01-23 02:57:21.228239: step: 604/527, loss: 0.00012164115469204262 2023-01-23 02:57:22.311300: step: 608/527, loss: 0.00040493012056685984 2023-01-23 02:57:23.416866: step: 612/527, loss: 0.05111751705408096 2023-01-23 02:57:24.525189: step: 616/527, loss: 0.01307830773293972 2023-01-23 02:57:25.635830: step: 620/527, loss: 0.00023560522822663188 2023-01-23 02:57:26.773609: step: 624/527, loss: 0.00023727417283225805 2023-01-23 02:57:27.864227: step: 628/527, loss: 0.0669642984867096 2023-01-23 02:57:28.990791: step: 632/527, loss: 0.05983276292681694 2023-01-23 02:57:30.115097: step: 636/527, loss: 0.000390899193007499 2023-01-23 02:57:31.240742: step: 640/527, loss: 0.04865474998950958 2023-01-23 02:57:32.333115: step: 644/527, loss: 0.0013248443137854338 2023-01-23 02:57:33.430655: step: 648/527, loss: 0.0006049156654626131 2023-01-23 02:57:34.566963: step: 652/527, loss: 0.0002872467157430947 2023-01-23 02:57:35.648853: step: 656/527, loss: 0.0272811409085989 2023-01-23 02:57:36.771854: step: 660/527, loss: 0.25415247678756714 2023-01-23 02:57:37.884353: step: 664/527, loss: 0.029935359954833984 2023-01-23 02:57:39.004844: step: 668/527, loss: 0.04670019447803497 2023-01-23 02:57:40.121226: step: 672/527, loss: 0.00045957567635923624 2023-01-23 02:57:41.235793: step: 676/527, loss: 0.0037940978072583675 2023-01-23 02:57:42.332713: step: 680/527, loss: 0.040196992456912994 2023-01-23 02:57:43.426075: step: 684/527, loss: 0.011786842718720436 2023-01-23 02:57:44.555449: step: 688/527, loss: 0.02827005460858345 2023-01-23 02:57:45.688224: step: 692/527, loss: 0.02662963978946209 2023-01-23 02:57:46.806461: step: 696/527, loss: 0.21253737807273865 2023-01-23 02:57:47.976941: step: 700/527, loss: 0.0035976411309093237 2023-01-23 02:57:49.093044: step: 704/527, loss: 0.0009593010181561112 2023-01-23 02:57:50.236924: step: 708/527, loss: 0.008925819769501686 2023-01-23 02:57:51.344953: step: 712/527, loss: 0.0007654189830645919 2023-01-23 02:57:52.481806: step: 716/527, loss: 0.006103515625 2023-01-23 02:57:53.603441: step: 720/527, loss: 0.03998818248510361 2023-01-23 02:57:54.727617: step: 724/527, loss: 0.0035259248688817024 2023-01-23 02:57:55.834938: step: 728/527, loss: 0.01492223795503378 2023-01-23 02:57:56.970742: step: 732/527, loss: 0.001525116036646068 2023-01-23 02:57:58.068241: step: 736/527, loss: 0.0023641586303710938 2023-01-23 02:57:59.168551: step: 740/527, loss: 0.00135297782253474 2023-01-23 02:58:00.247766: step: 744/527, loss: 0.006973076146095991 2023-01-23 02:58:01.382173: step: 748/527, loss: 0.0015350342728197575 2023-01-23 02:58:02.479918: step: 752/527, loss: 0.019895363599061966 2023-01-23 02:58:03.583235: step: 756/527, loss: 0.003822136204689741 2023-01-23 02:58:04.694667: step: 760/527, loss: 0.0054184915497899055 2023-01-23 02:58:05.826609: step: 764/527, loss: 0.02983722649514675 2023-01-23 02:58:06.914641: step: 768/527, loss: 0.0013895034790039062 2023-01-23 02:58:08.026059: step: 772/527, loss: 2.2029875253792852e-05 2023-01-23 02:58:09.148754: step: 776/527, loss: 0.00964050367474556 2023-01-23 02:58:10.271698: step: 780/527, loss: 0.0019660950638353825 2023-01-23 02:58:11.384823: step: 784/527, loss: 0.012362576089799404 2023-01-23 02:58:12.502936: step: 788/527, loss: 0.001563215279020369 2023-01-23 02:58:13.625853: step: 792/527, loss: 0.0016529083950445056 2023-01-23 02:58:14.736096: step: 796/527, loss: 0.013444995507597923 2023-01-23 02:58:15.842088: step: 800/527, loss: 0.03771228715777397 2023-01-23 02:58:16.993825: step: 804/527, loss: 0.02162233740091324 2023-01-23 02:58:18.127382: step: 808/527, loss: 0.00601959228515625 2023-01-23 02:58:19.247919: step: 812/527, loss: 0.08127345889806747 2023-01-23 02:58:20.368209: step: 816/527, loss: 0.00650787353515625 2023-01-23 02:58:21.484133: step: 820/527, loss: 0.0005459785461425781 2023-01-23 02:58:22.579827: step: 824/527, loss: 0.0197772029787302 2023-01-23 02:58:23.712497: step: 828/527, loss: 0.0019708634354174137 2023-01-23 02:58:24.818662: step: 832/527, loss: 0.0007402420160360634 2023-01-23 02:58:25.936333: step: 836/527, loss: 9.5367431640625e-07 2023-01-23 02:58:27.049855: step: 840/527, loss: 0.0016109467251226306 2023-01-23 02:58:28.187196: step: 844/527, loss: 0.020163822919130325 2023-01-23 02:58:29.300470: step: 848/527, loss: 0.010096645914018154 2023-01-23 02:58:30.372672: step: 852/527, loss: 0.02176341973245144 2023-01-23 02:58:31.507974: step: 856/527, loss: 0.16528044641017914 2023-01-23 02:58:32.613008: step: 860/527, loss: 0.0001447677641408518 2023-01-23 02:58:33.712256: step: 864/527, loss: 0.0037090301048010588 2023-01-23 02:58:34.830231: step: 868/527, loss: 0.001271915389224887 2023-01-23 02:58:35.947086: step: 872/527, loss: 0.0018568038940429688 2023-01-23 02:58:37.041662: step: 876/527, loss: 0.0020357132889330387 2023-01-23 02:58:38.149748: step: 880/527, loss: 0.02779207192361355 2023-01-23 02:58:39.275972: step: 884/527, loss: 0.008112144656479359 2023-01-23 02:58:40.409168: step: 888/527, loss: 0.006324195768684149 2023-01-23 02:58:41.522796: step: 892/527, loss: 4.9591064453125e-05 2023-01-23 02:58:42.667770: step: 896/527, loss: 0.05727434158325195 2023-01-23 02:58:43.771061: step: 900/527, loss: 5.3596493671648204e-05 2023-01-23 02:58:44.852361: step: 904/527, loss: 0.007550620939582586 2023-01-23 02:58:45.955729: step: 908/527, loss: 0.029595421627163887 2023-01-23 02:58:47.052847: step: 912/527, loss: 0.03451580926775932 2023-01-23 02:58:48.171312: step: 916/527, loss: 0.38295039534568787 2023-01-23 02:58:49.317530: step: 920/527, loss: 0.00825195387005806 2023-01-23 02:58:50.411768: step: 924/527, loss: 0.004049396608024836 2023-01-23 02:58:51.522769: step: 928/527, loss: 0.0031559946946799755 2023-01-23 02:58:52.642149: step: 932/527, loss: 0.02059326134622097 2023-01-23 02:58:53.791992: step: 936/527, loss: 0.7522391676902771 2023-01-23 02:58:54.923917: step: 940/527, loss: 0.015984343364834785 2023-01-23 02:58:56.041825: step: 944/527, loss: 0.0006873130332678556 2023-01-23 02:58:57.158569: step: 948/527, loss: 0.0054479604586958885 2023-01-23 02:58:58.277388: step: 952/527, loss: 0.001815700437873602 2023-01-23 02:58:59.375192: step: 956/527, loss: 0.0024441718123853207 2023-01-23 02:59:00.515169: step: 960/527, loss: 0.0016455650329589844 2023-01-23 02:59:01.633288: step: 964/527, loss: 0.00030040740966796875 2023-01-23 02:59:02.738066: step: 968/527, loss: 0.021976470947265625 2023-01-23 02:59:03.840287: step: 972/527, loss: 0.002544403076171875 2023-01-23 02:59:04.955474: step: 976/527, loss: 0.05015239864587784 2023-01-23 02:59:06.065075: step: 980/527, loss: 0.006952381227165461 2023-01-23 02:59:07.170828: step: 984/527, loss: 0.0010344506008550525 2023-01-23 02:59:08.291637: step: 988/527, loss: 0.002208113670349121 2023-01-23 02:59:09.366813: step: 992/527, loss: 0.00028774738893844187 2023-01-23 02:59:10.461317: step: 996/527, loss: 0.00961084384471178 2023-01-23 02:59:11.569592: step: 1000/527, loss: 0.0016227723099291325 2023-01-23 02:59:12.675683: step: 1004/527, loss: 0.007226848509162664 2023-01-23 02:59:13.787202: step: 1008/527, loss: 0.046210192143917084 2023-01-23 02:59:14.885446: step: 1012/527, loss: 0.000362634687917307 2023-01-23 02:59:16.034960: step: 1016/527, loss: 0.006450653076171875 2023-01-23 02:59:17.144738: step: 1020/527, loss: 0.017424391582608223 2023-01-23 02:59:18.251752: step: 1024/527, loss: 0.010802841745316982 2023-01-23 02:59:19.382300: step: 1028/527, loss: 0.03613724932074547 2023-01-23 02:59:20.504938: step: 1032/527, loss: 0.007034587673842907 2023-01-23 02:59:21.599841: step: 1036/527, loss: 0.0004828452947549522 2023-01-23 02:59:22.738916: step: 1040/527, loss: 0.0061883931048214436 2023-01-23 02:59:23.853749: step: 1044/527, loss: 0.006003951653838158 2023-01-23 02:59:24.955606: step: 1048/527, loss: 0.0011585236061364412 2023-01-23 02:59:26.066364: step: 1052/527, loss: 0.002670479007065296 2023-01-23 02:59:27.189380: step: 1056/527, loss: 0.002221822738647461 2023-01-23 02:59:28.283127: step: 1060/527, loss: 0.008598614484071732 2023-01-23 02:59:29.369655: step: 1064/527, loss: 0.029587937518954277 2023-01-23 02:59:30.461206: step: 1068/527, loss: 8.630752745375503e-06 2023-01-23 02:59:31.565943: step: 1072/527, loss: 0.023863792419433594 2023-01-23 02:59:32.678312: step: 1076/527, loss: 0.015695666894316673 2023-01-23 02:59:33.793195: step: 1080/527, loss: 0.005527401342988014 2023-01-23 02:59:34.922565: step: 1084/527, loss: 0.01373825129121542 2023-01-23 02:59:36.047607: step: 1088/527, loss: 0.016524458304047585 2023-01-23 02:59:37.192882: step: 1092/527, loss: 0.007532882504165173 2023-01-23 02:59:38.337191: step: 1096/527, loss: 0.02640705183148384 2023-01-23 02:59:39.458649: step: 1100/527, loss: 0.0006666183471679688 2023-01-23 02:59:40.575978: step: 1104/527, loss: 0.012154245749115944 2023-01-23 02:59:41.682191: step: 1108/527, loss: 0.0009609222179278731 2023-01-23 02:59:42.835269: step: 1112/527, loss: 0.00024280548677779734 2023-01-23 02:59:43.953180: step: 1116/527, loss: 0.007675552275031805 2023-01-23 02:59:45.074983: step: 1120/527, loss: 0.008370542898774147 2023-01-23 02:59:46.193347: step: 1124/527, loss: 0.00086722377454862 2023-01-23 02:59:47.305351: step: 1128/527, loss: 0.008390497416257858 2023-01-23 02:59:48.414080: step: 1132/527, loss: 0.026456832885742188 2023-01-23 02:59:49.510955: step: 1136/527, loss: 0.0008254528511315584 2023-01-23 02:59:50.623501: step: 1140/527, loss: 0.012426377274096012 2023-01-23 02:59:51.750466: step: 1144/527, loss: 0.014566803351044655 2023-01-23 02:59:52.885718: step: 1148/527, loss: 0.012318992987275124 2023-01-23 02:59:54.009695: step: 1152/527, loss: 0.0003888130304403603 2023-01-23 02:59:55.120348: step: 1156/527, loss: 0.013282394036650658 2023-01-23 02:59:56.219869: step: 1160/527, loss: 0.002064991043880582 2023-01-23 02:59:57.344391: step: 1164/527, loss: 0.009982109069824219 2023-01-23 02:59:58.472976: step: 1168/527, loss: 0.017261316999793053 2023-01-23 02:59:59.576909: step: 1172/527, loss: 0.011104965582489967 2023-01-23 03:00:00.662462: step: 1176/527, loss: 0.004796791356056929 2023-01-23 03:00:01.774578: step: 1180/527, loss: 2.0122528439969756e-05 2023-01-23 03:00:02.883717: step: 1184/527, loss: 0.02539539337158203 2023-01-23 03:00:04.016646: step: 1188/527, loss: 0.0001730919029796496 2023-01-23 03:00:05.112980: step: 1192/527, loss: 0.0024204254150390625 2023-01-23 03:00:06.183306: step: 1196/527, loss: 0.03469066694378853 2023-01-23 03:00:07.306933: step: 1200/527, loss: 0.009428691118955612 2023-01-23 03:00:08.411289: step: 1204/527, loss: 0.015006923116743565 2023-01-23 03:00:09.532112: step: 1208/527, loss: 0.0003141403431072831 2023-01-23 03:00:10.648357: step: 1212/527, loss: 0.003090381622314453 2023-01-23 03:00:11.756127: step: 1216/527, loss: 0.01769104041159153 2023-01-23 03:00:12.914513: step: 1220/527, loss: 0.0014437675708904862 2023-01-23 03:00:14.047442: step: 1224/527, loss: 0.03375072404742241 2023-01-23 03:00:15.130436: step: 1228/527, loss: 0.0007547378772869706 2023-01-23 03:00:16.251725: step: 1232/527, loss: 0.04166841506958008 2023-01-23 03:00:17.391473: step: 1236/527, loss: 0.00985939521342516 2023-01-23 03:00:18.478416: step: 1240/527, loss: 0.00032000543433241546 2023-01-23 03:00:19.623708: step: 1244/527, loss: 0.0033652307465672493 2023-01-23 03:00:20.754702: step: 1248/527, loss: 0.015625953674316406 2023-01-23 03:00:21.863191: step: 1252/527, loss: 0.010489463806152344 2023-01-23 03:00:22.985725: step: 1256/527, loss: 0.00011768341209972277 2023-01-23 03:00:24.094591: step: 1260/527, loss: 0.04278545454144478 2023-01-23 03:00:25.230575: step: 1264/527, loss: 7.123947580112144e-05 2023-01-23 03:00:26.364554: step: 1268/527, loss: 0.008060836233198643 2023-01-23 03:00:27.481692: step: 1272/527, loss: 0.011568451300263405 2023-01-23 03:00:28.607020: step: 1276/527, loss: 0.03861665725708008 2023-01-23 03:00:29.726806: step: 1280/527, loss: 0.0035602569114416838 2023-01-23 03:00:30.851427: step: 1284/527, loss: 0.018339728936553 2023-01-23 03:00:31.995353: step: 1288/527, loss: 0.00015249251737259328 2023-01-23 03:00:33.137077: step: 1292/527, loss: 0.009997749701142311 2023-01-23 03:00:34.251544: step: 1296/527, loss: 0.07165279984474182 2023-01-23 03:00:35.380093: step: 1300/527, loss: 0.007937287911772728 2023-01-23 03:00:36.543808: step: 1304/527, loss: 0.00026302336482331157 2023-01-23 03:00:37.649536: step: 1308/527, loss: 0.0006668090936727822 2023-01-23 03:00:38.770698: step: 1312/527, loss: 0.04505300521850586 2023-01-23 03:00:39.883238: step: 1316/527, loss: 0.016960715875029564 2023-01-23 03:00:41.026884: step: 1320/527, loss: 3.4618376957951114e-05 2023-01-23 03:00:42.158128: step: 1324/527, loss: 0.00042247772216796875 2023-01-23 03:00:43.264046: step: 1328/527, loss: 0.0004337310965638608 2023-01-23 03:00:44.397751: step: 1332/527, loss: 0.004719734191894531 2023-01-23 03:00:45.552474: step: 1336/527, loss: 0.026227571070194244 2023-01-23 03:00:46.722960: step: 1340/527, loss: 0.028843021020293236 2023-01-23 03:00:47.845126: step: 1344/527, loss: 0.009689902886748314 2023-01-23 03:00:48.963335: step: 1348/527, loss: 0.00145721435546875 2023-01-23 03:00:50.066106: step: 1352/527, loss: 0.009174251928925514 2023-01-23 03:00:51.181410: step: 1356/527, loss: 0.0005667686928063631 2023-01-23 03:00:52.283234: step: 1360/527, loss: 0.002259874250739813 2023-01-23 03:00:53.409731: step: 1364/527, loss: 0.030611135065555573 2023-01-23 03:00:54.515389: step: 1368/527, loss: 0.011901665478944778 2023-01-23 03:00:55.643328: step: 1372/527, loss: 0.004137611482292414 2023-01-23 03:00:56.784545: step: 1376/527, loss: 0.005522060673683882 2023-01-23 03:00:57.920630: step: 1380/527, loss: 0.001278781914152205 2023-01-23 03:00:59.040563: step: 1384/527, loss: 0.0007923126104287803 2023-01-23 03:01:00.150226: step: 1388/527, loss: 0.02739124186336994 2023-01-23 03:01:01.278115: step: 1392/527, loss: 0.0018606185913085938 2023-01-23 03:01:02.390601: step: 1396/527, loss: 0.014475155621767044 2023-01-23 03:01:03.494675: step: 1400/527, loss: 0.0003971099795307964 2023-01-23 03:01:04.635485: step: 1404/527, loss: 0.01198568381369114 2023-01-23 03:01:05.763796: step: 1408/527, loss: 0.004773902706801891 2023-01-23 03:01:06.876608: step: 1412/527, loss: 0.003279113909229636 2023-01-23 03:01:07.981304: step: 1416/527, loss: 0.022846031934022903 2023-01-23 03:01:09.089217: step: 1420/527, loss: 0.0006052017561160028 2023-01-23 03:01:10.210332: step: 1424/527, loss: 0.007365536410361528 2023-01-23 03:01:11.329382: step: 1428/527, loss: 0.0043697357177734375 2023-01-23 03:01:12.458139: step: 1432/527, loss: 0.002794933505356312 2023-01-23 03:01:13.630567: step: 1436/527, loss: 0.036139871925115585 2023-01-23 03:01:14.763465: step: 1440/527, loss: 0.028938675299286842 2023-01-23 03:01:15.903190: step: 1444/527, loss: 0.010556983761489391 2023-01-23 03:01:17.025939: step: 1448/527, loss: 0.011183071881532669 2023-01-23 03:01:18.125547: step: 1452/527, loss: 0.0015433788066729903 2023-01-23 03:01:19.252559: step: 1456/527, loss: 0.00016460419283248484 2023-01-23 03:01:20.362895: step: 1460/527, loss: 0.17852649092674255 2023-01-23 03:01:21.512691: step: 1464/527, loss: 0.014683246612548828 2023-01-23 03:01:22.669868: step: 1468/527, loss: 0.0011581419967114925 2023-01-23 03:01:23.795181: step: 1472/527, loss: 0.008943319320678711 2023-01-23 03:01:24.920974: step: 1476/527, loss: 0.036548420786857605 2023-01-23 03:01:26.027638: step: 1480/527, loss: 0.003646278288215399 2023-01-23 03:01:27.155366: step: 1484/527, loss: 0.00031147003755904734 2023-01-23 03:01:28.263955: step: 1488/527, loss: 0.0014142035506665707 2023-01-23 03:01:29.371612: step: 1492/527, loss: 0.0134903434664011 2023-01-23 03:01:30.497490: step: 1496/527, loss: 0.003542137099429965 2023-01-23 03:01:31.594098: step: 1500/527, loss: 0.0004935264587402344 2023-01-23 03:01:32.711109: step: 1504/527, loss: 0.0002878189261537045 2023-01-23 03:01:33.827608: step: 1508/527, loss: 0.025111103430390358 2023-01-23 03:01:34.933787: step: 1512/527, loss: 0.0046749114990234375 2023-01-23 03:01:36.032357: step: 1516/527, loss: 0.00018749237642623484 2023-01-23 03:01:37.141005: step: 1520/527, loss: 0.04882211610674858 2023-01-23 03:01:38.251196: step: 1524/527, loss: 0.00012826919555664062 2023-01-23 03:01:39.361747: step: 1528/527, loss: 0.005302906036376953 2023-01-23 03:01:40.517213: step: 1532/527, loss: 0.01220398023724556 2023-01-23 03:01:41.655623: step: 1536/527, loss: 0.07104186713695526 2023-01-23 03:01:42.765066: step: 1540/527, loss: 0.006728076841682196 2023-01-23 03:01:43.863198: step: 1544/527, loss: 0.016280079260468483 2023-01-23 03:01:44.978558: step: 1548/527, loss: 0.0017538070678710938 2023-01-23 03:01:46.072997: step: 1552/527, loss: 0.0673857256770134 2023-01-23 03:01:47.206673: step: 1556/527, loss: 0.03412008285522461 2023-01-23 03:01:48.291688: step: 1560/527, loss: 0.0036411285400390625 2023-01-23 03:01:49.390138: step: 1564/527, loss: 0.00609746016561985 2023-01-23 03:01:50.493254: step: 1568/527, loss: 0.01572742499411106 2023-01-23 03:01:51.640864: step: 1572/527, loss: 1.33514404296875e-05 2023-01-23 03:01:52.764076: step: 1576/527, loss: 0.01256332453340292 2023-01-23 03:01:53.897899: step: 1580/527, loss: 0.06875848770141602 2023-01-23 03:01:55.009856: step: 1584/527, loss: 0.024183178320527077 2023-01-23 03:01:56.108795: step: 1588/527, loss: 0.0007033824804238975 2023-01-23 03:01:57.224480: step: 1592/527, loss: 0.014115714468061924 2023-01-23 03:01:58.354077: step: 1596/527, loss: 0.007302188780158758 2023-01-23 03:01:59.453252: step: 1600/527, loss: 4.482269287109375e-05 2023-01-23 03:02:00.566799: step: 1604/527, loss: 0.0024428367614746094 2023-01-23 03:02:01.678357: step: 1608/527, loss: 0.00018796921358443797 2023-01-23 03:02:02.803852: step: 1612/527, loss: 0.0011894701747223735 2023-01-23 03:02:03.949434: step: 1616/527, loss: 0.0028945922385901213 2023-01-23 03:02:05.083987: step: 1620/527, loss: 0.0006848335615359247 2023-01-23 03:02:06.194657: step: 1624/527, loss: 0.005323886871337891 2023-01-23 03:02:07.302550: step: 1628/527, loss: 0.01225967425853014 2023-01-23 03:02:08.413035: step: 1632/527, loss: 0.0009273529867641628 2023-01-23 03:02:09.532318: step: 1636/527, loss: 0.002427005907520652 2023-01-23 03:02:10.650077: step: 1640/527, loss: 0.00592384347692132 2023-01-23 03:02:11.738604: step: 1644/527, loss: 0.003995371051132679 2023-01-23 03:02:12.870016: step: 1648/527, loss: 4.882812572759576e-05 2023-01-23 03:02:13.972192: step: 1652/527, loss: 0.0020814896561205387 2023-01-23 03:02:15.083204: step: 1656/527, loss: 0.0005338669288903475 2023-01-23 03:02:16.222175: step: 1660/527, loss: 0.025425149127840996 2023-01-23 03:02:17.328536: step: 1664/527, loss: 0.0006948232767172158 2023-01-23 03:02:18.461098: step: 1668/527, loss: 0.015040207654237747 2023-01-23 03:02:19.568774: step: 1672/527, loss: 0.05722980573773384 2023-01-23 03:02:20.701367: step: 1676/527, loss: 0.013215922750532627 2023-01-23 03:02:21.857833: step: 1680/527, loss: 0.0012277603382244706 2023-01-23 03:02:22.969555: step: 1684/527, loss: 0.009001731872558594 2023-01-23 03:02:24.086189: step: 1688/527, loss: 0.0006127357482910156 2023-01-23 03:02:25.208349: step: 1692/527, loss: 0.00022125244140625 2023-01-23 03:02:26.325211: step: 1696/527, loss: 0.02458496019244194 2023-01-23 03:02:27.410943: step: 1700/527, loss: 0.0021808624733239412 2023-01-23 03:02:28.515086: step: 1704/527, loss: 0.0029230117797851562 2023-01-23 03:02:29.649103: step: 1708/527, loss: 0.002620697021484375 2023-01-23 03:02:30.770884: step: 1712/527, loss: 0.00016393660916946828 2023-01-23 03:02:31.904236: step: 1716/527, loss: 0.0006011963123455644 2023-01-23 03:02:33.022343: step: 1720/527, loss: 0.005690097808837891 2023-01-23 03:02:34.125591: step: 1724/527, loss: 0.00026326178340241313 2023-01-23 03:02:35.255193: step: 1728/527, loss: 0.02092151716351509 2023-01-23 03:02:36.387964: step: 1732/527, loss: 0.011891174130141735 2023-01-23 03:02:37.498801: step: 1736/527, loss: 0.007113742642104626 2023-01-23 03:02:38.668916: step: 1740/527, loss: 0.04443969950079918 2023-01-23 03:02:39.747097: step: 1744/527, loss: 0.03164253383874893 2023-01-23 03:02:40.906258: step: 1748/527, loss: 0.020047379657626152 2023-01-23 03:02:42.022612: step: 1752/527, loss: 0.0007253646617755294 2023-01-23 03:02:43.153125: step: 1756/527, loss: 0.030295561999082565 2023-01-23 03:02:44.255525: step: 1760/527, loss: 0.013912391848862171 2023-01-23 03:02:45.376302: step: 1764/527, loss: 0.005179405212402344 2023-01-23 03:02:46.524413: step: 1768/527, loss: 0.002670574001967907 2023-01-23 03:02:47.644607: step: 1772/527, loss: 0.0011358261108398438 2023-01-23 03:02:48.763559: step: 1776/527, loss: 0.040836237370967865 2023-01-23 03:02:49.850980: step: 1780/527, loss: 0.00030236245947889984 2023-01-23 03:02:50.983034: step: 1784/527, loss: 0.030338477343320847 2023-01-23 03:02:52.084026: step: 1788/527, loss: 0.017296411097049713 2023-01-23 03:02:53.199602: step: 1792/527, loss: 0.0016822816105559468 2023-01-23 03:02:54.348710: step: 1796/527, loss: 9.174347360385582e-05 2023-01-23 03:02:55.490593: step: 1800/527, loss: 0.012796211056411266 2023-01-23 03:02:56.621694: step: 1804/527, loss: 0.0012359619140625 2023-01-23 03:02:57.729320: step: 1808/527, loss: 0.010367202572524548 2023-01-23 03:02:58.882766: step: 1812/527, loss: 0.02257404290139675 2023-01-23 03:03:00.034351: step: 1816/527, loss: 0.07814665138721466 2023-01-23 03:03:01.199029: step: 1820/527, loss: 0.021368028596043587 2023-01-23 03:03:02.337423: step: 1824/527, loss: 0.00029582978459075093 2023-01-23 03:03:03.431112: step: 1828/527, loss: 2.2554399038199335e-05 2023-01-23 03:03:04.558280: step: 1832/527, loss: 0.0027396203950047493 2023-01-23 03:03:05.662829: step: 1836/527, loss: 0.0002101898135151714 2023-01-23 03:03:06.820714: step: 1840/527, loss: 0.007755280006676912 2023-01-23 03:03:07.907911: step: 1844/527, loss: 0.055211640894412994 2023-01-23 03:03:08.989321: step: 1848/527, loss: 0.0020311353728175163 2023-01-23 03:03:10.087505: step: 1852/527, loss: 0.04140214994549751 2023-01-23 03:03:11.216856: step: 1856/527, loss: 7.877349707996473e-05 2023-01-23 03:03:12.340541: step: 1860/527, loss: 0.0020973205100744963 2023-01-23 03:03:13.454559: step: 1864/527, loss: 0.002594089601188898 2023-01-23 03:03:14.576913: step: 1868/527, loss: 0.0022974968887865543 2023-01-23 03:03:15.692443: step: 1872/527, loss: 6.966591172385961e-05 2023-01-23 03:03:16.788040: step: 1876/527, loss: 0.06346073746681213 2023-01-23 03:03:17.895015: step: 1880/527, loss: 0.020610475912690163 2023-01-23 03:03:19.023240: step: 1884/527, loss: 0.0008833885076455772 2023-01-23 03:03:20.147114: step: 1888/527, loss: 0.02894468419253826 2023-01-23 03:03:21.251457: step: 1892/527, loss: 0.012651252560317516 2023-01-23 03:03:22.386920: step: 1896/527, loss: 0.03854770585894585 2023-01-23 03:03:23.482988: step: 1900/527, loss: 0.0008977890247479081 2023-01-23 03:03:24.575990: step: 1904/527, loss: 0.0003326415899209678 2023-01-23 03:03:25.687573: step: 1908/527, loss: 0.0006975174183025956 2023-01-23 03:03:26.806638: step: 1912/527, loss: 0.00723800715059042 2023-01-23 03:03:27.945404: step: 1916/527, loss: 0.5513867139816284 2023-01-23 03:03:29.063424: step: 1920/527, loss: 0.14393559098243713 2023-01-23 03:03:30.178185: step: 1924/527, loss: 0.009127616882324219 2023-01-23 03:03:31.278955: step: 1928/527, loss: 0.000559902167879045 2023-01-23 03:03:32.387543: step: 1932/527, loss: 0.01709294319152832 2023-01-23 03:03:33.498935: step: 1936/527, loss: 0.007925224490463734 2023-01-23 03:03:34.615564: step: 1940/527, loss: 0.01940007321536541 2023-01-23 03:03:35.726321: step: 1944/527, loss: 0.004201698116958141 2023-01-23 03:03:36.865943: step: 1948/527, loss: 0.001144838286563754 2023-01-23 03:03:37.983894: step: 1952/527, loss: 0.003654670901596546 2023-01-23 03:03:39.097886: step: 1956/527, loss: 0.02370905876159668 2023-01-23 03:03:40.229064: step: 1960/527, loss: 0.001982021378353238 2023-01-23 03:03:41.361892: step: 1964/527, loss: 0.02887563779950142 2023-01-23 03:03:42.493210: step: 1968/527, loss: 0.0001663207949604839 2023-01-23 03:03:43.616446: step: 1972/527, loss: 0.0005970001220703125 2023-01-23 03:03:44.742497: step: 1976/527, loss: 0.0275744441896677 2023-01-23 03:03:45.833405: step: 1980/527, loss: 8.18252592580393e-05 2023-01-23 03:03:46.938610: step: 1984/527, loss: 0.004429054446518421 2023-01-23 03:03:48.060938: step: 1988/527, loss: 0.00226780166849494 2023-01-23 03:03:49.178326: step: 1992/527, loss: 0.0028839111328125 2023-01-23 03:03:50.266354: step: 1996/527, loss: 0.015825461596250534 2023-01-23 03:03:51.361526: step: 2000/527, loss: 0.00030078887357376516 2023-01-23 03:03:52.465434: step: 2004/527, loss: 0.0005528450128622353 2023-01-23 03:03:53.556753: step: 2008/527, loss: 0.0010038375621661544 2023-01-23 03:03:54.667391: step: 2012/527, loss: 0.011748886667191982 2023-01-23 03:03:55.771295: step: 2016/527, loss: 0.007998275570571423 2023-01-23 03:03:56.872184: step: 2020/527, loss: 0.04023732990026474 2023-01-23 03:03:57.960742: step: 2024/527, loss: 0.0004654884396586567 2023-01-23 03:03:59.091676: step: 2028/527, loss: 0.09385576099157333 2023-01-23 03:04:00.206702: step: 2032/527, loss: 0.0006269931909628212 2023-01-23 03:04:01.315346: step: 2036/527, loss: 0.014314842410385609 2023-01-23 03:04:02.462214: step: 2040/527, loss: 0.005443572998046875 2023-01-23 03:04:03.570746: step: 2044/527, loss: 0.023126699030399323 2023-01-23 03:04:04.650136: step: 2048/527, loss: 0.0008742331992834806 2023-01-23 03:04:05.783846: step: 2052/527, loss: 0.00200653076171875 2023-01-23 03:04:06.898760: step: 2056/527, loss: 0.01771564409136772 2023-01-23 03:04:07.996486: step: 2060/527, loss: 0.0006164073711261153 2023-01-23 03:04:09.114826: step: 2064/527, loss: 0.02081775665283203 2023-01-23 03:04:10.256014: step: 2068/527, loss: 0.00015945434279274195 2023-01-23 03:04:11.360851: step: 2072/527, loss: 0.004548454191535711 2023-01-23 03:04:12.483663: step: 2076/527, loss: 0.004913234617561102 2023-01-23 03:04:13.582521: step: 2080/527, loss: 0.05552806705236435 2023-01-23 03:04:14.699077: step: 2084/527, loss: 0.005438041873276234 2023-01-23 03:04:15.810275: step: 2088/527, loss: 0.025446033105254173 2023-01-23 03:04:16.932214: step: 2092/527, loss: 0.024950265884399414 2023-01-23 03:04:18.027617: step: 2096/527, loss: 0.28835418820381165 2023-01-23 03:04:19.155151: step: 2100/527, loss: 0.004086685366928577 2023-01-23 03:04:20.281901: step: 2104/527, loss: 0.07227544486522675 2023-01-23 03:04:21.378168: step: 2108/527, loss: 0.024315834045410156 ================================================== Loss: 0.019 -------------------- Dev: {'event': {'p': 0.5776892430278885, 'r': 0.7723035952063915, 'f1': 0.6609686609686609}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Test: {'event': {'p': 0.6184327693677649, 'r': 0.7937142857142857, 'f1': 0.6951951951951952}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Chinese: {'event': {'p': 0.5697674418604651, 'r': 0.9074074074074074, 'f1': 0.7}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Korean: {'event': {'p': 0.5740740740740741, 'r': 0.49206349206349204, 'f1': 0.5299145299145299}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Russian: {'event': {'p': 0.425, 'r': 0.4722222222222222, 'f1': 0.4473684210526316}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066252587991718, 'r': 0.7802929427430093, 'f1': 0.6825859056493885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Eng Test for Korean: {'event': {'p': 0.62580054894785, 'r': 0.7817142857142857, 'f1': 0.6951219512195121}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 21 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:05:03.015348: step: 4/527, loss: 0.0003424167516641319 2023-01-23 03:05:04.137114: step: 8/527, loss: 0.014571189880371094 2023-01-23 03:05:05.283704: step: 12/527, loss: 0.03218331187963486 2023-01-23 03:05:06.407853: step: 16/527, loss: 0.00018224716768600047 2023-01-23 03:05:07.545874: step: 20/527, loss: 0.029027651995420456 2023-01-23 03:05:08.663978: step: 24/527, loss: 0.0012723446125164628 2023-01-23 03:05:09.799354: step: 28/527, loss: 0.004144716076552868 2023-01-23 03:05:10.929680: step: 32/527, loss: 0.0002601623418740928 2023-01-23 03:05:12.049046: step: 36/527, loss: 0.018167970702052116 2023-01-23 03:05:13.150308: step: 40/527, loss: 0.0001473009615438059 2023-01-23 03:05:14.236975: step: 44/527, loss: 0.005300998687744141 2023-01-23 03:05:15.365012: step: 48/527, loss: 0.002562999725341797 2023-01-23 03:05:16.459498: step: 52/527, loss: 1.1634827387752011e-05 2023-01-23 03:05:17.576561: step: 56/527, loss: 0.00010318756540073082 2023-01-23 03:05:18.716517: step: 60/527, loss: 0.0003001093864440918 2023-01-23 03:05:19.832741: step: 64/527, loss: 8.59260544530116e-05 2023-01-23 03:05:20.934846: step: 68/527, loss: 0.0017410516738891602 2023-01-23 03:05:22.047267: step: 72/527, loss: 0.009443092159926891 2023-01-23 03:05:23.165566: step: 76/527, loss: 1.9073486612342094e-07 2023-01-23 03:05:24.252178: step: 80/527, loss: 0.0007999420049600303 2023-01-23 03:05:25.396574: step: 84/527, loss: 0.0010536194313317537 2023-01-23 03:05:26.539171: step: 88/527, loss: 0.047819897532463074 2023-01-23 03:05:27.654902: step: 92/527, loss: -2.574920654296875e-05 2023-01-23 03:05:28.755585: step: 96/527, loss: 0.0007807731744833291 2023-01-23 03:05:29.869906: step: 100/527, loss: 0.03480224683880806 2023-01-23 03:05:31.006277: step: 104/527, loss: 0.0004161834658589214 2023-01-23 03:05:32.114904: step: 108/527, loss: 1.869201696536038e-05 2023-01-23 03:05:33.269741: step: 112/527, loss: 0.042073819786310196 2023-01-23 03:05:34.388210: step: 116/527, loss: 0.00010776519775390625 2023-01-23 03:05:35.499824: step: 120/527, loss: 0.0020597458351403475 2023-01-23 03:05:36.616081: step: 124/527, loss: 3.8146959013829473e-07 2023-01-23 03:05:37.714556: step: 128/527, loss: 0.003101825714111328 2023-01-23 03:05:38.832660: step: 132/527, loss: 0.00022038817405700684 2023-01-23 03:05:39.931655: step: 136/527, loss: 0.035575199872255325 2023-01-23 03:05:41.038272: step: 140/527, loss: 1.2683868590102065e-05 2023-01-23 03:05:42.126740: step: 144/527, loss: 0.023530179634690285 2023-01-23 03:05:43.259027: step: 148/527, loss: 0.06080367788672447 2023-01-23 03:05:44.364743: step: 152/527, loss: 0.01611633412539959 2023-01-23 03:05:45.475073: step: 156/527, loss: 0.000623512314632535 2023-01-23 03:05:46.606644: step: 160/527, loss: 0.01434326171875 2023-01-23 03:05:47.710972: step: 164/527, loss: 0.014948463067412376 2023-01-23 03:05:48.825565: step: 168/527, loss: 0.01594219170510769 2023-01-23 03:05:49.975733: step: 172/527, loss: 0.0072536468505859375 2023-01-23 03:05:51.090701: step: 176/527, loss: 0.037351515144109726 2023-01-23 03:05:52.191470: step: 180/527, loss: 0.006028270814567804 2023-01-23 03:05:53.253450: step: 184/527, loss: 0.012895393185317516 2023-01-23 03:05:54.345252: step: 188/527, loss: 0.3417227864265442 2023-01-23 03:05:55.456246: step: 192/527, loss: 0.020996762439608574 2023-01-23 03:05:56.565618: step: 196/527, loss: 0.0029195784591138363 2023-01-23 03:05:57.706351: step: 200/527, loss: 0.0004861831839662045 2023-01-23 03:05:58.818924: step: 204/527, loss: 0.03167114406824112 2023-01-23 03:05:59.911476: step: 208/527, loss: 0.013806914910674095 2023-01-23 03:06:01.038943: step: 212/527, loss: 0.022811222821474075 2023-01-23 03:06:02.137981: step: 216/527, loss: 0.012236405164003372 2023-01-23 03:06:03.227695: step: 220/527, loss: 0.0006712913746014237 2023-01-23 03:06:04.333821: step: 224/527, loss: 0.0008067131275311112 2023-01-23 03:06:05.473010: step: 228/527, loss: 0.00014848708815407008 2023-01-23 03:06:06.579582: step: 232/527, loss: 0.00014181138249114156 2023-01-23 03:06:07.687669: step: 236/527, loss: 9.31739850784652e-05 2023-01-23 03:06:08.808874: step: 240/527, loss: 0.0009866715408861637 2023-01-23 03:06:09.915791: step: 244/527, loss: 0.03864326328039169 2023-01-23 03:06:11.042777: step: 248/527, loss: 0.0003999710315838456 2023-01-23 03:06:12.169578: step: 252/527, loss: 5.9127810345671605e-06 2023-01-23 03:06:13.294141: step: 256/527, loss: 0.005319785792380571 2023-01-23 03:06:14.398059: step: 260/527, loss: 0.021911241114139557 2023-01-23 03:06:15.520787: step: 264/527, loss: 0.00759735144674778 2023-01-23 03:06:16.651251: step: 268/527, loss: 0.02007455937564373 2023-01-23 03:06:17.752535: step: 272/527, loss: 0.0002645492786541581 2023-01-23 03:06:18.863647: step: 276/527, loss: 0.005757856648415327 2023-01-23 03:06:20.006595: step: 280/527, loss: 0.012510204687714577 2023-01-23 03:06:21.136499: step: 284/527, loss: 8.37326078908518e-05 2023-01-23 03:06:22.238459: step: 288/527, loss: 0.010239219292998314 2023-01-23 03:06:23.324602: step: 292/527, loss: 0.005147362127900124 2023-01-23 03:06:24.459234: step: 296/527, loss: 2.021789623540826e-05 2023-01-23 03:06:25.544376: step: 300/527, loss: 0.00013427734666038305 2023-01-23 03:06:26.657531: step: 304/527, loss: 0.008954429998993874 2023-01-23 03:06:27.785580: step: 308/527, loss: 0.01438894309103489 2023-01-23 03:06:28.916038: step: 312/527, loss: 0.03666668012738228 2023-01-23 03:06:30.018862: step: 316/527, loss: 0.0001791954127838835 2023-01-23 03:06:31.119695: step: 320/527, loss: 0.019196892157197 2023-01-23 03:06:32.206991: step: 324/527, loss: 0.0019220353569835424 2023-01-23 03:06:33.334952: step: 328/527, loss: 0.009200858883559704 2023-01-23 03:06:34.461581: step: 332/527, loss: 0.03407297283411026 2023-01-23 03:06:35.566789: step: 336/527, loss: 0.002361297607421875 2023-01-23 03:06:36.683273: step: 340/527, loss: 0.02950306050479412 2023-01-23 03:06:37.819441: step: 344/527, loss: 0.06600666046142578 2023-01-23 03:06:38.935897: step: 348/527, loss: 3.6048892070539296e-05 2023-01-23 03:06:40.062563: step: 352/527, loss: 0.00510482769459486 2023-01-23 03:06:41.164986: step: 356/527, loss: 0.0010512591106817126 2023-01-23 03:06:42.275679: step: 360/527, loss: 0.002140045166015625 2023-01-23 03:06:43.399135: step: 364/527, loss: 0.0003147125244140625 2023-01-23 03:06:44.525893: step: 368/527, loss: 0.00014820098294876516 2023-01-23 03:06:45.637896: step: 372/527, loss: 0.0005359649658203125 2023-01-23 03:06:46.733097: step: 376/527, loss: 0.00019359588623046875 2023-01-23 03:06:47.844822: step: 380/527, loss: 0.20133666694164276 2023-01-23 03:06:48.942123: step: 384/527, loss: 0.0010221959091722965 2023-01-23 03:06:50.062942: step: 388/527, loss: 0.003403282258659601 2023-01-23 03:06:51.206236: step: 392/527, loss: 0.011954116635024548 2023-01-23 03:06:52.313972: step: 396/527, loss: 0.03757696598768234 2023-01-23 03:06:53.419614: step: 400/527, loss: 0.002617359161376953 2023-01-23 03:06:54.535662: step: 404/527, loss: 0.1376585066318512 2023-01-23 03:06:55.675369: step: 408/527, loss: 0.0005266189691610634 2023-01-23 03:06:56.789261: step: 412/527, loss: 0.005866050720214844 2023-01-23 03:06:57.886240: step: 416/527, loss: 6.017684791004285e-05 2023-01-23 03:06:58.979961: step: 420/527, loss: 0.0006245553377084434 2023-01-23 03:07:00.070351: step: 424/527, loss: 0.03697452321648598 2023-01-23 03:07:01.189282: step: 428/527, loss: 0.006893730256706476 2023-01-23 03:07:02.284005: step: 432/527, loss: 0.030045127496123314 2023-01-23 03:07:03.410157: step: 436/527, loss: 0.0015117645962163806 2023-01-23 03:07:04.540131: step: 440/527, loss: 0.0017983437282964587 2023-01-23 03:07:05.641423: step: 444/527, loss: 0.0009522438631393015 2023-01-23 03:07:06.738849: step: 448/527, loss: 0.03591423109173775 2023-01-23 03:07:07.873735: step: 452/527, loss: 0.03116321563720703 2023-01-23 03:07:09.005915: step: 456/527, loss: 0.030733203515410423 2023-01-23 03:07:10.091931: step: 460/527, loss: 0.018243025988340378 2023-01-23 03:07:11.205161: step: 464/527, loss: 0.06861267983913422 2023-01-23 03:07:12.326790: step: 468/527, loss: 0.00024852753267623484 2023-01-23 03:07:13.495245: step: 472/527, loss: 0.5412971377372742 2023-01-23 03:07:14.600171: step: 476/527, loss: 0.025266362354159355 2023-01-23 03:07:15.733144: step: 480/527, loss: 0.0005380630609579384 2023-01-23 03:07:16.848119: step: 484/527, loss: 0.01474151574075222 2023-01-23 03:07:17.984494: step: 488/527, loss: 0.0031697272788733244 2023-01-23 03:07:19.126951: step: 492/527, loss: 0.023918725550174713 2023-01-23 03:07:20.315329: step: 496/527, loss: 0.02289428748190403 2023-01-23 03:07:21.418953: step: 500/527, loss: 0.024773407727479935 2023-01-23 03:07:22.564493: step: 504/527, loss: 0.0009824753506109118 2023-01-23 03:07:23.698328: step: 508/527, loss: 0.012175941839814186 2023-01-23 03:07:24.808901: step: 512/527, loss: 0.022690391167998314 2023-01-23 03:07:25.912094: step: 516/527, loss: 0.005814170930534601 2023-01-23 03:07:27.008838: step: 520/527, loss: 0.018480967730283737 2023-01-23 03:07:28.131582: step: 524/527, loss: 0.00024271011352539062 2023-01-23 03:07:29.245134: step: 528/527, loss: 0.00148601527325809 2023-01-23 03:07:30.345167: step: 532/527, loss: 0.0037738799583166838 2023-01-23 03:07:31.466960: step: 536/527, loss: 0.005963135045021772 2023-01-23 03:07:32.589809: step: 540/527, loss: 0.025493431836366653 2023-01-23 03:07:33.702227: step: 544/527, loss: 1.4925002687959932e-05 2023-01-23 03:07:34.817771: step: 548/527, loss: 0.03330955654382706 2023-01-23 03:07:35.944290: step: 552/527, loss: 0.005772400181740522 2023-01-23 03:07:37.078341: step: 556/527, loss: 0.02334127388894558 2023-01-23 03:07:38.182876: step: 560/527, loss: 0.00025272369384765625 2023-01-23 03:07:39.317335: step: 564/527, loss: 0.0021568299271166325 2023-01-23 03:07:40.425314: step: 568/527, loss: 0.0005167007329873741 2023-01-23 03:07:41.539921: step: 572/527, loss: 0.07779093086719513 2023-01-23 03:07:42.629445: step: 576/527, loss: 2.4652481442899443e-05 2023-01-23 03:07:43.763423: step: 580/527, loss: 0.00013809204392600805 2023-01-23 03:07:44.879849: step: 584/527, loss: 0.0014862060779705644 2023-01-23 03:07:45.962876: step: 588/527, loss: 7.724761962890625e-05 2023-01-23 03:07:47.086512: step: 592/527, loss: 0.00010795592970680445 2023-01-23 03:07:48.224365: step: 596/527, loss: 0.0028567316476255655 2023-01-23 03:07:49.361674: step: 600/527, loss: 0.13070030510425568 2023-01-23 03:07:50.506728: step: 604/527, loss: 0.0011511803604662418 2023-01-23 03:07:51.641095: step: 608/527, loss: 0.024424076080322266 2023-01-23 03:07:52.733260: step: 612/527, loss: 0.014412213116884232 2023-01-23 03:07:53.865173: step: 616/527, loss: 1.983642505365424e-05 2023-01-23 03:07:55.010560: step: 620/527, loss: 0.03953971713781357 2023-01-23 03:07:56.129399: step: 624/527, loss: 0.00085024832515046 2023-01-23 03:07:57.268682: step: 628/527, loss: 0.00047512055607512593 2023-01-23 03:07:58.373048: step: 632/527, loss: 0.0013311386574059725 2023-01-23 03:07:59.475522: step: 636/527, loss: 0.00192003243137151 2023-01-23 03:08:00.574318: step: 640/527, loss: 0.015132216736674309 2023-01-23 03:08:01.667673: step: 644/527, loss: 0.02239055559039116 2023-01-23 03:08:02.759893: step: 648/527, loss: 0.005105114076286554 2023-01-23 03:08:03.876769: step: 652/527, loss: 0.0017021656967699528 2023-01-23 03:08:05.003083: step: 656/527, loss: 0.042104244232177734 2023-01-23 03:08:06.163646: step: 660/527, loss: 0.0001312255917582661 2023-01-23 03:08:07.302266: step: 664/527, loss: 0.021743202582001686 2023-01-23 03:08:08.419492: step: 668/527, loss: 0.0001617431698832661 2023-01-23 03:08:09.505825: step: 672/527, loss: 0.021367549896240234 2023-01-23 03:08:10.620988: step: 676/527, loss: 0.009956836700439453 2023-01-23 03:08:11.748516: step: 680/527, loss: 0.029199600219726562 2023-01-23 03:08:12.871171: step: 684/527, loss: 0.017909621819853783 2023-01-23 03:08:13.990221: step: 688/527, loss: 0.003249645233154297 2023-01-23 03:08:15.108270: step: 692/527, loss: 0.02673787996172905 2023-01-23 03:08:16.215093: step: 696/527, loss: 9.74655122263357e-05 2023-01-23 03:08:17.298430: step: 700/527, loss: 0.0008546352037228644 2023-01-23 03:08:18.427559: step: 704/527, loss: 0.0018526078201830387 2023-01-23 03:08:19.550721: step: 708/527, loss: 0.006857538595795631 2023-01-23 03:08:20.722436: step: 712/527, loss: 0.03363924100995064 2023-01-23 03:08:21.868461: step: 716/527, loss: 0.00023956299992278218 2023-01-23 03:08:22.959378: step: 720/527, loss: 0.029749106615781784 2023-01-23 03:08:24.074980: step: 724/527, loss: 0.0023924827110022306 2023-01-23 03:08:25.230311: step: 728/527, loss: 0.027650736272335052 2023-01-23 03:08:26.343859: step: 732/527, loss: 1.7452239262638614e-05 2023-01-23 03:08:27.497860: step: 736/527, loss: 0.0034271241165697575 2023-01-23 03:08:28.599598: step: 740/527, loss: 0.004085445310920477 2023-01-23 03:08:29.724717: step: 744/527, loss: 0.003986025229096413 2023-01-23 03:08:30.819835: step: 748/527, loss: 6.341934204101562e-05 2023-01-23 03:08:31.899977: step: 752/527, loss: 0.004427909851074219 2023-01-23 03:08:33.012485: step: 756/527, loss: 0.023706817999482155 2023-01-23 03:08:34.105916: step: 760/527, loss: 0.0002662658516783267 2023-01-23 03:08:35.212156: step: 764/527, loss: 0.00347137451171875 2023-01-23 03:08:36.314343: step: 768/527, loss: 0.00011596680269576609 2023-01-23 03:08:37.390232: step: 772/527, loss: 0.00022678376990370452 2023-01-23 03:08:38.533324: step: 776/527, loss: 0.02955188788473606 2023-01-23 03:08:39.640126: step: 780/527, loss: 0.0014501571422442794 2023-01-23 03:08:40.744784: step: 784/527, loss: 0.00016574858454987407 2023-01-23 03:08:41.848539: step: 788/527, loss: 4.9591067181609105e-06 2023-01-23 03:08:42.957768: step: 792/527, loss: 0.00017614364332985133 2023-01-23 03:08:44.050682: step: 796/527, loss: 0.03747148439288139 2023-01-23 03:08:45.185326: step: 800/527, loss: 0.004490852355957031 2023-01-23 03:08:46.278629: step: 804/527, loss: 0.00029907224234193563 2023-01-23 03:08:47.367571: step: 808/527, loss: 0.00022411346435546875 2023-01-23 03:08:48.513232: step: 812/527, loss: 0.0029441833030432463 2023-01-23 03:08:49.616483: step: 816/527, loss: 0.00038137438241392374 2023-01-23 03:08:50.749455: step: 820/527, loss: 0.00714111328125 2023-01-23 03:08:51.886175: step: 824/527, loss: 0.024107933044433594 2023-01-23 03:08:53.015354: step: 828/527, loss: 0.0009182632202282548 2023-01-23 03:08:54.123490: step: 832/527, loss: 0.012457084842026234 2023-01-23 03:08:55.232674: step: 836/527, loss: 0.00029621124849654734 2023-01-23 03:08:56.349031: step: 840/527, loss: 0.009798049926757812 2023-01-23 03:08:57.456605: step: 844/527, loss: 0.0024993896950036287 2023-01-23 03:08:58.579672: step: 848/527, loss: 0.005990410223603249 2023-01-23 03:08:59.690139: step: 852/527, loss: 4.825591895496473e-05 2023-01-23 03:09:00.847135: step: 856/527, loss: 0.00346794119104743 2023-01-23 03:09:01.942890: step: 860/527, loss: 0.003063201904296875 2023-01-23 03:09:03.037424: step: 864/527, loss: 1.3446808225126006e-05 2023-01-23 03:09:04.140614: step: 868/527, loss: 0.0010539054637774825 2023-01-23 03:09:05.278103: step: 872/527, loss: 0.03994961082935333 2023-01-23 03:09:06.403048: step: 876/527, loss: 0.008144378662109375 2023-01-23 03:09:07.521299: step: 880/527, loss: 0.00452690152451396 2023-01-23 03:09:08.633492: step: 884/527, loss: 0.016897965222597122 2023-01-23 03:09:09.764151: step: 888/527, loss: 0.0018390655750408769 2023-01-23 03:09:10.875189: step: 892/527, loss: 0.3178071975708008 2023-01-23 03:09:11.971913: step: 896/527, loss: 0.1257917433977127 2023-01-23 03:09:13.086833: step: 900/527, loss: 0.012685203924775124 2023-01-23 03:09:14.195821: step: 904/527, loss: 0.050023604184389114 2023-01-23 03:09:15.293986: step: 908/527, loss: 0.00380287179723382 2023-01-23 03:09:16.400011: step: 912/527, loss: 4.062652442371473e-05 2023-01-23 03:09:17.539049: step: 916/527, loss: 0.014753341674804688 2023-01-23 03:09:18.674088: step: 920/527, loss: 0.054183200001716614 2023-01-23 03:09:19.805308: step: 924/527, loss: 0.002619075821712613 2023-01-23 03:09:20.913274: step: 928/527, loss: 0.0003921508905477822 2023-01-23 03:09:22.022848: step: 932/527, loss: 3.6191944673191756e-05 2023-01-23 03:09:23.104623: step: 936/527, loss: 0.00014381408982444555 2023-01-23 03:09:24.217117: step: 940/527, loss: 0.0014183043967932463 2023-01-23 03:09:25.320908: step: 944/527, loss: 0.007401895243674517 2023-01-23 03:09:26.445268: step: 948/527, loss: 8.37326078908518e-05 2023-01-23 03:09:27.556733: step: 952/527, loss: 6.0749054682673886e-05 2023-01-23 03:09:28.696049: step: 956/527, loss: 0.009656048379838467 2023-01-23 03:09:29.804980: step: 960/527, loss: 0.011701774783432484 2023-01-23 03:09:30.949876: step: 964/527, loss: 0.03260479122400284 2023-01-23 03:09:32.060229: step: 968/527, loss: 0.0006240844959393144 2023-01-23 03:09:33.186102: step: 972/527, loss: 5.440712266135961e-05 2023-01-23 03:09:34.334654: step: 976/527, loss: 0.0031078339088708162 2023-01-23 03:09:35.443785: step: 980/527, loss: 7.572174217784777e-05 2023-01-23 03:09:36.549182: step: 984/527, loss: 0.028738977387547493 2023-01-23 03:09:37.695447: step: 988/527, loss: 0.028688622638583183 2023-01-23 03:09:38.786906: step: 992/527, loss: 0.03795475885272026 2023-01-23 03:09:39.891680: step: 996/527, loss: 0.008068228140473366 2023-01-23 03:09:41.022018: step: 1000/527, loss: 0.010193252936005592 2023-01-23 03:09:42.128621: step: 1004/527, loss: 7.724762326688506e-06 2023-01-23 03:09:43.274319: step: 1008/527, loss: 0.014235115610063076 2023-01-23 03:09:44.370763: step: 1012/527, loss: 0.010060882195830345 2023-01-23 03:09:45.508859: step: 1016/527, loss: 0.00010719299461925402 2023-01-23 03:09:46.610427: step: 1020/527, loss: 0.024750327691435814 2023-01-23 03:09:47.760916: step: 1024/527, loss: 0.000522613525390625 2023-01-23 03:09:48.883563: step: 1028/527, loss: 0.0025341035798192024 2023-01-23 03:09:50.010848: step: 1032/527, loss: 0.017375566065311432 2023-01-23 03:09:51.107719: step: 1036/527, loss: 0.09704332053661346 2023-01-23 03:09:52.198079: step: 1040/527, loss: 0.029554367065429688 2023-01-23 03:09:53.279679: step: 1044/527, loss: 0.007841014303267002 2023-01-23 03:09:54.394151: step: 1048/527, loss: 0.006303596775978804 2023-01-23 03:09:55.530266: step: 1052/527, loss: 0.001470375107601285 2023-01-23 03:09:56.625981: step: 1056/527, loss: 0.02668152004480362 2023-01-23 03:09:57.740043: step: 1060/527, loss: 0.0004788398800883442 2023-01-23 03:09:58.881074: step: 1064/527, loss: 0.008269691839814186 2023-01-23 03:09:59.981923: step: 1068/527, loss: 0.00036458970862440765 2023-01-23 03:10:01.096406: step: 1072/527, loss: 0.0019134521717205644 2023-01-23 03:10:02.201096: step: 1076/527, loss: 0.03135652840137482 2023-01-23 03:10:03.326854: step: 1080/527, loss: 0.012887001037597656 2023-01-23 03:10:04.439405: step: 1084/527, loss: 0.03917905315756798 2023-01-23 03:10:05.535236: step: 1088/527, loss: 0.03343725576996803 2023-01-23 03:10:06.622467: step: 1092/527, loss: 0.0103880874812603 2023-01-23 03:10:07.759075: step: 1096/527, loss: 0.017304420471191406 2023-01-23 03:10:08.859770: step: 1100/527, loss: 0.017217446118593216 2023-01-23 03:10:09.986407: step: 1104/527, loss: 0.0013104439713060856 2023-01-23 03:10:11.087104: step: 1108/527, loss: 0.014308547601103783 2023-01-23 03:10:12.179182: step: 1112/527, loss: 0.00029239655123092234 2023-01-23 03:10:13.294375: step: 1116/527, loss: 0.0008804321405477822 2023-01-23 03:10:14.449425: step: 1120/527, loss: 0.008081817999482155 2023-01-23 03:10:15.580305: step: 1124/527, loss: 0.010495948605239391 2023-01-23 03:10:16.687576: step: 1128/527, loss: 0.007045269012451172 2023-01-23 03:10:17.780639: step: 1132/527, loss: 0.019229698926210403 2023-01-23 03:10:18.877798: step: 1136/527, loss: 0.04969377443194389 2023-01-23 03:10:19.998698: step: 1140/527, loss: 0.0026632307562977076 2023-01-23 03:10:21.114949: step: 1144/527, loss: 0.007572222035378218 2023-01-23 03:10:22.255612: step: 1148/527, loss: 0.030561067163944244 2023-01-23 03:10:23.367157: step: 1152/527, loss: 0.0003498077276162803 2023-01-23 03:10:24.502744: step: 1156/527, loss: 0.6666473150253296 2023-01-23 03:10:25.595454: step: 1160/527, loss: 0.0023136138916015625 2023-01-23 03:10:26.730117: step: 1164/527, loss: 0.26929742097854614 2023-01-23 03:10:27.857576: step: 1168/527, loss: 0.02358074113726616 2023-01-23 03:10:28.976805: step: 1172/527, loss: 0.0012399672996252775 2023-01-23 03:10:30.150938: step: 1176/527, loss: 0.042407989501953125 2023-01-23 03:10:31.271735: step: 1180/527, loss: 0.0009212493896484375 2023-01-23 03:10:32.336035: step: 1184/527, loss: 0.0014943123096600175 2023-01-23 03:10:33.481924: step: 1188/527, loss: 0.0006429672357626259 2023-01-23 03:10:34.608344: step: 1192/527, loss: 0.001831150148063898 2023-01-23 03:10:35.742722: step: 1196/527, loss: 0.012487792409956455 2023-01-23 03:10:36.840926: step: 1200/527, loss: 0.027876663953065872 2023-01-23 03:10:37.990080: step: 1204/527, loss: 0.0005977630498819053 2023-01-23 03:10:39.095315: step: 1208/527, loss: 0.004257583525031805 2023-01-23 03:10:40.233121: step: 1212/527, loss: 0.06620216369628906 2023-01-23 03:10:41.317895: step: 1216/527, loss: 0.00024099351139739156 2023-01-23 03:10:42.448944: step: 1220/527, loss: 0.03276100009679794 2023-01-23 03:10:43.537566: step: 1224/527, loss: 0.020774461328983307 2023-01-23 03:10:44.653241: step: 1228/527, loss: 0.001195716904476285 2023-01-23 03:10:45.765714: step: 1232/527, loss: 0.026764871552586555 2023-01-23 03:10:46.860416: step: 1236/527, loss: 0.00016899110050871968 2023-01-23 03:10:47.976218: step: 1240/527, loss: 0.033670518547296524 2023-01-23 03:10:49.090619: step: 1244/527, loss: 0.010154534131288528 2023-01-23 03:10:50.231601: step: 1248/527, loss: 0.009581947699189186 2023-01-23 03:10:51.373043: step: 1252/527, loss: 0.01013793982565403 2023-01-23 03:10:52.497506: step: 1256/527, loss: 0.0011025428539142013 2023-01-23 03:10:53.635428: step: 1260/527, loss: 0.008715820498764515 2023-01-23 03:10:54.742063: step: 1264/527, loss: 0.0013222694396972656 2023-01-23 03:10:55.914682: step: 1268/527, loss: 0.0010924339294433594 2023-01-23 03:10:57.007413: step: 1272/527, loss: 0.017766855657100677 2023-01-23 03:10:58.132148: step: 1276/527, loss: 0.001348304795101285 2023-01-23 03:10:59.252124: step: 1280/527, loss: 0.013893604278564453 2023-01-23 03:11:00.333879: step: 1284/527, loss: 0.07488150894641876 2023-01-23 03:11:01.455742: step: 1288/527, loss: 0.04465227201581001 2023-01-23 03:11:02.565947: step: 1292/527, loss: 0.0003437042178120464 2023-01-23 03:11:03.705991: step: 1296/527, loss: 0.01599426381289959 2023-01-23 03:11:04.825967: step: 1300/527, loss: 0.0031141280196607113 2023-01-23 03:11:05.916992: step: 1304/527, loss: 6.532669067382812e-05 2023-01-23 03:11:07.018988: step: 1308/527, loss: 0.00902261771261692 2023-01-23 03:11:08.112367: step: 1312/527, loss: 0.005781936924904585 2023-01-23 03:11:09.226702: step: 1316/527, loss: 0.00010032653517555445 2023-01-23 03:11:10.387298: step: 1320/527, loss: 0.010472106747329235 2023-01-23 03:11:11.486064: step: 1324/527, loss: 0.008134746924042702 2023-01-23 03:11:12.580827: step: 1328/527, loss: 0.007324791047722101 2023-01-23 03:11:13.680801: step: 1332/527, loss: 0.027659133076667786 2023-01-23 03:11:14.785970: step: 1336/527, loss: 0.0016972542507573962 2023-01-23 03:11:15.897047: step: 1340/527, loss: 0.32235851883888245 2023-01-23 03:11:16.993869: step: 1344/527, loss: 0.031434059143066406 2023-01-23 03:11:18.107502: step: 1348/527, loss: 0.0015349150635302067 2023-01-23 03:11:19.252691: step: 1352/527, loss: 0.001064300537109375 2023-01-23 03:11:20.356507: step: 1356/527, loss: 0.005130887031555176 2023-01-23 03:11:21.499378: step: 1360/527, loss: 0.0289827361702919 2023-01-23 03:11:22.658975: step: 1364/527, loss: 0.00982291717082262 2023-01-23 03:11:23.769098: step: 1368/527, loss: 0.016693115234375 2023-01-23 03:11:24.882213: step: 1372/527, loss: 0.01905345916748047 2023-01-23 03:11:26.021146: step: 1376/527, loss: 0.04285154491662979 2023-01-23 03:11:27.145849: step: 1380/527, loss: 0.0007675171364098787 2023-01-23 03:11:28.262543: step: 1384/527, loss: 0.0008178711286745965 2023-01-23 03:11:29.360292: step: 1388/527, loss: 0.0005775452009402215 2023-01-23 03:11:30.462914: step: 1392/527, loss: 2.3508073354605585e-05 2023-01-23 03:11:31.578715: step: 1396/527, loss: 0.004079436883330345 2023-01-23 03:11:32.700639: step: 1400/527, loss: 0.004896759986877441 2023-01-23 03:11:33.831165: step: 1404/527, loss: 0.013026141561567783 2023-01-23 03:11:34.966337: step: 1408/527, loss: 0.03258190304040909 2023-01-23 03:11:36.073813: step: 1412/527, loss: 0.023769784718751907 2023-01-23 03:11:37.196275: step: 1416/527, loss: 0.0005627929931506515 2023-01-23 03:11:38.321303: step: 1420/527, loss: 0.004569435026496649 2023-01-23 03:11:39.455964: step: 1424/527, loss: 0.0072904592379927635 2023-01-23 03:11:40.562509: step: 1428/527, loss: 0.0033453465439379215 2023-01-23 03:11:41.662978: step: 1432/527, loss: 0.0017184257740154862 2023-01-23 03:11:42.774485: step: 1436/527, loss: 0.02543344348669052 2023-01-23 03:11:43.873010: step: 1440/527, loss: 0.0004535675107035786 2023-01-23 03:11:44.979589: step: 1444/527, loss: 0.0018549920059740543 2023-01-23 03:11:46.135487: step: 1448/527, loss: 0.014914131723344326 2023-01-23 03:11:47.250531: step: 1452/527, loss: 0.0006571770063601434 2023-01-23 03:11:48.360690: step: 1456/527, loss: 4.053860902786255e-05 2023-01-23 03:11:49.489839: step: 1460/527, loss: 0.02966766431927681 2023-01-23 03:11:50.608736: step: 1464/527, loss: 9.088516526389867e-05 2023-01-23 03:11:51.800898: step: 1468/527, loss: 0.01150426920503378 2023-01-23 03:11:52.901665: step: 1472/527, loss: 0.8885663747787476 2023-01-23 03:11:53.991709: step: 1476/527, loss: 0.002996444934979081 2023-01-23 03:11:55.114759: step: 1480/527, loss: 0.00023703573970124125 2023-01-23 03:11:56.250862: step: 1484/527, loss: 0.002443981356918812 2023-01-23 03:11:57.351541: step: 1488/527, loss: 1.4495850336970761e-05 2023-01-23 03:11:58.460410: step: 1492/527, loss: 0.0004406929074320942 2023-01-23 03:11:59.588567: step: 1496/527, loss: 0.03025798872113228 2023-01-23 03:12:00.691539: step: 1500/527, loss: 0.0012924193870276213 2023-01-23 03:12:01.766968: step: 1504/527, loss: 0.2954823970794678 2023-01-23 03:12:02.864400: step: 1508/527, loss: 0.0004878044128417969 2023-01-23 03:12:03.960904: step: 1512/527, loss: 0.004134750459343195 2023-01-23 03:12:05.076946: step: 1516/527, loss: 0.20269259810447693 2023-01-23 03:12:06.203685: step: 1520/527, loss: 6.446838960982859e-05 2023-01-23 03:12:07.313472: step: 1524/527, loss: 3.25202927342616e-05 2023-01-23 03:12:08.451506: step: 1528/527, loss: 0.031868983060121536 2023-01-23 03:12:09.532725: step: 1532/527, loss: 0.0007028579711914062 2023-01-23 03:12:10.622663: step: 1536/527, loss: 0.00023546218289993703 2023-01-23 03:12:11.739196: step: 1540/527, loss: 0.005890607833862305 2023-01-23 03:12:12.839193: step: 1544/527, loss: 0.0072532654739916325 2023-01-23 03:12:13.956536: step: 1548/527, loss: 0.006248283665627241 2023-01-23 03:12:15.095303: step: 1552/527, loss: 0.0033077241387218237 2023-01-23 03:12:16.234931: step: 1556/527, loss: 0.0021356106735765934 2023-01-23 03:12:17.329057: step: 1560/527, loss: 0.09970169514417648 2023-01-23 03:12:18.456739: step: 1564/527, loss: 0.0006385803571902215 2023-01-23 03:12:19.581565: step: 1568/527, loss: 0.04650163650512695 2023-01-23 03:12:20.687524: step: 1572/527, loss: 0.0005164146423339844 2023-01-23 03:12:21.774080: step: 1576/527, loss: 0.007397461216896772 2023-01-23 03:12:22.895674: step: 1580/527, loss: 6.008148375258315e-06 2023-01-23 03:12:24.008429: step: 1584/527, loss: 0.0014300346374511719 2023-01-23 03:12:25.130724: step: 1588/527, loss: 0.02004106156527996 2023-01-23 03:12:26.236133: step: 1592/527, loss: 0.0013718605041503906 2023-01-23 03:12:27.356370: step: 1596/527, loss: 0.002473801374435425 2023-01-23 03:12:28.453455: step: 1600/527, loss: 0.0032970430329442024 2023-01-23 03:12:29.570074: step: 1604/527, loss: 0.003894805908203125 2023-01-23 03:12:30.679281: step: 1608/527, loss: 0.12098999321460724 2023-01-23 03:12:31.768916: step: 1612/527, loss: 0.005961323156952858 2023-01-23 03:12:32.868506: step: 1616/527, loss: 0.012486887164413929 2023-01-23 03:12:33.980433: step: 1620/527, loss: 0.013773728162050247 2023-01-23 03:12:35.084405: step: 1624/527, loss: 0.0011380196083337069 2023-01-23 03:12:36.187963: step: 1628/527, loss: 0.0025688172318041325 2023-01-23 03:12:37.317945: step: 1632/527, loss: 0.021014787256717682 2023-01-23 03:12:38.463724: step: 1636/527, loss: 1.773834264895413e-05 2023-01-23 03:12:39.563592: step: 1640/527, loss: 0.006216621492058039 2023-01-23 03:12:40.706120: step: 1644/527, loss: 0.06589861214160919 2023-01-23 03:12:41.833998: step: 1648/527, loss: 0.00445170421153307 2023-01-23 03:12:42.954109: step: 1652/527, loss: 0.028729820623993874 2023-01-23 03:12:44.088194: step: 1656/527, loss: 0.010041999630630016 2023-01-23 03:12:45.197616: step: 1660/527, loss: 0.0007791519165039062 2023-01-23 03:12:46.299914: step: 1664/527, loss: 0.001821804093196988 2023-01-23 03:12:47.426843: step: 1668/527, loss: 0.0048088072799146175 2023-01-23 03:12:48.519950: step: 1672/527, loss: 0.00033655166043899953 2023-01-23 03:12:49.673641: step: 1676/527, loss: 0.03624897450208664 2023-01-23 03:12:50.850764: step: 1680/527, loss: 0.0005367278936319053 2023-01-23 03:12:51.973689: step: 1684/527, loss: 0.022812461480498314 2023-01-23 03:12:53.098763: step: 1688/527, loss: 0.00011477470980025828 2023-01-23 03:12:54.187756: step: 1692/527, loss: 0.010447025299072266 2023-01-23 03:12:55.309771: step: 1696/527, loss: 0.013906383886933327 2023-01-23 03:12:56.430793: step: 1700/527, loss: 0.0009957790607586503 2023-01-23 03:12:57.558588: step: 1704/527, loss: 0.016587449237704277 2023-01-23 03:12:58.675773: step: 1708/527, loss: 0.0001066207914846018 2023-01-23 03:12:59.818477: step: 1712/527, loss: 0.008736801333725452 2023-01-23 03:13:00.959148: step: 1716/527, loss: 0.02396678924560547 2023-01-23 03:13:02.068993: step: 1720/527, loss: 0.0004646301385946572 2023-01-23 03:13:03.192443: step: 1724/527, loss: 0.0338197723031044 2023-01-23 03:13:04.341417: step: 1728/527, loss: 0.011371993459761143 2023-01-23 03:13:05.445601: step: 1732/527, loss: 0.0016618729569017887 2023-01-23 03:13:06.550410: step: 1736/527, loss: 0.11559267342090607 2023-01-23 03:13:07.641728: step: 1740/527, loss: 0.2119472473859787 2023-01-23 03:13:08.743838: step: 1744/527, loss: 0.00013999939255882055 2023-01-23 03:13:09.846366: step: 1748/527, loss: 0.025621414184570312 2023-01-23 03:13:10.966236: step: 1752/527, loss: 0.002800035523250699 2023-01-23 03:13:12.126623: step: 1756/527, loss: 0.003209733869880438 2023-01-23 03:13:13.237279: step: 1760/527, loss: 0.0009115219581872225 2023-01-23 03:13:14.336664: step: 1764/527, loss: 0.012387752532958984 2023-01-23 03:13:15.469667: step: 1768/527, loss: 0.0020169259514659643 2023-01-23 03:13:16.572038: step: 1772/527, loss: 0.05519409477710724 2023-01-23 03:13:17.686133: step: 1776/527, loss: 0.0023979186080396175 2023-01-23 03:13:18.794015: step: 1780/527, loss: 0.03604469448328018 2023-01-23 03:13:19.908050: step: 1784/527, loss: 0.0015453338855877519 2023-01-23 03:13:21.026024: step: 1788/527, loss: 0.0059226988814771175 2023-01-23 03:13:22.182541: step: 1792/527, loss: 0.0022039413452148438 2023-01-23 03:13:23.299280: step: 1796/527, loss: 0.015506553463637829 2023-01-23 03:13:24.397932: step: 1800/527, loss: 0.0003748893504962325 2023-01-23 03:13:25.522879: step: 1804/527, loss: 0.021599579602479935 2023-01-23 03:13:26.622607: step: 1808/527, loss: 0.005167102906852961 2023-01-23 03:13:27.760547: step: 1812/527, loss: 0.008883094415068626 2023-01-23 03:13:28.873132: step: 1816/527, loss: 0.0042368886061012745 2023-01-23 03:13:29.973320: step: 1820/527, loss: 0.0001442909415345639 2023-01-23 03:13:31.079854: step: 1824/527, loss: 0.001965141389518976 2023-01-23 03:13:32.198443: step: 1828/527, loss: 0.16837435960769653 2023-01-23 03:13:33.317389: step: 1832/527, loss: 0.04644737392663956 2023-01-23 03:13:34.441971: step: 1836/527, loss: 0.001495456788688898 2023-01-23 03:13:35.542567: step: 1840/527, loss: 0.02749796025454998 2023-01-23 03:13:36.666719: step: 1844/527, loss: 0.024663161486387253 2023-01-23 03:13:37.740493: step: 1848/527, loss: 0.00043544769869185984 2023-01-23 03:13:38.840499: step: 1852/527, loss: 0.0008251190301962197 2023-01-23 03:13:39.978078: step: 1856/527, loss: 0.00959930382668972 2023-01-23 03:13:41.086707: step: 1860/527, loss: 0.030899692326784134 2023-01-23 03:13:42.219742: step: 1864/527, loss: 0.019400596618652344 2023-01-23 03:13:43.338916: step: 1868/527, loss: 0.0005040168762207031 2023-01-23 03:13:44.473803: step: 1872/527, loss: 0.01944141462445259 2023-01-23 03:13:45.594958: step: 1876/527, loss: 0.024878978729248047 2023-01-23 03:13:46.699572: step: 1880/527, loss: 0.0004665374872274697 2023-01-23 03:13:47.826573: step: 1884/527, loss: 0.004247569944709539 2023-01-23 03:13:48.904357: step: 1888/527, loss: 0.01380624808371067 2023-01-23 03:13:49.995605: step: 1892/527, loss: 6.904602196300402e-05 2023-01-23 03:13:51.092895: step: 1896/527, loss: 0.022628404200077057 2023-01-23 03:13:52.210448: step: 1900/527, loss: 0.03297929838299751 2023-01-23 03:13:53.334035: step: 1904/527, loss: 0.00018376113439444453 2023-01-23 03:13:54.468104: step: 1908/527, loss: 0.0005106925964355469 2023-01-23 03:13:55.547159: step: 1912/527, loss: 0.2286311537027359 2023-01-23 03:13:56.684776: step: 1916/527, loss: 0.003556633135303855 2023-01-23 03:13:57.817596: step: 1920/527, loss: 0.00367660541087389 2023-01-23 03:13:58.938839: step: 1924/527, loss: 0.004868888761848211 2023-01-23 03:14:00.029196: step: 1928/527, loss: 0.0060253143310546875 2023-01-23 03:14:01.146444: step: 1932/527, loss: 0.01381072960793972 2023-01-23 03:14:02.270826: step: 1936/527, loss: 0.0003581047058105469 2023-01-23 03:14:03.388227: step: 1940/527, loss: 0.01874103769659996 2023-01-23 03:14:04.521082: step: 1944/527, loss: 0.0175323486328125 2023-01-23 03:14:05.604849: step: 1948/527, loss: 0.029023267328739166 2023-01-23 03:14:06.739906: step: 1952/527, loss: 0.0393308661878109 2023-01-23 03:14:07.827304: step: 1956/527, loss: 0.004464888479560614 2023-01-23 03:14:08.976252: step: 1960/527, loss: 0.0200418159365654 2023-01-23 03:14:10.106176: step: 1964/527, loss: 0.00018434523371979594 2023-01-23 03:14:11.190944: step: 1968/527, loss: 0.006373309995979071 2023-01-23 03:14:12.291190: step: 1972/527, loss: 0.003254509065300226 2023-01-23 03:14:13.411193: step: 1976/527, loss: 0.0027070045471191406 2023-01-23 03:14:14.503261: step: 1980/527, loss: 0.0002590179501567036 2023-01-23 03:14:15.636428: step: 1984/527, loss: 0.0009178161853924394 2023-01-23 03:14:16.776082: step: 1988/527, loss: 0.012150573544204235 2023-01-23 03:14:17.908399: step: 1992/527, loss: 0.001127052353695035 2023-01-23 03:14:19.038434: step: 1996/527, loss: 0.0005780697101727128 2023-01-23 03:14:20.138758: step: 2000/527, loss: 0.0018157005542889237 2023-01-23 03:14:21.260513: step: 2004/527, loss: 0.016777994111180305 2023-01-23 03:14:22.381618: step: 2008/527, loss: 0.00564079312607646 2023-01-23 03:14:23.487031: step: 2012/527, loss: 3.256797936046496e-05 2023-01-23 03:14:24.618123: step: 2016/527, loss: 0.0010238647228106856 2023-01-23 03:14:25.746506: step: 2020/527, loss: 0.002127408981323242 2023-01-23 03:14:26.879428: step: 2024/527, loss: 0.041478537023067474 2023-01-23 03:14:28.034535: step: 2028/527, loss: 0.03844613954424858 2023-01-23 03:14:29.139534: step: 2032/527, loss: 0.0011966705787926912 2023-01-23 03:14:30.247665: step: 2036/527, loss: 0.16422395408153534 2023-01-23 03:14:31.371517: step: 2040/527, loss: 0.03650417551398277 2023-01-23 03:14:32.524571: step: 2044/527, loss: 0.0003999710315838456 2023-01-23 03:14:33.642660: step: 2048/527, loss: 0.003025627229362726 2023-01-23 03:14:34.744897: step: 2052/527, loss: 0.0007245063898153603 2023-01-23 03:14:35.861195: step: 2056/527, loss: 0.005990314297378063 2023-01-23 03:14:36.982994: step: 2060/527, loss: 0.0006962775951251388 2023-01-23 03:14:38.100354: step: 2064/527, loss: 0.00025615692720748484 2023-01-23 03:14:39.211041: step: 2068/527, loss: 0.0011220932938158512 2023-01-23 03:14:40.334263: step: 2072/527, loss: 0.04845075681805611 2023-01-23 03:14:41.457586: step: 2076/527, loss: 0.0014861106174066663 2023-01-23 03:14:42.572226: step: 2080/527, loss: 0.014460563659667969 2023-01-23 03:14:43.678475: step: 2084/527, loss: 0.02891988679766655 2023-01-23 03:14:44.776811: step: 2088/527, loss: 0.00039224623469635844 2023-01-23 03:14:45.874943: step: 2092/527, loss: 0.05199460685253143 2023-01-23 03:14:46.991312: step: 2096/527, loss: 0.013969993218779564 2023-01-23 03:14:48.119923: step: 2100/527, loss: 0.002536487765610218 2023-01-23 03:14:49.252996: step: 2104/527, loss: 0.04461870342493057 2023-01-23 03:14:50.367169: step: 2108/527, loss: 0.006943607237190008 ================================================== Loss: 0.021 -------------------- Dev: {'event': {'p': 0.5830845771144278, 'r': 0.7802929427430093, 'f1': 0.6674259681093394}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Test: {'event': {'p': 0.6141975308641975, 'r': 0.796, 'f1': 0.6933797909407665}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Chinese: {'event': {'p': 0.5287356321839081, 'r': 0.8518518518518519, 'f1': 0.6524822695035462}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Korean: {'event': {'p': 0.55, 'r': 0.5238095238095238, 'f1': 0.5365853658536585}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Russian: {'event': {'p': 0.47368421052631576, 'r': 0.5, 'f1': 0.4864864864864865}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066252587991718, 'r': 0.7802929427430093, 'f1': 0.6825859056493885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Eng Test for Korean: {'event': {'p': 0.62580054894785, 'r': 0.7817142857142857, 'f1': 0.6951219512195121}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 22 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:15:32.040729: step: 4/527, loss: 0.0005558967823162675 2023-01-23 03:15:33.166009: step: 8/527, loss: 0.0004991531604900956 2023-01-23 03:15:34.285734: step: 12/527, loss: 0.07324127852916718 2023-01-23 03:15:35.372357: step: 16/527, loss: 0.000827789306640625 2023-01-23 03:15:36.483819: step: 20/527, loss: 0.007728958036750555 2023-01-23 03:15:37.572950: step: 24/527, loss: 0.00080957415048033 2023-01-23 03:15:38.668339: step: 28/527, loss: 0.0006683349492959678 2023-01-23 03:15:39.763756: step: 32/527, loss: 0.008914565667510033 2023-01-23 03:15:40.852127: step: 36/527, loss: 0.0027980327140539885 2023-01-23 03:15:41.968144: step: 40/527, loss: 0.0003648757701739669 2023-01-23 03:15:43.079860: step: 44/527, loss: 0.00959391612559557 2023-01-23 03:15:44.181236: step: 48/527, loss: 3.6239625842426904e-06 2023-01-23 03:15:45.311056: step: 52/527, loss: 0.001872253487817943 2023-01-23 03:15:46.428918: step: 56/527, loss: 0.00933923665434122 2023-01-23 03:15:47.532192: step: 60/527, loss: 9.641647920943797e-05 2023-01-23 03:15:48.632008: step: 64/527, loss: 0.019780471920967102 2023-01-23 03:15:49.761185: step: 68/527, loss: 0.0026884081307798624 2023-01-23 03:15:50.876175: step: 72/527, loss: 0.011364745907485485 2023-01-23 03:15:52.015492: step: 76/527, loss: 0.03705139085650444 2023-01-23 03:15:53.148257: step: 80/527, loss: 0.018434714525938034 2023-01-23 03:15:54.239208: step: 84/527, loss: 0.00048060418339446187 2023-01-23 03:15:55.368455: step: 88/527, loss: -3.528594970703125e-05 2023-01-23 03:15:56.518573: step: 92/527, loss: 0.18018054962158203 2023-01-23 03:15:57.657905: step: 96/527, loss: 0.0020869255531579256 2023-01-23 03:15:58.773197: step: 100/527, loss: 0.0002636909775901586 2023-01-23 03:15:59.872463: step: 104/527, loss: 0.01960763894021511 2023-01-23 03:16:00.976268: step: 108/527, loss: 0.00822839792817831 2023-01-23 03:16:02.098617: step: 112/527, loss: 0.0005994796520099044 2023-01-23 03:16:03.193954: step: 116/527, loss: 1.029968279908644e-05 2023-01-23 03:16:04.322962: step: 120/527, loss: 0.0008114815573208034 2023-01-23 03:16:05.436683: step: 124/527, loss: 0.00018100740271620452 2023-01-23 03:16:06.568558: step: 128/527, loss: 0.00969605427235365 2023-01-23 03:16:07.678462: step: 132/527, loss: 3.3378603347955504e-07 2023-01-23 03:16:08.825915: step: 136/527, loss: 0.011018563061952591 2023-01-23 03:16:09.925837: step: 140/527, loss: 0.002222251845523715 2023-01-23 03:16:11.048590: step: 144/527, loss: 0.0002647399960551411 2023-01-23 03:16:12.155920: step: 148/527, loss: 0.00054168701171875 2023-01-23 03:16:13.264563: step: 152/527, loss: 0.01643352583050728 2023-01-23 03:16:14.394054: step: 156/527, loss: 0.011928845196962357 2023-01-23 03:16:15.538630: step: 160/527, loss: 0.005833625793457031 2023-01-23 03:16:16.657494: step: 164/527, loss: 0.000813388847745955 2023-01-23 03:16:17.750331: step: 168/527, loss: 0.004551601596176624 2023-01-23 03:16:18.830878: step: 172/527, loss: 7.62939453125e-06 2023-01-23 03:16:19.986347: step: 176/527, loss: 0.010396957397460938 2023-01-23 03:16:21.099165: step: 180/527, loss: 0.00013246535672806203 2023-01-23 03:16:22.209738: step: 184/527, loss: 0.0003122329944744706 2023-01-23 03:16:23.324268: step: 188/527, loss: 0.0016575813060626388 2023-01-23 03:16:24.454698: step: 192/527, loss: 8.277893357444555e-05 2023-01-23 03:16:25.582548: step: 196/527, loss: 0.0002503395080566406 2023-01-23 03:16:26.731335: step: 200/527, loss: 0.010983943939208984 2023-01-23 03:16:27.848536: step: 204/527, loss: 0.023090267553925514 2023-01-23 03:16:28.981940: step: 208/527, loss: 7.696151442360133e-05 2023-01-23 03:16:30.090352: step: 212/527, loss: 0.013471603393554688 2023-01-23 03:16:31.209712: step: 216/527, loss: 0.0002400398370809853 2023-01-23 03:16:32.337912: step: 220/527, loss: 0.009569359011948109 2023-01-23 03:16:33.432578: step: 224/527, loss: 0.004719185642898083 2023-01-23 03:16:34.548571: step: 228/527, loss: 3.9577484130859375e-05 2023-01-23 03:16:35.647175: step: 232/527, loss: 8.206367056118324e-05 2023-01-23 03:16:36.792589: step: 236/527, loss: 0.0002610206720419228 2023-01-23 03:16:37.906190: step: 240/527, loss: 0.0025115013122558594 2023-01-23 03:16:39.083785: step: 244/527, loss: 0.0019284249283373356 2023-01-23 03:16:40.205346: step: 248/527, loss: 0.00011281967454124242 2023-01-23 03:16:41.294227: step: 252/527, loss: 0.09685325622558594 2023-01-23 03:16:42.409695: step: 256/527, loss: 0.0008448601001873612 2023-01-23 03:16:43.513389: step: 260/527, loss: 0.00021781922259833664 2023-01-23 03:16:44.608508: step: 264/527, loss: 0.014787578955292702 2023-01-23 03:16:45.731076: step: 268/527, loss: 0.0017694473499432206 2023-01-23 03:16:46.844432: step: 272/527, loss: 4.444122168933973e-05 2023-01-23 03:16:47.997798: step: 276/527, loss: 0.0029649734497070312 2023-01-23 03:16:49.108274: step: 280/527, loss: 0.00012035370309604332 2023-01-23 03:16:50.247608: step: 284/527, loss: 0.007779121398925781 2023-01-23 03:16:51.355151: step: 288/527, loss: 0.03949422761797905 2023-01-23 03:16:52.485604: step: 292/527, loss: 0.00016922950453590602 2023-01-23 03:16:53.616349: step: 296/527, loss: 0.2985950708389282 2023-01-23 03:16:54.692347: step: 300/527, loss: 0.014481925405561924 2023-01-23 03:16:55.799053: step: 304/527, loss: 0.00300769810564816 2023-01-23 03:16:56.920861: step: 308/527, loss: 0.015477752313017845 2023-01-23 03:16:58.054788: step: 312/527, loss: 0.0036468505859375 2023-01-23 03:16:59.180975: step: 316/527, loss: 0.019165420904755592 2023-01-23 03:17:00.300756: step: 320/527, loss: 0.00016679763211868703 2023-01-23 03:17:01.368887: step: 324/527, loss: 0.000958347343839705 2023-01-23 03:17:02.447519: step: 328/527, loss: 0.002056217286735773 2023-01-23 03:17:03.614937: step: 332/527, loss: 0.030867289751768112 2023-01-23 03:17:04.694852: step: 336/527, loss: 7.05719003235572e-06 2023-01-23 03:17:05.792043: step: 340/527, loss: 0.020842932164669037 2023-01-23 03:17:06.887525: step: 344/527, loss: 0.009637641720473766 2023-01-23 03:17:07.981478: step: 348/527, loss: 0.01580219343304634 2023-01-23 03:17:09.085252: step: 352/527, loss: 0.0066525936126708984 2023-01-23 03:17:10.180837: step: 356/527, loss: 0.0009056210983544588 2023-01-23 03:17:11.283969: step: 360/527, loss: 0.005259132478386164 2023-01-23 03:17:12.406627: step: 364/527, loss: 0.0005645751953125 2023-01-23 03:17:13.547442: step: 368/527, loss: 0.01938645914196968 2023-01-23 03:17:14.643256: step: 372/527, loss: 0.0196057315915823 2023-01-23 03:17:15.761635: step: 376/527, loss: 3.0517578125e-05 2023-01-23 03:17:16.850554: step: 380/527, loss: 0.004533624742180109 2023-01-23 03:17:18.003872: step: 384/527, loss: 0.02848644368350506 2023-01-23 03:17:19.121013: step: 388/527, loss: 0.00016021728515625 2023-01-23 03:17:20.250148: step: 392/527, loss: 0.0010353089310228825 2023-01-23 03:17:21.387229: step: 396/527, loss: 0.005118751898407936 2023-01-23 03:17:22.521954: step: 400/527, loss: 0.06099729239940643 2023-01-23 03:17:23.643986: step: 404/527, loss: 0.0004983901744708419 2023-01-23 03:17:24.748160: step: 408/527, loss: 0.01264648512005806 2023-01-23 03:17:25.876387: step: 412/527, loss: 0.0031671524047851562 2023-01-23 03:17:26.968492: step: 416/527, loss: 0.013668537139892578 2023-01-23 03:17:28.050607: step: 420/527, loss: 0.00015203953080344945 2023-01-23 03:17:29.157137: step: 424/527, loss: 8.840560622047633e-05 2023-01-23 03:17:30.292619: step: 428/527, loss: 0.023430252447724342 2023-01-23 03:17:31.439551: step: 432/527, loss: 0.00021762849064543843 2023-01-23 03:17:32.566790: step: 436/527, loss: 0.0005807876586914062 2023-01-23 03:17:33.675893: step: 440/527, loss: 0.0004954338073730469 2023-01-23 03:17:34.783907: step: 444/527, loss: 0.8372736573219299 2023-01-23 03:17:35.878017: step: 448/527, loss: 0.009445381350815296 2023-01-23 03:17:36.982113: step: 452/527, loss: 0.009234047494828701 2023-01-23 03:17:38.102773: step: 456/527, loss: 0.0020020485389977694 2023-01-23 03:17:39.206314: step: 460/527, loss: 0.0003277778741903603 2023-01-23 03:17:40.322926: step: 464/527, loss: 0.000448036240413785 2023-01-23 03:17:41.428390: step: 468/527, loss: 0.005885219667106867 2023-01-23 03:17:42.555732: step: 472/527, loss: 0.0005820274818688631 2023-01-23 03:17:43.668808: step: 476/527, loss: 0.0004261017020326108 2023-01-23 03:17:44.804852: step: 480/527, loss: 0.0004280567227397114 2023-01-23 03:17:45.908635: step: 484/527, loss: 0.004059505648910999 2023-01-23 03:17:47.042877: step: 488/527, loss: 0.004740810953080654 2023-01-23 03:17:48.146985: step: 492/527, loss: 0.001324558281339705 2023-01-23 03:17:49.295344: step: 496/527, loss: 0.0010866165393963456 2023-01-23 03:17:50.384558: step: 500/527, loss: 0.011626482009887695 2023-01-23 03:17:51.511955: step: 504/527, loss: 0.0015188216930255294 2023-01-23 03:17:52.630986: step: 508/527, loss: 0.003599930088967085 2023-01-23 03:17:53.739028: step: 512/527, loss: 0.00293140416033566 2023-01-23 03:17:54.836596: step: 516/527, loss: 0.016948889940977097 2023-01-23 03:17:55.968576: step: 520/527, loss: 0.0037254809867590666 2023-01-23 03:17:57.126382: step: 524/527, loss: 0.017209814861416817 2023-01-23 03:17:58.237266: step: 528/527, loss: 0.0046545034274458885 2023-01-23 03:17:59.348888: step: 532/527, loss: 0.004320716951042414 2023-01-23 03:18:00.465389: step: 536/527, loss: 0.00037631989107467234 2023-01-23 03:18:01.578044: step: 540/527, loss: 0.02561493031680584 2023-01-23 03:18:02.680774: step: 544/527, loss: 0.0033080100547522306 2023-01-23 03:18:03.804540: step: 548/527, loss: 0.026097487658262253 2023-01-23 03:18:04.931987: step: 552/527, loss: 0.15177659690380096 2023-01-23 03:18:06.049656: step: 556/527, loss: 0.00023493765911553055 2023-01-23 03:18:07.180384: step: 560/527, loss: 0.03329582139849663 2023-01-23 03:18:08.311297: step: 564/527, loss: 0.00020818710618186742 2023-01-23 03:18:09.421048: step: 568/527, loss: 0.00498156575486064 2023-01-23 03:18:10.541336: step: 572/527, loss: 0.011979294009506702 2023-01-23 03:18:11.663953: step: 576/527, loss: 0.00028972624568268657 2023-01-23 03:18:12.761166: step: 580/527, loss: 0.04299907758831978 2023-01-23 03:18:13.898862: step: 584/527, loss: 0.024701213464140892 2023-01-23 03:18:15.008655: step: 588/527, loss: 0.035338591784238815 2023-01-23 03:18:16.093444: step: 592/527, loss: 0.016944551840424538 2023-01-23 03:18:17.217636: step: 596/527, loss: 0.005271721165627241 2023-01-23 03:18:18.350299: step: 600/527, loss: 0.018448447808623314 2023-01-23 03:18:19.485058: step: 604/527, loss: 0.04133720323443413 2023-01-23 03:18:20.592221: step: 608/527, loss: 0.0001282215234823525 2023-01-23 03:18:21.711115: step: 612/527, loss: 0.1305810511112213 2023-01-23 03:18:22.862601: step: 616/527, loss: 0.017786789685487747 2023-01-23 03:18:23.977976: step: 620/527, loss: 0.12646332383155823 2023-01-23 03:18:25.141426: step: 624/527, loss: 0.0006554127321578562 2023-01-23 03:18:26.268985: step: 628/527, loss: 0.03694958612322807 2023-01-23 03:18:27.387749: step: 632/527, loss: 0.0008533477666787803 2023-01-23 03:18:28.512591: step: 636/527, loss: 2.2935870219953358e-05 2023-01-23 03:18:29.641224: step: 640/527, loss: 0.017215536907315254 2023-01-23 03:18:30.725902: step: 644/527, loss: 0.005025959108024836 2023-01-23 03:18:31.844640: step: 648/527, loss: 0.0014410018920898438 2023-01-23 03:18:32.982878: step: 652/527, loss: 0.0013002397026866674 2023-01-23 03:18:34.107724: step: 656/527, loss: 0.03106250800192356 2023-01-23 03:18:35.248303: step: 660/527, loss: 0.0011707305675372481 2023-01-23 03:18:36.394277: step: 664/527, loss: 0.0026319504249840975 2023-01-23 03:18:37.472773: step: 668/527, loss: 1.926422191900201e-05 2023-01-23 03:18:38.594704: step: 672/527, loss: 0.001215267227962613 2023-01-23 03:18:39.724633: step: 676/527, loss: 0.0006723403930664062 2023-01-23 03:18:40.848500: step: 680/527, loss: 0.05684547498822212 2023-01-23 03:18:42.009371: step: 684/527, loss: 0.019237495958805084 2023-01-23 03:18:43.149503: step: 688/527, loss: 0.00013465881056617945 2023-01-23 03:18:44.261296: step: 692/527, loss: 0.05452442169189453 2023-01-23 03:18:45.370566: step: 696/527, loss: 0.00859603937715292 2023-01-23 03:18:46.449861: step: 700/527, loss: 9.489059448242188e-05 2023-01-23 03:18:47.528564: step: 704/527, loss: 0.0008376121986657381 2023-01-23 03:18:48.637536: step: 708/527, loss: 0.08675622940063477 2023-01-23 03:18:49.752124: step: 712/527, loss: 0.006838035769760609 2023-01-23 03:18:50.888939: step: 716/527, loss: 5.722046262235381e-05 2023-01-23 03:18:52.023357: step: 720/527, loss: 0.01479492150247097 2023-01-23 03:18:53.148298: step: 724/527, loss: 0.01570282131433487 2023-01-23 03:18:54.254640: step: 728/527, loss: 0.4493824541568756 2023-01-23 03:18:55.392437: step: 732/527, loss: 0.0002429485321044922 2023-01-23 03:18:56.479909: step: 736/527, loss: 0.046500302851200104 2023-01-23 03:18:57.581017: step: 740/527, loss: 0.0026671886444091797 2023-01-23 03:18:58.685849: step: 744/527, loss: 0.013757658191025257 2023-01-23 03:18:59.780658: step: 748/527, loss: 9.078979201149195e-05 2023-01-23 03:19:00.890269: step: 752/527, loss: 0.03950929641723633 2023-01-23 03:19:02.059743: step: 756/527, loss: 0.034891605377197266 2023-01-23 03:19:03.163320: step: 760/527, loss: 0.007219600956887007 2023-01-23 03:19:04.249593: step: 764/527, loss: 0.000857925449963659 2023-01-23 03:19:05.382250: step: 768/527, loss: 0.021681785583496094 2023-01-23 03:19:06.500099: step: 772/527, loss: 0.012350846081972122 2023-01-23 03:19:07.605449: step: 776/527, loss: 0.045530129224061966 2023-01-23 03:19:08.714133: step: 780/527, loss: 0.06528101116418839 2023-01-23 03:19:09.855904: step: 784/527, loss: 0.00722503662109375 2023-01-23 03:19:10.965738: step: 788/527, loss: 0.02055356465280056 2023-01-23 03:19:12.062484: step: 792/527, loss: 0.00019121170043945312 2023-01-23 03:19:13.184539: step: 796/527, loss: 0.0017677306896075606 2023-01-23 03:19:14.329701: step: 800/527, loss: 0.07141242176294327 2023-01-23 03:19:15.442030: step: 804/527, loss: -4.0054324017546605e-06 2023-01-23 03:19:16.550499: step: 808/527, loss: 0.010314560495316982 2023-01-23 03:19:17.654147: step: 812/527, loss: 0.026074696332216263 2023-01-23 03:19:18.772344: step: 816/527, loss: 0.14199551939964294 2023-01-23 03:19:19.897147: step: 820/527, loss: 0.02960786782205105 2023-01-23 03:19:21.001639: step: 824/527, loss: 3.252029637224041e-05 2023-01-23 03:19:22.126865: step: 828/527, loss: 0.016623497009277344 2023-01-23 03:19:23.222649: step: 832/527, loss: 0.004015207290649414 2023-01-23 03:19:24.357800: step: 836/527, loss: 0.0009103774791583419 2023-01-23 03:19:25.446153: step: 840/527, loss: 0.008807850070297718 2023-01-23 03:19:26.562733: step: 844/527, loss: 0.002062892774119973 2023-01-23 03:19:27.643562: step: 848/527, loss: 0.011046218685805798 2023-01-23 03:19:28.781915: step: 852/527, loss: 0.005708885379135609 2023-01-23 03:19:29.876637: step: 856/527, loss: 0.0010912418365478516 2023-01-23 03:19:30.969887: step: 860/527, loss: 2.86102294921875e-06 2023-01-23 03:19:32.072966: step: 864/527, loss: 0.04565735161304474 2023-01-23 03:19:33.211551: step: 868/527, loss: 0.010359669104218483 2023-01-23 03:19:34.353104: step: 872/527, loss: 0.005309867672622204 2023-01-23 03:19:35.490686: step: 876/527, loss: 0.0011375427711755037 2023-01-23 03:19:36.602088: step: 880/527, loss: 0.0030698776245117188 2023-01-23 03:19:37.703825: step: 884/527, loss: 0.00040431023808196187 2023-01-23 03:19:38.817117: step: 888/527, loss: 0.0009482384193688631 2023-01-23 03:19:39.960793: step: 892/527, loss: 0.0012921332381665707 2023-01-23 03:19:41.060205: step: 896/527, loss: 0.0362180694937706 2023-01-23 03:19:42.159982: step: 900/527, loss: 0.008481407538056374 2023-01-23 03:19:43.258140: step: 904/527, loss: 0.0001273631933145225 2023-01-23 03:19:44.387355: step: 908/527, loss: 0.004111862741410732 2023-01-23 03:19:45.476959: step: 912/527, loss: 0.0025197984650731087 2023-01-23 03:19:46.576699: step: 916/527, loss: 0.0653291642665863 2023-01-23 03:19:47.695818: step: 920/527, loss: 0.015781832858920097 2023-01-23 03:19:48.879096: step: 924/527, loss: 0.0046710968017578125 2023-01-23 03:19:49.994376: step: 928/527, loss: 1.182556115963962e-05 2023-01-23 03:19:51.107925: step: 932/527, loss: 0.008518218994140625 2023-01-23 03:19:52.210639: step: 936/527, loss: 0.062433816492557526 2023-01-23 03:19:53.362872: step: 940/527, loss: 6.27517729299143e-05 2023-01-23 03:19:54.498772: step: 944/527, loss: 0.0009416580433025956 2023-01-23 03:19:55.619105: step: 948/527, loss: 1.9073486328125e-05 2023-01-23 03:19:56.752696: step: 952/527, loss: 0.03326931223273277 2023-01-23 03:19:57.848573: step: 956/527, loss: 0.0005405425908975303 2023-01-23 03:19:58.954964: step: 960/527, loss: 0.03508793190121651 2023-01-23 03:20:00.086070: step: 964/527, loss: 0.6289817690849304 2023-01-23 03:20:01.235967: step: 968/527, loss: 0.012804890051484108 2023-01-23 03:20:02.395070: step: 972/527, loss: 0.0019335746765136719 2023-01-23 03:20:03.531476: step: 976/527, loss: 0.004665756598114967 2023-01-23 03:20:04.616793: step: 980/527, loss: 0.0023023607209324837 2023-01-23 03:20:05.746116: step: 984/527, loss: 0.00524950074031949 2023-01-23 03:20:06.881208: step: 988/527, loss: 0.028874970972537994 2023-01-23 03:20:07.995970: step: 992/527, loss: 0.00019741058349609375 2023-01-23 03:20:09.135467: step: 996/527, loss: 1.125335711549269e-05 2023-01-23 03:20:10.304061: step: 1000/527, loss: 0.0014068603049963713 2023-01-23 03:20:11.411955: step: 1004/527, loss: 0.0010789871448650956 2023-01-23 03:20:12.532552: step: 1008/527, loss: 0.00011510849435580894 2023-01-23 03:20:13.615518: step: 1012/527, loss: 0.01061267964541912 2023-01-23 03:20:14.730909: step: 1016/527, loss: 0.0029767754022032022 2023-01-23 03:20:15.826439: step: 1020/527, loss: 0.05207844078540802 2023-01-23 03:20:16.953566: step: 1024/527, loss: 0.0007111072773113847 2023-01-23 03:20:18.076345: step: 1028/527, loss: 0.044226959347724915 2023-01-23 03:20:19.180452: step: 1032/527, loss: 0.0357481949031353 2023-01-23 03:20:20.289415: step: 1036/527, loss: 0.0006395339732989669 2023-01-23 03:20:21.400977: step: 1040/527, loss: 0.004071617498993874 2023-01-23 03:20:22.572470: step: 1044/527, loss: 0.054274849593639374 2023-01-23 03:20:23.705814: step: 1048/527, loss: 2.3746490114717744e-05 2023-01-23 03:20:24.796965: step: 1052/527, loss: 0.0006860733265057206 2023-01-23 03:20:25.928975: step: 1056/527, loss: 0.022799300029873848 2023-01-23 03:20:27.028340: step: 1060/527, loss: 0.018406962975859642 2023-01-23 03:20:28.140353: step: 1064/527, loss: 0.004471207037568092 2023-01-23 03:20:29.270834: step: 1068/527, loss: 0.00031604766263626516 2023-01-23 03:20:30.387288: step: 1072/527, loss: 0.0063516139052808285 2023-01-23 03:20:31.504920: step: 1076/527, loss: 0.00361804966814816 2023-01-23 03:20:32.644150: step: 1080/527, loss: 0.03751087188720703 2023-01-23 03:20:33.764738: step: 1084/527, loss: 0.001027870224788785 2023-01-23 03:20:34.866861: step: 1088/527, loss: 0.007287454791367054 2023-01-23 03:20:35.983752: step: 1092/527, loss: 0.025456812232732773 2023-01-23 03:20:37.060632: step: 1096/527, loss: 0.006733942311257124 2023-01-23 03:20:38.156721: step: 1100/527, loss: 0.00207958254031837 2023-01-23 03:20:39.307781: step: 1104/527, loss: 6.32286028121598e-05 2023-01-23 03:20:40.409739: step: 1108/527, loss: 7.009506134636467e-06 2023-01-23 03:20:41.526281: step: 1112/527, loss: 1.697540210443549e-05 2023-01-23 03:20:42.665861: step: 1116/527, loss: 0.017817117273807526 2023-01-23 03:20:43.799237: step: 1120/527, loss: 0.0007919311756268144 2023-01-23 03:20:44.919590: step: 1124/527, loss: 0.015481948852539062 2023-01-23 03:20:46.012258: step: 1128/527, loss: 0.00013103484525345266 2023-01-23 03:20:47.144428: step: 1132/527, loss: 0.022413061931729317 2023-01-23 03:20:48.297671: step: 1136/527, loss: 0.0003266334533691406 2023-01-23 03:20:49.452236: step: 1140/527, loss: 0.00014848708815407008 2023-01-23 03:20:50.577701: step: 1144/527, loss: 0.09833307564258575 2023-01-23 03:20:51.685789: step: 1148/527, loss: 0.02944183349609375 2023-01-23 03:20:52.848741: step: 1152/527, loss: 0.15351122617721558 2023-01-23 03:20:53.976203: step: 1156/527, loss: 0.00013799667067360133 2023-01-23 03:20:55.084276: step: 1160/527, loss: 0.05336713790893555 2023-01-23 03:20:56.215762: step: 1164/527, loss: 7.63893112889491e-05 2023-01-23 03:20:57.371226: step: 1168/527, loss: 0.0007970810402184725 2023-01-23 03:20:58.503755: step: 1172/527, loss: 0.004556465428322554 2023-01-23 03:20:59.624247: step: 1176/527, loss: 0.0010605811839923263 2023-01-23 03:21:00.745146: step: 1180/527, loss: 0.0002649307425599545 2023-01-23 03:21:01.837538: step: 1184/527, loss: 0.060370828956365585 2023-01-23 03:21:02.974132: step: 1188/527, loss: 0.0013395309215411544 2023-01-23 03:21:04.091420: step: 1192/527, loss: 0.001082992646843195 2023-01-23 03:21:05.193536: step: 1196/527, loss: 0.014884661883115768 2023-01-23 03:21:06.303052: step: 1200/527, loss: 0.009298896417021751 2023-01-23 03:21:07.396747: step: 1204/527, loss: 2.96115867968183e-05 2023-01-23 03:21:08.489701: step: 1208/527, loss: 0.0247941967099905 2023-01-23 03:21:09.585981: step: 1212/527, loss: 0.0055252076126635075 2023-01-23 03:21:10.697306: step: 1216/527, loss: 0.001121139619499445 2023-01-23 03:21:11.821279: step: 1220/527, loss: 0.017994403839111328 2023-01-23 03:21:12.929358: step: 1224/527, loss: 0.02340412139892578 2023-01-23 03:21:14.066728: step: 1228/527, loss: 0.00017147065955214202 2023-01-23 03:21:15.174848: step: 1232/527, loss: 0.009339428506791592 2023-01-23 03:21:16.269846: step: 1236/527, loss: 0.0006149769178591669 2023-01-23 03:21:17.389527: step: 1240/527, loss: 0.00137500767596066 2023-01-23 03:21:18.477886: step: 1244/527, loss: 0.043062400072813034 2023-01-23 03:21:19.591859: step: 1248/527, loss: 0.023932933807373047 2023-01-23 03:21:20.703460: step: 1252/527, loss: 0.0021259307395666838 2023-01-23 03:21:21.806417: step: 1256/527, loss: 0.00031986235990189016 2023-01-23 03:21:22.901179: step: 1260/527, loss: 0.0002758025948423892 2023-01-23 03:21:24.011759: step: 1264/527, loss: 0.002234077313914895 2023-01-23 03:21:25.105909: step: 1268/527, loss: 0.05120287090539932 2023-01-23 03:21:26.257783: step: 1272/527, loss: 0.022077560424804688 2023-01-23 03:21:27.377879: step: 1276/527, loss: 0.03183254972100258 2023-01-23 03:21:28.471031: step: 1280/527, loss: 0.027191162109375 2023-01-23 03:21:29.586916: step: 1284/527, loss: 0.0013658524258062243 2023-01-23 03:21:30.696336: step: 1288/527, loss: 0.01043081283569336 2023-01-23 03:21:31.804932: step: 1292/527, loss: 2.7275087632006034e-05 2023-01-23 03:21:32.950104: step: 1296/527, loss: 0.02287764847278595 2023-01-23 03:21:34.020255: step: 1300/527, loss: 0.00013208389282226562 2023-01-23 03:21:35.138734: step: 1304/527, loss: 0.08474164456129074 2023-01-23 03:21:36.246684: step: 1308/527, loss: 0.022499753162264824 2023-01-23 03:21:37.362203: step: 1312/527, loss: 0.015335272997617722 2023-01-23 03:21:38.464067: step: 1316/527, loss: 0.13820815086364746 2023-01-23 03:21:39.582681: step: 1320/527, loss: 0.00022163391986396164 2023-01-23 03:21:40.709562: step: 1324/527, loss: 0.011668014340102673 2023-01-23 03:21:41.807008: step: 1328/527, loss: 0.00010423660569358617 2023-01-23 03:21:42.948475: step: 1332/527, loss: 0.0004512786981649697 2023-01-23 03:21:44.070034: step: 1336/527, loss: 0.012129020877182484 2023-01-23 03:21:45.200413: step: 1340/527, loss: 0.0027151110116392374 2023-01-23 03:21:46.300433: step: 1344/527, loss: 0.0013480186462402344 2023-01-23 03:21:47.429664: step: 1348/527, loss: 0.0011337280739098787 2023-01-23 03:21:48.531755: step: 1352/527, loss: 0.02316303364932537 2023-01-23 03:21:49.656840: step: 1356/527, loss: 0.005997275933623314 2023-01-23 03:21:50.762920: step: 1360/527, loss: 0.010057831183075905 2023-01-23 03:21:51.892754: step: 1364/527, loss: 0.0036478997208178043 2023-01-23 03:21:53.013730: step: 1368/527, loss: 0.10908007621765137 2023-01-23 03:21:54.117884: step: 1372/527, loss: 0.0001846313534770161 2023-01-23 03:21:55.228374: step: 1376/527, loss: 0.0015518426662310958 2023-01-23 03:21:56.336726: step: 1380/527, loss: 0.00038137438241392374 2023-01-23 03:21:57.473579: step: 1384/527, loss: 0.043082524091005325 2023-01-23 03:21:58.565441: step: 1388/527, loss: 7.715225365245715e-05 2023-01-23 03:21:59.673452: step: 1392/527, loss: 0.0017282486660405993 2023-01-23 03:22:00.788641: step: 1396/527, loss: 0.058941081166267395 2023-01-23 03:22:01.909918: step: 1400/527, loss: 1.2063979738741182e-05 2023-01-23 03:22:03.013540: step: 1404/527, loss: 0.06037035211920738 2023-01-23 03:22:04.159338: step: 1408/527, loss: 7.82012921263231e-06 2023-01-23 03:22:05.282365: step: 1412/527, loss: 0.0002521514834370464 2023-01-23 03:22:06.395187: step: 1416/527, loss: 0.009723186492919922 2023-01-23 03:22:07.514437: step: 1420/527, loss: 0.0006910323863849044 2023-01-23 03:22:08.622655: step: 1424/527, loss: 0.0027882575523108244 2023-01-23 03:22:09.725626: step: 1428/527, loss: 0.0022771835792809725 2023-01-23 03:22:10.855576: step: 1432/527, loss: 0.0024145126808434725 2023-01-23 03:22:11.966842: step: 1436/527, loss: 6.67572021484375e-06 2023-01-23 03:22:13.077434: step: 1440/527, loss: 0.011468315497040749 2023-01-23 03:22:14.160805: step: 1444/527, loss: 0.0697694718837738 2023-01-23 03:22:15.269682: step: 1448/527, loss: 5.2928924560546875e-05 2023-01-23 03:22:16.386592: step: 1452/527, loss: 0.007643222808837891 2023-01-23 03:22:17.504649: step: 1456/527, loss: 0.0215485580265522 2023-01-23 03:22:18.578131: step: 1460/527, loss: 0.00822000578045845 2023-01-23 03:22:19.720545: step: 1464/527, loss: 0.0030698776245117188 2023-01-23 03:22:20.812718: step: 1468/527, loss: 0.004712950903922319 2023-01-23 03:22:21.953123: step: 1472/527, loss: 0.0005050658946856856 2023-01-23 03:22:23.094925: step: 1476/527, loss: -2.1457672119140625e-06 2023-01-23 03:22:24.239082: step: 1480/527, loss: 0.0072879791259765625 2023-01-23 03:22:25.330371: step: 1484/527, loss: 0.0498899444937706 2023-01-23 03:22:26.446213: step: 1488/527, loss: 0.002757358830422163 2023-01-23 03:22:27.537295: step: 1492/527, loss: 0.013018417172133923 2023-01-23 03:22:28.636945: step: 1496/527, loss: 0.0013814927078783512 2023-01-23 03:22:29.768498: step: 1500/527, loss: 0.0156676284968853 2023-01-23 03:22:30.917255: step: 1504/527, loss: 0.010724353604018688 2023-01-23 03:22:32.008633: step: 1508/527, loss: 0.04039440304040909 2023-01-23 03:22:33.133804: step: 1512/527, loss: 0.012734705582261086 2023-01-23 03:22:34.258944: step: 1516/527, loss: 0.0029209135100245476 2023-01-23 03:22:35.366582: step: 1520/527, loss: 0.02089405059814453 2023-01-23 03:22:36.493308: step: 1524/527, loss: 0.01139287929981947 2023-01-23 03:22:37.643663: step: 1528/527, loss: 0.03298530727624893 2023-01-23 03:22:38.757845: step: 1532/527, loss: 0.535767138004303 2023-01-23 03:22:39.859865: step: 1536/527, loss: 0.0004978179931640625 2023-01-23 03:22:40.973482: step: 1540/527, loss: 0.015664197504520416 2023-01-23 03:22:42.079489: step: 1544/527, loss: 0.008694302290678024 2023-01-23 03:22:43.179636: step: 1548/527, loss: 0.0002738952753134072 2023-01-23 03:22:44.297885: step: 1552/527, loss: 0.0004563808615785092 2023-01-23 03:22:45.401364: step: 1556/527, loss: 2.527237120375503e-05 2023-01-23 03:22:46.492489: step: 1560/527, loss: 0.00012454987154342234 2023-01-23 03:22:47.601034: step: 1564/527, loss: 0.0661611557006836 2023-01-23 03:22:48.721230: step: 1568/527, loss: 0.0479188933968544 2023-01-23 03:22:49.874343: step: 1572/527, loss: 0.06435756385326385 2023-01-23 03:22:50.969893: step: 1576/527, loss: 0.0600128173828125 2023-01-23 03:22:52.111787: step: 1580/527, loss: 0.007936668582260609 2023-01-23 03:22:53.236622: step: 1584/527, loss: 0.16411086916923523 2023-01-23 03:22:54.320056: step: 1588/527, loss: 2.2758544218959287e-05 2023-01-23 03:22:55.410582: step: 1592/527, loss: 0.0006431579240597785 2023-01-23 03:22:56.547360: step: 1596/527, loss: 0.010871505364775658 2023-01-23 03:22:57.639055: step: 1600/527, loss: 0.0017646790947765112 2023-01-23 03:22:58.738296: step: 1604/527, loss: 0.013074017129838467 2023-01-23 03:22:59.854965: step: 1608/527, loss: 0.0011531829368323088 2023-01-23 03:23:00.974651: step: 1612/527, loss: 0.00038280486478470266 2023-01-23 03:23:02.082004: step: 1616/527, loss: 0.007318305782973766 2023-01-23 03:23:03.181675: step: 1620/527, loss: 0.01648383028805256 2023-01-23 03:23:04.290735: step: 1624/527, loss: 0.006229210179299116 2023-01-23 03:23:05.399809: step: 1628/527, loss: 0.0038916587363928556 2023-01-23 03:23:06.515653: step: 1632/527, loss: 0.013828659430146217 2023-01-23 03:23:07.627913: step: 1636/527, loss: 0.25910741090774536 2023-01-23 03:23:08.735266: step: 1640/527, loss: 3.128051685052924e-05 2023-01-23 03:23:09.830476: step: 1644/527, loss: 0.049620676785707474 2023-01-23 03:23:10.958705: step: 1648/527, loss: 0.003712183330208063 2023-01-23 03:23:12.132377: step: 1652/527, loss: 0.12752917408943176 2023-01-23 03:23:13.227604: step: 1656/527, loss: 0.025815939530730247 2023-01-23 03:23:14.332357: step: 1660/527, loss: 0.0006519794696941972 2023-01-23 03:23:15.428918: step: 1664/527, loss: 0.008729267865419388 2023-01-23 03:23:16.524375: step: 1668/527, loss: 0.006938266567885876 2023-01-23 03:23:17.685783: step: 1672/527, loss: 0.00023117066302802414 2023-01-23 03:23:18.834328: step: 1676/527, loss: 0.0013781548477709293 2023-01-23 03:23:19.982214: step: 1680/527, loss: 0.013305379077792168 2023-01-23 03:23:21.083778: step: 1684/527, loss: 6.866455078125e-05 2023-01-23 03:23:22.225110: step: 1688/527, loss: 0.0016859056195244193 2023-01-23 03:23:23.332945: step: 1692/527, loss: 3.719329833984375e-05 2023-01-23 03:23:24.442865: step: 1696/527, loss: 0.0001548767031636089 2023-01-23 03:23:25.550945: step: 1700/527, loss: 3.719329924933845e-06 2023-01-23 03:23:26.631460: step: 1704/527, loss: 0.028162576258182526 2023-01-23 03:23:27.768795: step: 1708/527, loss: 3.5953522456111386e-05 2023-01-23 03:23:28.884259: step: 1712/527, loss: 0.0257783904671669 2023-01-23 03:23:29.987523: step: 1716/527, loss: 1.3542176020564511e-05 2023-01-23 03:23:31.095633: step: 1720/527, loss: 0.00014452934556175023 2023-01-23 03:23:32.229302: step: 1724/527, loss: 0.0040236469358205795 2023-01-23 03:23:33.367634: step: 1728/527, loss: 1.7929078239831142e-05 2023-01-23 03:23:34.507973: step: 1732/527, loss: 0.0007322788005694747 2023-01-23 03:23:35.611809: step: 1736/527, loss: 0.0008134841918945312 2023-01-23 03:23:36.733225: step: 1740/527, loss: 0.0024483681190758944 2023-01-23 03:23:37.849609: step: 1744/527, loss: 0.0016332627274096012 2023-01-23 03:23:38.958653: step: 1748/527, loss: 0.004801368806511164 2023-01-23 03:23:40.057158: step: 1752/527, loss: 0.0023527145385742188 2023-01-23 03:23:41.203769: step: 1756/527, loss: 0.17571859061717987 2023-01-23 03:23:42.307208: step: 1760/527, loss: 0.0013370513916015625 2023-01-23 03:23:43.443422: step: 1764/527, loss: 0.00010089873831020668 2023-01-23 03:23:44.546125: step: 1768/527, loss: 0.00666465749964118 2023-01-23 03:23:45.659950: step: 1772/527, loss: 0.010736274532973766 2023-01-23 03:23:46.745055: step: 1776/527, loss: 0.0024148940574377775 2023-01-23 03:23:47.879098: step: 1780/527, loss: 0.011734199710190296 2023-01-23 03:23:48.982009: step: 1784/527, loss: 0.013148784637451172 2023-01-23 03:23:50.079450: step: 1788/527, loss: 0.003081083297729492 2023-01-23 03:23:51.216318: step: 1792/527, loss: 0.006851387210190296 2023-01-23 03:23:52.329183: step: 1796/527, loss: 0.002324390457943082 2023-01-23 03:23:53.473307: step: 1800/527, loss: 0.0006364822038449347 2023-01-23 03:23:54.626661: step: 1804/527, loss: 0.00011000633821822703 2023-01-23 03:23:55.731239: step: 1808/527, loss: 0.015353584662079811 2023-01-23 03:23:56.819958: step: 1812/527, loss: 0.016373634338378906 2023-01-23 03:23:57.930405: step: 1816/527, loss: 0.04403867945075035 2023-01-23 03:23:59.064245: step: 1820/527, loss: 0.17885570228099823 2023-01-23 03:24:00.182042: step: 1824/527, loss: 0.012077808380126953 2023-01-23 03:24:01.264718: step: 1828/527, loss: 0.002914238255470991 2023-01-23 03:24:02.384961: step: 1832/527, loss: 0.0011190414661541581 2023-01-23 03:24:03.468556: step: 1836/527, loss: 0.003102922346442938 2023-01-23 03:24:04.607542: step: 1840/527, loss: 0.0005242824554443359 2023-01-23 03:24:05.729846: step: 1844/527, loss: 0.05102214962244034 2023-01-23 03:24:06.875553: step: 1848/527, loss: 0.013502693735063076 2023-01-23 03:24:07.995888: step: 1852/527, loss: 0.015835857018828392 2023-01-23 03:24:09.121491: step: 1856/527, loss: 0.008668041788041592 2023-01-23 03:24:10.237119: step: 1860/527, loss: 0.0017417907947674394 2023-01-23 03:24:11.349428: step: 1864/527, loss: 0.0009460925939492881 2023-01-23 03:24:12.471351: step: 1868/527, loss: 1.5735626220703125e-05 2023-01-23 03:24:13.604575: step: 1872/527, loss: 0.024711990728974342 2023-01-23 03:24:14.699977: step: 1876/527, loss: 0.008853626437485218 2023-01-23 03:24:15.811890: step: 1880/527, loss: 0.008028030395507812 2023-01-23 03:24:16.941879: step: 1884/527, loss: 6.313324411166832e-05 2023-01-23 03:24:18.054062: step: 1888/527, loss: 0.006133460905402899 2023-01-23 03:24:19.197416: step: 1892/527, loss: 0.03036346472799778 2023-01-23 03:24:20.289108: step: 1896/527, loss: 0.00024700164794921875 2023-01-23 03:24:21.383389: step: 1900/527, loss: 0.006574248895049095 2023-01-23 03:24:22.525792: step: 1904/527, loss: 0.004259156994521618 2023-01-23 03:24:23.634137: step: 1908/527, loss: 0.001476716948673129 2023-01-23 03:24:24.807121: step: 1912/527, loss: 0.01608562469482422 2023-01-23 03:24:25.937927: step: 1916/527, loss: 0.020825671032071114 2023-01-23 03:24:27.074908: step: 1920/527, loss: 0.05935373157262802 2023-01-23 03:24:28.214500: step: 1924/527, loss: 0.0001262664736714214 2023-01-23 03:24:29.303858: step: 1928/527, loss: 0.002974605420604348 2023-01-23 03:24:30.416301: step: 1932/527, loss: 1.3923644473834429e-05 2023-01-23 03:24:31.515317: step: 1936/527, loss: 0.00023336410231422633 2023-01-23 03:24:32.616750: step: 1940/527, loss: 0.0008708000532351434 2023-01-23 03:24:33.743868: step: 1944/527, loss: 6.46591215627268e-05 2023-01-23 03:24:34.841965: step: 1948/527, loss: 0.0011211395030841231 2023-01-23 03:24:35.963294: step: 1952/527, loss: 0.004268551245331764 2023-01-23 03:24:37.056382: step: 1956/527, loss: 0.015630245208740234 2023-01-23 03:24:38.184608: step: 1960/527, loss: 0.010922718793153763 2023-01-23 03:24:39.276461: step: 1964/527, loss: 0.0007017135503701866 2023-01-23 03:24:40.412985: step: 1968/527, loss: 0.0008071899646893144 2023-01-23 03:24:41.504507: step: 1972/527, loss: 0.0003615856112446636 2023-01-23 03:24:42.622358: step: 1976/527, loss: 0.0011043548583984375 2023-01-23 03:24:43.763454: step: 1980/527, loss: 0.017474746331572533 2023-01-23 03:24:44.898999: step: 1984/527, loss: 0.00515251187607646 2023-01-23 03:24:46.012257: step: 1988/527, loss: 0.00406303396448493 2023-01-23 03:24:47.111838: step: 1992/527, loss: 2.7227401005802676e-05 2023-01-23 03:24:48.220524: step: 1996/527, loss: 0.003277254058048129 2023-01-23 03:24:49.328355: step: 2000/527, loss: 9.098053124034777e-05 2023-01-23 03:24:50.434945: step: 2004/527, loss: 0.0605618953704834 2023-01-23 03:24:51.532088: step: 2008/527, loss: 2.6702882678364404e-05 2023-01-23 03:24:52.632803: step: 2012/527, loss: 0.00025215151254087687 2023-01-23 03:24:53.732238: step: 2016/527, loss: 0.0003925323544535786 2023-01-23 03:24:54.827455: step: 2020/527, loss: 0.006507683079689741 2023-01-23 03:24:55.924818: step: 2024/527, loss: 0.0029703141190111637 2023-01-23 03:24:57.037694: step: 2028/527, loss: 0.04039516672492027 2023-01-23 03:24:58.160929: step: 2032/527, loss: 0.010406875982880592 2023-01-23 03:24:59.272233: step: 2036/527, loss: 0.013894462957978249 2023-01-23 03:25:00.346365: step: 2040/527, loss: 5.0067901611328125e-06 2023-01-23 03:25:01.452740: step: 2044/527, loss: 0.00015916825213935226 2023-01-23 03:25:02.591887: step: 2048/527, loss: 7.028579420875758e-05 2023-01-23 03:25:03.692161: step: 2052/527, loss: 3.318786548334174e-05 2023-01-23 03:25:04.823356: step: 2056/527, loss: 0.01747283898293972 2023-01-23 03:25:05.942365: step: 2060/527, loss: 0.01985015906393528 2023-01-23 03:25:07.044977: step: 2064/527, loss: 0.0006216049077920616 2023-01-23 03:25:08.160424: step: 2068/527, loss: 0.0006938934093341231 2023-01-23 03:25:09.312764: step: 2072/527, loss: 5.4168704082258046e-05 2023-01-23 03:25:10.427751: step: 2076/527, loss: 0.03682155907154083 2023-01-23 03:25:11.533550: step: 2080/527, loss: 0.010202979668974876 2023-01-23 03:25:12.640530: step: 2084/527, loss: 0.07242298126220703 2023-01-23 03:25:13.764722: step: 2088/527, loss: 0.04967985302209854 2023-01-23 03:25:14.881775: step: 2092/527, loss: 0.23714673519134521 2023-01-23 03:25:16.003915: step: 2096/527, loss: 0.4237120747566223 2023-01-23 03:25:17.127243: step: 2100/527, loss: 0.025271130725741386 2023-01-23 03:25:18.228107: step: 2104/527, loss: 0.022679520770907402 2023-01-23 03:25:19.376899: step: 2108/527, loss: 0.0006148338434286416 ================================================== Loss: 0.021 -------------------- Dev: {'event': {'p': 0.6010526315789474, 'r': 0.7603195739014648, 'f1': 0.6713697824808936}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Test: {'event': {'p': 0.6353974121996303, 'r': 0.7857142857142857, 'f1': 0.7026060296371998}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Chinese: {'event': {'p': 0.5875, 'r': 0.8703703703703703, 'f1': 0.7014925373134329}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Korean: {'event': {'p': 0.6530612244897959, 'r': 0.5079365079365079, 'f1': 0.5714285714285714}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Russian: {'event': {'p': 0.4857142857142857, 'r': 0.4722222222222222, 'f1': 0.47887323943661975}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066252587991718, 'r': 0.7802929427430093, 'f1': 0.6825859056493885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Eng Test for Korean: {'event': {'p': 0.62580054894785, 'r': 0.7817142857142857, 'f1': 0.6951219512195121}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 23 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:26:01.011853: step: 4/527, loss: 0.009901237674057484 2023-01-23 03:26:02.121003: step: 8/527, loss: 0.00029506682767532766 2023-01-23 03:26:03.213078: step: 12/527, loss: 0.011362170800566673 2023-01-23 03:26:04.338172: step: 16/527, loss: 0.00234565744176507 2023-01-23 03:26:05.440093: step: 20/527, loss: 0.0014167786575853825 2023-01-23 03:26:06.559538: step: 24/527, loss: 0.02457427978515625 2023-01-23 03:26:07.687991: step: 28/527, loss: 0.10041961073875427 2023-01-23 03:26:08.799007: step: 32/527, loss: 0.30272847414016724 2023-01-23 03:26:09.928529: step: 36/527, loss: 0.0029817582108080387 2023-01-23 03:26:11.018657: step: 40/527, loss: 0.009951401501893997 2023-01-23 03:26:12.159058: step: 44/527, loss: 0.009654236026108265 2023-01-23 03:26:13.245313: step: 48/527, loss: 0.004967212677001953 2023-01-23 03:26:14.359093: step: 52/527, loss: 0.0023446083068847656 2023-01-23 03:26:15.494962: step: 56/527, loss: 0.00047435759915970266 2023-01-23 03:26:16.575427: step: 60/527, loss: 0.003908920101821423 2023-01-23 03:26:17.738070: step: 64/527, loss: 4.978180004400201e-05 2023-01-23 03:26:18.909481: step: 68/527, loss: 0.00013604163541458547 2023-01-23 03:26:20.013802: step: 72/527, loss: 0.01138296164572239 2023-01-23 03:26:21.122688: step: 76/527, loss: 0.0014940262772142887 2023-01-23 03:26:22.260587: step: 80/527, loss: 2.441406286379788e-05 2023-01-23 03:26:23.388696: step: 84/527, loss: 0.0004268646298442036 2023-01-23 03:26:24.495752: step: 88/527, loss: 0.0005876541254110634 2023-01-23 03:26:25.628760: step: 92/527, loss: 0.0018032074440270662 2023-01-23 03:26:26.736134: step: 96/527, loss: 0.0010448455577716231 2023-01-23 03:26:27.858161: step: 100/527, loss: 1.52587890625e-05 2023-01-23 03:26:28.963993: step: 104/527, loss: 0.0008096694946289062 2023-01-23 03:26:30.087469: step: 108/527, loss: 0.0017808914417400956 2023-01-23 03:26:31.180873: step: 112/527, loss: 0.00030155183048918843 2023-01-23 03:26:32.272721: step: 116/527, loss: 0.04763726890087128 2023-01-23 03:26:33.384582: step: 120/527, loss: 0.02572164684534073 2023-01-23 03:26:34.479639: step: 124/527, loss: 0.0004935264587402344 2023-01-23 03:26:35.579276: step: 128/527, loss: 0.00024518967256881297 2023-01-23 03:26:36.680615: step: 132/527, loss: 0.0001848220854299143 2023-01-23 03:26:37.786302: step: 136/527, loss: 0.06323890388011932 2023-01-23 03:26:38.909405: step: 140/527, loss: 4.844665454584174e-05 2023-01-23 03:26:40.036456: step: 144/527, loss: 0.007431411650031805 2023-01-23 03:26:41.158961: step: 148/527, loss: 5.397796485340223e-05 2023-01-23 03:26:42.305614: step: 152/527, loss: 0.01949443854391575 2023-01-23 03:26:43.425950: step: 156/527, loss: 0.030301380902528763 2023-01-23 03:26:44.524682: step: 160/527, loss: 0.0008165359613485634 2023-01-23 03:26:45.674779: step: 164/527, loss: 0.004011916927993298 2023-01-23 03:26:46.785466: step: 168/527, loss: 0.007187986746430397 2023-01-23 03:26:47.880972: step: 172/527, loss: 0.00019750595674850047 2023-01-23 03:26:48.980138: step: 176/527, loss: 2.6321413315599784e-05 2023-01-23 03:26:50.084641: step: 180/527, loss: 0.0015926361083984375 2023-01-23 03:26:51.243537: step: 184/527, loss: -3.051757857974735e-06 2023-01-23 03:26:52.378470: step: 188/527, loss: 0.002456235932186246 2023-01-23 03:26:53.475273: step: 192/527, loss: 0.0038700581062585115 2023-01-23 03:26:54.589144: step: 196/527, loss: 0.004583549685776234 2023-01-23 03:26:55.701362: step: 200/527, loss: 0.0001430511474609375 2023-01-23 03:26:56.789861: step: 204/527, loss: 0.04462156444787979 2023-01-23 03:26:57.890965: step: 208/527, loss: 0.022519491612911224 2023-01-23 03:26:59.018783: step: 212/527, loss: 0.004508304409682751 2023-01-23 03:27:00.138357: step: 216/527, loss: 0.0004117012140341103 2023-01-23 03:27:01.257323: step: 220/527, loss: 0.006398010067641735 2023-01-23 03:27:02.404469: step: 224/527, loss: 8.79287690622732e-05 2023-01-23 03:27:03.524935: step: 228/527, loss: 0.00012488366337493062 2023-01-23 03:27:04.632341: step: 232/527, loss: 0.0002541542053222656 2023-01-23 03:27:05.756704: step: 236/527, loss: 0.0023065567947924137 2023-01-23 03:27:06.867574: step: 240/527, loss: 0.0010856628650799394 2023-01-23 03:27:07.961992: step: 244/527, loss: 0.005016136448830366 2023-01-23 03:27:09.058454: step: 248/527, loss: 0.012585067190229893 2023-01-23 03:27:10.228595: step: 252/527, loss: 0.030712127685546875 2023-01-23 03:27:11.309358: step: 256/527, loss: 0.03115549124777317 2023-01-23 03:27:12.422338: step: 260/527, loss: 0.005206203553825617 2023-01-23 03:27:13.534537: step: 264/527, loss: 0.06761131435632706 2023-01-23 03:27:14.660138: step: 268/527, loss: 0.023745572194457054 2023-01-23 03:27:15.823571: step: 272/527, loss: 0.0018802642589434981 2023-01-23 03:27:16.939015: step: 276/527, loss: 0.009649086743593216 2023-01-23 03:27:18.051924: step: 280/527, loss: 0.006797504145652056 2023-01-23 03:27:19.174844: step: 284/527, loss: 0.006094837561249733 2023-01-23 03:27:20.301240: step: 288/527, loss: 0.000583267246838659 2023-01-23 03:27:21.411323: step: 292/527, loss: -4.9591067181609105e-06 2023-01-23 03:27:22.554270: step: 296/527, loss: 0.010476112365722656 2023-01-23 03:27:23.684668: step: 300/527, loss: 0.024883801117539406 2023-01-23 03:27:24.778575: step: 304/527, loss: 0.00036878586979582906 2023-01-23 03:27:25.905242: step: 308/527, loss: 0.007901382632553577 2023-01-23 03:27:27.026153: step: 312/527, loss: 0.09342136234045029 2023-01-23 03:27:28.117923: step: 316/527, loss: 1.3256073543743696e-05 2023-01-23 03:27:29.258369: step: 320/527, loss: 0.0015665054088458419 2023-01-23 03:27:30.350672: step: 324/527, loss: 0.00010986328561557457 2023-01-23 03:27:31.464075: step: 328/527, loss: 0.2275102585554123 2023-01-23 03:27:32.581490: step: 332/527, loss: 0.010584068484604359 2023-01-23 03:27:33.704060: step: 336/527, loss: 0.0490722693502903 2023-01-23 03:27:34.837221: step: 340/527, loss: 0.004690027330070734 2023-01-23 03:27:35.969952: step: 344/527, loss: 0.042484235018491745 2023-01-23 03:27:37.077610: step: 348/527, loss: 0.01852741278707981 2023-01-23 03:27:38.176266: step: 352/527, loss: 0.0001046180768753402 2023-01-23 03:27:39.297336: step: 356/527, loss: 0.00253639230504632 2023-01-23 03:27:40.404704: step: 360/527, loss: 0.005726623814553022 2023-01-23 03:27:41.528418: step: 364/527, loss: 0.0002918243408203125 2023-01-23 03:27:42.642320: step: 368/527, loss: 0.013079452328383923 2023-01-23 03:27:43.764073: step: 372/527, loss: 0.0007347107166424394 2023-01-23 03:27:44.882515: step: 376/527, loss: 2.002716064453125e-05 2023-01-23 03:27:45.987568: step: 380/527, loss: 0.021936701610684395 2023-01-23 03:27:47.101860: step: 384/527, loss: 0.0001683235022937879 2023-01-23 03:27:48.206394: step: 388/527, loss: 0.022719383239746094 2023-01-23 03:27:49.309528: step: 392/527, loss: 0.0021144866477698088 2023-01-23 03:27:50.423491: step: 396/527, loss: 1.1205673217773438e-05 2023-01-23 03:27:51.533067: step: 400/527, loss: 0.002982711885124445 2023-01-23 03:27:52.666146: step: 404/527, loss: 0.0012072563404217362 2023-01-23 03:27:53.813198: step: 408/527, loss: 0.03755536302924156 2023-01-23 03:27:54.914449: step: 412/527, loss: 0.015459251590073109 2023-01-23 03:27:56.100555: step: 416/527, loss: 0.006657600402832031 2023-01-23 03:27:57.205894: step: 420/527, loss: 0.00037388800410553813 2023-01-23 03:27:58.314116: step: 424/527, loss: 0.017155839130282402 2023-01-23 03:27:59.405308: step: 428/527, loss: 0.9744386672973633 2023-01-23 03:28:00.521621: step: 432/527, loss: 0.00257701869122684 2023-01-23 03:28:01.617606: step: 436/527, loss: 4.825591895496473e-05 2023-01-23 03:28:02.731897: step: 440/527, loss: 0.32267218828201294 2023-01-23 03:28:03.848689: step: 444/527, loss: 0.026952551677823067 2023-01-23 03:28:04.962916: step: 448/527, loss: 0.00016851424879860133 2023-01-23 03:28:06.081414: step: 452/527, loss: 0.0002056121884379536 2023-01-23 03:28:07.194010: step: 456/527, loss: 0.004977417178452015 2023-01-23 03:28:08.313334: step: 460/527, loss: 2.307891918462701e-05 2023-01-23 03:28:09.430763: step: 464/527, loss: 0.003697967389598489 2023-01-23 03:28:10.547219: step: 468/527, loss: 0.00037741661071777344 2023-01-23 03:28:11.665543: step: 472/527, loss: 0.003179276129230857 2023-01-23 03:28:12.829648: step: 476/527, loss: 0.0006736755603924394 2023-01-23 03:28:13.978183: step: 480/527, loss: 0.017903519794344902 2023-01-23 03:28:15.080814: step: 484/527, loss: 0.000255393999395892 2023-01-23 03:28:16.168829: step: 488/527, loss: 0.04050245136022568 2023-01-23 03:28:17.287365: step: 492/527, loss: 0.025121737271547318 2023-01-23 03:28:18.398126: step: 496/527, loss: 0.0025573731400072575 2023-01-23 03:28:19.555971: step: 500/527, loss: 0.1347423940896988 2023-01-23 03:28:20.700649: step: 504/527, loss: 0.026486970484256744 2023-01-23 03:28:21.810244: step: 508/527, loss: 0.020560551434755325 2023-01-23 03:28:22.925224: step: 512/527, loss: 0.01280517689883709 2023-01-23 03:28:24.042818: step: 516/527, loss: 0.009318160824477673 2023-01-23 03:28:25.178260: step: 520/527, loss: 0.08470630645751953 2023-01-23 03:28:26.288890: step: 524/527, loss: 0.0004506111145019531 2023-01-23 03:28:27.437146: step: 528/527, loss: 1.4495850336970761e-05 2023-01-23 03:28:28.545778: step: 532/527, loss: 0.0031223297119140625 2023-01-23 03:28:29.669797: step: 536/527, loss: 0.001815033028833568 2023-01-23 03:28:30.802919: step: 540/527, loss: 0.2849872410297394 2023-01-23 03:28:31.959011: step: 544/527, loss: 0.002042627427726984 2023-01-23 03:28:33.068825: step: 548/527, loss: 7.805824134266004e-05 2023-01-23 03:28:34.174164: step: 552/527, loss: 0.009734439663589 2023-01-23 03:28:35.284491: step: 556/527, loss: 0.011613654904067516 2023-01-23 03:28:36.393242: step: 560/527, loss: 0.01763901859521866 2023-01-23 03:28:37.543231: step: 564/527, loss: 0.00961456261575222 2023-01-23 03:28:38.653019: step: 568/527, loss: 0.00048232078552246094 2023-01-23 03:28:39.780326: step: 572/527, loss: 0.0001677513209870085 2023-01-23 03:28:40.890897: step: 576/527, loss: 0.1305990219116211 2023-01-23 03:28:41.980231: step: 580/527, loss: 0.5182012319564819 2023-01-23 03:28:43.112454: step: 584/527, loss: 0.008178330026566982 2023-01-23 03:28:44.239367: step: 588/527, loss: 0.011240148916840553 2023-01-23 03:28:45.361255: step: 592/527, loss: 1.144409225162235e-06 2023-01-23 03:28:46.476199: step: 596/527, loss: 0.000789260899182409 2023-01-23 03:28:47.619660: step: 600/527, loss: 0.038184165954589844 2023-01-23 03:28:48.768743: step: 604/527, loss: 0.005100298207253218 2023-01-23 03:28:49.896870: step: 608/527, loss: 1.1539459592313506e-05 2023-01-23 03:28:51.008821: step: 612/527, loss: 0.05793943256139755 2023-01-23 03:28:52.099114: step: 616/527, loss: 0.0009870529174804688 2023-01-23 03:28:53.197355: step: 620/527, loss: 0.00041837693424895406 2023-01-23 03:28:54.324258: step: 624/527, loss: 0.05193343386054039 2023-01-23 03:28:55.459904: step: 628/527, loss: 0.5337886810302734 2023-01-23 03:28:56.575870: step: 632/527, loss: 0.0038826942909508944 2023-01-23 03:28:57.710666: step: 636/527, loss: 4.472732325666584e-05 2023-01-23 03:28:58.839699: step: 640/527, loss: 0.0014015197521075606 2023-01-23 03:28:59.961909: step: 644/527, loss: 0.00017395020404364914 2023-01-23 03:29:01.073584: step: 648/527, loss: 0.00020732879056595266 2023-01-23 03:29:02.200426: step: 652/527, loss: 0.00011711120896507055 2023-01-23 03:29:03.331686: step: 656/527, loss: 0.0033273696899414062 2023-01-23 03:29:04.486753: step: 660/527, loss: 0.029459573328495026 2023-01-23 03:29:05.574703: step: 664/527, loss: 0.01702442206442356 2023-01-23 03:29:06.673707: step: 668/527, loss: 0.03598365932703018 2023-01-23 03:29:07.771549: step: 672/527, loss: 0.009150505065917969 2023-01-23 03:29:08.894555: step: 676/527, loss: 0.003360843751579523 2023-01-23 03:29:10.012668: step: 680/527, loss: 0.008288050070405006 2023-01-23 03:29:11.098809: step: 684/527, loss: 7.62939453125e-06 2023-01-23 03:29:12.211245: step: 688/527, loss: 0.031882286071777344 2023-01-23 03:29:13.326250: step: 692/527, loss: 0.02600417099893093 2023-01-23 03:29:14.457129: step: 696/527, loss: 0.008329391479492188 2023-01-23 03:29:15.559567: step: 700/527, loss: 0.0022581100929528475 2023-01-23 03:29:16.695074: step: 704/527, loss: 0.04228067398071289 2023-01-23 03:29:17.850581: step: 708/527, loss: 0.00466203736141324 2023-01-23 03:29:18.981991: step: 712/527, loss: 0.0011475563514977694 2023-01-23 03:29:20.060586: step: 716/527, loss: 0.0196685791015625 2023-01-23 03:29:21.199163: step: 720/527, loss: 0.0029592516366392374 2023-01-23 03:29:22.310882: step: 724/527, loss: 0.00022468566021416336 2023-01-23 03:29:23.421536: step: 728/527, loss: 0.004058646969497204 2023-01-23 03:29:24.563308: step: 732/527, loss: 0.0015620231861248612 2023-01-23 03:29:25.682355: step: 736/527, loss: 3.8909915019758046e-05 2023-01-23 03:29:26.773787: step: 740/527, loss: 0.2221468985080719 2023-01-23 03:29:27.896383: step: 744/527, loss: 0.0001697540283203125 2023-01-23 03:29:29.035493: step: 748/527, loss: 0.018655015155673027 2023-01-23 03:29:30.129464: step: 752/527, loss: 0.022371292114257812 2023-01-23 03:29:31.222562: step: 756/527, loss: 2.689361645025201e-05 2023-01-23 03:29:32.356180: step: 760/527, loss: 0.00014228820509742945 2023-01-23 03:29:33.486256: step: 764/527, loss: 6.246566954359878e-06 2023-01-23 03:29:34.600685: step: 768/527, loss: 9.1552734375e-05 2023-01-23 03:29:35.715670: step: 772/527, loss: 0.000668430351652205 2023-01-23 03:29:36.866771: step: 776/527, loss: 0.04863777384161949 2023-01-23 03:29:37.979235: step: 780/527, loss: 0.009463215246796608 2023-01-23 03:29:39.085307: step: 784/527, loss: 0.00013408661470748484 2023-01-23 03:29:40.194700: step: 788/527, loss: 7.53879503463395e-05 2023-01-23 03:29:41.313468: step: 792/527, loss: 0.0006320953834801912 2023-01-23 03:29:42.453132: step: 796/527, loss: 0.0005182266468182206 2023-01-23 03:29:43.606804: step: 800/527, loss: 0.018663501366972923 2023-01-23 03:29:44.693573: step: 804/527, loss: 0.023381328210234642 2023-01-23 03:29:45.803174: step: 808/527, loss: 9.670257713878527e-05 2023-01-23 03:29:46.914992: step: 812/527, loss: 0.009738064371049404 2023-01-23 03:29:48.012160: step: 816/527, loss: 0.0011090278858318925 2023-01-23 03:29:49.128692: step: 820/527, loss: 0.024940870702266693 2023-01-23 03:29:50.277391: step: 824/527, loss: 0.00779304513707757 2023-01-23 03:29:51.375749: step: 828/527, loss: 0.007205199915915728 2023-01-23 03:29:52.481906: step: 832/527, loss: 3.347396705066785e-05 2023-01-23 03:29:53.590852: step: 836/527, loss: 0.007741928566247225 2023-01-23 03:29:54.688601: step: 840/527, loss: 2.2315980459097773e-05 2023-01-23 03:29:55.811225: step: 844/527, loss: 0.06525001674890518 2023-01-23 03:29:56.921296: step: 848/527, loss: 0.008314132690429688 2023-01-23 03:29:58.031717: step: 852/527, loss: 2.5987625122070312e-05 2023-01-23 03:29:59.157698: step: 856/527, loss: 0.015436649322509766 2023-01-23 03:30:00.318956: step: 860/527, loss: 0.021979333832859993 2023-01-23 03:30:01.455423: step: 864/527, loss: 0.001811218331567943 2023-01-23 03:30:02.577003: step: 868/527, loss: 0.005060672760009766 2023-01-23 03:30:03.667101: step: 872/527, loss: 0.0005823135143145919 2023-01-23 03:30:04.767506: step: 876/527, loss: 0.0008657455327920616 2023-01-23 03:30:05.905788: step: 880/527, loss: 0.017195511609315872 2023-01-23 03:30:07.018442: step: 884/527, loss: 0.0010107994312420487 2023-01-23 03:30:08.126956: step: 888/527, loss: 0.004335975740104914 2023-01-23 03:30:09.231002: step: 892/527, loss: 0.0005720138433389366 2023-01-23 03:30:10.344341: step: 896/527, loss: 0.0061013223603367805 2023-01-23 03:30:11.466855: step: 900/527, loss: 0.00035772324190475047 2023-01-23 03:30:12.606247: step: 904/527, loss: 0.0610651969909668 2023-01-23 03:30:13.681222: step: 908/527, loss: 1.7070769899873994e-05 2023-01-23 03:30:14.818493: step: 912/527, loss: 0.017061617225408554 2023-01-23 03:30:15.913196: step: 916/527, loss: 0.00043668749276548624 2023-01-23 03:30:17.005823: step: 920/527, loss: 0.024418117478489876 2023-01-23 03:30:18.085819: step: 924/527, loss: 0.0016036033630371094 2023-01-23 03:30:19.304398: step: 928/527, loss: 0.0005802154773846269 2023-01-23 03:30:20.427477: step: 932/527, loss: 0.0049932003021240234 2023-01-23 03:30:21.536622: step: 936/527, loss: 0.0005226611974649131 2023-01-23 03:30:22.655657: step: 940/527, loss: 0.00019984245591331273 2023-01-23 03:30:23.769500: step: 944/527, loss: 7.734298560535535e-05 2023-01-23 03:30:24.862611: step: 948/527, loss: 0.0006738663068972528 2023-01-23 03:30:25.992234: step: 952/527, loss: 0.03094034269452095 2023-01-23 03:30:27.099961: step: 956/527, loss: 0.03349189832806587 2023-01-23 03:30:28.182311: step: 960/527, loss: 0.025272751227021217 2023-01-23 03:30:29.330379: step: 964/527, loss: 0.005921745672821999 2023-01-23 03:30:30.439145: step: 968/527, loss: 0.00013170242891646922 2023-01-23 03:30:31.552142: step: 972/527, loss: 0.002598381135612726 2023-01-23 03:30:32.683596: step: 976/527, loss: 0.00021800995455123484 2023-01-23 03:30:33.802743: step: 980/527, loss: 0.00479049701243639 2023-01-23 03:30:34.909675: step: 984/527, loss: 0.012603998184204102 2023-01-23 03:30:36.036043: step: 988/527, loss: 0.0005575180402956903 2023-01-23 03:30:37.158264: step: 992/527, loss: 0.0004016876337118447 2023-01-23 03:30:38.260525: step: 996/527, loss: 0.025344902649521828 2023-01-23 03:30:39.370694: step: 1000/527, loss: 0.0005071639898233116 2023-01-23 03:30:40.504951: step: 1004/527, loss: 0.003490447998046875 2023-01-23 03:30:41.634178: step: 1008/527, loss: 0.004206848330795765 2023-01-23 03:30:42.762547: step: 1012/527, loss: 0.008918190374970436 2023-01-23 03:30:43.919062: step: 1016/527, loss: 0.05567416921257973 2023-01-23 03:30:44.988340: step: 1020/527, loss: 0.01316604670137167 2023-01-23 03:30:46.116518: step: 1024/527, loss: 0.00023536683875136077 2023-01-23 03:30:47.253501: step: 1028/527, loss: 0.0027950287330895662 2023-01-23 03:30:48.356050: step: 1032/527, loss: 0.002307796385139227 2023-01-23 03:30:49.472614: step: 1036/527, loss: 3.566742088878527e-05 2023-01-23 03:30:50.600555: step: 1040/527, loss: 0.004254436586052179 2023-01-23 03:30:51.743208: step: 1044/527, loss: -2.846717688953504e-05 2023-01-23 03:30:52.877027: step: 1048/527, loss: 0.003581428434699774 2023-01-23 03:30:54.035259: step: 1052/527, loss: 0.0429597869515419 2023-01-23 03:30:55.132870: step: 1056/527, loss: 0.0001621246337890625 2023-01-23 03:30:56.269800: step: 1060/527, loss: 0.0004070282157044858 2023-01-23 03:30:57.396388: step: 1064/527, loss: 0.0016332149971276522 2023-01-23 03:30:58.480286: step: 1068/527, loss: 6.189346458995715e-05 2023-01-23 03:30:59.595778: step: 1072/527, loss: 9.160042100120336e-05 2023-01-23 03:31:00.703889: step: 1076/527, loss: 0.013260078616440296 2023-01-23 03:31:01.809038: step: 1080/527, loss: 0.0001428604155080393 2023-01-23 03:31:02.903949: step: 1084/527, loss: 0.008313274942338467 2023-01-23 03:31:04.016616: step: 1088/527, loss: 0.024941731244325638 2023-01-23 03:31:05.130257: step: 1092/527, loss: 0.009051322937011719 2023-01-23 03:31:06.246999: step: 1096/527, loss: 0.00039281847421079874 2023-01-23 03:31:07.350960: step: 1100/527, loss: 0.00046443939208984375 2023-01-23 03:31:08.488724: step: 1104/527, loss: 0.00027065275935456157 2023-01-23 03:31:09.597963: step: 1108/527, loss: 0.03258323669433594 2023-01-23 03:31:10.719315: step: 1112/527, loss: 0.13284483551979065 2023-01-23 03:31:11.856761: step: 1116/527, loss: 0.058045580983161926 2023-01-23 03:31:12.963918: step: 1120/527, loss: 4.7683710135970614e-07 2023-01-23 03:31:14.070667: step: 1124/527, loss: 0.0050223348662257195 2023-01-23 03:31:15.177477: step: 1128/527, loss: 0.002240181202068925 2023-01-23 03:31:16.290092: step: 1132/527, loss: 0.0042243958450853825 2023-01-23 03:31:17.399858: step: 1136/527, loss: 0.017250681295990944 2023-01-23 03:31:18.518995: step: 1140/527, loss: 0.04178063943982124 2023-01-23 03:31:19.616511: step: 1144/527, loss: 2.059936559817288e-05 2023-01-23 03:31:20.747003: step: 1148/527, loss: 0.0011625289916992188 2023-01-23 03:31:21.868616: step: 1152/527, loss: 0.0033068659249693155 2023-01-23 03:31:22.964675: step: 1156/527, loss: 0.00032396314782090485 2023-01-23 03:31:24.055136: step: 1160/527, loss: 0.0016888618702068925 2023-01-23 03:31:25.169646: step: 1164/527, loss: 0.02366485819220543 2023-01-23 03:31:26.272040: step: 1168/527, loss: 0.0009849548805505037 2023-01-23 03:31:27.364587: step: 1172/527, loss: 0.02193584479391575 2023-01-23 03:31:28.443946: step: 1176/527, loss: 0.0001876354217529297 2023-01-23 03:31:29.574111: step: 1180/527, loss: 0.000842189765535295 2023-01-23 03:31:30.674914: step: 1184/527, loss: 0.0014688492519780993 2023-01-23 03:31:31.819981: step: 1188/527, loss: 0.0004326820489950478 2023-01-23 03:31:32.929737: step: 1192/527, loss: 0.0015844345325604081 2023-01-23 03:31:34.047689: step: 1196/527, loss: 0.0005743980291299522 2023-01-23 03:31:35.186560: step: 1200/527, loss: 0.012994195334613323 2023-01-23 03:31:36.312859: step: 1204/527, loss: 0.005030632019042969 2023-01-23 03:31:37.447988: step: 1208/527, loss: 0.0003017425478901714 2023-01-23 03:31:38.556435: step: 1212/527, loss: 0.0003208160342182964 2023-01-23 03:31:39.655561: step: 1216/527, loss: 0.0008462906116619706 2023-01-23 03:31:40.788609: step: 1220/527, loss: 0.004182625096291304 2023-01-23 03:31:41.899423: step: 1224/527, loss: 0.14578190445899963 2023-01-23 03:31:43.016063: step: 1228/527, loss: 7.62939453125e-06 2023-01-23 03:31:44.126828: step: 1232/527, loss: 0.3640851080417633 2023-01-23 03:31:45.213560: step: 1236/527, loss: 0.0024131773971021175 2023-01-23 03:31:46.318069: step: 1240/527, loss: 0.014840316958725452 2023-01-23 03:31:47.429679: step: 1244/527, loss: 0.004340744111686945 2023-01-23 03:31:48.546718: step: 1248/527, loss: 0.01746845245361328 2023-01-23 03:31:49.711581: step: 1252/527, loss: 0.0006083488697186112 2023-01-23 03:31:50.800341: step: 1256/527, loss: 0.0009681701194494963 2023-01-23 03:31:51.923487: step: 1260/527, loss: 0.00046262741670943797 2023-01-23 03:31:53.040422: step: 1264/527, loss: 0.000327634799759835 2023-01-23 03:31:54.159837: step: 1268/527, loss: 0.06470489501953125 2023-01-23 03:31:55.280520: step: 1272/527, loss: 0.0003659248468466103 2023-01-23 03:31:56.377443: step: 1276/527, loss: 6.95705457474105e-05 2023-01-23 03:31:57.481802: step: 1280/527, loss: 0.005387497134506702 2023-01-23 03:31:58.579758: step: 1284/527, loss: 0.0010467530228197575 2023-01-23 03:31:59.714248: step: 1288/527, loss: 0.009755134582519531 2023-01-23 03:32:00.861362: step: 1292/527, loss: 0.04974942281842232 2023-01-23 03:32:01.955411: step: 1296/527, loss: 0.00016479492478538305 2023-01-23 03:32:03.058525: step: 1300/527, loss: 4.0626528061693534e-05 2023-01-23 03:32:04.179584: step: 1304/527, loss: 0.0004245758173055947 2023-01-23 03:32:05.312297: step: 1308/527, loss: 8.406639244640246e-05 2023-01-23 03:32:06.442790: step: 1312/527, loss: 0.0012098312145099044 2023-01-23 03:32:07.552608: step: 1316/527, loss: 0.0032777786254882812 2023-01-23 03:32:08.697765: step: 1320/527, loss: 0.020536424592137337 2023-01-23 03:32:09.808820: step: 1324/527, loss: 0.00918130949139595 2023-01-23 03:32:10.917736: step: 1328/527, loss: 0.0006234169122762978 2023-01-23 03:32:12.032061: step: 1332/527, loss: 0.0008804321405477822 2023-01-23 03:32:13.137838: step: 1336/527, loss: 0.28104686737060547 2023-01-23 03:32:14.218711: step: 1340/527, loss: 0.07377391308546066 2023-01-23 03:32:15.319722: step: 1344/527, loss: 0.0016926765674725175 2023-01-23 03:32:16.398472: step: 1348/527, loss: 0.01854095607995987 2023-01-23 03:32:17.488781: step: 1352/527, loss: 0.0393003486096859 2023-01-23 03:32:18.605558: step: 1356/527, loss: 0.003024578094482422 2023-01-23 03:32:19.734913: step: 1360/527, loss: 0.01980285719037056 2023-01-23 03:32:20.834036: step: 1364/527, loss: 0.007147598080337048 2023-01-23 03:32:21.976716: step: 1368/527, loss: 9.546280489303172e-05 2023-01-23 03:32:23.105405: step: 1372/527, loss: 0.004654502961784601 2023-01-23 03:32:24.272628: step: 1376/527, loss: 0.0020509720779955387 2023-01-23 03:32:25.379063: step: 1380/527, loss: 0.002462244126945734 2023-01-23 03:32:26.508006: step: 1384/527, loss: 5.2833554946118966e-05 2023-01-23 03:32:27.638020: step: 1388/527, loss: 0.04148240387439728 2023-01-23 03:32:28.746557: step: 1392/527, loss: 0.04448547214269638 2023-01-23 03:32:29.853483: step: 1396/527, loss: 0.00029735564021393657 2023-01-23 03:32:30.925627: step: 1400/527, loss: 0.016714954748749733 2023-01-23 03:32:32.028595: step: 1404/527, loss: 0.00013999939255882055 2023-01-23 03:32:33.148172: step: 1408/527, loss: 0.02235722541809082 2023-01-23 03:32:34.266735: step: 1412/527, loss: 0.014533234760165215 2023-01-23 03:32:35.360811: step: 1416/527, loss: 7.815361459506676e-05 2023-01-23 03:32:36.479031: step: 1420/527, loss: 0.05853080749511719 2023-01-23 03:32:37.638503: step: 1424/527, loss: 0.011784744448959827 2023-01-23 03:32:38.771967: step: 1428/527, loss: 0.012887001037597656 2023-01-23 03:32:39.951870: step: 1432/527, loss: 0.0005016326904296875 2023-01-23 03:32:41.067552: step: 1436/527, loss: 0.022765539586544037 2023-01-23 03:32:42.155274: step: 1440/527, loss: 8.163452002918348e-05 2023-01-23 03:32:43.268490: step: 1444/527, loss: 1.3637541997013614e-05 2023-01-23 03:32:44.380019: step: 1448/527, loss: 0.005961514078080654 2023-01-23 03:32:45.489359: step: 1452/527, loss: 0.022536564618349075 2023-01-23 03:32:46.596129: step: 1456/527, loss: 0.005604791920632124 2023-01-23 03:32:47.725704: step: 1460/527, loss: 2.9277802241267636e-05 2023-01-23 03:32:48.823680: step: 1464/527, loss: 0.0050637247040867805 2023-01-23 03:32:49.959613: step: 1468/527, loss: 0.023099135607481003 2023-01-23 03:32:51.074386: step: 1472/527, loss: 0.010835266672074795 2023-01-23 03:32:52.186168: step: 1476/527, loss: 0.00014810562424827367 2023-01-23 03:32:53.316122: step: 1480/527, loss: 0.012093067169189453 2023-01-23 03:32:54.439808: step: 1484/527, loss: 7.090569124557078e-05 2023-01-23 03:32:55.560511: step: 1488/527, loss: 4.730224463855848e-05 2023-01-23 03:32:56.669917: step: 1492/527, loss: 4.425048973644152e-05 2023-01-23 03:32:57.774945: step: 1496/527, loss: 9.474754187976941e-05 2023-01-23 03:32:58.915094: step: 1500/527, loss: 0.0016165734268724918 2023-01-23 03:33:00.019123: step: 1504/527, loss: 0.0005517005920410156 2023-01-23 03:33:01.123631: step: 1508/527, loss: 0.0015367508167400956 2023-01-23 03:33:02.234986: step: 1512/527, loss: 0.0001291275111725554 2023-01-23 03:33:03.341634: step: 1516/527, loss: 0.05298595875501633 2023-01-23 03:33:04.453437: step: 1520/527, loss: 0.0010730742942541838 2023-01-23 03:33:05.570265: step: 1524/527, loss: 0.0013320923317223787 2023-01-23 03:33:06.692029: step: 1528/527, loss: 0.00012006760516669601 2023-01-23 03:33:07.807521: step: 1532/527, loss: 0.0010648728348314762 2023-01-23 03:33:08.914842: step: 1536/527, loss: 7.534027099609375e-05 2023-01-23 03:33:10.005746: step: 1540/527, loss: 0.009952736087143421 2023-01-23 03:33:11.111905: step: 1544/527, loss: 0.0007658004760742188 2023-01-23 03:33:12.219082: step: 1548/527, loss: 0.05119170993566513 2023-01-23 03:33:13.365608: step: 1552/527, loss: 0.008610057644546032 2023-01-23 03:33:14.483342: step: 1556/527, loss: 1.9931794668082148e-05 2023-01-23 03:33:15.588826: step: 1560/527, loss: 0.0005798340425826609 2023-01-23 03:33:16.710082: step: 1564/527, loss: 0.0007037163013592362 2023-01-23 03:33:17.809619: step: 1568/527, loss: 0.0017225266201421618 2023-01-23 03:33:18.957621: step: 1572/527, loss: 0.013992118649184704 2023-01-23 03:33:20.086427: step: 1576/527, loss: 0.0002193450927734375 2023-01-23 03:33:21.200675: step: 1580/527, loss: 0.006794357672333717 2023-01-23 03:33:22.293484: step: 1584/527, loss: 0.00011739730689441785 2023-01-23 03:33:23.399951: step: 1588/527, loss: 0.0023143768776208162 2023-01-23 03:33:24.483354: step: 1592/527, loss: 3.156661841785535e-05 2023-01-23 03:33:25.586853: step: 1596/527, loss: 0.005384969525039196 2023-01-23 03:33:26.675534: step: 1600/527, loss: 0.0023557664826512337 2023-01-23 03:33:27.776107: step: 1604/527, loss: 0.00022239684767555445 2023-01-23 03:33:28.891825: step: 1608/527, loss: 0.0004681587452068925 2023-01-23 03:33:29.987604: step: 1612/527, loss: 0.01723327860236168 2023-01-23 03:33:31.133510: step: 1616/527, loss: 0.01791858673095703 2023-01-23 03:33:32.257577: step: 1620/527, loss: 0.02886676974594593 2023-01-23 03:33:33.394924: step: 1624/527, loss: 0.011512375436723232 2023-01-23 03:33:34.491830: step: 1628/527, loss: 0.014509772881865501 2023-01-23 03:33:35.618938: step: 1632/527, loss: 0.03004760853946209 2023-01-23 03:33:36.705700: step: 1636/527, loss: 0.00030193329439498484 2023-01-23 03:33:37.803631: step: 1640/527, loss: 0.00290088658221066 2023-01-23 03:33:38.887008: step: 1644/527, loss: 0.012646389193832874 2023-01-23 03:33:40.009601: step: 1648/527, loss: 0.05897979810833931 2023-01-23 03:33:41.092287: step: 1652/527, loss: 3.051757857974735e-06 2023-01-23 03:33:42.217786: step: 1656/527, loss: 0.02617206610739231 2023-01-23 03:33:43.326934: step: 1660/527, loss: 0.014951134100556374 2023-01-23 03:33:44.459210: step: 1664/527, loss: 0.0002899169921875 2023-01-23 03:33:45.555790: step: 1668/527, loss: 0.019494343549013138 2023-01-23 03:33:46.704197: step: 1672/527, loss: 0.000683689140714705 2023-01-23 03:33:47.808888: step: 1676/527, loss: 0.010310744866728783 2023-01-23 03:33:48.913235: step: 1680/527, loss: 0.00010395050048828125 2023-01-23 03:33:50.050920: step: 1684/527, loss: 0.00030202866764739156 2023-01-23 03:33:51.142511: step: 1688/527, loss: 0.004313755314797163 2023-01-23 03:33:52.253255: step: 1692/527, loss: 0.026384973898530006 2023-01-23 03:33:53.374330: step: 1696/527, loss: 0.00076379778329283 2023-01-23 03:33:54.499434: step: 1700/527, loss: 0.00023665429034736007 2023-01-23 03:33:55.620120: step: 1704/527, loss: 0.0001924514799611643 2023-01-23 03:33:56.717012: step: 1708/527, loss: 0.0020292282570153475 2023-01-23 03:33:57.805964: step: 1712/527, loss: 0.09808941185474396 2023-01-23 03:33:58.939373: step: 1716/527, loss: 0.0018439769046381116 2023-01-23 03:34:00.058913: step: 1720/527, loss: 0.00846626702696085 2023-01-23 03:34:01.167671: step: 1724/527, loss: 0.0005520820850506425 2023-01-23 03:34:02.315844: step: 1728/527, loss: 0.0039535523392260075 2023-01-23 03:34:03.439000: step: 1732/527, loss: 0.0019207954173907638 2023-01-23 03:34:04.530351: step: 1736/527, loss: 0.009162330999970436 2023-01-23 03:34:05.634215: step: 1740/527, loss: 0.0017773628933355212 2023-01-23 03:34:06.755530: step: 1744/527, loss: 0.0011793136363849044 2023-01-23 03:34:07.863517: step: 1748/527, loss: 0.0002019882231252268 2023-01-23 03:34:08.954588: step: 1752/527, loss: 0.7095305919647217 2023-01-23 03:34:10.055418: step: 1756/527, loss: 1.7547608877066523e-05 2023-01-23 03:34:11.172448: step: 1760/527, loss: 0.0032100677490234375 2023-01-23 03:34:12.299605: step: 1764/527, loss: 0.03240266069769859 2023-01-23 03:34:13.415299: step: 1768/527, loss: 0.018138503655791283 2023-01-23 03:34:14.518178: step: 1772/527, loss: 7.62939453125e-06 2023-01-23 03:34:15.632885: step: 1776/527, loss: 0.00108251569326967 2023-01-23 03:34:16.805843: step: 1780/527, loss: 0.003858471056446433 2023-01-23 03:34:17.942447: step: 1784/527, loss: 0.00011882782564498484 2023-01-23 03:34:19.048066: step: 1788/527, loss: 0.00010032654245151207 2023-01-23 03:34:20.150372: step: 1792/527, loss: 0.00035119056701660156 2023-01-23 03:34:21.280519: step: 1796/527, loss: 0.3447505831718445 2023-01-23 03:34:22.386977: step: 1800/527, loss: 8.010864803509321e-06 2023-01-23 03:34:23.492540: step: 1804/527, loss: 0.00012130737013649195 2023-01-23 03:34:24.592403: step: 1808/527, loss: 0.0038904191460460424 2023-01-23 03:34:25.692343: step: 1812/527, loss: 3.10870361328125 2023-01-23 03:34:26.803372: step: 1816/527, loss: 0.003504037857055664 2023-01-23 03:34:27.934578: step: 1820/527, loss: 0.00035572052001953125 2023-01-23 03:34:29.046264: step: 1824/527, loss: 0.0002574920654296875 2023-01-23 03:34:30.186953: step: 1828/527, loss: 0.0004428863467182964 2023-01-23 03:34:31.270979: step: 1832/527, loss: 0.005035591311752796 2023-01-23 03:34:32.377543: step: 1836/527, loss: 0.008076191879808903 2023-01-23 03:34:33.495912: step: 1840/527, loss: 0.05245323106646538 2023-01-23 03:34:34.595186: step: 1844/527, loss: 0.024606704711914062 2023-01-23 03:34:35.713097: step: 1848/527, loss: 0.004636859986931086 2023-01-23 03:34:36.805307: step: 1852/527, loss: 1.0204315003647935e-05 2023-01-23 03:34:37.904938: step: 1856/527, loss: 0.013503646478056908 2023-01-23 03:34:39.045446: step: 1860/527, loss: 0.0010825158096849918 2023-01-23 03:34:40.157869: step: 1864/527, loss: 0.0010453223949298263 2023-01-23 03:34:41.277586: step: 1868/527, loss: 0.004374981392174959 2023-01-23 03:34:42.392183: step: 1872/527, loss: 0.006274318788200617 2023-01-23 03:34:43.494019: step: 1876/527, loss: 0.0005322456127032638 2023-01-23 03:34:44.604175: step: 1880/527, loss: 0.005789279937744141 2023-01-23 03:34:45.755784: step: 1884/527, loss: 0.10395793616771698 2023-01-23 03:34:46.858094: step: 1888/527, loss: 0.0005834579933434725 2023-01-23 03:34:47.958749: step: 1892/527, loss: 0.0004730224609375 2023-01-23 03:34:49.054503: step: 1896/527, loss: 0.015216255560517311 2023-01-23 03:34:50.188763: step: 1900/527, loss: 0.00018506050400901586 2023-01-23 03:34:51.295770: step: 1904/527, loss: 0.0008146286127157509 2023-01-23 03:34:52.378343: step: 1908/527, loss: 2.2268295651883818e-05 2023-01-23 03:34:53.508028: step: 1912/527, loss: 0.0007706641918048263 2023-01-23 03:34:54.616818: step: 1916/527, loss: 0.012499618344008923 2023-01-23 03:34:55.729268: step: 1920/527, loss: 0.017778636887669563 2023-01-23 03:34:56.849073: step: 1924/527, loss: 8.296966552734375e-05 2023-01-23 03:34:57.965173: step: 1928/527, loss: 0.04794588312506676 2023-01-23 03:34:59.104842: step: 1932/527, loss: 0.18900719285011292 2023-01-23 03:35:00.244352: step: 1936/527, loss: 0.4262973666191101 2023-01-23 03:35:01.383024: step: 1940/527, loss: 0.009500885382294655 2023-01-23 03:35:02.502504: step: 1944/527, loss: 0.0010272025829181075 2023-01-23 03:35:03.618419: step: 1948/527, loss: 0.004021644592285156 2023-01-23 03:35:04.732756: step: 1952/527, loss: 3.52859501617786e-06 2023-01-23 03:35:05.840840: step: 1956/527, loss: 1.373290979245212e-05 2023-01-23 03:35:06.921568: step: 1960/527, loss: 1.106262243411038e-05 2023-01-23 03:35:08.025648: step: 1964/527, loss: 2.8038026357535273e-05 2023-01-23 03:35:09.137957: step: 1968/527, loss: 0.09510540962219238 2023-01-23 03:35:10.253813: step: 1972/527, loss: 0.0002437591610942036 2023-01-23 03:35:11.363183: step: 1976/527, loss: 0.0005998611450195312 2023-01-23 03:35:12.456919: step: 1980/527, loss: 0.0020483972039073706 2023-01-23 03:35:13.538080: step: 1984/527, loss: 0.08438543975353241 2023-01-23 03:35:14.651811: step: 1988/527, loss: 0.004636954981833696 2023-01-23 03:35:15.768871: step: 1992/527, loss: 2.2029875253792852e-05 2023-01-23 03:35:16.879088: step: 1996/527, loss: 0.0010011672275140882 2023-01-23 03:35:18.000570: step: 2000/527, loss: 0.02397174760699272 2023-01-23 03:35:19.108751: step: 2004/527, loss: 0.03462624549865723 2023-01-23 03:35:20.230166: step: 2008/527, loss: 9.632110959501006e-06 2023-01-23 03:35:21.328527: step: 2012/527, loss: 0.02156086079776287 2023-01-23 03:35:22.406750: step: 2016/527, loss: 0.00016870498075149953 2023-01-23 03:35:23.528464: step: 2020/527, loss: 0.008963203057646751 2023-01-23 03:35:24.634922: step: 2024/527, loss: 0.0026958466041833162 2023-01-23 03:35:25.749169: step: 2028/527, loss: 0.043246079236269 2023-01-23 03:35:26.873857: step: 2032/527, loss: 0.005075711291283369 2023-01-23 03:35:27.991955: step: 2036/527, loss: 0.0039914133958518505 2023-01-23 03:35:29.121031: step: 2040/527, loss: 1.4342349767684937 2023-01-23 03:35:30.253439: step: 2044/527, loss: 0.0011192321544513106 2023-01-23 03:35:31.362547: step: 2048/527, loss: 1.3446808225126006e-05 2023-01-23 03:35:32.493501: step: 2052/527, loss: 0.006104087922722101 2023-01-23 03:35:33.611198: step: 2056/527, loss: 0.04354248195886612 2023-01-23 03:35:34.720726: step: 2060/527, loss: 0.00016536712064407766 2023-01-23 03:35:35.844558: step: 2064/527, loss: 0.004340839572250843 2023-01-23 03:35:36.972412: step: 2068/527, loss: 0.0004107475106138736 2023-01-23 03:35:38.090460: step: 2072/527, loss: 0.054785825312137604 2023-01-23 03:35:39.197642: step: 2076/527, loss: 0.011180782690644264 2023-01-23 03:35:40.313224: step: 2080/527, loss: 0.0047626495361328125 2023-01-23 03:35:41.421893: step: 2084/527, loss: 0.014306164346635342 2023-01-23 03:35:42.504481: step: 2088/527, loss: 0.00014019012451171875 2023-01-23 03:35:43.641135: step: 2092/527, loss: 0.0126221664249897 2023-01-23 03:35:44.738425: step: 2096/527, loss: 0.00021162032498978078 2023-01-23 03:35:45.851604: step: 2100/527, loss: 0.01700897328555584 2023-01-23 03:35:46.959139: step: 2104/527, loss: 0.030184125527739525 2023-01-23 03:35:48.059679: step: 2108/527, loss: 1.5735627130197827e-06 ================================================== Loss: 0.030 -------------------- Dev: {'event': {'p': 0.5964912280701754, 'r': 0.7696404793608522, 'f1': 0.6720930232558139}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Test: {'event': {'p': 0.6298819255222525, 'r': 0.7925714285714286, 'f1': 0.701923076923077}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Chinese: {'event': {'p': 0.5609756097560976, 'r': 0.8518518518518519, 'f1': 0.6764705882352942}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Korean: {'event': {'p': 0.5689655172413793, 'r': 0.5238095238095238, 'f1': 0.5454545454545455}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Russian: {'event': {'p': 0.425, 'r': 0.4722222222222222, 'f1': 0.4473684210526316}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6066252587991718, 'r': 0.7802929427430093, 'f1': 0.6825859056493885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Eng Test for Korean: {'event': {'p': 0.62580054894785, 'r': 0.7817142857142857, 'f1': 0.6951219512195121}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Korean: {'event': {'p': 0.6730769230769231, 'r': 0.5555555555555556, 'f1': 0.6086956521739131}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 24 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:36:29.573416: step: 4/527, loss: 0.0009110451210290194 2023-01-23 03:36:30.658539: step: 8/527, loss: 7.848739915061742e-05 2023-01-23 03:36:31.759926: step: 12/527, loss: 0.010296916589140892 2023-01-23 03:36:32.854164: step: 16/527, loss: 1.2874603271484375e-05 2023-01-23 03:36:33.928452: step: 20/527, loss: 0.005050516687333584 2023-01-23 03:36:35.068646: step: 24/527, loss: 0.0032618525438010693 2023-01-23 03:36:36.178096: step: 28/527, loss: 0.00013599396334029734 2023-01-23 03:36:37.270878: step: 32/527, loss: 0.0010828971862792969 2023-01-23 03:36:38.357816: step: 36/527, loss: 5.836487252963707e-05 2023-01-23 03:36:39.469365: step: 40/527, loss: 0.00027561187744140625 2023-01-23 03:36:40.594445: step: 44/527, loss: 0.00583992013707757 2023-01-23 03:36:41.711708: step: 48/527, loss: 0.002434349153190851 2023-01-23 03:36:42.820989: step: 52/527, loss: 0.0006800651899538934 2023-01-23 03:36:43.924590: step: 56/527, loss: 0.0007221222040243447 2023-01-23 03:36:45.048813: step: 60/527, loss: 0.05299406126141548 2023-01-23 03:36:46.179275: step: 64/527, loss: 0.0016513824230059981 2023-01-23 03:36:47.278662: step: 68/527, loss: 0.006466483697295189 2023-01-23 03:36:48.401031: step: 72/527, loss: 0.004902172368019819 2023-01-23 03:36:49.493923: step: 76/527, loss: 0.028306199237704277 2023-01-23 03:36:50.610855: step: 80/527, loss: 0.0033044815063476562 2023-01-23 03:36:51.744486: step: 84/527, loss: 0.0001655578671488911 2023-01-23 03:36:52.851392: step: 88/527, loss: 0.005522537510842085 2023-01-23 03:36:53.956165: step: 92/527, loss: 0.00021667480177711695 2023-01-23 03:36:55.053996: step: 96/527, loss: 2.6226043701171875e-05 2023-01-23 03:36:56.147548: step: 100/527, loss: 0.010250759311020374 2023-01-23 03:36:57.251493: step: 104/527, loss: 0.0047595021314918995 2023-01-23 03:36:58.370333: step: 108/527, loss: 0.00010604858107399195 2023-01-23 03:36:59.467172: step: 112/527, loss: 0.011271094903349876 2023-01-23 03:37:00.593770: step: 116/527, loss: 0.024996565654873848 2023-01-23 03:37:01.729313: step: 120/527, loss: 0.0003570079570636153 2023-01-23 03:37:02.832977: step: 124/527, loss: 0.0013793945545330644 2023-01-23 03:37:03.948251: step: 128/527, loss: 8.630751835880801e-06 2023-01-23 03:37:05.044621: step: 132/527, loss: 0.00012445449829101562 2023-01-23 03:37:06.180785: step: 136/527, loss: 0.0003688812139444053 2023-01-23 03:37:07.315929: step: 140/527, loss: 0.007424688432365656 2023-01-23 03:37:08.426209: step: 144/527, loss: 0.007475471124053001 2023-01-23 03:37:09.533354: step: 148/527, loss: 0.0024349212180823088 2023-01-23 03:37:10.646703: step: 152/527, loss: 0.0001924514799611643 2023-01-23 03:37:11.771962: step: 156/527, loss: 0.0019601343665271997 2023-01-23 03:37:12.897843: step: 160/527, loss: 0.006792736239731312 2023-01-23 03:37:14.016562: step: 164/527, loss: 0.8472946286201477 2023-01-23 03:37:15.117518: step: 168/527, loss: 2.6702882678364404e-06 2023-01-23 03:37:16.244820: step: 172/527, loss: 0.0067005157470703125 2023-01-23 03:37:17.343341: step: 176/527, loss: 0.00017433166794944555 2023-01-23 03:37:18.456789: step: 180/527, loss: 0.021106814965605736 2023-01-23 03:37:19.555026: step: 184/527, loss: 0.0003991127014160156 2023-01-23 03:37:20.654040: step: 188/527, loss: 0.0012279510265216231 2023-01-23 03:37:21.759997: step: 192/527, loss: 7.4386593951203395e-06 2023-01-23 03:37:22.886899: step: 196/527, loss: 0.0017589569324627519 2023-01-23 03:37:23.998201: step: 200/527, loss: 0.009861993603408337 2023-01-23 03:37:25.144105: step: 204/527, loss: 0.01757373847067356 2023-01-23 03:37:26.280502: step: 208/527, loss: 0.0030303956009447575 2023-01-23 03:37:27.365608: step: 212/527, loss: 2.498626781743951e-05 2023-01-23 03:37:28.493027: step: 216/527, loss: 0.03145027160644531 2023-01-23 03:37:29.592458: step: 220/527, loss: 0.00015382767014671117 2023-01-23 03:37:30.744559: step: 224/527, loss: 0.02449817769229412 2023-01-23 03:37:31.903427: step: 228/527, loss: 0.0024200440384447575 2023-01-23 03:37:32.987395: step: 232/527, loss: 0.01563587225973606 2023-01-23 03:37:34.112152: step: 236/527, loss: 0.001042270683683455 2023-01-23 03:37:35.185859: step: 240/527, loss: 3.0040740966796875e-05 2023-01-23 03:37:36.304143: step: 244/527, loss: 5.111694190418348e-05 2023-01-23 03:37:37.436340: step: 248/527, loss: 2.784729076665826e-05 2023-01-23 03:37:38.515197: step: 252/527, loss: -1.907349087559851e-07 2023-01-23 03:37:39.620971: step: 256/527, loss: 0.0005649566883221269 2023-01-23 03:37:40.738797: step: 260/527, loss: 0.0253111831843853 2023-01-23 03:37:41.859680: step: 264/527, loss: 0.0010314941173419356 2023-01-23 03:37:42.955219: step: 268/527, loss: 0.0006244659889489412 2023-01-23 03:37:44.094551: step: 272/527, loss: 0.028299523517489433 2023-01-23 03:37:45.231765: step: 276/527, loss: 0.00043258670484647155 2023-01-23 03:37:46.325977: step: 280/527, loss: 0.04387526586651802 2023-01-23 03:37:47.416732: step: 284/527, loss: 0.0035158158279955387 2023-01-23 03:37:48.538995: step: 288/527, loss: 0.00032806396484375 2023-01-23 03:37:49.669125: step: 292/527, loss: 2.746581958490424e-05 2023-01-23 03:37:50.801396: step: 296/527, loss: 0.00024552346440032125 2023-01-23 03:37:51.900345: step: 300/527, loss: 3.166198803228326e-05 2023-01-23 03:37:53.015198: step: 304/527, loss: 0.004077625460922718 2023-01-23 03:37:54.121888: step: 308/527, loss: 0.0012128830421715975 2023-01-23 03:37:55.222510: step: 312/527, loss: 3.662109520519152e-05 2023-01-23 03:37:56.353663: step: 316/527, loss: 0.01729745790362358 2023-01-23 03:37:57.475654: step: 320/527, loss: 4.024505687993951e-05 2023-01-23 03:37:58.593364: step: 324/527, loss: 0.0010309219360351562 2023-01-23 03:37:59.699924: step: 328/527, loss: 0.05305824428796768 2023-01-23 03:38:00.779601: step: 332/527, loss: 0.011519813910126686 2023-01-23 03:38:01.906744: step: 336/527, loss: 0.0031494139693677425 2023-01-23 03:38:03.052472: step: 340/527, loss: 0.000179290771484375 2023-01-23 03:38:04.189091: step: 344/527, loss: 0.006348800845444202 2023-01-23 03:38:05.290378: step: 348/527, loss: 0.0012062073219567537 2023-01-23 03:38:06.444809: step: 352/527, loss: 1.277923547604587e-05 2023-01-23 03:38:07.561460: step: 356/527, loss: 0.05075817182660103 2023-01-23 03:38:08.673894: step: 360/527, loss: 0.001114320824854076 2023-01-23 03:38:09.773632: step: 364/527, loss: 8.449555025435984e-05 2023-01-23 03:38:10.889993: step: 368/527, loss: 0.016541672870516777 2023-01-23 03:38:11.989930: step: 372/527, loss: 0.004622983746230602 2023-01-23 03:38:13.113780: step: 376/527, loss: 0.00474814185872674 2023-01-23 03:38:14.209237: step: 380/527, loss: 0.005918407812714577 2023-01-23 03:38:15.347008: step: 384/527, loss: 0.0004203796270303428 2023-01-23 03:38:16.464205: step: 388/527, loss: 0.2650686204433441 2023-01-23 03:38:17.561207: step: 392/527, loss: 0.00012855530076194555 2023-01-23 03:38:18.674241: step: 396/527, loss: 0.023317432031035423 2023-01-23 03:38:19.769024: step: 400/527, loss: 0.0048354147002100945 2023-01-23 03:38:20.903509: step: 404/527, loss: 0.007964467629790306 2023-01-23 03:38:22.004976: step: 408/527, loss: 0.04084453731775284 2023-01-23 03:38:23.125731: step: 412/527, loss: 0.03669404983520508 2023-01-23 03:38:24.246530: step: 416/527, loss: 0.028662966564297676 2023-01-23 03:38:25.359461: step: 420/527, loss: 0.00024499892606399953 2023-01-23 03:38:26.483978: step: 424/527, loss: 0.003098870161920786 2023-01-23 03:38:27.589531: step: 428/527, loss: 0.0018156052101403475 2023-01-23 03:38:28.678276: step: 432/527, loss: 0.007095145992934704 2023-01-23 03:38:29.787841: step: 436/527, loss: 0.00019741058349609375 2023-01-23 03:38:30.898019: step: 440/527, loss: 0.002078437712043524 2023-01-23 03:38:32.024493: step: 444/527, loss: 0.005266761872917414 2023-01-23 03:38:33.130382: step: 448/527, loss: 0.0018029690254479647 2023-01-23 03:38:34.259954: step: 452/527, loss: 0.03513755649328232 2023-01-23 03:38:35.348646: step: 456/527, loss: 0.0023438455536961555 2023-01-23 03:38:36.468802: step: 460/527, loss: 0.006919098552316427 2023-01-23 03:38:37.573062: step: 464/527, loss: 0.0007852555136196315 2023-01-23 03:38:38.663828: step: 468/527, loss: 0.013456153683364391 2023-01-23 03:38:39.750431: step: 472/527, loss: 0.000621795654296875 2023-01-23 03:38:40.920262: step: 476/527, loss: 0.06559744477272034 2023-01-23 03:38:42.037638: step: 480/527, loss: 0.0035276412963867188 2023-01-23 03:38:43.157828: step: 484/527, loss: 0.02904348261654377 2023-01-23 03:38:44.279299: step: 488/527, loss: 0.003094196319580078 2023-01-23 03:38:45.367158: step: 492/527, loss: 0.0008033752674236894 2023-01-23 03:38:46.487340: step: 496/527, loss: 0.00444374093785882 2023-01-23 03:38:47.623490: step: 500/527, loss: 0.014609336853027344 2023-01-23 03:38:48.761655: step: 504/527, loss: 4.720688230008818e-05 2023-01-23 03:38:49.854212: step: 508/527, loss: 0.005415153689682484 2023-01-23 03:38:50.995535: step: 512/527, loss: 0.004528617952018976 2023-01-23 03:38:52.104920: step: 516/527, loss: 0.0003110409015789628 2023-01-23 03:38:53.208911: step: 520/527, loss: 0.00036683081998489797 2023-01-23 03:38:54.315761: step: 524/527, loss: 0.010301709175109863 2023-01-23 03:38:55.444590: step: 528/527, loss: 0.0007524490938521922 2023-01-23 03:38:56.528585: step: 532/527, loss: 0.0020704269409179688 2023-01-23 03:38:57.642669: step: 536/527, loss: 0.031176377087831497 2023-01-23 03:38:58.756369: step: 540/527, loss: 0.01903695985674858 2023-01-23 03:38:59.870981: step: 544/527, loss: 0.004793358035385609 2023-01-23 03:39:01.003112: step: 548/527, loss: 0.005286025814712048 2023-01-23 03:39:02.102287: step: 552/527, loss: 0.0006929397932253778 2023-01-23 03:39:03.228082: step: 556/527, loss: 0.0001911163271870464 2023-01-23 03:39:04.321481: step: 560/527, loss: 0.0026998519897460938 2023-01-23 03:39:05.436562: step: 564/527, loss: 0.06305485218763351 2023-01-23 03:39:06.555921: step: 568/527, loss: 0.028919601812958717 2023-01-23 03:39:07.690648: step: 572/527, loss: 0.00035381317138671875 2023-01-23 03:39:08.777558: step: 576/527, loss: 0.0006907462957315147 2023-01-23 03:39:09.877294: step: 580/527, loss: 0.0004951477167196572 2023-01-23 03:39:10.991310: step: 584/527, loss: 0.00132837297860533 2023-01-23 03:39:12.090859: step: 588/527, loss: 0.06281270831823349 2023-01-23 03:39:13.196047: step: 592/527, loss: 0.0032138824462890625 2023-01-23 03:39:14.342707: step: 596/527, loss: 0.0018033981323242188 2023-01-23 03:39:15.479073: step: 600/527, loss: 0.000164031982421875 2023-01-23 03:39:16.584995: step: 604/527, loss: 0.00018787384033203125 2023-01-23 03:39:17.704280: step: 608/527, loss: 0.0001367092045256868 2023-01-23 03:39:18.799326: step: 612/527, loss: 0.0002193450927734375 2023-01-23 03:39:19.919132: step: 616/527, loss: 7.905960228526965e-05 2023-01-23 03:39:21.003859: step: 620/527, loss: 0.002499866532161832 2023-01-23 03:39:22.111190: step: 624/527, loss: 0.0008541106944903731 2023-01-23 03:39:23.238346: step: 628/527, loss: 0.005959892179816961 2023-01-23 03:39:24.356790: step: 632/527, loss: 0.0015912533272057772 2023-01-23 03:39:25.462071: step: 636/527, loss: 0.001168584800325334 2023-01-23 03:39:26.549910: step: 640/527, loss: 0.003045368241146207 2023-01-23 03:39:27.658969: step: 644/527, loss: 0.031163597479462624 2023-01-23 03:39:28.798121: step: 648/527, loss: 0.0002529144403524697 2023-01-23 03:39:29.893301: step: 652/527, loss: 0.015043544583022594 2023-01-23 03:39:31.010764: step: 656/527, loss: 0.004358482547104359 2023-01-23 03:39:32.114305: step: 660/527, loss: 0.005823039915412664 2023-01-23 03:39:33.242975: step: 664/527, loss: 5.779266211902723e-05 2023-01-23 03:39:34.378265: step: 668/527, loss: 0.0238476749509573 2023-01-23 03:39:35.526997: step: 672/527, loss: 0.015002441592514515 2023-01-23 03:39:36.637123: step: 676/527, loss: 0.00017404557729605585 2023-01-23 03:39:37.732049: step: 680/527, loss: 6.031990051269531e-05 2023-01-23 03:39:38.833137: step: 684/527, loss: 4.291534423828125e-06 2023-01-23 03:39:39.938840: step: 688/527, loss: 0.02594575844705105 2023-01-23 03:39:41.068419: step: 692/527, loss: 0.001591157983057201 2023-01-23 03:39:42.208435: step: 696/527, loss: 0.0002475738583598286 2023-01-23 03:39:43.313240: step: 700/527, loss: 7.534027827205136e-05 2023-01-23 03:39:44.428622: step: 704/527, loss: 1.049041748046875e-05 2023-01-23 03:39:45.534361: step: 708/527, loss: 0.0005067825550213456 2023-01-23 03:39:46.640617: step: 712/527, loss: 0.00020160674466751516 2023-01-23 03:39:47.755806: step: 716/527, loss: 4.57763690064894e-06 2023-01-23 03:39:48.867470: step: 720/527, loss: 0.02637786790728569 2023-01-23 03:39:49.995041: step: 724/527, loss: -9.536743306171047e-08 2023-01-23 03:39:51.129162: step: 728/527, loss: -3.0517576306010596e-06 2023-01-23 03:39:52.268967: step: 732/527, loss: 0.03326826170086861 2023-01-23 03:39:53.385708: step: 736/527, loss: 0.00024242402287200093 2023-01-23 03:39:54.468442: step: 740/527, loss: 0.000480842572869733 2023-01-23 03:39:55.595923: step: 744/527, loss: 0.14041289687156677 2023-01-23 03:39:56.751899: step: 748/527, loss: 0.0016294479137286544 2023-01-23 03:39:57.835278: step: 752/527, loss: 0.00039472582284361124 2023-01-23 03:39:58.932879: step: 756/527, loss: 0.0006849289056845009 2023-01-23 03:40:00.066683: step: 760/527, loss: 0.055683329701423645 2023-01-23 03:40:01.197372: step: 764/527, loss: -5.149840944795869e-06 2023-01-23 03:40:02.301598: step: 768/527, loss: 0.015389394015073776 2023-01-23 03:40:03.425182: step: 772/527, loss: 0.0018640519119799137 2023-01-23 03:40:04.539706: step: 776/527, loss: 0.0010543823009356856 2023-01-23 03:40:05.626429: step: 780/527, loss: 0.0002099990815622732 2023-01-23 03:40:06.733616: step: 784/527, loss: 2.689361645025201e-05 2023-01-23 03:40:07.849247: step: 788/527, loss: 0.004417181480675936 2023-01-23 03:40:08.950813: step: 792/527, loss: 0.018381882458925247 2023-01-23 03:40:10.078443: step: 796/527, loss: 0.01084060687571764 2023-01-23 03:40:11.192257: step: 800/527, loss: 0.004812145140022039 2023-01-23 03:40:12.298320: step: 804/527, loss: 0.08924102783203125 2023-01-23 03:40:13.421071: step: 808/527, loss: 0.014422702603042126 2023-01-23 03:40:14.516593: step: 812/527, loss: 0.030928421765565872 2023-01-23 03:40:15.620674: step: 816/527, loss: 0.01819891855120659 2023-01-23 03:40:16.767193: step: 820/527, loss: 0.003727388335391879 2023-01-23 03:40:17.910267: step: 824/527, loss: 0.06298418343067169 2023-01-23 03:40:19.054031: step: 828/527, loss: 0.0014245033962652087 2023-01-23 03:40:20.152496: step: 832/527, loss: 0.038451578468084335 2023-01-23 03:40:21.287007: step: 836/527, loss: 0.03311176598072052 2023-01-23 03:40:22.410698: step: 840/527, loss: 0.0009989738464355469 2023-01-23 03:40:23.542098: step: 844/527, loss: 0.0011274338467046618 2023-01-23 03:40:24.696360: step: 848/527, loss: 0.00535850552842021 2023-01-23 03:40:25.824514: step: 852/527, loss: 0.007448769174516201 2023-01-23 03:40:26.933162: step: 856/527, loss: 0.010066986083984375 2023-01-23 03:40:28.042340: step: 860/527, loss: 0.0008013487095013261 2023-01-23 03:40:29.155016: step: 864/527, loss: 0.00015277863712981343 2023-01-23 03:40:30.288988: step: 868/527, loss: 9.336470975540578e-05 2023-01-23 03:40:31.392782: step: 872/527, loss: 0.0009654522291384637 2023-01-23 03:40:32.553504: step: 876/527, loss: 0.0037525177467614412 2023-01-23 03:40:33.696953: step: 880/527, loss: 0.025702476501464844 2023-01-23 03:40:34.855547: step: 884/527, loss: 0.0015096664428710938 2023-01-23 03:40:36.024663: step: 888/527, loss: 0.009095096960663795 2023-01-23 03:40:37.158502: step: 892/527, loss: 0.048610687255859375 2023-01-23 03:40:38.296922: step: 896/527, loss: 0.0021051408257335424 2023-01-23 03:40:39.390640: step: 900/527, loss: 0.003425789065659046 2023-01-23 03:40:40.509592: step: 904/527, loss: 0.04430227726697922 2023-01-23 03:40:41.615597: step: 908/527, loss: 0.0002478599490132183 2023-01-23 03:40:42.733931: step: 912/527, loss: 3.1471254260395654e-06 2023-01-23 03:40:43.846080: step: 916/527, loss: 0.0033757209312170744 2023-01-23 03:40:44.971598: step: 920/527, loss: 0.003626060439273715 2023-01-23 03:40:46.096497: step: 924/527, loss: 0.03672304376959801 2023-01-23 03:40:47.266375: step: 928/527, loss: 0.00027179718017578125 2023-01-23 03:40:48.371623: step: 932/527, loss: 0.0011697768932208419 2023-01-23 03:40:49.485043: step: 936/527, loss: 4.291534423828125e-06 2023-01-23 03:40:50.558101: step: 940/527, loss: 0.00032830238342285156 2023-01-23 03:40:51.660489: step: 944/527, loss: 4.425048973644152e-05 2023-01-23 03:40:52.770573: step: 948/527, loss: 5.340576535672881e-06 2023-01-23 03:40:53.877016: step: 952/527, loss: 1.430511474609375e-05 2023-01-23 03:40:54.985326: step: 956/527, loss: 0.00011329651169944555 2023-01-23 03:40:56.103745: step: 960/527, loss: 0.0019049644470214844 2023-01-23 03:40:57.253375: step: 964/527, loss: 0.07014808803796768 2023-01-23 03:40:58.367859: step: 968/527, loss: 0.007043933961540461 2023-01-23 03:40:59.471528: step: 972/527, loss: 0.10995063930749893 2023-01-23 03:41:00.562220: step: 976/527, loss: 0.00012874603271484375 2023-01-23 03:41:01.649676: step: 980/527, loss: 0.013449382968246937 2023-01-23 03:41:02.771194: step: 984/527, loss: 0.01301345881074667 2023-01-23 03:41:03.895953: step: 988/527, loss: 0.007336426060646772 2023-01-23 03:41:04.978757: step: 992/527, loss: -1.5258789289873675e-06 2023-01-23 03:41:06.129660: step: 996/527, loss: 0.011358261108398438 2023-01-23 03:41:07.245187: step: 1000/527, loss: 0.00013909340486861765 2023-01-23 03:41:08.353146: step: 1004/527, loss: 0.0001066207914846018 2023-01-23 03:41:09.451880: step: 1008/527, loss: 0.571603536605835 2023-01-23 03:41:10.543942: step: 1012/527, loss: 0.03208484873175621 2023-01-23 03:41:11.654829: step: 1016/527, loss: 0.00943231675773859 2023-01-23 03:41:12.753749: step: 1020/527, loss: 0.003540611360222101 2023-01-23 03:41:13.895106: step: 1024/527, loss: 0.1077755019068718 2023-01-23 03:41:15.006220: step: 1028/527, loss: 0.04181881248950958 2023-01-23 03:41:16.141884: step: 1032/527, loss: 0.04215993732213974 2023-01-23 03:41:17.241534: step: 1036/527, loss: 0.0014624595642089844 2023-01-23 03:41:18.322216: step: 1040/527, loss: 6.48498553346144e-06 2023-01-23 03:41:19.410284: step: 1044/527, loss: 0.005657482426613569 2023-01-23 03:41:20.531965: step: 1048/527, loss: 0.0014853953616693616 2023-01-23 03:41:21.667786: step: 1052/527, loss: 0.13561153411865234 2023-01-23 03:41:22.763432: step: 1056/527, loss: 0.00016446114750579 2023-01-23 03:41:23.928837: step: 1060/527, loss: 0.11530762165784836 2023-01-23 03:41:25.041714: step: 1064/527, loss: 0.01401300448924303 2023-01-23 03:41:26.144139: step: 1068/527, loss: 0.0003203391970600933 2023-01-23 03:41:27.245449: step: 1072/527, loss: 0.0015596390003338456 2023-01-23 03:41:28.331550: step: 1076/527, loss: 0.0009634971502237022 2023-01-23 03:41:29.452447: step: 1080/527, loss: 6.513595872092992e-05 2023-01-23 03:41:30.600316: step: 1084/527, loss: 0.015500831417739391 2023-01-23 03:41:31.708103: step: 1088/527, loss: 0.007788181304931641 2023-01-23 03:41:32.828011: step: 1092/527, loss: 0.006949997041374445 2023-01-23 03:41:33.916995: step: 1096/527, loss: 0.008608246222138405 2023-01-23 03:41:35.061473: step: 1100/527, loss: 0.00017547607421875 2023-01-23 03:41:36.164990: step: 1104/527, loss: 0.006443119142204523 2023-01-23 03:41:37.277792: step: 1108/527, loss: 0.0023887634743005037 2023-01-23 03:41:38.362750: step: 1112/527, loss: 0.0005298614269122481 2023-01-23 03:41:39.514009: step: 1116/527, loss: 0.0007083893287926912 2023-01-23 03:41:40.665024: step: 1120/527, loss: 0.0016560554504394531 2023-01-23 03:41:41.853263: step: 1124/527, loss: 0.01673126220703125 2023-01-23 03:41:42.951610: step: 1128/527, loss: 0.027311135083436966 2023-01-23 03:41:44.060001: step: 1132/527, loss: 0.01328296773135662 2023-01-23 03:41:45.153774: step: 1136/527, loss: 4.291534423828125e-06 2023-01-23 03:41:46.283393: step: 1140/527, loss: 0.0005595207330770791 2023-01-23 03:41:47.389294: step: 1144/527, loss: 0.0008642196771688759 2023-01-23 03:41:48.539130: step: 1148/527, loss: 0.0006767273298464715 2023-01-23 03:41:49.642354: step: 1152/527, loss: 0.007454490754753351 2023-01-23 03:41:50.780191: step: 1156/527, loss: -3.0517576306010596e-06 2023-01-23 03:41:51.901569: step: 1160/527, loss: 0.0201263427734375 2023-01-23 03:41:53.014940: step: 1164/527, loss: 0.008500671945512295 2023-01-23 03:41:54.123843: step: 1168/527, loss: 0.02196650579571724 2023-01-23 03:41:55.214732: step: 1172/527, loss: 0.0015062332386150956 2023-01-23 03:41:56.305965: step: 1176/527, loss: 3.175735764671117e-05 2023-01-23 03:41:57.425541: step: 1180/527, loss: 0.02458667755126953 2023-01-23 03:41:58.530319: step: 1184/527, loss: 0.006309890653938055 2023-01-23 03:41:59.639216: step: 1188/527, loss: 0.11926989257335663 2023-01-23 03:42:00.726765: step: 1192/527, loss: 0.00024309159198310226 2023-01-23 03:42:01.860411: step: 1196/527, loss: 0.00034198761568404734 2023-01-23 03:42:02.960065: step: 1200/527, loss: 0.0009774207137525082 2023-01-23 03:42:04.103456: step: 1204/527, loss: 0.00083074567373842 2023-01-23 03:42:05.212064: step: 1208/527, loss: 0.012092972174286842 2023-01-23 03:42:06.319006: step: 1212/527, loss: 0.17910784482955933 2023-01-23 03:42:07.410311: step: 1216/527, loss: 8.287429955089465e-05 2023-01-23 03:42:08.536613: step: 1220/527, loss: 0.00026597976102493703 2023-01-23 03:42:09.630968: step: 1224/527, loss: 0.0031692981719970703 2023-01-23 03:42:10.754001: step: 1228/527, loss: 1.621246337890625e-05 2023-01-23 03:42:11.853006: step: 1232/527, loss: 0.0018568038940429688 2023-01-23 03:42:12.971521: step: 1236/527, loss: 0.0008809566497802734 2023-01-23 03:42:14.067620: step: 1240/527, loss: 0.0026684761978685856 2023-01-23 03:42:15.160908: step: 1244/527, loss: 0.010835838504135609 2023-01-23 03:42:16.260275: step: 1248/527, loss: 0.08521823585033417 2023-01-23 03:42:17.382669: step: 1252/527, loss: 0.03731956332921982 2023-01-23 03:42:18.494768: step: 1256/527, loss: 0.00023627281188964844 2023-01-23 03:42:19.614451: step: 1260/527, loss: 6.065368506824598e-05 2023-01-23 03:42:20.756068: step: 1264/527, loss: 0.0001829147367971018 2023-01-23 03:42:21.912609: step: 1268/527, loss: 0.00393333425745368 2023-01-23 03:42:23.035899: step: 1272/527, loss: 0.00014209747314453125 2023-01-23 03:42:24.183842: step: 1276/527, loss: 0.0066648488864302635 2023-01-23 03:42:25.289445: step: 1280/527, loss: 0.002902126405388117 2023-01-23 03:42:26.406084: step: 1284/527, loss: 0.0014165878528729081 2023-01-23 03:42:27.511186: step: 1288/527, loss: 1.735687328618951e-05 2023-01-23 03:42:28.616318: step: 1292/527, loss: 0.07302512973546982 2023-01-23 03:42:29.740819: step: 1296/527, loss: 0.0147247314453125 2023-01-23 03:42:30.836161: step: 1300/527, loss: 5.493163916980848e-05 2023-01-23 03:42:31.944858: step: 1304/527, loss: 2.3746491933707148e-05 2023-01-23 03:42:33.046025: step: 1308/527, loss: 0.009915444999933243 2023-01-23 03:42:34.165766: step: 1312/527, loss: 0.00031299592228606343 2023-01-23 03:42:35.262577: step: 1316/527, loss: 0.008557701483368874 2023-01-23 03:42:36.390060: step: 1320/527, loss: 0.0022821428719908 2023-01-23 03:42:37.499685: step: 1324/527, loss: 0.0010303497547283769 2023-01-23 03:42:38.600642: step: 1328/527, loss: 0.0002884864807128906 2023-01-23 03:42:39.701621: step: 1332/527, loss: 0.008757400326430798 2023-01-23 03:42:40.877183: step: 1336/527, loss: 0.07017135620117188 2023-01-23 03:42:41.981068: step: 1340/527, loss: 0.004674673080444336 2023-01-23 03:42:43.098521: step: 1344/527, loss: 0.07266692817211151 2023-01-23 03:42:44.216535: step: 1348/527, loss: 0.00021200180344749242 2023-01-23 03:42:45.322813: step: 1352/527, loss: 0.00012035370309604332 2023-01-23 03:42:46.444066: step: 1356/527, loss: 0.0030529021751135588 2023-01-23 03:42:47.571628: step: 1360/527, loss: 3.337860107421875e-05 2023-01-23 03:42:48.680225: step: 1364/527, loss: 0.005961894989013672 2023-01-23 03:42:49.813904: step: 1368/527, loss: 1.621246337890625e-05 2023-01-23 03:42:50.943882: step: 1372/527, loss: 0.00015954971604514867 2023-01-23 03:42:52.046393: step: 1376/527, loss: 2.708435022213962e-05 2023-01-23 03:42:53.153996: step: 1380/527, loss: 8.792877633823082e-05 2023-01-23 03:42:54.275809: step: 1384/527, loss: 0.0005931854830123484 2023-01-23 03:42:55.377924: step: 1388/527, loss: 0.004020023159682751 2023-01-23 03:42:56.500648: step: 1392/527, loss: 0.001962375594303012 2023-01-23 03:42:57.638155: step: 1396/527, loss: 0.0006391525384970009 2023-01-23 03:42:58.751798: step: 1400/527, loss: 4.329681542003527e-05 2023-01-23 03:42:59.880565: step: 1404/527, loss: 1.296997106692288e-05 2023-01-23 03:43:01.008776: step: 1408/527, loss: 0.0001470565766794607 2023-01-23 03:43:02.108901: step: 1412/527, loss: 0.013037562370300293 2023-01-23 03:43:03.228461: step: 1416/527, loss: 0.0006107330555096269 2023-01-23 03:43:04.336242: step: 1420/527, loss: 0.0036802294198423624 2023-01-23 03:43:05.472642: step: 1424/527, loss: 2.9659271604032256e-05 2023-01-23 03:43:06.554012: step: 1428/527, loss: 3.027916136488784e-05 2023-01-23 03:43:07.668533: step: 1432/527, loss: 0.0028078078757971525 2023-01-23 03:43:08.766705: step: 1436/527, loss: 0.0001264572056243196 2023-01-23 03:43:09.856468: step: 1440/527, loss: 0.010053062811493874 2023-01-23 03:43:10.965171: step: 1444/527, loss: 3.814697265625e-05 2023-01-23 03:43:12.102080: step: 1448/527, loss: 0.015074157156050205 2023-01-23 03:43:13.226926: step: 1452/527, loss: 0.06313496083021164 2023-01-23 03:43:14.357388: step: 1456/527, loss: 0.00028133392333984375 2023-01-23 03:43:15.505617: step: 1460/527, loss: 0.033357810229063034 2023-01-23 03:43:16.622322: step: 1464/527, loss: 7.505416579078883e-05 2023-01-23 03:43:17.738131: step: 1468/527, loss: 0.0013175965286791325 2023-01-23 03:43:18.903025: step: 1472/527, loss: 0.018235208466649055 2023-01-23 03:43:20.031128: step: 1476/527, loss: 7.152557373046875e-06 2023-01-23 03:43:21.127291: step: 1480/527, loss: 0.020801354199647903 2023-01-23 03:43:22.232649: step: 1484/527, loss: 0.00021457672119140625 2023-01-23 03:43:23.330961: step: 1488/527, loss: 0.0011400223011150956 2023-01-23 03:43:24.446944: step: 1492/527, loss: 0.06320180743932724 2023-01-23 03:43:25.550180: step: 1496/527, loss: 0.0013172149192541838 2023-01-23 03:43:26.690453: step: 1500/527, loss: 0.009605550207197666 2023-01-23 03:43:27.777141: step: 1504/527, loss: 0.0001027107282425277 2023-01-23 03:43:28.924403: step: 1508/527, loss: 0.011391830630600452 2023-01-23 03:43:30.036842: step: 1512/527, loss: 2.441406286379788e-05 2023-01-23 03:43:31.187135: step: 1516/527, loss: 0.003962898626923561 2023-01-23 03:43:32.297027: step: 1520/527, loss: 0.0012102127075195312 2023-01-23 03:43:33.432041: step: 1524/527, loss: 6.29425039733178e-06 2023-01-23 03:43:34.571281: step: 1528/527, loss: 4.6539309551008046e-05 2023-01-23 03:43:35.671628: step: 1532/527, loss: 0.00022726060706190765 2023-01-23 03:43:36.747219: step: 1536/527, loss: 9.202956789522432e-06 2023-01-23 03:43:37.876272: step: 1540/527, loss: 0.03403463587164879 2023-01-23 03:43:38.992506: step: 1544/527, loss: 0.03675422817468643 2023-01-23 03:43:40.093126: step: 1548/527, loss: 0.00046062469482421875 2023-01-23 03:43:41.173083: step: 1552/527, loss: 0.0014482499100267887 2023-01-23 03:43:42.296201: step: 1556/527, loss: 0.006739807315170765 2023-01-23 03:43:43.422302: step: 1560/527, loss: 4.682540748035535e-05 2023-01-23 03:43:44.542941: step: 1564/527, loss: 8.010864803509321e-06 2023-01-23 03:43:45.662408: step: 1568/527, loss: 0.007746315095573664 2023-01-23 03:43:46.807465: step: 1572/527, loss: 0.011471820063889027 2023-01-23 03:43:47.921914: step: 1576/527, loss: 0.0018236160976812243 2023-01-23 03:43:49.048013: step: 1580/527, loss: 9.794235666049644e-05 2023-01-23 03:43:50.256994: step: 1584/527, loss: 0.014097117818892002 2023-01-23 03:43:51.401958: step: 1588/527, loss: 0.054868318140506744 2023-01-23 03:43:52.495244: step: 1592/527, loss: 0.0016113758319988847 2023-01-23 03:43:53.620189: step: 1596/527, loss: 0.000562477158382535 2023-01-23 03:43:54.733852: step: 1600/527, loss: 0.0017742158379405737 2023-01-23 03:43:55.849957: step: 1604/527, loss: 0.007762241642922163 2023-01-23 03:43:56.977747: step: 1608/527, loss: 0.005294323433190584 2023-01-23 03:43:58.084846: step: 1612/527, loss: 0.0006788253667764366 2023-01-23 03:43:59.197423: step: 1616/527, loss: 0.00012226105900481343 2023-01-23 03:44:00.294072: step: 1620/527, loss: 0.00026607513427734375 2023-01-23 03:44:01.408207: step: 1624/527, loss: 0.0003145218070130795 2023-01-23 03:44:02.532884: step: 1628/527, loss: 0.004463577177375555 2023-01-23 03:44:03.671641: step: 1632/527, loss: 3.585815284168348e-05 2023-01-23 03:44:04.790185: step: 1636/527, loss: 0.03625917434692383 2023-01-23 03:44:05.907121: step: 1640/527, loss: 0.0003758430539164692 2023-01-23 03:44:07.037771: step: 1644/527, loss: 0.0035732747055590153 2023-01-23 03:44:08.147096: step: 1648/527, loss: 0.0001291275111725554 2023-01-23 03:44:09.268762: step: 1652/527, loss: 0.005347824189811945 2023-01-23 03:44:10.415627: step: 1656/527, loss: 0.0002368926943745464 2023-01-23 03:44:11.554246: step: 1660/527, loss: 0.00099773402325809 2023-01-23 03:44:12.694749: step: 1664/527, loss: 0.07575778663158417 2023-01-23 03:44:13.802372: step: 1668/527, loss: 9.965896606445312e-05 2023-01-23 03:44:14.959736: step: 1672/527, loss: 0.0036961555015295744 2023-01-23 03:44:16.063919: step: 1676/527, loss: 0.00030803680419921875 2023-01-23 03:44:17.183823: step: 1680/527, loss: 1.602172778802924e-05 2023-01-23 03:44:18.282860: step: 1684/527, loss: 0.0006830215570516884 2023-01-23 03:44:19.391896: step: 1688/527, loss: 0.00043444635230116546 2023-01-23 03:44:20.542971: step: 1692/527, loss: 3.318786548334174e-05 2023-01-23 03:44:21.633432: step: 1696/527, loss: 3.3855438232421875e-05 2023-01-23 03:44:22.735234: step: 1700/527, loss: 0.0003883361641783267 2023-01-23 03:44:23.908566: step: 1704/527, loss: 0.00017051698523573577 2023-01-23 03:44:25.034797: step: 1708/527, loss: 0.00034885405329987407 2023-01-23 03:44:26.179178: step: 1712/527, loss: 0.020542718470096588 2023-01-23 03:44:27.300282: step: 1716/527, loss: 3.5572051274357364e-05 2023-01-23 03:44:28.436920: step: 1720/527, loss: 0.00017442702664993703 2023-01-23 03:44:29.543881: step: 1724/527, loss: 0.0002727508544921875 2023-01-23 03:44:30.637422: step: 1728/527, loss: 0.0009077072609215975 2023-01-23 03:44:31.783366: step: 1732/527, loss: 0.017871523275971413 2023-01-23 03:44:32.893798: step: 1736/527, loss: 0.00011997222463833168 2023-01-23 03:44:34.023938: step: 1740/527, loss: 0.29222604632377625 2023-01-23 03:44:35.115880: step: 1744/527, loss: 0.00028753283550031483 2023-01-23 03:44:36.194512: step: 1748/527, loss: 0.00018997193546965718 2023-01-23 03:44:37.296999: step: 1752/527, loss: 0.0015269280411303043 2023-01-23 03:44:38.405238: step: 1756/527, loss: 0.00023727417283225805 2023-01-23 03:44:39.491910: step: 1760/527, loss: 1.2731552487821318e-05 2023-01-23 03:44:40.601389: step: 1764/527, loss: 0.0012496948475018144 2023-01-23 03:44:41.711419: step: 1768/527, loss: 0.0013566971756517887 2023-01-23 03:44:42.832625: step: 1772/527, loss: 0.0017074585193768144 2023-01-23 03:44:43.963289: step: 1776/527, loss: 0.0005752564175054431 2023-01-23 03:44:45.081704: step: 1780/527, loss: 0.0003880501026287675 2023-01-23 03:44:46.235168: step: 1784/527, loss: 0.5561831593513489 2023-01-23 03:44:47.362184: step: 1788/527, loss: 0.000362205522833392 2023-01-23 03:44:48.505744: step: 1792/527, loss: 0.004054451361298561 2023-01-23 03:44:49.617511: step: 1796/527, loss: 1.5306473869713955e-05 2023-01-23 03:44:50.756199: step: 1800/527, loss: 0.0003002166631631553 2023-01-23 03:44:51.855940: step: 1804/527, loss: 0.00337390904314816 2023-01-23 03:44:52.965887: step: 1808/527, loss: 0.004103279206901789 2023-01-23 03:44:54.116538: step: 1812/527, loss: 0.03952770307660103 2023-01-23 03:44:55.231463: step: 1816/527, loss: 0.07254792004823685 2023-01-23 03:44:56.349029: step: 1820/527, loss: 0.0006922244792804122 2023-01-23 03:44:57.475912: step: 1824/527, loss: 0.00016803742619231343 2023-01-23 03:44:58.586155: step: 1828/527, loss: 0.00020990372286178172 2023-01-23 03:44:59.708373: step: 1832/527, loss: 0.007465553469955921 2023-01-23 03:45:00.860793: step: 1836/527, loss: 0.002485084580257535 2023-01-23 03:45:01.961067: step: 1840/527, loss: 0.00030832289485260844 2023-01-23 03:45:03.084615: step: 1844/527, loss: 0.006155014503747225 2023-01-23 03:45:04.239446: step: 1848/527, loss: 0.01597149483859539 2023-01-23 03:45:05.350507: step: 1852/527, loss: 0.010368538089096546 2023-01-23 03:45:06.467814: step: 1856/527, loss: 0.0009034157264977694 2023-01-23 03:45:07.586731: step: 1860/527, loss: 0.008312702178955078 2023-01-23 03:45:08.729507: step: 1864/527, loss: 0.00018615722365211695 2023-01-23 03:45:09.843419: step: 1868/527, loss: 0.014773559756577015 2023-01-23 03:45:10.945380: step: 1872/527, loss: 0.0020902634132653475 2023-01-23 03:45:12.054902: step: 1876/527, loss: 0.000776100205257535 2023-01-23 03:45:13.200906: step: 1880/527, loss: 3.452301461948082e-05 2023-01-23 03:45:14.319449: step: 1884/527, loss: 0.0002249717799713835 2023-01-23 03:45:15.460171: step: 1888/527, loss: 0.0007179260719567537 2023-01-23 03:45:16.578899: step: 1892/527, loss: 0.0015614510048180819 2023-01-23 03:45:17.676081: step: 1896/527, loss: 0.0034096718300133944 2023-01-23 03:45:18.773626: step: 1900/527, loss: 0.036923788487911224 2023-01-23 03:45:19.889151: step: 1904/527, loss: 0.02938261069357395 2023-01-23 03:45:21.019402: step: 1908/527, loss: 0.001229620072990656 2023-01-23 03:45:22.179181: step: 1912/527, loss: 0.0003037929709535092 2023-01-23 03:45:23.283480: step: 1916/527, loss: 2.7322770620230585e-05 2023-01-23 03:45:24.390534: step: 1920/527, loss: 5.435943421616685e-06 2023-01-23 03:45:25.497498: step: 1924/527, loss: 0.0018215179443359375 2023-01-23 03:45:26.616367: step: 1928/527, loss: 0.024213576689362526 2023-01-23 03:45:27.736939: step: 1932/527, loss: 0.09364891052246094 2023-01-23 03:45:28.849306: step: 1936/527, loss: 8.468628220725805e-05 2023-01-23 03:45:29.960957: step: 1940/527, loss: 0.004463482182472944 2023-01-23 03:45:31.078245: step: 1944/527, loss: 0.0866088941693306 2023-01-23 03:45:32.180172: step: 1948/527, loss: 0.00044760701712220907 2023-01-23 03:45:33.305774: step: 1952/527, loss: 1.564025842526462e-05 2023-01-23 03:45:34.430639: step: 1956/527, loss: 0.048040393739938736 2023-01-23 03:45:35.544246: step: 1960/527, loss: 0.00012545585923362523 2023-01-23 03:45:36.666211: step: 1964/527, loss: 0.0009127140510827303 2023-01-23 03:45:37.747018: step: 1968/527, loss: 0.0009654044988565147 2023-01-23 03:45:38.867336: step: 1972/527, loss: 0.02923450618982315 2023-01-23 03:45:39.971601: step: 1976/527, loss: 0.0028626921121031046 2023-01-23 03:45:41.084439: step: 1980/527, loss: 2.021789623540826e-05 2023-01-23 03:45:42.176979: step: 1984/527, loss: 7.848739915061742e-05 2023-01-23 03:45:43.310707: step: 1988/527, loss: 0.0007068634149618447 2023-01-23 03:45:44.424231: step: 1992/527, loss: 0.0002561568981036544 2023-01-23 03:45:45.543996: step: 1996/527, loss: 0.0010416507720947266 2023-01-23 03:45:46.649240: step: 2000/527, loss: 0.0003029823419637978 2023-01-23 03:45:47.741437: step: 2004/527, loss: 0.01583237573504448 2023-01-23 03:45:48.842073: step: 2008/527, loss: 0.00916681345552206 2023-01-23 03:45:49.953392: step: 2012/527, loss: 9.622573998058215e-05 2023-01-23 03:45:51.029932: step: 2016/527, loss: 0.006411457434296608 2023-01-23 03:45:52.144256: step: 2020/527, loss: 3.82423386326991e-05 2023-01-23 03:45:53.239395: step: 2024/527, loss: 0.009990692138671875 2023-01-23 03:45:54.357723: step: 2028/527, loss: 0.005894470028579235 2023-01-23 03:45:55.487679: step: 2032/527, loss: 0.005634975619614124 2023-01-23 03:45:56.617918: step: 2036/527, loss: 0.005612373352050781 2023-01-23 03:45:57.754505: step: 2040/527, loss: 0.0033271312713623047 2023-01-23 03:45:58.839022: step: 2044/527, loss: 0.09671249985694885 2023-01-23 03:45:59.957318: step: 2048/527, loss: 0.004402351565659046 2023-01-23 03:46:01.022271: step: 2052/527, loss: 0.00016813278489280492 2023-01-23 03:46:02.143311: step: 2056/527, loss: 0.001800537109375 2023-01-23 03:46:03.250807: step: 2060/527, loss: 0.002048015594482422 2023-01-23 03:46:04.369230: step: 2064/527, loss: 0.00409011822193861 2023-01-23 03:46:05.494062: step: 2068/527, loss: 0.02006559446454048 2023-01-23 03:46:06.596224: step: 2072/527, loss: 0.0004901885986328125 2023-01-23 03:46:07.693638: step: 2076/527, loss: 2.6226043701171875e-06 2023-01-23 03:46:08.811380: step: 2080/527, loss: 0.007480430882424116 2023-01-23 03:46:09.903575: step: 2084/527, loss: 0.0010358811123296618 2023-01-23 03:46:11.058893: step: 2088/527, loss: 0.014516926370561123 2023-01-23 03:46:12.185708: step: 2092/527, loss: 0.0018707275157794356 2023-01-23 03:46:13.314589: step: 2096/527, loss: -3.8146959013829473e-07 2023-01-23 03:46:14.454189: step: 2100/527, loss: 0.039951324462890625 2023-01-23 03:46:15.554638: step: 2104/527, loss: 0.00030040740966796875 2023-01-23 03:46:16.688365: step: 2108/527, loss: 0.0022982596419751644 ================================================== Loss: 0.015 -------------------- Dev: {'event': {'p': 0.6063157894736843, 'r': 0.7669773635153129, 'f1': 0.6772486772486773}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Test: {'event': {'p': 0.6427238805970149, 'r': 0.7874285714285715, 'f1': 0.7077555213148434}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Chinese: {'event': {'p': 0.5454545454545454, 'r': 0.8888888888888888, 'f1': 0.676056338028169}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Korean: {'event': {'p': 0.6792452830188679, 'r': 0.5714285714285714, 'f1': 0.6206896551724137}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Russian: {'event': {'p': 0.4634146341463415, 'r': 0.5277777777777778, 'f1': 0.4935064935064935}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} New best korean model... ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6063157894736843, 'r': 0.7669773635153129, 'f1': 0.6772486772486773}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Eng Test for Korean: {'event': {'p': 0.6427238805970149, 'r': 0.7874285714285715, 'f1': 0.7077555213148434}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Korean: {'event': {'p': 0.6792452830188679, 'r': 0.5714285714285714, 'f1': 0.6206896551724137}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 25 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:47:05.492970: step: 4/527, loss: 0.001766204833984375 2023-01-23 03:47:06.605325: step: 8/527, loss: 0.0018249511485919356 2023-01-23 03:47:07.699721: step: 12/527, loss: 0.00418467540293932 2023-01-23 03:47:08.830028: step: 16/527, loss: 0.6899878978729248 2023-01-23 03:47:09.950678: step: 20/527, loss: 0.00040187835111282766 2023-01-23 03:47:11.070102: step: 24/527, loss: 0.027169324457645416 2023-01-23 03:47:12.177791: step: 28/527, loss: 0.00012073517427779734 2023-01-23 03:47:13.274492: step: 32/527, loss: 0.0022459030151367188 2023-01-23 03:47:14.392872: step: 36/527, loss: 0.2458038330078125 2023-01-23 03:47:15.541488: step: 40/527, loss: 0.0003391265927348286 2023-01-23 03:47:16.621925: step: 44/527, loss: 0.0024442672729492188 2023-01-23 03:47:17.755106: step: 48/527, loss: 0.0007211684715002775 2023-01-23 03:47:18.843670: step: 52/527, loss: 6.10351571594947e-06 2023-01-23 03:47:19.982614: step: 56/527, loss: 0.06713838875293732 2023-01-23 03:47:21.063023: step: 60/527, loss: 0.042611028999090195 2023-01-23 03:47:22.193251: step: 64/527, loss: 0.016329767182469368 2023-01-23 03:47:23.310496: step: 68/527, loss: 0.00041255951509810984 2023-01-23 03:47:24.446633: step: 72/527, loss: 0.000431060791015625 2023-01-23 03:47:25.577419: step: 76/527, loss: 0.0003604888916015625 2023-01-23 03:47:26.693766: step: 80/527, loss: 0.008054065518081188 2023-01-23 03:47:27.842924: step: 84/527, loss: 7.266998727573082e-05 2023-01-23 03:47:28.954338: step: 88/527, loss: 0.0009488582727499306 2023-01-23 03:47:30.059127: step: 92/527, loss: 0.0487423874437809 2023-01-23 03:47:31.149855: step: 96/527, loss: 0.022173713892698288 2023-01-23 03:47:32.291146: step: 100/527, loss: 0.0002749442937783897 2023-01-23 03:47:33.419482: step: 104/527, loss: 0.04202428087592125 2023-01-23 03:47:34.526734: step: 108/527, loss: 0.00040903090848587453 2023-01-23 03:47:35.627500: step: 112/527, loss: 0.0008162498706951737 2023-01-23 03:47:36.738871: step: 116/527, loss: 0.005974674131721258 2023-01-23 03:47:37.862416: step: 120/527, loss: 0.013529110699892044 2023-01-23 03:47:38.945274: step: 124/527, loss: 1.0824203855008818e-05 2023-01-23 03:47:40.084452: step: 128/527, loss: 8.37326078908518e-05 2023-01-23 03:47:41.179101: step: 132/527, loss: 0.008611869998276234 2023-01-23 03:47:42.302325: step: 136/527, loss: 0.015115928836166859 2023-01-23 03:47:43.393327: step: 140/527, loss: 0.0003482818719930947 2023-01-23 03:47:44.495351: step: 144/527, loss: 0.00032539371750317514 2023-01-23 03:47:45.600852: step: 148/527, loss: 0.0002548217889852822 2023-01-23 03:47:46.802994: step: 152/527, loss: 0.006144905462861061 2023-01-23 03:47:47.926875: step: 156/527, loss: 0.09411268681287766 2023-01-23 03:47:49.038378: step: 160/527, loss: 0.043656542897224426 2023-01-23 03:47:50.141404: step: 164/527, loss: 0.0013572692405432463 2023-01-23 03:47:51.250141: step: 168/527, loss: 0.0023288726806640625 2023-01-23 03:47:52.328494: step: 172/527, loss: 9.498596773482859e-05 2023-01-23 03:47:53.464018: step: 176/527, loss: 0.013548469170928001 2023-01-23 03:47:54.578144: step: 180/527, loss: 0.012019157409667969 2023-01-23 03:47:55.743856: step: 184/527, loss: 0.0277081485837698 2023-01-23 03:47:56.863004: step: 188/527, loss: 0.0001622468262212351 2023-01-23 03:47:57.994555: step: 192/527, loss: 1.6593934560660273e-05 2023-01-23 03:47:59.080496: step: 196/527, loss: 0.00302047748118639 2023-01-23 03:48:00.169923: step: 200/527, loss: 6.866455805720761e-05 2023-01-23 03:48:01.268383: step: 204/527, loss: 0.00016002656775526702 2023-01-23 03:48:02.359880: step: 208/527, loss: 0.006379508879035711 2023-01-23 03:48:03.504121: step: 212/527, loss: 0.000263023393927142 2023-01-23 03:48:04.594895: step: 216/527, loss: 7.629395213371026e-07 2023-01-23 03:48:05.728436: step: 220/527, loss: 4.580021050060168e-05 2023-01-23 03:48:06.830766: step: 224/527, loss: 1.0156631105928682e-05 2023-01-23 03:48:07.964845: step: 228/527, loss: 0.00014934540376998484 2023-01-23 03:48:09.116921: step: 232/527, loss: 0.015249443240463734 2023-01-23 03:48:10.257726: step: 236/527, loss: 0.00617370568215847 2023-01-23 03:48:11.363543: step: 240/527, loss: -8.96453821042087e-06 2023-01-23 03:48:12.523028: step: 244/527, loss: 0.017554283142089844 2023-01-23 03:48:13.649157: step: 248/527, loss: 0.0010103225940838456 2023-01-23 03:48:14.785114: step: 252/527, loss: 0.0008919715764932334 2023-01-23 03:48:15.936780: step: 256/527, loss: 0.00015735626220703125 2023-01-23 03:48:17.060067: step: 260/527, loss: 0.06901970505714417 2023-01-23 03:48:18.169614: step: 264/527, loss: 0.001715803169645369 2023-01-23 03:48:19.304212: step: 268/527, loss: 0.0003888607316184789 2023-01-23 03:48:20.431518: step: 272/527, loss: 8.78334030858241e-05 2023-01-23 03:48:21.541481: step: 276/527, loss: 0.013718223199248314 2023-01-23 03:48:22.696077: step: 280/527, loss: 0.023568343371152878 2023-01-23 03:48:23.857594: step: 284/527, loss: 0.04051399230957031 2023-01-23 03:48:25.005143: step: 288/527, loss: 0.015955161303281784 2023-01-23 03:48:26.125351: step: 292/527, loss: 5.626678466796875e-05 2023-01-23 03:48:27.219544: step: 296/527, loss: 0.03216380998492241 2023-01-23 03:48:28.345985: step: 300/527, loss: 0.45355138182640076 2023-01-23 03:48:29.470324: step: 304/527, loss: 0.0025525183882564306 2023-01-23 03:48:30.559145: step: 308/527, loss: 0.000495356332976371 2023-01-23 03:48:31.678114: step: 312/527, loss: 0.06972599029541016 2023-01-23 03:48:32.785465: step: 316/527, loss: 0.0017675398848950863 2023-01-23 03:48:33.895577: step: 320/527, loss: 2.136230432370212e-05 2023-01-23 03:48:35.004640: step: 324/527, loss: 0.023733139038085938 2023-01-23 03:48:36.151638: step: 328/527, loss: 0.0008713722345419228 2023-01-23 03:48:37.292131: step: 332/527, loss: 0.004764080047607422 2023-01-23 03:48:38.422657: step: 336/527, loss: 0.00717086810618639 2023-01-23 03:48:39.532735: step: 340/527, loss: 0.013479900546371937 2023-01-23 03:48:40.618462: step: 344/527, loss: 0.029793405905365944 2023-01-23 03:48:41.724266: step: 348/527, loss: 0.047705892473459244 2023-01-23 03:48:42.826615: step: 352/527, loss: 8.39233416627394e-06 2023-01-23 03:48:43.944145: step: 356/527, loss: 0.010256004519760609 2023-01-23 03:48:45.066215: step: 360/527, loss: 0.005286789033561945 2023-01-23 03:48:46.170670: step: 364/527, loss: 0.0057773590087890625 2023-01-23 03:48:47.260965: step: 368/527, loss: 0.01985187642276287 2023-01-23 03:48:48.398991: step: 372/527, loss: 0.00016670227341819555 2023-01-23 03:48:49.519978: step: 376/527, loss: 0.01763916201889515 2023-01-23 03:48:50.654343: step: 380/527, loss: 0.0009875297546386719 2023-01-23 03:48:51.768336: step: 384/527, loss: 0.0005135536193847656 2023-01-23 03:48:52.916810: step: 388/527, loss: 0.00759391812607646 2023-01-23 03:48:54.022712: step: 392/527, loss: 0.05786771699786186 2023-01-23 03:48:55.159270: step: 396/527, loss: 0.12877942621707916 2023-01-23 03:48:56.265904: step: 400/527, loss: 0.004709959030151367 2023-01-23 03:48:57.380500: step: 404/527, loss: 3.147125244140625e-05 2023-01-23 03:48:58.505234: step: 408/527, loss: 0.020836830139160156 2023-01-23 03:48:59.639071: step: 412/527, loss: 5.6743621826171875e-05 2023-01-23 03:49:00.762779: step: 416/527, loss: 0.0009277343633584678 2023-01-23 03:49:01.863048: step: 420/527, loss: 0.007067299447953701 2023-01-23 03:49:02.969207: step: 424/527, loss: 0.012654590420424938 2023-01-23 03:49:04.084721: step: 428/527, loss: 0.0031277656089514494 2023-01-23 03:49:05.178441: step: 432/527, loss: 0.039862822741270065 2023-01-23 03:49:06.309933: step: 436/527, loss: 0.00020027162099722773 2023-01-23 03:49:07.396463: step: 440/527, loss: 0.00017857553029898554 2023-01-23 03:49:08.510212: step: 444/527, loss: 0.06750774383544922 2023-01-23 03:49:09.632524: step: 448/527, loss: 0.011782455258071423 2023-01-23 03:49:10.751828: step: 452/527, loss: 2.8991700673941523e-05 2023-01-23 03:49:11.867830: step: 456/527, loss: 0.0020721436012536287 2023-01-23 03:49:12.965543: step: 460/527, loss: 0.05566387251019478 2023-01-23 03:49:14.100647: step: 464/527, loss: 0.00024700164794921875 2023-01-23 03:49:15.205419: step: 468/527, loss: 0.0006452560191974044 2023-01-23 03:49:16.318119: step: 472/527, loss: 0.005136394407600164 2023-01-23 03:49:17.420511: step: 476/527, loss: 0.0005501746782101691 2023-01-23 03:49:18.534714: step: 480/527, loss: 0.0069564348086714745 2023-01-23 03:49:19.629081: step: 484/527, loss: 0.00016579628572799265 2023-01-23 03:49:20.747797: step: 488/527, loss: 1.3947486877441406e-05 2023-01-23 03:49:21.853148: step: 492/527, loss: 0.0009250640869140625 2023-01-23 03:49:22.978871: step: 496/527, loss: 0.0001674652157817036 2023-01-23 03:49:24.138308: step: 500/527, loss: 0.015229320153594017 2023-01-23 03:49:25.269048: step: 504/527, loss: 0.1258329451084137 2023-01-23 03:49:26.368843: step: 508/527, loss: 0.0037277699448168278 2023-01-23 03:49:27.459143: step: 512/527, loss: 0.0003524780331645161 2023-01-23 03:49:28.571444: step: 516/527, loss: 0.00025873183039948344 2023-01-23 03:49:29.680255: step: 520/527, loss: 0.00721473665907979 2023-01-23 03:49:30.807431: step: 524/527, loss: 0.026122570037841797 2023-01-23 03:49:31.957746: step: 528/527, loss: 0.00026359560433775187 2023-01-23 03:49:33.080719: step: 532/527, loss: 0.00775489816442132 2023-01-23 03:49:34.172234: step: 536/527, loss: 0.00010251998901367188 2023-01-23 03:49:35.281710: step: 540/527, loss: 0.03396415710449219 2023-01-23 03:49:36.393504: step: 544/527, loss: 0.00018768310837913305 2023-01-23 03:49:37.503947: step: 548/527, loss: 0.03217964246869087 2023-01-23 03:49:38.611620: step: 552/527, loss: 6.4373016357421875e-06 2023-01-23 03:49:39.704812: step: 556/527, loss: 0.0004201889387331903 2023-01-23 03:49:40.868614: step: 560/527, loss: 0.0004066467226948589 2023-01-23 03:49:42.010752: step: 564/527, loss: 0.05708971247076988 2023-01-23 03:49:43.127677: step: 568/527, loss: 0.0031879425514489412 2023-01-23 03:49:44.276909: step: 572/527, loss: 0.002782726427540183 2023-01-23 03:49:45.391398: step: 576/527, loss: 0.005340910051018 2023-01-23 03:49:46.497606: step: 580/527, loss: 0.0038767815567553043 2023-01-23 03:49:47.604860: step: 584/527, loss: 0.000177288064151071 2023-01-23 03:49:48.726789: step: 588/527, loss: 0.014242363162338734 2023-01-23 03:49:49.823613: step: 592/527, loss: 5.378723290050402e-05 2023-01-23 03:49:50.981474: step: 596/527, loss: 0.008321666158735752 2023-01-23 03:49:52.100049: step: 600/527, loss: 0.0017938613891601562 2023-01-23 03:49:53.246126: step: 604/527, loss: 0.0003831863577943295 2023-01-23 03:49:54.369479: step: 608/527, loss: 3.24249267578125e-05 2023-01-23 03:49:55.464093: step: 612/527, loss: 0.0170148853212595 2023-01-23 03:49:56.573225: step: 616/527, loss: 0.0013532638549804688 2023-01-23 03:49:57.667934: step: 620/527, loss: 0.0026283501647412777 2023-01-23 03:49:58.776567: step: 624/527, loss: 6.437302363337949e-05 2023-01-23 03:49:59.880451: step: 628/527, loss: 1.8501283193472773e-05 2023-01-23 03:50:01.005069: step: 632/527, loss: 0.045168209820985794 2023-01-23 03:50:02.121835: step: 636/527, loss: 0.0015952109824866056 2023-01-23 03:50:03.243395: step: 640/527, loss: 0.0007852554554119706 2023-01-23 03:50:04.364734: step: 644/527, loss: 0.1273820847272873 2023-01-23 03:50:05.450400: step: 648/527, loss: 0.10925483703613281 2023-01-23 03:50:06.530708: step: 652/527, loss: 0.0014833450550213456 2023-01-23 03:50:07.623658: step: 656/527, loss: 0.0017621993320062757 2023-01-23 03:50:08.747529: step: 660/527, loss: 0.0082283029332757 2023-01-23 03:50:09.902711: step: 664/527, loss: 3.051760359085165e-06 2023-01-23 03:50:10.993610: step: 668/527, loss: 0.0025742053985595703 2023-01-23 03:50:12.113323: step: 672/527, loss: 0.018035316839814186 2023-01-23 03:50:13.256409: step: 676/527, loss: 0.010709285736083984 2023-01-23 03:50:14.424409: step: 680/527, loss: 0.027624178677797318 2023-01-23 03:50:15.558891: step: 684/527, loss: 0.00012254714965820312 2023-01-23 03:50:16.710147: step: 688/527, loss: 5.313157816999592e-05 2023-01-23 03:50:17.822007: step: 692/527, loss: 0.00733718927949667 2023-01-23 03:50:18.959939: step: 696/527, loss: 0.0012041091686114669 2023-01-23 03:50:20.112149: step: 700/527, loss: 0.0008429526933468878 2023-01-23 03:50:21.215630: step: 704/527, loss: 0.29358309507369995 2023-01-23 03:50:22.308784: step: 708/527, loss: 0.0001614093780517578 2023-01-23 03:50:23.411454: step: 712/527, loss: 2.918243444582913e-05 2023-01-23 03:50:24.526309: step: 716/527, loss: 0.0034881592728197575 2023-01-23 03:50:25.647211: step: 720/527, loss: 0.00041294097900390625 2023-01-23 03:50:26.767873: step: 724/527, loss: 1.5258789289873675e-06 2023-01-23 03:50:27.899442: step: 728/527, loss: 0.00028905869112350047 2023-01-23 03:50:29.033848: step: 732/527, loss: 0.0005399704095907509 2023-01-23 03:50:30.168410: step: 736/527, loss: 0.0018918991554528475 2023-01-23 03:50:31.296348: step: 740/527, loss: 0.0028125762473791838 2023-01-23 03:50:32.433628: step: 744/527, loss: 0.0004868984397035092 2023-01-23 03:50:33.558815: step: 748/527, loss: 0.00047664641169831157 2023-01-23 03:50:34.673991: step: 752/527, loss: 0.0010887146927416325 2023-01-23 03:50:35.775938: step: 756/527, loss: 0.012437820434570312 2023-01-23 03:50:36.876267: step: 760/527, loss: 1.8024444216280244e-05 2023-01-23 03:50:37.998974: step: 764/527, loss: 0.00011005402484443039 2023-01-23 03:50:39.102689: step: 768/527, loss: -1.1444091796875e-05 2023-01-23 03:50:40.202623: step: 772/527, loss: 0.05443020164966583 2023-01-23 03:50:41.294081: step: 776/527, loss: 3.0040739602554822e-06 2023-01-23 03:50:42.394678: step: 780/527, loss: 0.00017395020404364914 2023-01-23 03:50:43.478405: step: 784/527, loss: 0.0008347511175088584 2023-01-23 03:50:44.600777: step: 788/527, loss: 0.004263973794877529 2023-01-23 03:50:45.738664: step: 792/527, loss: 0.006775474641472101 2023-01-23 03:50:46.842062: step: 796/527, loss: 7.761418237350881e-05 2023-01-23 03:50:47.965282: step: 800/527, loss: -5.34057608092553e-06 2023-01-23 03:50:49.051934: step: 804/527, loss: 0.0009716272470541298 2023-01-23 03:50:50.167961: step: 808/527, loss: 0.0003389358753338456 2023-01-23 03:50:51.291775: step: 812/527, loss: 1.8441548347473145 2023-01-23 03:50:52.391512: step: 816/527, loss: 0.00016536712064407766 2023-01-23 03:50:53.475243: step: 820/527, loss: 0.010838508605957031 2023-01-23 03:50:54.585017: step: 824/527, loss: 0.020152568817138672 2023-01-23 03:50:55.675031: step: 828/527, loss: 0.6454117298126221 2023-01-23 03:50:56.793254: step: 832/527, loss: 0.01870269887149334 2023-01-23 03:50:57.921657: step: 836/527, loss: 0.05858135223388672 2023-01-23 03:50:59.057168: step: 840/527, loss: 0.00013771058002021164 2023-01-23 03:51:00.200961: step: 844/527, loss: 0.8861551284790039 2023-01-23 03:51:01.358179: step: 848/527, loss: 0.021503638476133347 2023-01-23 03:51:02.459278: step: 852/527, loss: 0.0011535168159753084 2023-01-23 03:51:03.590618: step: 856/527, loss: 0.013278389349579811 2023-01-23 03:51:04.720937: step: 860/527, loss: 0.0007536888588219881 2023-01-23 03:51:05.841398: step: 864/527, loss: 0.010644722729921341 2023-01-23 03:51:06.959030: step: 868/527, loss: 0.0001066207914846018 2023-01-23 03:51:08.091823: step: 872/527, loss: 0.023304367437958717 2023-01-23 03:51:09.189805: step: 876/527, loss: 0.002553558209910989 2023-01-23 03:51:10.281352: step: 880/527, loss: 0.0006555200088769197 2023-01-23 03:51:11.386773: step: 884/527, loss: 0.014714241027832031 2023-01-23 03:51:12.523865: step: 888/527, loss: 0.012941170483827591 2023-01-23 03:51:13.684259: step: 892/527, loss: 0.0006715774652548134 2023-01-23 03:51:14.835732: step: 896/527, loss: 5.6552886235294864e-05 2023-01-23 03:51:15.967835: step: 900/527, loss: 0.00010981559898937121 2023-01-23 03:51:17.102729: step: 904/527, loss: 1.754760705807712e-05 2023-01-23 03:51:18.187698: step: 908/527, loss: 0.03206386789679527 2023-01-23 03:51:19.304110: step: 912/527, loss: 0.0006968498346395791 2023-01-23 03:51:20.410657: step: 916/527, loss: 0.0064668660052120686 2023-01-23 03:51:21.529435: step: 920/527, loss: 0.05810356140136719 2023-01-23 03:51:22.619669: step: 924/527, loss: 0.035750579088926315 2023-01-23 03:51:23.714087: step: 928/527, loss: 0.00388336181640625 2023-01-23 03:51:24.845271: step: 932/527, loss: 0.0009361266857013106 2023-01-23 03:51:25.939780: step: 936/527, loss: 0.0003735542413778603 2023-01-23 03:51:27.040442: step: 940/527, loss: 0.0012674810132011771 2023-01-23 03:51:28.146846: step: 944/527, loss: 0.0068503376096487045 2023-01-23 03:51:29.286068: step: 948/527, loss: 0.0006660461658611894 2023-01-23 03:51:30.377631: step: 952/527, loss: 2.2506712411995977e-05 2023-01-23 03:51:31.499164: step: 956/527, loss: 0.007409381680190563 2023-01-23 03:51:32.597335: step: 960/527, loss: 0.00022554397583007812 2023-01-23 03:51:33.728810: step: 964/527, loss: 0.048370361328125 2023-01-23 03:51:34.842521: step: 968/527, loss: 0.005554294679313898 2023-01-23 03:51:35.935443: step: 972/527, loss: 0.0011470795143395662 2023-01-23 03:51:37.047456: step: 976/527, loss: 0.005016994196921587 2023-01-23 03:51:38.162296: step: 980/527, loss: 1.219009280204773 2023-01-23 03:51:39.292814: step: 984/527, loss: 0.0021209719125181437 2023-01-23 03:51:40.407429: step: 988/527, loss: 0.011798858642578125 2023-01-23 03:51:41.516848: step: 992/527, loss: 0.0017727374797686934 2023-01-23 03:51:42.600787: step: 996/527, loss: 0.0008016586070880294 2023-01-23 03:51:43.727412: step: 1000/527, loss: 0.03096199221909046 2023-01-23 03:51:44.847913: step: 1004/527, loss: 0.0004514694446697831 2023-01-23 03:51:45.960796: step: 1008/527, loss: 0.08549900352954865 2023-01-23 03:51:47.069586: step: 1012/527, loss: 0.009777641855180264 2023-01-23 03:51:48.158391: step: 1016/527, loss: 0.09717349708080292 2023-01-23 03:51:49.297505: step: 1020/527, loss: 0.04846920818090439 2023-01-23 03:51:50.395842: step: 1024/527, loss: 0.0003048896905966103 2023-01-23 03:51:51.458635: step: 1028/527, loss: 2.0122528439969756e-05 2023-01-23 03:51:52.548549: step: 1032/527, loss: 0.00011587143671931699 2023-01-23 03:51:53.654357: step: 1036/527, loss: 0.00012168884859420359 2023-01-23 03:51:54.768657: step: 1040/527, loss: 0.011916160583496094 2023-01-23 03:51:55.893251: step: 1044/527, loss: 0.00013866423978470266 2023-01-23 03:51:57.012484: step: 1048/527, loss: 5.53131121705519e-06 2023-01-23 03:51:58.148671: step: 1052/527, loss: 0.00994567945599556 2023-01-23 03:51:59.304214: step: 1056/527, loss: 0.005994987674057484 2023-01-23 03:52:00.450109: step: 1060/527, loss: 0.10977286845445633 2023-01-23 03:52:01.569953: step: 1064/527, loss: 4.7016143071232364e-05 2023-01-23 03:52:02.705391: step: 1068/527, loss: 0.0015819550026208162 2023-01-23 03:52:03.812270: step: 1072/527, loss: 0.0017896651988849044 2023-01-23 03:52:04.904459: step: 1076/527, loss: 0.02225627936422825 2023-01-23 03:52:06.020808: step: 1080/527, loss: 6.008148193359375e-05 2023-01-23 03:52:07.148772: step: 1084/527, loss: 0.00034160615177825093 2023-01-23 03:52:08.257724: step: 1088/527, loss: 0.0885867103934288 2023-01-23 03:52:09.348288: step: 1092/527, loss: 0.0009214401943609118 2023-01-23 03:52:10.459465: step: 1096/527, loss: 0.0040187835693359375 2023-01-23 03:52:11.571338: step: 1100/527, loss: 0.027106191962957382 2023-01-23 03:52:12.681645: step: 1104/527, loss: 0.00024180412583518773 2023-01-23 03:52:13.829849: step: 1108/527, loss: 0.0012324333656579256 2023-01-23 03:52:14.942536: step: 1112/527, loss: 0.04183988645672798 2023-01-23 03:52:16.035507: step: 1116/527, loss: 0.0172333475202322 2023-01-23 03:52:17.187339: step: 1120/527, loss: 0.011845970526337624 2023-01-23 03:52:18.289582: step: 1124/527, loss: 0.0007647991296835244 2023-01-23 03:52:19.411938: step: 1128/527, loss: 0.061222173273563385 2023-01-23 03:52:20.538490: step: 1132/527, loss: 0.015550518408417702 2023-01-23 03:52:21.651329: step: 1136/527, loss: 6.0749054682673886e-05 2023-01-23 03:52:22.777211: step: 1140/527, loss: 0.006541633512824774 2023-01-23 03:52:23.877259: step: 1144/527, loss: 0.0026005746331065893 2023-01-23 03:52:25.002784: step: 1148/527, loss: 0.0006946563953533769 2023-01-23 03:52:26.160923: step: 1152/527, loss: 8.802414231467992e-05 2023-01-23 03:52:27.290732: step: 1156/527, loss: 0.001219034194946289 2023-01-23 03:52:28.384469: step: 1160/527, loss: 5.6743621826171875e-05 2023-01-23 03:52:29.522486: step: 1164/527, loss: 8.296967280330136e-06 2023-01-23 03:52:30.667263: step: 1168/527, loss: 0.011019611731171608 2023-01-23 03:52:31.804481: step: 1172/527, loss: 0.42521238327026367 2023-01-23 03:52:32.911007: step: 1176/527, loss: 0.0013048171531409025 2023-01-23 03:52:34.051166: step: 1180/527, loss: 0.007025241851806641 2023-01-23 03:52:35.185948: step: 1184/527, loss: 0.004977226257324219 2023-01-23 03:52:36.316408: step: 1188/527, loss: 0.055217742919921875 2023-01-23 03:52:37.413435: step: 1192/527, loss: 0.0002927780442405492 2023-01-23 03:52:38.498426: step: 1196/527, loss: 0.0019769668579101562 2023-01-23 03:52:39.646439: step: 1200/527, loss: 1.296997106692288e-05 2023-01-23 03:52:40.771432: step: 1204/527, loss: 2.18868262891192e-05 2023-01-23 03:52:41.929159: step: 1208/527, loss: 9.34600830078125e-05 2023-01-23 03:52:43.032095: step: 1212/527, loss: 0.01757803000509739 2023-01-23 03:52:44.165123: step: 1216/527, loss: 0.0003582954523153603 2023-01-23 03:52:45.294437: step: 1220/527, loss: 0.00045013427734375 2023-01-23 03:52:46.404770: step: 1224/527, loss: 0.0015214920276775956 2023-01-23 03:52:47.504555: step: 1228/527, loss: 0.001737403916195035 2023-01-23 03:52:48.597853: step: 1232/527, loss: 0.047087907791137695 2023-01-23 03:52:49.722513: step: 1236/527, loss: 2.86102294921875e-06 2023-01-23 03:52:50.812098: step: 1240/527, loss: 0.0061502461321651936 2023-01-23 03:52:51.917636: step: 1244/527, loss: 0.0016082762740552425 2023-01-23 03:52:53.020169: step: 1248/527, loss: 0.000289726274786517 2023-01-23 03:52:54.133713: step: 1252/527, loss: 0.0002201557217631489 2023-01-23 03:52:55.225990: step: 1256/527, loss: 0.013474225997924805 2023-01-23 03:52:56.337501: step: 1260/527, loss: 0.002567243529483676 2023-01-23 03:52:57.472813: step: 1264/527, loss: 0.00024967192439362407 2023-01-23 03:52:58.592801: step: 1268/527, loss: 0.012307548895478249 2023-01-23 03:52:59.713704: step: 1272/527, loss: 0.00017652512178756297 2023-01-23 03:53:00.828773: step: 1276/527, loss: 2.3937225705594756e-05 2023-01-23 03:53:01.917874: step: 1280/527, loss: 2.4080276489257812e-05 2023-01-23 03:53:03.028289: step: 1284/527, loss: 0.048406124114990234 2023-01-23 03:53:04.111208: step: 1288/527, loss: 8.94546537892893e-05 2023-01-23 03:53:05.196196: step: 1292/527, loss: 0.02050619199872017 2023-01-23 03:53:06.298618: step: 1296/527, loss: 0.00960388220846653 2023-01-23 03:53:07.398395: step: 1300/527, loss: 0.0002726554812397808 2023-01-23 03:53:08.506865: step: 1304/527, loss: 0.0005056381342001259 2023-01-23 03:53:09.626105: step: 1308/527, loss: 0.0070314412005245686 2023-01-23 03:53:10.744450: step: 1312/527, loss: 0.00028743743314407766 2023-01-23 03:53:11.818825: step: 1316/527, loss: 0.005698060616850853 2023-01-23 03:53:12.929984: step: 1320/527, loss: 0.00011711120168911293 2023-01-23 03:53:14.044058: step: 1324/527, loss: 4.310607982915826e-05 2023-01-23 03:53:15.190215: step: 1328/527, loss: 0.05271758884191513 2023-01-23 03:53:16.309930: step: 1332/527, loss: 0.0021932125091552734 2023-01-23 03:53:17.394363: step: 1336/527, loss: 9.078979201149195e-05 2023-01-23 03:53:18.560057: step: 1340/527, loss: 0.04548444598913193 2023-01-23 03:53:19.734112: step: 1344/527, loss: 0.016104459762573242 2023-01-23 03:53:20.832685: step: 1348/527, loss: 3.34262840624433e-05 2023-01-23 03:53:21.941780: step: 1352/527, loss: 0.0012344360584393144 2023-01-23 03:53:23.081429: step: 1356/527, loss: 7.467270188499242e-05 2023-01-23 03:53:24.185973: step: 1360/527, loss: 0.002158641815185547 2023-01-23 03:53:25.300983: step: 1364/527, loss: 0.0477568618953228 2023-01-23 03:53:26.430774: step: 1368/527, loss: 0.059859372675418854 2023-01-23 03:53:27.572000: step: 1372/527, loss: 0.09371032565832138 2023-01-23 03:53:28.678131: step: 1376/527, loss: 0.000169658669619821 2023-01-23 03:53:29.810818: step: 1380/527, loss: 0.002362537197768688 2023-01-23 03:53:30.905748: step: 1384/527, loss: 0.0005471229669637978 2023-01-23 03:53:32.026714: step: 1388/527, loss: 0.07060647755861282 2023-01-23 03:53:33.148687: step: 1392/527, loss: 0.00016651154146529734 2023-01-23 03:53:34.244068: step: 1396/527, loss: 0.0010497093899175525 2023-01-23 03:53:35.406944: step: 1400/527, loss: 0.03735800087451935 2023-01-23 03:53:36.523769: step: 1404/527, loss: 0.0015563011402264237 2023-01-23 03:53:37.608796: step: 1408/527, loss: 0.0012318610679358244 2023-01-23 03:53:38.739715: step: 1412/527, loss: 0.03470268473029137 2023-01-23 03:53:39.840483: step: 1416/527, loss: 0.012385845184326172 2023-01-23 03:53:40.940345: step: 1420/527, loss: 0.00012826919555664062 2023-01-23 03:53:42.058367: step: 1424/527, loss: 0.012933493591845036 2023-01-23 03:53:43.157920: step: 1428/527, loss: 0.03939790651202202 2023-01-23 03:53:44.254887: step: 1432/527, loss: 0.000185108176083304 2023-01-23 03:53:45.361419: step: 1436/527, loss: 0.04531509801745415 2023-01-23 03:53:46.473644: step: 1440/527, loss: 0.014312363229691982 2023-01-23 03:53:47.603787: step: 1444/527, loss: 0.021743202582001686 2023-01-23 03:53:48.727749: step: 1448/527, loss: 0.0236084945499897 2023-01-23 03:53:49.847633: step: 1452/527, loss: 0.009135056287050247 2023-01-23 03:53:50.974822: step: 1456/527, loss: 8.316039748024195e-05 2023-01-23 03:53:52.082636: step: 1460/527, loss: 3.046989513677545e-05 2023-01-23 03:53:53.180351: step: 1464/527, loss: 4.8160552978515625e-05 2023-01-23 03:53:54.276922: step: 1468/527, loss: 0.003084564348682761 2023-01-23 03:53:55.396415: step: 1472/527, loss: 0.013629436492919922 2023-01-23 03:53:56.527233: step: 1476/527, loss: 0.0019363403553143144 2023-01-23 03:53:57.614800: step: 1480/527, loss: 7.057189577608369e-06 2023-01-23 03:53:58.709206: step: 1484/527, loss: 1.945495569088962e-05 2023-01-23 03:53:59.834363: step: 1488/527, loss: 0.002985095838084817 2023-01-23 03:54:00.939495: step: 1492/527, loss: 0.004210758022964001 2023-01-23 03:54:02.060028: step: 1496/527, loss: 0.0003247261047363281 2023-01-23 03:54:03.158445: step: 1500/527, loss: 2.6798248654813506e-05 2023-01-23 03:54:04.309721: step: 1504/527, loss: 9.956360736396164e-05 2023-01-23 03:54:05.412043: step: 1508/527, loss: 0.0003939390298910439 2023-01-23 03:54:06.512869: step: 1512/527, loss: 0.0005599975702352822 2023-01-23 03:54:07.594132: step: 1516/527, loss: 0.03544482961297035 2023-01-23 03:54:08.707880: step: 1520/527, loss: -4.95910626341356e-06 2023-01-23 03:54:09.811007: step: 1524/527, loss: 0.0007112980238161981 2023-01-23 03:54:10.901648: step: 1528/527, loss: 1.3089180356473662e-05 2023-01-23 03:54:11.995303: step: 1532/527, loss: 0.004083013627678156 2023-01-23 03:54:13.106111: step: 1536/527, loss: 3.24249276673072e-06 2023-01-23 03:54:14.212224: step: 1540/527, loss: 0.0021806717850267887 2023-01-23 03:54:15.314195: step: 1544/527, loss: 2.5558472771081142e-05 2023-01-23 03:54:16.418047: step: 1548/527, loss: 0.0030982017051428556 2023-01-23 03:54:17.548046: step: 1552/527, loss: 0.007536888588219881 2023-01-23 03:54:18.651525: step: 1556/527, loss: 0.02356109581887722 2023-01-23 03:54:19.766471: step: 1560/527, loss: 0.006760883145034313 2023-01-23 03:54:20.890280: step: 1564/527, loss: 0.01896200142800808 2023-01-23 03:54:22.009628: step: 1568/527, loss: 0.0005858421791344881 2023-01-23 03:54:23.099676: step: 1572/527, loss: 0.003640842391178012 2023-01-23 03:54:24.209311: step: 1576/527, loss: 6.251335435081273e-05 2023-01-23 03:54:25.339494: step: 1580/527, loss: 3.0326846172101796e-05 2023-01-23 03:54:26.479205: step: 1584/527, loss: 0.0047454833984375 2023-01-23 03:54:27.601028: step: 1588/527, loss: 0.00029392243595793843 2023-01-23 03:54:28.705935: step: 1592/527, loss: 0.07987356185913086 2023-01-23 03:54:29.795766: step: 1596/527, loss: 8.583068620282575e-07 2023-01-23 03:54:30.938945: step: 1600/527, loss: 0.0006755829090252519 2023-01-23 03:54:32.062843: step: 1604/527, loss: 0.006155490875244141 2023-01-23 03:54:33.166715: step: 1608/527, loss: 1.735687328618951e-05 2023-01-23 03:54:34.299768: step: 1612/527, loss: 0.0001844406215241179 2023-01-23 03:54:35.400671: step: 1616/527, loss: 0.06156330183148384 2023-01-23 03:54:36.503070: step: 1620/527, loss: 3.814698175119702e-07 2023-01-23 03:54:37.617911: step: 1624/527, loss: 0.014499282464385033 2023-01-23 03:54:38.733806: step: 1628/527, loss: 0.0004937172052450478 2023-01-23 03:54:39.828989: step: 1632/527, loss: 0.023496342822909355 2023-01-23 03:54:40.964977: step: 1636/527, loss: 0.013883400708436966 2023-01-23 03:54:42.086204: step: 1640/527, loss: 0.00010795593698276207 2023-01-23 03:54:43.186448: step: 1644/527, loss: 0.0007696152315475047 2023-01-23 03:54:44.292261: step: 1648/527, loss: 0.00011668205115711316 2023-01-23 03:54:45.387651: step: 1652/527, loss: 0.0470890998840332 2023-01-23 03:54:46.503657: step: 1656/527, loss: 0.0008905887953005731 2023-01-23 03:54:47.649676: step: 1660/527, loss: 7.336139969993383e-05 2023-01-23 03:54:48.752700: step: 1664/527, loss: 0.014044332318007946 2023-01-23 03:54:49.892463: step: 1668/527, loss: 0.03412990644574165 2023-01-23 03:54:50.965512: step: 1672/527, loss: 0.0004258155822753906 2023-01-23 03:54:52.121547: step: 1676/527, loss: 0.17165128886699677 2023-01-23 03:54:53.257571: step: 1680/527, loss: 2.040863000729587e-05 2023-01-23 03:54:54.374767: step: 1684/527, loss: 0.007773781195282936 2023-01-23 03:54:55.477392: step: 1688/527, loss: 0.001656913897022605 2023-01-23 03:54:56.595891: step: 1692/527, loss: 0.00026006699772551656 2023-01-23 03:54:57.738840: step: 1696/527, loss: 0.0008150100475177169 2023-01-23 03:54:58.837952: step: 1700/527, loss: 0.00010719299461925402 2023-01-23 03:54:59.972902: step: 1704/527, loss: 0.0021101476158946753 2023-01-23 03:55:01.107018: step: 1708/527, loss: 1.1444091796875e-05 2023-01-23 03:55:02.241207: step: 1712/527, loss: 1.3256072634248994e-05 2023-01-23 03:55:03.346612: step: 1716/527, loss: 0.09243179112672806 2023-01-23 03:55:04.459410: step: 1720/527, loss: 0.004834366030991077 2023-01-23 03:55:05.568890: step: 1724/527, loss: 0.02069120481610298 2023-01-23 03:55:06.662366: step: 1728/527, loss: 4.749298022943549e-05 2023-01-23 03:55:07.792350: step: 1732/527, loss: 0.044396206736564636 2023-01-23 03:55:08.921612: step: 1736/527, loss: 0.07142200320959091 2023-01-23 03:55:10.052171: step: 1740/527, loss: 2.098083541568485e-06 2023-01-23 03:55:11.164837: step: 1744/527, loss: 0.023014020174741745 2023-01-23 03:55:12.271557: step: 1748/527, loss: 0.0004960060468874872 2023-01-23 03:55:13.389295: step: 1752/527, loss: 0.004168892279267311 2023-01-23 03:55:14.504564: step: 1756/527, loss: 0.023257827386260033 2023-01-23 03:55:15.599494: step: 1760/527, loss: 0.02803640440106392 2023-01-23 03:55:16.744459: step: 1764/527, loss: 0.0006607056129723787 2023-01-23 03:55:17.846590: step: 1768/527, loss: 0.0049835206009447575 2023-01-23 03:55:18.965131: step: 1772/527, loss: 0.00020532608323264867 2023-01-23 03:55:20.073759: step: 1776/527, loss: 0.000135326394229196 2023-01-23 03:55:21.177890: step: 1780/527, loss: 4.57763690064894e-06 2023-01-23 03:55:22.277169: step: 1784/527, loss: 1.7547608877066523e-05 2023-01-23 03:55:23.389629: step: 1788/527, loss: 0.005622482392936945 2023-01-23 03:55:24.515222: step: 1792/527, loss: 0.03742418438196182 2023-01-23 03:55:25.644638: step: 1796/527, loss: 0.04316005855798721 2023-01-23 03:55:26.765062: step: 1800/527, loss: 0.11108584702014923 2023-01-23 03:55:27.880235: step: 1804/527, loss: 0.024437524378299713 2023-01-23 03:55:28.993643: step: 1808/527, loss: 0.01724720001220703 2023-01-23 03:55:30.105686: step: 1812/527, loss: 0.005657291505485773 2023-01-23 03:55:31.242824: step: 1816/527, loss: 0.0009641647338867188 2023-01-23 03:55:32.352190: step: 1820/527, loss: 0.008989906869828701 2023-01-23 03:55:33.449709: step: 1824/527, loss: 0.0002552032528910786 2023-01-23 03:55:34.527789: step: 1828/527, loss: 7.190704491222277e-05 2023-01-23 03:55:35.641138: step: 1832/527, loss: 0.00037364958552643657 2023-01-23 03:55:36.756199: step: 1836/527, loss: 0.00011749268014682457 2023-01-23 03:55:37.855134: step: 1840/527, loss: 0.0005306244129315019 2023-01-23 03:55:38.955257: step: 1844/527, loss: 7.953643944347277e-05 2023-01-23 03:55:40.113765: step: 1848/527, loss: 0.006087684538215399 2023-01-23 03:55:41.220660: step: 1852/527, loss: 0.0018959998851642013 2023-01-23 03:55:42.319177: step: 1856/527, loss: 2.6178360712947324e-05 2023-01-23 03:55:43.410687: step: 1860/527, loss: 0.0012657642364501953 2023-01-23 03:55:44.494098: step: 1864/527, loss: 0.0009847640758380294 2023-01-23 03:55:45.587286: step: 1868/527, loss: 0.13579325377941132 2023-01-23 03:55:46.709412: step: 1872/527, loss: 7.162093970691785e-05 2023-01-23 03:55:47.862899: step: 1876/527, loss: 0.002620744751766324 2023-01-23 03:55:48.985301: step: 1880/527, loss: 0.0008141041034832597 2023-01-23 03:55:50.079751: step: 1884/527, loss: 0.001517009804956615 2023-01-23 03:55:51.220377: step: 1888/527, loss: 0.00018656253814697266 2023-01-23 03:55:52.352991: step: 1892/527, loss: 3.452301098150201e-05 2023-01-23 03:55:53.468819: step: 1896/527, loss: 0.004050922580063343 2023-01-23 03:55:54.601641: step: 1900/527, loss: 0.0002058982936432585 2023-01-23 03:55:55.717310: step: 1904/527, loss: 0.015165328979492188 2023-01-23 03:55:56.840758: step: 1908/527, loss: 0.0002885818830691278 2023-01-23 03:55:57.970563: step: 1912/527, loss: 0.04956522583961487 2023-01-23 03:55:59.060428: step: 1916/527, loss: 0.006954193580895662 2023-01-23 03:56:00.184631: step: 1920/527, loss: 1.0776519957289565e-05 2023-01-23 03:56:01.299213: step: 1924/527, loss: 5.52177443751134e-05 2023-01-23 03:56:02.404822: step: 1928/527, loss: 0.0011095048394054174 2023-01-23 03:56:03.504066: step: 1932/527, loss: 1.52587890625e-05 2023-01-23 03:56:04.625664: step: 1936/527, loss: 0.00528373708948493 2023-01-23 03:56:05.718009: step: 1940/527, loss: 0.033998776227235794 2023-01-23 03:56:06.869129: step: 1944/527, loss: 0.011694718152284622 2023-01-23 03:56:07.960797: step: 1948/527, loss: 0.021518802270293236 2023-01-23 03:56:09.070112: step: 1952/527, loss: 0.0004793167463503778 2023-01-23 03:56:10.169458: step: 1956/527, loss: 0.0004134178161621094 2023-01-23 03:56:11.283860: step: 1960/527, loss: 5.340576535672881e-05 2023-01-23 03:56:12.383207: step: 1964/527, loss: 0.0014345169765874743 2023-01-23 03:56:13.481686: step: 1968/527, loss: 0.002166366670280695 2023-01-23 03:56:14.584171: step: 1972/527, loss: 0.01573152467608452 2023-01-23 03:56:15.668377: step: 1976/527, loss: 4.673004241340095e-06 2023-01-23 03:56:16.807419: step: 1980/527, loss: 0.017440224066376686 2023-01-23 03:56:17.916417: step: 1984/527, loss: 0.00034971238346770406 2023-01-23 03:56:19.027516: step: 1988/527, loss: 6.036758713889867e-05 2023-01-23 03:56:20.143191: step: 1992/527, loss: 2.3746490114717744e-05 2023-01-23 03:56:21.248652: step: 1996/527, loss: 0.0002203464537160471 2023-01-23 03:56:22.365771: step: 2000/527, loss: 0.0018532752292230725 2023-01-23 03:56:23.485472: step: 2004/527, loss: 0.03355293348431587 2023-01-23 03:56:24.616077: step: 2008/527, loss: 0.00022020340838935226 2023-01-23 03:56:25.740096: step: 2012/527, loss: 0.017255593091249466 2023-01-23 03:56:26.861569: step: 2016/527, loss: 0.0003689766163006425 2023-01-23 03:56:27.996358: step: 2020/527, loss: 0.005496406927704811 2023-01-23 03:56:29.118512: step: 2024/527, loss: 0.0512942299246788 2023-01-23 03:56:30.232289: step: 2028/527, loss: 0.036402132362127304 2023-01-23 03:56:31.319281: step: 2032/527, loss: 0.0056473733857274055 2023-01-23 03:56:32.434803: step: 2036/527, loss: 0.032465457916259766 2023-01-23 03:56:33.548165: step: 2040/527, loss: 0.06357435882091522 2023-01-23 03:56:34.672838: step: 2044/527, loss: 0.05133533477783203 2023-01-23 03:56:35.757753: step: 2048/527, loss: 0.0052405837923288345 2023-01-23 03:56:36.863275: step: 2052/527, loss: 0.0002544402959756553 2023-01-23 03:56:37.999957: step: 2056/527, loss: 0.060882568359375 2023-01-23 03:56:39.118030: step: 2060/527, loss: 0.007435703184455633 2023-01-23 03:56:40.225080: step: 2064/527, loss: 0.00031194687471725047 2023-01-23 03:56:41.360996: step: 2068/527, loss: 0.00021085739717818797 2023-01-23 03:56:42.488697: step: 2072/527, loss: 0.03925285115838051 2023-01-23 03:56:43.595982: step: 2076/527, loss: 0.0006368637550622225 2023-01-23 03:56:44.718673: step: 2080/527, loss: 0.00699958810582757 2023-01-23 03:56:45.816066: step: 2084/527, loss: 0.0008995056268759072 2023-01-23 03:56:46.916211: step: 2088/527, loss: 5.9604644775390625e-06 2023-01-23 03:56:48.030908: step: 2092/527, loss: 0.0002870559983421117 2023-01-23 03:56:49.132210: step: 2096/527, loss: 0.015148449689149857 2023-01-23 03:56:50.295285: step: 2100/527, loss: 0.0008975506061688066 2023-01-23 03:56:51.431446: step: 2104/527, loss: 0.007875824347138405 2023-01-23 03:56:52.560389: step: 2108/527, loss: 5.3977968491381034e-05 ================================================== Loss: 0.025 -------------------- Dev: {'event': {'p': 0.6041884816753926, 'r': 0.7683089214380826, 'f1': 0.6764361078546307}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Test: {'event': {'p': 0.631651376146789, 'r': 0.7868571428571428, 'f1': 0.7007633587786259}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Chinese: {'event': {'p': 0.5647058823529412, 'r': 0.8888888888888888, 'f1': 0.6906474820143885}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Korean: {'event': {'p': 0.5555555555555556, 'r': 0.47619047619047616, 'f1': 0.5128205128205129}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Russian: {'event': {'p': 0.47368421052631576, 'r': 0.5, 'f1': 0.4864864864864865}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6063157894736843, 'r': 0.7669773635153129, 'f1': 0.6772486772486773}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Eng Test for Korean: {'event': {'p': 0.6427238805970149, 'r': 0.7874285714285715, 'f1': 0.7077555213148434}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Korean: {'event': {'p': 0.6792452830188679, 'r': 0.5714285714285714, 'f1': 0.6206896551724137}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 26 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 03:57:34.223532: step: 4/527, loss: 0.0010183810954913497 2023-01-23 03:57:35.388067: step: 8/527, loss: 0.00020923613919876516 2023-01-23 03:57:36.497964: step: 12/527, loss: 0.5303603410720825 2023-01-23 03:57:37.594914: step: 16/527, loss: 6.079673767089844e-05 2023-01-23 03:57:38.741593: step: 20/527, loss: 0.01933899149298668 2023-01-23 03:57:39.849511: step: 24/527, loss: 0.002817249158397317 2023-01-23 03:57:40.980557: step: 28/527, loss: 0.0016889572143554688 2023-01-23 03:57:42.111691: step: 32/527, loss: 0.0008220672607421875 2023-01-23 03:57:43.218468: step: 36/527, loss: 0.00550766009837389 2023-01-23 03:57:44.356948: step: 40/527, loss: 0.006059456150978804 2023-01-23 03:57:45.482205: step: 44/527, loss: 5.588531348621473e-05 2023-01-23 03:57:46.601470: step: 48/527, loss: 0.1003929153084755 2023-01-23 03:57:47.726929: step: 52/527, loss: 0.00033965110196731985 2023-01-23 03:57:48.834325: step: 56/527, loss: 0.0001031875581247732 2023-01-23 03:57:50.001079: step: 60/527, loss: 0.0007342338794842362 2023-01-23 03:57:51.110827: step: 64/527, loss: 0.0012675285106524825 2023-01-23 03:57:52.204744: step: 68/527, loss: 0.03591899946331978 2023-01-23 03:57:53.315388: step: 72/527, loss: 0.0001434326113667339 2023-01-23 03:57:54.418247: step: 76/527, loss: 1.010894811770413e-05 2023-01-23 03:57:55.533734: step: 80/527, loss: 0.026571083813905716 2023-01-23 03:57:56.600051: step: 84/527, loss: 1.5592575437040068e-05 2023-01-23 03:57:57.707765: step: 88/527, loss: 0.052752308547496796 2023-01-23 03:57:58.830228: step: 92/527, loss: 0.000523471855558455 2023-01-23 03:57:59.944029: step: 96/527, loss: 0.003355789463967085 2023-01-23 03:58:01.046629: step: 100/527, loss: 0.0023502351250499487 2023-01-23 03:58:02.175515: step: 104/527, loss: 2.4461745852022432e-05 2023-01-23 03:58:03.298611: step: 108/527, loss: 0.0006574630388058722 2023-01-23 03:58:04.452966: step: 112/527, loss: 0.00421750545501709 2023-01-23 03:58:05.551941: step: 116/527, loss: 5.34057608092553e-06 2023-01-23 03:58:06.643794: step: 120/527, loss: 0.0005490303155966103 2023-01-23 03:58:07.781927: step: 124/527, loss: 0.03997693210840225 2023-01-23 03:58:08.906359: step: 128/527, loss: 1.568794141348917e-05 2023-01-23 03:58:09.993748: step: 132/527, loss: 0.00014743805513717234 2023-01-23 03:58:11.121663: step: 136/527, loss: 0.0006561279296875 2023-01-23 03:58:12.207986: step: 140/527, loss: 0.022336388006806374 2023-01-23 03:58:13.320544: step: 144/527, loss: 0.0007814407581463456 2023-01-23 03:58:14.409635: step: 148/527, loss: 2.0599363779183477e-05 2023-01-23 03:58:15.494157: step: 152/527, loss: 0.0001749038783600554 2023-01-23 03:58:16.615012: step: 156/527, loss: 5.3977968491381034e-05 2023-01-23 03:58:17.702299: step: 160/527, loss: -1.62124638336536e-06 2023-01-23 03:58:18.805613: step: 164/527, loss: 0.016738606616854668 2023-01-23 03:58:19.906311: step: 168/527, loss: 0.0006752967601642013 2023-01-23 03:58:21.036874: step: 172/527, loss: 0.5214818120002747 2023-01-23 03:58:22.177419: step: 176/527, loss: 0.062439534813165665 2023-01-23 03:58:23.267710: step: 180/527, loss: 0.0007843017810955644 2023-01-23 03:58:24.372329: step: 184/527, loss: 0.0010223388671875 2023-01-23 03:58:25.503108: step: 188/527, loss: 1.125335711549269e-05 2023-01-23 03:58:26.583856: step: 192/527, loss: 0.00023536682419944555 2023-01-23 03:58:27.677665: step: 196/527, loss: 0.5942993760108948 2023-01-23 03:58:28.792514: step: 200/527, loss: 0.022959327325224876 2023-01-23 03:58:29.864489: step: 204/527, loss: 0.0006401776918210089 2023-01-23 03:58:30.955206: step: 208/527, loss: 0.00012798310490325093 2023-01-23 03:58:32.064144: step: 212/527, loss: 0.024925947189331055 2023-01-23 03:58:33.167426: step: 216/527, loss: 0.00012664795212913305 2023-01-23 03:58:34.261879: step: 220/527, loss: 0.0014165878528729081 2023-01-23 03:58:35.368146: step: 224/527, loss: 0.003051948733627796 2023-01-23 03:58:36.528011: step: 228/527, loss: 1.4877320609230082e-05 2023-01-23 03:58:37.685889: step: 232/527, loss: 0.004054069519042969 2023-01-23 03:58:38.806165: step: 236/527, loss: 7.691383507335559e-05 2023-01-23 03:58:39.941975: step: 240/527, loss: 0.00014896393986418843 2023-01-23 03:58:41.044042: step: 244/527, loss: 0.0003648281272035092 2023-01-23 03:58:42.173782: step: 248/527, loss: 0.006998110096901655 2023-01-23 03:58:43.269701: step: 252/527, loss: 1.0728836059570312e-05 2023-01-23 03:58:44.409396: step: 256/527, loss: 0.013386535458266735 2023-01-23 03:58:45.526298: step: 260/527, loss: 0.043252088129520416 2023-01-23 03:58:46.709662: step: 264/527, loss: 0.019922543317079544 2023-01-23 03:58:47.854591: step: 268/527, loss: 0.00846567191183567 2023-01-23 03:58:48.963240: step: 272/527, loss: 0.08620081096887589 2023-01-23 03:58:50.083844: step: 276/527, loss: 0.0004235267697367817 2023-01-23 03:58:51.188330: step: 280/527, loss: 0.006584834773093462 2023-01-23 03:58:52.333251: step: 284/527, loss: 0.00011687279038596898 2023-01-23 03:58:53.441358: step: 288/527, loss: 0.0021186829544603825 2023-01-23 03:58:54.542134: step: 292/527, loss: 0.00813441351056099 2023-01-23 03:58:55.656581: step: 296/527, loss: 0.02946958690881729 2023-01-23 03:58:56.782867: step: 300/527, loss: 0.04097137600183487 2023-01-23 03:58:57.889657: step: 304/527, loss: 0.0002875328063964844 2023-01-23 03:58:59.039286: step: 308/527, loss: 0.0013885498046875 2023-01-23 03:59:00.156652: step: 312/527, loss: 0.0017648697830736637 2023-01-23 03:59:01.246023: step: 316/527, loss: 0.00010538101196289062 2023-01-23 03:59:02.353609: step: 320/527, loss: 0.001785659696906805 2023-01-23 03:59:03.520066: step: 324/527, loss: 2.5558472771081142e-05 2023-01-23 03:59:04.648022: step: 328/527, loss: 0.0002652168332133442 2023-01-23 03:59:05.747292: step: 332/527, loss: 1.106262243411038e-05 2023-01-23 03:59:06.854507: step: 336/527, loss: 0.00024337769718840718 2023-01-23 03:59:07.934294: step: 340/527, loss: 0.0001246929168701172 2023-01-23 03:59:09.065545: step: 344/527, loss: 0.3799140453338623 2023-01-23 03:59:10.198349: step: 348/527, loss: 0.0017257691361010075 2023-01-23 03:59:11.360616: step: 352/527, loss: 0.09048710018396378 2023-01-23 03:59:12.468482: step: 356/527, loss: 0.07144976407289505 2023-01-23 03:59:13.608003: step: 360/527, loss: 0.0017267226940020919 2023-01-23 03:59:14.750109: step: 364/527, loss: 0.004371833987534046 2023-01-23 03:59:15.886294: step: 368/527, loss: 0.0002040863037109375 2023-01-23 03:59:16.968319: step: 372/527, loss: 0.13012218475341797 2023-01-23 03:59:18.061172: step: 376/527, loss: 0.039885327219963074 2023-01-23 03:59:19.161897: step: 380/527, loss: 0.00011472702317405492 2023-01-23 03:59:20.265953: step: 384/527, loss: 5.865097136847908e-06 2023-01-23 03:59:21.358534: step: 388/527, loss: 7.65800432418473e-05 2023-01-23 03:59:22.443605: step: 392/527, loss: 8.659363084007055e-05 2023-01-23 03:59:23.558204: step: 396/527, loss: 0.00011491775512695312 2023-01-23 03:59:24.686130: step: 400/527, loss: 0.015918731689453125 2023-01-23 03:59:25.803923: step: 404/527, loss: 0.03349361568689346 2023-01-23 03:59:26.940064: step: 408/527, loss: 2.5844572519417852e-05 2023-01-23 03:59:28.023165: step: 412/527, loss: 3.852844383800402e-05 2023-01-23 03:59:29.126937: step: 416/527, loss: 0.0044349669478833675 2023-01-23 03:59:30.237775: step: 420/527, loss: 0.0073455809615552425 2023-01-23 03:59:31.380533: step: 424/527, loss: 0.006294441409409046 2023-01-23 03:59:32.503916: step: 428/527, loss: -4.434585662238533e-06 2023-01-23 03:59:33.574702: step: 432/527, loss: 2.3365021206700476e-06 2023-01-23 03:59:34.664708: step: 436/527, loss: 0.00024366378784179688 2023-01-23 03:59:35.792034: step: 440/527, loss: 0.0005542278522625566 2023-01-23 03:59:36.909213: step: 444/527, loss: 0.0002375602925894782 2023-01-23 03:59:38.015430: step: 448/527, loss: 0.00012903213792014867 2023-01-23 03:59:39.113087: step: 452/527, loss: 0.0015468597412109375 2023-01-23 03:59:40.246983: step: 456/527, loss: 0.02023620717227459 2023-01-23 03:59:41.349251: step: 460/527, loss: 0.0038557052612304688 2023-01-23 03:59:42.487526: step: 464/527, loss: 0.015799619257450104 2023-01-23 03:59:43.622066: step: 468/527, loss: 0.005543041508644819 2023-01-23 03:59:44.721211: step: 472/527, loss: 0.006198692135512829 2023-01-23 03:59:45.810330: step: 476/527, loss: 0.0052689556032419205 2023-01-23 03:59:46.916470: step: 480/527, loss: 6.732940528308973e-05 2023-01-23 03:59:48.085667: step: 484/527, loss: 9.822846550378017e-06 2023-01-23 03:59:49.223443: step: 488/527, loss: 0.0009836197132244706 2023-01-23 03:59:50.325581: step: 492/527, loss: 0.02634620852768421 2023-01-23 03:59:51.407593: step: 496/527, loss: 0.6064134240150452 2023-01-23 03:59:52.540500: step: 500/527, loss: 0.0001789093075785786 2023-01-23 03:59:53.710034: step: 504/527, loss: 0.0005716324085369706 2023-01-23 03:59:54.830619: step: 508/527, loss: 1.163482647825731e-05 2023-01-23 03:59:55.946884: step: 512/527, loss: 0.0005960464477539062 2023-01-23 03:59:57.027887: step: 516/527, loss: 0.0020146372262388468 2023-01-23 03:59:58.148731: step: 520/527, loss: 0.06055726855993271 2023-01-23 03:59:59.251088: step: 524/527, loss: 0.026302719488739967 2023-01-23 04:00:00.350474: step: 528/527, loss: 7.629396350239404e-07 2023-01-23 04:00:01.460163: step: 532/527, loss: 4.94956984766759e-05 2023-01-23 04:00:02.585522: step: 536/527, loss: 0.05432319641113281 2023-01-23 04:00:03.733672: step: 540/527, loss: 0.012856483459472656 2023-01-23 04:00:04.850125: step: 544/527, loss: 5.054473876953125e-05 2023-01-23 04:00:05.966708: step: 548/527, loss: 0.0013012886047363281 2023-01-23 04:00:07.059474: step: 552/527, loss: 0.0007833242416381836 2023-01-23 04:00:08.217071: step: 556/527, loss: 0.012685108929872513 2023-01-23 04:00:09.312010: step: 560/527, loss: 0.03389434888958931 2023-01-23 04:00:10.402008: step: 564/527, loss: 2.2888185412739404e-05 2023-01-23 04:00:11.515336: step: 568/527, loss: 4.425048973644152e-05 2023-01-23 04:00:12.665994: step: 572/527, loss: 0.01061716116964817 2023-01-23 04:00:13.760685: step: 576/527, loss: 7.915496826171875e-05 2023-01-23 04:00:14.877779: step: 580/527, loss: 0.002999782795086503 2023-01-23 04:00:15.980337: step: 584/527, loss: 0.02184000238776207 2023-01-23 04:00:17.110642: step: 588/527, loss: 0.00471344031393528 2023-01-23 04:00:18.262386: step: 592/527, loss: 4.3106076191179454e-05 2023-01-23 04:00:19.385568: step: 596/527, loss: -1.2779236385540571e-05 2023-01-23 04:00:20.490590: step: 600/527, loss: 0.0029702666215598583 2023-01-23 04:00:21.572228: step: 604/527, loss: 0.00016841889009810984 2023-01-23 04:00:22.706719: step: 608/527, loss: 1.9073486612342094e-07 2023-01-23 04:00:23.806578: step: 612/527, loss: 3.6239625842426904e-06 2023-01-23 04:00:24.899786: step: 616/527, loss: -1.0585785275907256e-05 2023-01-23 04:00:26.019817: step: 620/527, loss: 0.0013824462657794356 2023-01-23 04:00:27.143652: step: 624/527, loss: 2.3269654775504023e-05 2023-01-23 04:00:28.257401: step: 628/527, loss: 4.291534423828125e-06 2023-01-23 04:00:29.391070: step: 632/527, loss: 3.61442580469884e-05 2023-01-23 04:00:30.599238: step: 636/527, loss: 0.012423706240952015 2023-01-23 04:00:31.738195: step: 640/527, loss: 0.0020893097389489412 2023-01-23 04:00:32.826645: step: 644/527, loss: 0.0013291359646245837 2023-01-23 04:00:33.936593: step: 648/527, loss: 0.00030770304147154093 2023-01-23 04:00:35.062590: step: 652/527, loss: 0.02173643186688423 2023-01-23 04:00:36.154726: step: 656/527, loss: 5.1116945542162284e-05 2023-01-23 04:00:37.269075: step: 660/527, loss: 0.004600333981215954 2023-01-23 04:00:38.352193: step: 664/527, loss: 0.000610732997301966 2023-01-23 04:00:39.470435: step: 668/527, loss: 0.04113402217626572 2023-01-23 04:00:40.598368: step: 672/527, loss: 0.015576934441924095 2023-01-23 04:00:41.704951: step: 676/527, loss: 0.01519107911735773 2023-01-23 04:00:42.818326: step: 680/527, loss: 0.03730297088623047 2023-01-23 04:00:43.984504: step: 684/527, loss: 0.010211181826889515 2023-01-23 04:00:45.100074: step: 688/527, loss: 0.00141315464861691 2023-01-23 04:00:46.228832: step: 692/527, loss: 0.02148304134607315 2023-01-23 04:00:47.304651: step: 696/527, loss: 0.015772532671689987 2023-01-23 04:00:48.448847: step: 700/527, loss: -3.0994415283203125e-05 2023-01-23 04:00:49.580226: step: 704/527, loss: 0.00022048949904274195 2023-01-23 04:00:50.734701: step: 708/527, loss: 3.24249267578125e-05 2023-01-23 04:00:51.817525: step: 712/527, loss: 0.0003799438418354839 2023-01-23 04:00:52.920260: step: 716/527, loss: 0.000244140625 2023-01-23 04:00:54.046040: step: 720/527, loss: -5.7220458984375e-06 2023-01-23 04:00:55.164987: step: 724/527, loss: 0.002777671907097101 2023-01-23 04:00:56.256892: step: 728/527, loss: 0.0003267287975177169 2023-01-23 04:00:57.367716: step: 732/527, loss: 0.03407011181116104 2023-01-23 04:00:58.540490: step: 736/527, loss: 0.0017590522766113281 2023-01-23 04:00:59.662162: step: 740/527, loss: 3.7860871088923886e-05 2023-01-23 04:01:00.794159: step: 744/527, loss: 0.044820211827754974 2023-01-23 04:01:01.927400: step: 748/527, loss: 0.04671459272503853 2023-01-23 04:01:03.040103: step: 752/527, loss: 1.125335711549269e-05 2023-01-23 04:01:04.181920: step: 756/527, loss: 0.00063323974609375 2023-01-23 04:01:05.310031: step: 760/527, loss: 0.0019325256580486894 2023-01-23 04:01:06.427063: step: 764/527, loss: 0.014039326459169388 2023-01-23 04:01:07.533944: step: 768/527, loss: 3.4046173823298886e-05 2023-01-23 04:01:08.624128: step: 772/527, loss: 4.816055025003152e-06 2023-01-23 04:01:09.722021: step: 776/527, loss: 0.04074068367481232 2023-01-23 04:01:10.810585: step: 780/527, loss: 7.867813110351562e-06 2023-01-23 04:01:11.937634: step: 784/527, loss: 0.0003578186151571572 2023-01-23 04:01:13.046177: step: 788/527, loss: 0.002159976866096258 2023-01-23 04:01:14.183031: step: 792/527, loss: 0.00041131972102448344 2023-01-23 04:01:15.313533: step: 796/527, loss: 2.193450927734375e-05 2023-01-23 04:01:16.438763: step: 800/527, loss: 0.025296688079833984 2023-01-23 04:01:17.551681: step: 804/527, loss: 1.6784666513558477e-05 2023-01-23 04:01:18.673752: step: 808/527, loss: 0.008298492059111595 2023-01-23 04:01:19.773649: step: 812/527, loss: 0.002295970916748047 2023-01-23 04:01:20.892649: step: 816/527, loss: 0.000373649614630267 2023-01-23 04:01:22.020468: step: 820/527, loss: 0.0001390457182424143 2023-01-23 04:01:23.133159: step: 824/527, loss: 0.0032508850563317537 2023-01-23 04:01:24.203796: step: 828/527, loss: 8.144378807628527e-05 2023-01-23 04:01:25.333460: step: 832/527, loss: 0.008536816574633121 2023-01-23 04:01:26.449064: step: 836/527, loss: 0.0018991470569744706 2023-01-23 04:01:27.587572: step: 840/527, loss: 0.0004013061407022178 2023-01-23 04:01:28.689130: step: 844/527, loss: 0.0006505966302938759 2023-01-23 04:01:29.801639: step: 848/527, loss: 0.0023069381713867188 2023-01-23 04:01:30.915457: step: 852/527, loss: 0.0037331103812903166 2023-01-23 04:01:32.022811: step: 856/527, loss: 0.03898124769330025 2023-01-23 04:01:33.134932: step: 860/527, loss: 0.030019378289580345 2023-01-23 04:01:34.245961: step: 864/527, loss: 0.0012832642532885075 2023-01-23 04:01:35.331601: step: 868/527, loss: 0.0006417751428671181 2023-01-23 04:01:36.431960: step: 872/527, loss: 0.008279609493911266 2023-01-23 04:01:37.545146: step: 876/527, loss: 0.004338455386459827 2023-01-23 04:01:38.641818: step: 880/527, loss: -6.9618222369172145e-06 2023-01-23 04:01:39.746722: step: 884/527, loss: 0.005635071080178022 2023-01-23 04:01:40.878827: step: 888/527, loss: 0.03139848634600639 2023-01-23 04:01:41.970722: step: 892/527, loss: 0.003057193709537387 2023-01-23 04:01:43.076352: step: 896/527, loss: 0.0011819839710369706 2023-01-23 04:01:44.199726: step: 900/527, loss: 0.0004093170282430947 2023-01-23 04:01:45.291042: step: 904/527, loss: 0.00039157868013717234 2023-01-23 04:01:46.399144: step: 908/527, loss: 0.005717468447983265 2023-01-23 04:01:47.542224: step: 912/527, loss: 0.0017227173084393144 2023-01-23 04:01:48.659706: step: 916/527, loss: 0.04964065924286842 2023-01-23 04:01:49.835543: step: 920/527, loss: -3.1471249712922145e-06 2023-01-23 04:01:50.978369: step: 924/527, loss: 8.20159948489163e-06 2023-01-23 04:01:52.090827: step: 928/527, loss: 0.01500558853149414 2023-01-23 04:01:53.227487: step: 932/527, loss: 0.01852703094482422 2023-01-23 04:01:54.325026: step: 936/527, loss: 0.011164474301040173 2023-01-23 04:01:55.430084: step: 940/527, loss: 0.04160509258508682 2023-01-23 04:01:56.522317: step: 944/527, loss: 4.596710641635582e-05 2023-01-23 04:01:57.646305: step: 948/527, loss: 0.01620650291442871 2023-01-23 04:01:58.743206: step: 952/527, loss: 2.09808349609375e-05 2023-01-23 04:01:59.872375: step: 956/527, loss: 0.027567576617002487 2023-01-23 04:02:00.992485: step: 960/527, loss: 0.0025661946274340153 2023-01-23 04:02:02.132965: step: 964/527, loss: 0.019494246691465378 2023-01-23 04:02:03.289065: step: 968/527, loss: 0.00299663539044559 2023-01-23 04:02:04.421321: step: 972/527, loss: 0.00027294160099700093 2023-01-23 04:02:05.530138: step: 976/527, loss: 0.021616365760564804 2023-01-23 04:02:06.634997: step: 980/527, loss: 0.0001171588955912739 2023-01-23 04:02:07.742466: step: 984/527, loss: 0.000850296055432409 2023-01-23 04:02:08.851442: step: 988/527, loss: 0.0005934715154580772 2023-01-23 04:02:09.981661: step: 992/527, loss: 0.000865936279296875 2023-01-23 04:02:11.092282: step: 996/527, loss: 1.049041748046875e-05 2023-01-23 04:02:12.216455: step: 1000/527, loss: 0.0022062300704419613 2023-01-23 04:02:13.303312: step: 1004/527, loss: 0.04919185861945152 2023-01-23 04:02:14.409966: step: 1008/527, loss: 0.04399757459759712 2023-01-23 04:02:15.527652: step: 1012/527, loss: 1.678466833254788e-05 2023-01-23 04:02:16.636304: step: 1016/527, loss: 3.7193296975601697e-06 2023-01-23 04:02:17.753367: step: 1020/527, loss: 2.86102294921875e-06 2023-01-23 04:02:18.857051: step: 1024/527, loss: 0.013213539496064186 2023-01-23 04:02:19.998065: step: 1028/527, loss: 0.00796361081302166 2023-01-23 04:02:21.102344: step: 1032/527, loss: 0.03281097486615181 2023-01-23 04:02:22.229760: step: 1036/527, loss: 0.0012125015491619706 2023-01-23 04:02:23.373307: step: 1040/527, loss: 0.052804186940193176 2023-01-23 04:02:24.510205: step: 1044/527, loss: 0.0011572837829589844 2023-01-23 04:02:25.620657: step: 1048/527, loss: 0.00017013550677802414 2023-01-23 04:02:26.734064: step: 1052/527, loss: 2.0980837689421605e-06 2023-01-23 04:02:27.844208: step: 1056/527, loss: 4.38690176451928e-06 2023-01-23 04:02:28.965640: step: 1060/527, loss: 0.007182884030044079 2023-01-23 04:02:30.103626: step: 1064/527, loss: 2.498626781743951e-05 2023-01-23 04:02:31.200438: step: 1068/527, loss: 1.0395049685030244e-05 2023-01-23 04:02:32.307300: step: 1072/527, loss: 0.00010671615018509328 2023-01-23 04:02:33.413166: step: 1076/527, loss: 0.015565109439194202 2023-01-23 04:02:34.515024: step: 1080/527, loss: 1.101493944588583e-05 2023-01-23 04:02:35.638585: step: 1084/527, loss: 1.1825562069134321e-05 2023-01-23 04:02:36.755107: step: 1088/527, loss: 3.9386748539982364e-05 2023-01-23 04:02:37.895384: step: 1092/527, loss: 3.5953522456111386e-05 2023-01-23 04:02:38.987801: step: 1096/527, loss: 5.2261355449445546e-05 2023-01-23 04:02:40.109210: step: 1100/527, loss: 1.926422191900201e-05 2023-01-23 04:02:41.219399: step: 1104/527, loss: 1.4019013178767636e-05 2023-01-23 04:02:42.325114: step: 1108/527, loss: 0.000263214111328125 2023-01-23 04:02:43.446939: step: 1112/527, loss: 0.011109352111816406 2023-01-23 04:02:44.563571: step: 1116/527, loss: 0.09749965369701385 2023-01-23 04:02:45.686403: step: 1120/527, loss: 0.001010894775390625 2023-01-23 04:02:46.793374: step: 1124/527, loss: 0.0011451244354248047 2023-01-23 04:02:47.886030: step: 1128/527, loss: 0.00038824082002975047 2023-01-23 04:02:48.983571: step: 1132/527, loss: 2.1934512915322557e-05 2023-01-23 04:02:50.088464: step: 1136/527, loss: 4.682540748035535e-05 2023-01-23 04:02:51.167390: step: 1140/527, loss: 3.376007225597277e-05 2023-01-23 04:02:52.292003: step: 1144/527, loss: 2.9373170036706142e-05 2023-01-23 04:02:53.428584: step: 1148/527, loss: 7.247925168485381e-06 2023-01-23 04:02:54.545745: step: 1152/527, loss: -1.1444091796875e-05 2023-01-23 04:02:55.674311: step: 1156/527, loss: 0.000469207763671875 2023-01-23 04:02:56.802793: step: 1160/527, loss: 0.0017791748978197575 2023-01-23 04:02:57.909162: step: 1164/527, loss: 0.00014553070650435984 2023-01-23 04:02:59.020221: step: 1168/527, loss: 0.0007131577003747225 2023-01-23 04:03:00.186907: step: 1172/527, loss: 4.3201445805607364e-05 2023-01-23 04:03:01.306621: step: 1176/527, loss: 0.0005456924554891884 2023-01-23 04:03:02.427639: step: 1180/527, loss: 0.01765928417444229 2023-01-23 04:03:03.531466: step: 1184/527, loss: 0.11211776733398438 2023-01-23 04:03:04.671079: step: 1188/527, loss: 3.9768219721736386e-05 2023-01-23 04:03:05.782744: step: 1192/527, loss: 9.937286813510582e-05 2023-01-23 04:03:06.887174: step: 1196/527, loss: 1.3256073543743696e-05 2023-01-23 04:03:08.010996: step: 1200/527, loss: 0.044927217066287994 2023-01-23 04:03:09.118347: step: 1204/527, loss: 2.689361645025201e-05 2023-01-23 04:03:10.246313: step: 1208/527, loss: 0.00011577606346691027 2023-01-23 04:03:11.369829: step: 1212/527, loss: 0.08493976294994354 2023-01-23 04:03:12.498612: step: 1216/527, loss: 0.05260677635669708 2023-01-23 04:03:13.619103: step: 1220/527, loss: 0.0002827644348144531 2023-01-23 04:03:14.779940: step: 1224/527, loss: 0.00042552949162200093 2023-01-23 04:03:15.887959: step: 1228/527, loss: 0.013245105743408203 2023-01-23 04:03:16.989878: step: 1232/527, loss: 0.0005317687755450606 2023-01-23 04:03:18.137744: step: 1236/527, loss: 4.854202416026965e-05 2023-01-23 04:03:19.252919: step: 1240/527, loss: 0.10594673454761505 2023-01-23 04:03:20.373682: step: 1244/527, loss: -2.86102294921875e-06 2023-01-23 04:03:21.478919: step: 1248/527, loss: 0.0040740966796875 2023-01-23 04:03:22.598346: step: 1252/527, loss: 1.8596649624669226e-06 2023-01-23 04:03:23.723651: step: 1256/527, loss: 8.392333256779239e-05 2023-01-23 04:03:24.831881: step: 1260/527, loss: 0.002207183977589011 2023-01-23 04:03:25.943579: step: 1264/527, loss: 7.45773286325857e-05 2023-01-23 04:03:27.055282: step: 1268/527, loss: 0.003558445256203413 2023-01-23 04:03:28.152484: step: 1272/527, loss: 0.02456035651266575 2023-01-23 04:03:29.246949: step: 1276/527, loss: 2.5129318601102568e-05 2023-01-23 04:03:30.366124: step: 1280/527, loss: 5.34057608092553e-06 2023-01-23 04:03:31.472703: step: 1284/527, loss: 0.0025812149979174137 2023-01-23 04:03:32.585154: step: 1288/527, loss: 0.005806351080536842 2023-01-23 04:03:33.725471: step: 1292/527, loss: 4.27722952736076e-05 2023-01-23 04:03:34.848918: step: 1296/527, loss: 1.5258789289873675e-06 2023-01-23 04:03:35.957059: step: 1300/527, loss: 5.3977968491381034e-05 2023-01-23 04:03:37.047924: step: 1304/527, loss: -3.24249267578125e-05 2023-01-23 04:03:38.143216: step: 1308/527, loss: -9.536740890325746e-07 2023-01-23 04:03:39.263670: step: 1312/527, loss: 0.005859804339706898 2023-01-23 04:03:40.367152: step: 1316/527, loss: 3.14712519866589e-06 2023-01-23 04:03:41.480481: step: 1320/527, loss: 0.010534477420151234 2023-01-23 04:03:42.592669: step: 1324/527, loss: 0.011196709237992764 2023-01-23 04:03:43.695146: step: 1328/527, loss: 0.0004432440036907792 2023-01-23 04:03:44.811541: step: 1332/527, loss: 0.00010795592970680445 2023-01-23 04:03:45.905588: step: 1336/527, loss: 0.023223018273711205 2023-01-23 04:03:47.024621: step: 1340/527, loss: 6.008148557157256e-05 2023-01-23 04:03:48.131768: step: 1344/527, loss: 0.0035771848633885384 2023-01-23 04:03:49.243161: step: 1348/527, loss: 9.346009392174892e-06 2023-01-23 04:03:50.336962: step: 1352/527, loss: 0.03470730409026146 2023-01-23 04:03:51.477974: step: 1356/527, loss: 0.007178306579589844 2023-01-23 04:03:52.570189: step: 1360/527, loss: 0.003159904619678855 2023-01-23 04:03:53.676376: step: 1364/527, loss: 0.0009855270618572831 2023-01-23 04:03:54.802972: step: 1368/527, loss: 0.0003097057342529297 2023-01-23 04:03:55.943416: step: 1372/527, loss: 0.0016050338745117188 2023-01-23 04:03:57.048034: step: 1376/527, loss: 0.002596471458673477 2023-01-23 04:03:58.192014: step: 1380/527, loss: 0.062470052391290665 2023-01-23 04:03:59.309960: step: 1384/527, loss: 0.24535445868968964 2023-01-23 04:04:00.413532: step: 1388/527, loss: 3.0231476557673886e-05 2023-01-23 04:04:01.553546: step: 1392/527, loss: 0.009205102920532227 2023-01-23 04:04:02.660224: step: 1396/527, loss: 0.061215974390506744 2023-01-23 04:04:03.753960: step: 1400/527, loss: 0.011213780380785465 2023-01-23 04:04:04.867971: step: 1404/527, loss: 0.00296440115198493 2023-01-23 04:04:05.971058: step: 1408/527, loss: 0.03813991695642471 2023-01-23 04:04:07.059694: step: 1412/527, loss: 0.0005735396989621222 2023-01-23 04:04:08.193533: step: 1416/527, loss: 0.018656635656952858 2023-01-23 04:04:09.289541: step: 1420/527, loss: 2.117157055181451e-05 2023-01-23 04:04:10.390283: step: 1424/527, loss: 0.0003582954523153603 2023-01-23 04:04:11.498797: step: 1428/527, loss: 0.018941879272460938 2023-01-23 04:04:12.608040: step: 1432/527, loss: 0.01845235750079155 2023-01-23 04:04:13.700440: step: 1436/527, loss: -4.5776364459015895e-06 2023-01-23 04:04:14.806422: step: 1440/527, loss: 0.002491474151611328 2023-01-23 04:04:15.920443: step: 1444/527, loss: 0.0017177581321448088 2023-01-23 04:04:17.020703: step: 1448/527, loss: 0.010931205935776234 2023-01-23 04:04:18.116229: step: 1452/527, loss: 0.0004507213889155537 2023-01-23 04:04:19.235653: step: 1456/527, loss: 5.073547436040826e-05 2023-01-23 04:04:20.329861: step: 1460/527, loss: 3.132819983875379e-05 2023-01-23 04:04:21.425932: step: 1464/527, loss: 0.01725759543478489 2023-01-23 04:04:22.544344: step: 1468/527, loss: 0.0009572029230184853 2023-01-23 04:04:23.663306: step: 1472/527, loss: 4.3487551010912284e-05 2023-01-23 04:04:24.763652: step: 1476/527, loss: -2.6702882678364404e-06 2023-01-23 04:04:25.890692: step: 1480/527, loss: 0.7006246447563171 2023-01-23 04:04:27.012983: step: 1484/527, loss: 0.002730798674747348 2023-01-23 04:04:28.145574: step: 1488/527, loss: 0.0753277838230133 2023-01-23 04:04:29.261059: step: 1492/527, loss: 0.00551605224609375 2023-01-23 04:04:30.371594: step: 1496/527, loss: 0.003438663436099887 2023-01-23 04:04:31.504453: step: 1500/527, loss: 0.027538299560546875 2023-01-23 04:04:32.627753: step: 1504/527, loss: 0.009008407592773438 2023-01-23 04:04:33.748306: step: 1508/527, loss: 0.0002904892317019403 2023-01-23 04:04:34.854625: step: 1512/527, loss: 0.0008724212530069053 2023-01-23 04:04:35.971501: step: 1516/527, loss: 0.0010391712421551347 2023-01-23 04:04:37.073267: step: 1520/527, loss: 0.0008005142444744706 2023-01-23 04:04:38.219351: step: 1524/527, loss: 0.0010977268684655428 2023-01-23 04:04:39.331897: step: 1528/527, loss: 0.0004170417960267514 2023-01-23 04:04:40.475022: step: 1532/527, loss: 6.294250852079131e-06 2023-01-23 04:04:41.596437: step: 1536/527, loss: 0.00033254624577239156 2023-01-23 04:04:42.701420: step: 1540/527, loss: 0.0002576351398602128 2023-01-23 04:04:43.831759: step: 1544/527, loss: 0.000232696533203125 2023-01-23 04:04:44.940167: step: 1548/527, loss: 0.033135127276182175 2023-01-23 04:04:46.039734: step: 1552/527, loss: 0.0002883911074604839 2023-01-23 04:04:47.161386: step: 1556/527, loss: 0.00010004044452216476 2023-01-23 04:04:48.262340: step: 1560/527, loss: 6.189346458995715e-05 2023-01-23 04:04:49.400086: step: 1564/527, loss: 0.05324821546673775 2023-01-23 04:04:50.511901: step: 1568/527, loss: 0.0395817756652832 2023-01-23 04:04:51.631556: step: 1572/527, loss: 0.013029957190155983 2023-01-23 04:04:52.759617: step: 1576/527, loss: 0.08099603652954102 2023-01-23 04:04:53.860551: step: 1580/527, loss: 0.03337879106402397 2023-01-23 04:04:54.955369: step: 1584/527, loss: 0.0006295204511843622 2023-01-23 04:04:56.055440: step: 1588/527, loss: 0.0069828033447265625 2023-01-23 04:04:57.168738: step: 1592/527, loss: 0.005482864566147327 2023-01-23 04:04:58.272073: step: 1596/527, loss: 0.004457187373191118 2023-01-23 04:04:59.364480: step: 1600/527, loss: 0.03366050869226456 2023-01-23 04:05:00.460764: step: 1604/527, loss: 0.002764129778370261 2023-01-23 04:05:01.613609: step: 1608/527, loss: 0.03771534189581871 2023-01-23 04:05:02.713944: step: 1612/527, loss: 0.011908340267837048 2023-01-23 04:05:03.858325: step: 1616/527, loss: 6.160735938465223e-05 2023-01-23 04:05:04.961108: step: 1620/527, loss: 0.00014801026554778218 2023-01-23 04:05:06.055815: step: 1624/527, loss: 9.813308861339465e-05 2023-01-23 04:05:07.166602: step: 1628/527, loss: 0.00015125275240279734 2023-01-23 04:05:08.290822: step: 1632/527, loss: 0.0003359794500283897 2023-01-23 04:05:09.455913: step: 1636/527, loss: 0.0027188300155103207 2023-01-23 04:05:10.576803: step: 1640/527, loss: 0.00016899108595680445 2023-01-23 04:05:11.642727: step: 1644/527, loss: 0.00023493767366744578 2023-01-23 04:05:12.788689: step: 1648/527, loss: 0.007884550839662552 2023-01-23 04:05:13.924349: step: 1652/527, loss: 0.002260875655338168 2023-01-23 04:05:15.019449: step: 1656/527, loss: 5.8650970458984375e-05 2023-01-23 04:05:16.129299: step: 1660/527, loss: 1.5354156857938506e-05 2023-01-23 04:05:17.222985: step: 1664/527, loss: 0.00017910002497956157 2023-01-23 04:05:18.346945: step: 1668/527, loss: 0.015706252306699753 2023-01-23 04:05:19.465258: step: 1672/527, loss: 3.42369094141759e-05 2023-01-23 04:05:20.566354: step: 1676/527, loss: 0.0006195068708620965 2023-01-23 04:05:21.654750: step: 1680/527, loss: 0.00016307830810546875 2023-01-23 04:05:22.790258: step: 1684/527, loss: 0.06390685588121414 2023-01-23 04:05:23.890956: step: 1688/527, loss: 2.1266936528263614e-05 2023-01-23 04:05:25.014135: step: 1692/527, loss: 0.037119198590517044 2023-01-23 04:05:26.152795: step: 1696/527, loss: 0.0014846802223473787 2023-01-23 04:05:27.274609: step: 1700/527, loss: 0.006541252601891756 2023-01-23 04:05:28.416678: step: 1704/527, loss: 0.011773491278290749 2023-01-23 04:05:29.550670: step: 1708/527, loss: 0.0006527423392981291 2023-01-23 04:05:30.665166: step: 1712/527, loss: 2.365112231927924e-05 2023-01-23 04:05:31.767561: step: 1716/527, loss: 0.00015468598576262593 2023-01-23 04:05:32.877007: step: 1720/527, loss: 9.91821252682712e-06 2023-01-23 04:05:33.985295: step: 1724/527, loss: 1.33514404296875e-05 2023-01-23 04:05:35.118470: step: 1728/527, loss: 1.33514404296875e-05 2023-01-23 04:05:36.238996: step: 1732/527, loss: 0.005729102995246649 2023-01-23 04:05:37.315365: step: 1736/527, loss: 0.0004498004855122417 2023-01-23 04:05:38.439064: step: 1740/527, loss: 0.005129242315888405 2023-01-23 04:05:39.560383: step: 1744/527, loss: -6.48498553346144e-06 2023-01-23 04:05:40.682869: step: 1748/527, loss: 0.004732513800263405 2023-01-23 04:05:41.784144: step: 1752/527, loss: 3.738403393072076e-05 2023-01-23 04:05:42.897052: step: 1756/527, loss: 0.0002151489316020161 2023-01-23 04:05:44.025585: step: 1760/527, loss: 0.00011606217594817281 2023-01-23 04:05:45.142347: step: 1764/527, loss: 2.460479663568549e-05 2023-01-23 04:05:46.265692: step: 1768/527, loss: 0.00811548251658678 2023-01-23 04:05:47.433750: step: 1772/527, loss: 0.0005331039428710938 2023-01-23 04:05:48.539505: step: 1776/527, loss: 8.80241350387223e-05 2023-01-23 04:05:49.702534: step: 1780/527, loss: 1.888275073724799e-05 2023-01-23 04:05:50.848631: step: 1784/527, loss: 0.06308841705322266 2023-01-23 04:05:51.957099: step: 1788/527, loss: 0.02843456342816353 2023-01-23 04:05:53.066887: step: 1792/527, loss: 0.0036882401909679174 2023-01-23 04:05:54.187905: step: 1796/527, loss: 0.002012062119320035 2023-01-23 04:05:55.351076: step: 1800/527, loss: 0.00254650111310184 2023-01-23 04:05:56.473012: step: 1804/527, loss: 0.0010662077693268657 2023-01-23 04:05:57.567813: step: 1808/527, loss: 0.020600700750947 2023-01-23 04:05:58.686154: step: 1812/527, loss: 0.05712928622961044 2023-01-23 04:05:59.827825: step: 1816/527, loss: 0.03398609533905983 2023-01-23 04:06:00.926209: step: 1820/527, loss: 0.00028486252995207906 2023-01-23 04:06:02.044138: step: 1824/527, loss: 0.004505062010139227 2023-01-23 04:06:03.157596: step: 1828/527, loss: 0.0001009941115626134 2023-01-23 04:06:04.247423: step: 1832/527, loss: 1.4591216313419864e-05 2023-01-23 04:06:05.359156: step: 1836/527, loss: 0.00011768341209972277 2023-01-23 04:06:06.503179: step: 1840/527, loss: 0.0017599106067791581 2023-01-23 04:06:07.601679: step: 1844/527, loss: 0.009038162417709827 2023-01-23 04:06:08.734400: step: 1848/527, loss: 5.0258637202205136e-05 2023-01-23 04:06:09.828473: step: 1852/527, loss: 0.0001902580406749621 2023-01-23 04:06:10.956941: step: 1856/527, loss: 0.0007579803350381553 2023-01-23 04:06:12.058844: step: 1860/527, loss: 0.015275382436811924 2023-01-23 04:06:13.172156: step: 1864/527, loss: 0.0007547378772869706 2023-01-23 04:06:14.295666: step: 1868/527, loss: -9.5367431640625e-07 2023-01-23 04:06:15.402438: step: 1872/527, loss: 0.0003952980332542211 2023-01-23 04:06:16.529996: step: 1876/527, loss: 8.907318260753527e-05 2023-01-23 04:06:17.637086: step: 1880/527, loss: 0.008313274011015892 2023-01-23 04:06:18.752759: step: 1884/527, loss: 0.003382110735401511 2023-01-23 04:06:19.882545: step: 1888/527, loss: 0.6619184613227844 2023-01-23 04:06:20.976456: step: 1892/527, loss: 3.610849307733588e-05 2023-01-23 04:06:22.069503: step: 1896/527, loss: 0.0012271881569176912 2023-01-23 04:06:23.179486: step: 1900/527, loss: 0.00267200474627316 2023-01-23 04:06:24.308223: step: 1904/527, loss: 0.15486736595630646 2023-01-23 04:06:25.407530: step: 1908/527, loss: 0.0021168231032788754 2023-01-23 04:06:26.487535: step: 1912/527, loss: 0.00607223529368639 2023-01-23 04:06:27.585814: step: 1916/527, loss: 0.0002665519714355469 2023-01-23 04:06:28.683922: step: 1920/527, loss: 0.10925684869289398 2023-01-23 04:06:29.834774: step: 1924/527, loss: 5.9700014389818534e-05 2023-01-23 04:06:30.941952: step: 1928/527, loss: 3.6048892070539296e-05 2023-01-23 04:06:32.103672: step: 1932/527, loss: 0.0004403114435262978 2023-01-23 04:06:33.243404: step: 1936/527, loss: 0.00011177062697242945 2023-01-23 04:06:34.371068: step: 1940/527, loss: 0.017100144177675247 2023-01-23 04:06:35.496755: step: 1944/527, loss: 0.007148170378059149 2023-01-23 04:06:36.594708: step: 1948/527, loss: 6.67572021484375e-06 2023-01-23 04:06:37.704720: step: 1952/527, loss: 0.0005556106334552169 2023-01-23 04:06:38.820469: step: 1956/527, loss: 0.009818457998335361 2023-01-23 04:06:39.927896: step: 1960/527, loss: 0.07918872684240341 2023-01-23 04:06:41.056515: step: 1964/527, loss: 0.001754379365593195 2023-01-23 04:06:42.162898: step: 1968/527, loss: 0.0010378838051110506 2023-01-23 04:06:43.287444: step: 1972/527, loss: 0.017582321539521217 2023-01-23 04:06:44.398705: step: 1976/527, loss: 9.479522850597277e-05 2023-01-23 04:06:45.500587: step: 1980/527, loss: 8.535385131835938e-05 2023-01-23 04:06:46.602153: step: 1984/527, loss: 3.075600034208037e-05 2023-01-23 04:06:47.694733: step: 1988/527, loss: 4.0817263652570546e-05 2023-01-23 04:06:48.847765: step: 1992/527, loss: 0.0017286300426349044 2023-01-23 04:06:49.965633: step: 1996/527, loss: 0.00537528982385993 2023-01-23 04:06:51.090187: step: 2000/527, loss: 0.02022418938577175 2023-01-23 04:06:52.203875: step: 2004/527, loss: 0.00023727417283225805 2023-01-23 04:06:53.310074: step: 2008/527, loss: 9.822845458984375e-05 2023-01-23 04:06:54.449453: step: 2012/527, loss: 0.024771880358457565 2023-01-23 04:06:55.568111: step: 2016/527, loss: 4.2533876694506034e-05 2023-01-23 04:06:56.676355: step: 2020/527, loss: 0.0013359070289880037 2023-01-23 04:06:57.803134: step: 2024/527, loss: 0.0007856369484215975 2023-01-23 04:06:58.928177: step: 2028/527, loss: 0.005607319064438343 2023-01-23 04:07:00.024384: step: 2032/527, loss: 0.0003445625479798764 2023-01-23 04:07:01.164101: step: 2036/527, loss: 0.012133216485381126 2023-01-23 04:07:02.251408: step: 2040/527, loss: 0.0001442909415345639 2023-01-23 04:07:03.342561: step: 2044/527, loss: 0.01182627584785223 2023-01-23 04:07:04.434423: step: 2048/527, loss: 0.00011940002150367945 2023-01-23 04:07:05.556266: step: 2052/527, loss: 0.07088293880224228 2023-01-23 04:07:06.643101: step: 2056/527, loss: 0.0004131317255087197 2023-01-23 04:07:07.794477: step: 2060/527, loss: 0.000994873116724193 2023-01-23 04:07:08.926744: step: 2064/527, loss: 9.822845458984375e-05 2023-01-23 04:07:10.048878: step: 2068/527, loss: 0.002620220184326172 2023-01-23 04:07:11.166567: step: 2072/527, loss: 0.00023956298537086695 2023-01-23 04:07:12.282565: step: 2076/527, loss: 0.09332828223705292 2023-01-23 04:07:13.389660: step: 2080/527, loss: 0.0019372940296307206 2023-01-23 04:07:14.527390: step: 2084/527, loss: 0.00027756692725233734 2023-01-23 04:07:15.637950: step: 2088/527, loss: 0.0053806304931640625 2023-01-23 04:07:16.734132: step: 2092/527, loss: 2.09808349609375e-05 2023-01-23 04:07:17.894966: step: 2096/527, loss: 0.0071763996966183186 2023-01-23 04:07:18.992899: step: 2100/527, loss: 0.04844836890697479 2023-01-23 04:07:20.119073: step: 2104/527, loss: 5.0973892939509824e-05 2023-01-23 04:07:21.273629: step: 2108/527, loss: 0.00014972686767578125 ================================================== Loss: 0.018 -------------------- Dev: {'event': {'p': 0.5886524822695035, 'r': 0.7736351531291611, 'f1': 0.668584579976985}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Test: {'event': {'p': 0.6139184397163121, 'r': 0.7914285714285715, 'f1': 0.691462805791313}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Chinese: {'event': {'p': 0.5393258426966292, 'r': 0.8888888888888888, 'f1': 0.6713286713286712}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Korean: {'event': {'p': 0.6101694915254238, 'r': 0.5714285714285714, 'f1': 0.5901639344262296}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Russian: {'event': {'p': 0.4864864864864865, 'r': 0.5, 'f1': 0.4931506849315069}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6063157894736843, 'r': 0.7669773635153129, 'f1': 0.6772486772486773}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Eng Test for Korean: {'event': {'p': 0.6427238805970149, 'r': 0.7874285714285715, 'f1': 0.7077555213148434}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Korean: {'event': {'p': 0.6792452830188679, 'r': 0.5714285714285714, 'f1': 0.6206896551724137}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 27 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 04:08:02.942951: step: 4/527, loss: 0.04659261927008629 2023-01-23 04:08:04.059106: step: 8/527, loss: 0.00010786057100631297 2023-01-23 04:08:05.169616: step: 12/527, loss: 0.007655430119484663 2023-01-23 04:08:06.314731: step: 16/527, loss: 0.004889965057373047 2023-01-23 04:08:07.420938: step: 20/527, loss: 0.000278282182989642 2023-01-23 04:08:08.531947: step: 24/527, loss: 0.01794900745153427 2023-01-23 04:08:09.643944: step: 28/527, loss: 0.0062577249482274055 2023-01-23 04:08:10.773642: step: 32/527, loss: 0.0012624741066247225 2023-01-23 04:08:11.902755: step: 36/527, loss: 3.910064697265625e-05 2023-01-23 04:08:13.033157: step: 40/527, loss: 0.0017003059620037675 2023-01-23 04:08:14.140459: step: 44/527, loss: 0.004430055618286133 2023-01-23 04:08:15.241081: step: 48/527, loss: 0.007754278369247913 2023-01-23 04:08:16.357583: step: 52/527, loss: 0.0007696151733398438 2023-01-23 04:08:17.480160: step: 56/527, loss: 0.019739866256713867 2023-01-23 04:08:18.602876: step: 60/527, loss: 0.058429960161447525 2023-01-23 04:08:19.748963: step: 64/527, loss: 7.514953904319555e-05 2023-01-23 04:08:20.852218: step: 68/527, loss: 0.010947608388960361 2023-01-23 04:08:22.008608: step: 72/527, loss: 0.008004666306078434 2023-01-23 04:08:23.115679: step: 76/527, loss: 3.3092499506892636e-05 2023-01-23 04:08:24.255672: step: 80/527, loss: 9.984970529330894e-05 2023-01-23 04:08:25.351231: step: 84/527, loss: 0.002942848252132535 2023-01-23 04:08:26.473471: step: 88/527, loss: 0.01419687271118164 2023-01-23 04:08:27.603020: step: 92/527, loss: 0.009990310296416283 2023-01-23 04:08:28.730360: step: 96/527, loss: 0.002063322113826871 2023-01-23 04:08:29.832879: step: 100/527, loss: 8.130073547363281e-05 2023-01-23 04:08:30.953417: step: 104/527, loss: -1.5068053471622989e-05 2023-01-23 04:08:32.032029: step: 108/527, loss: 0.018062258139252663 2023-01-23 04:08:33.139243: step: 112/527, loss: 0.027595043182373047 2023-01-23 04:08:34.240047: step: 116/527, loss: 0.00021009445481467992 2023-01-23 04:08:35.350266: step: 120/527, loss: 0.0014997005928307772 2023-01-23 04:08:36.445064: step: 124/527, loss: 3.0040739602554822e-06 2023-01-23 04:08:37.566708: step: 128/527, loss: 0.002384185791015625 2023-01-23 04:08:38.700027: step: 132/527, loss: 0.0009252548334188759 2023-01-23 04:08:39.796607: step: 136/527, loss: 0.00022974015155341476 2023-01-23 04:08:40.889144: step: 140/527, loss: 0.000465202349005267 2023-01-23 04:08:41.996809: step: 144/527, loss: 3.910064697265625e-05 2023-01-23 04:08:43.141894: step: 148/527, loss: 9.72747784544481e-06 2023-01-23 04:08:44.249855: step: 152/527, loss: 0.0008228778606280684 2023-01-23 04:08:45.343010: step: 156/527, loss: 0.01465139351785183 2023-01-23 04:08:46.458485: step: 160/527, loss: 3.1089784897631034e-05 2023-01-23 04:08:47.572218: step: 164/527, loss: 0.06945057213306427 2023-01-23 04:08:48.704434: step: 168/527, loss: 1.5163422176556196e-05 2023-01-23 04:08:49.814924: step: 172/527, loss: 0.03608589246869087 2023-01-23 04:08:50.936145: step: 176/527, loss: 7.486343383789062e-05 2023-01-23 04:08:52.077065: step: 180/527, loss: 0.00010023116919910535 2023-01-23 04:08:53.183171: step: 184/527, loss: 0.011927126906812191 2023-01-23 04:08:54.271045: step: 188/527, loss: 0.051439668983221054 2023-01-23 04:08:55.402618: step: 192/527, loss: 0.021615982055664062 2023-01-23 04:08:56.528626: step: 196/527, loss: 0.009925270453095436 2023-01-23 04:08:57.628891: step: 200/527, loss: 0.03155670315027237 2023-01-23 04:08:58.776144: step: 204/527, loss: 0.00040836335392668843 2023-01-23 04:08:59.905571: step: 208/527, loss: 8.95500197657384e-05 2023-01-23 04:09:01.008322: step: 212/527, loss: 0.0009998321766033769 2023-01-23 04:09:02.123649: step: 216/527, loss: 0.017232513055205345 2023-01-23 04:09:03.260833: step: 220/527, loss: 6.971359835006297e-05 2023-01-23 04:09:04.344303: step: 224/527, loss: 0.0012765884166583419 2023-01-23 04:09:05.448891: step: 228/527, loss: 0.0014699936145916581 2023-01-23 04:09:06.600761: step: 232/527, loss: 0.007521629333496094 2023-01-23 04:09:07.712739: step: 236/527, loss: 0.008309364318847656 2023-01-23 04:09:08.829301: step: 240/527, loss: 0.00019435881404206157 2023-01-23 04:09:09.953461: step: 244/527, loss: 0.027112485840916634 2023-01-23 04:09:11.083045: step: 248/527, loss: 0.0009975910652428865 2023-01-23 04:09:12.176221: step: 252/527, loss: 0.007272863294929266 2023-01-23 04:09:13.340344: step: 256/527, loss: 0.08685588836669922 2023-01-23 04:09:14.448432: step: 260/527, loss: 0.0005687713855877519 2023-01-23 04:09:15.534513: step: 264/527, loss: 2.241134825453628e-05 2023-01-23 04:09:16.628859: step: 268/527, loss: 0.0033886912278831005 2023-01-23 04:09:17.758971: step: 272/527, loss: 0.0006528854137286544 2023-01-23 04:09:18.862195: step: 276/527, loss: 2.555847095209174e-05 2023-01-23 04:09:19.968504: step: 280/527, loss: -4.0531085687689483e-07 2023-01-23 04:09:21.069692: step: 284/527, loss: 5.5313107623078395e-06 2023-01-23 04:09:22.178295: step: 288/527, loss: 0.0001583099365234375 2023-01-23 04:09:23.297587: step: 292/527, loss: 0.010297585278749466 2023-01-23 04:09:24.413007: step: 296/527, loss: 0.0018918036948889494 2023-01-23 04:09:25.506234: step: 300/527, loss: 0.00018196106248069555 2023-01-23 04:09:26.626456: step: 304/527, loss: 0.03812684863805771 2023-01-23 04:09:27.754670: step: 308/527, loss: 0.008020401000976562 2023-01-23 04:09:28.881549: step: 312/527, loss: 0.00012130737013649195 2023-01-23 04:09:29.991452: step: 316/527, loss: 0.0010629653697833419 2023-01-23 04:09:31.139916: step: 320/527, loss: 0.0002644539054017514 2023-01-23 04:09:32.273405: step: 324/527, loss: 0.14924278855323792 2023-01-23 04:09:33.376627: step: 328/527, loss: 0.0007692337385378778 2023-01-23 04:09:34.472432: step: 332/527, loss: 0.002264023059979081 2023-01-23 04:09:35.575744: step: 336/527, loss: 0.009740781970322132 2023-01-23 04:09:36.697588: step: 340/527, loss: 4.100799742445815e-06 2023-01-23 04:09:37.815696: step: 344/527, loss: 3.833770824712701e-05 2023-01-23 04:09:38.946319: step: 348/527, loss: 0.14693059027194977 2023-01-23 04:09:40.072959: step: 352/527, loss: 1.09672544112982e-06 2023-01-23 04:09:41.191948: step: 356/527, loss: 6.67572021484375e-06 2023-01-23 04:09:42.315444: step: 360/527, loss: 0.0019189835293218493 2023-01-23 04:09:43.435350: step: 364/527, loss: 0.0016914368607103825 2023-01-23 04:09:44.560119: step: 368/527, loss: -1.716613724056515e-06 2023-01-23 04:09:45.696863: step: 372/527, loss: 0.017615128308534622 2023-01-23 04:09:46.797727: step: 376/527, loss: 0.0002153396635549143 2023-01-23 04:09:47.963096: step: 380/527, loss: 0.0014480591053143144 2023-01-23 04:09:49.058859: step: 384/527, loss: 0.00011329651169944555 2023-01-23 04:09:50.166234: step: 388/527, loss: 0.006583022885024548 2023-01-23 04:09:51.310625: step: 392/527, loss: 0.02748594433069229 2023-01-23 04:09:52.410971: step: 396/527, loss: 0.027559852227568626 2023-01-23 04:09:53.540384: step: 400/527, loss: 7.410049147438258e-05 2023-01-23 04:09:54.645134: step: 404/527, loss: 0.005669021513313055 2023-01-23 04:09:55.778626: step: 408/527, loss: 0.00027675629826262593 2023-01-23 04:09:56.893393: step: 412/527, loss: 1.621246337890625e-05 2023-01-23 04:09:57.986997: step: 416/527, loss: -3.623962356869015e-06 2023-01-23 04:09:59.062058: step: 420/527, loss: 0.026864241808652878 2023-01-23 04:10:00.169279: step: 424/527, loss: 0.030648043379187584 2023-01-23 04:10:01.269721: step: 428/527, loss: 0.00031280517578125 2023-01-23 04:10:02.373545: step: 432/527, loss: 0.00034165382385253906 2023-01-23 04:10:03.499524: step: 436/527, loss: 2.765655517578125e-05 2023-01-23 04:10:04.628039: step: 440/527, loss: 0.0003564834769349545 2023-01-23 04:10:05.756166: step: 444/527, loss: 0.001743316650390625 2023-01-23 04:10:06.920567: step: 448/527, loss: 0.0004419327015057206 2023-01-23 04:10:08.060965: step: 452/527, loss: 0.0009684563265182078 2023-01-23 04:10:09.173583: step: 456/527, loss: 0.011153602041304111 2023-01-23 04:10:10.256926: step: 460/527, loss: 0.039178136736154556 2023-01-23 04:10:11.379148: step: 464/527, loss: 0.004218864254653454 2023-01-23 04:10:12.461359: step: 468/527, loss: 4.596710277837701e-05 2023-01-23 04:10:13.578777: step: 472/527, loss: 0.4207834303379059 2023-01-23 04:10:14.712354: step: 476/527, loss: 2.7513504392118193e-05 2023-01-23 04:10:15.856755: step: 480/527, loss: 0.005084228236228228 2023-01-23 04:10:16.982786: step: 484/527, loss: 0.030003929510712624 2023-01-23 04:10:18.129372: step: 488/527, loss: 3.9768219721736386e-05 2023-01-23 04:10:19.252245: step: 492/527, loss: 7.343292054429185e-06 2023-01-23 04:10:20.370103: step: 496/527, loss: 0.010692168027162552 2023-01-23 04:10:21.458126: step: 500/527, loss: 0.0036265135277062654 2023-01-23 04:10:22.566553: step: 504/527, loss: 0.001107549760490656 2023-01-23 04:10:23.674867: step: 508/527, loss: 7.53879503463395e-05 2023-01-23 04:10:24.754467: step: 512/527, loss: 0.008500195108354092 2023-01-23 04:10:25.840010: step: 516/527, loss: 9.975433204090223e-05 2023-01-23 04:10:26.931771: step: 520/527, loss: 0.0020113945938646793 2023-01-23 04:10:28.058758: step: 524/527, loss: 0.0007612229092046618 2023-01-23 04:10:29.172649: step: 528/527, loss: 0.007023429963737726 2023-01-23 04:10:30.329009: step: 532/527, loss: 0.03042755089700222 2023-01-23 04:10:31.452998: step: 536/527, loss: 0.00017652510723564774 2023-01-23 04:10:32.548638: step: 540/527, loss: 1.088216781616211 2023-01-23 04:10:33.667354: step: 544/527, loss: 0.0007103919633664191 2023-01-23 04:10:34.813098: step: 548/527, loss: 0.036032866686582565 2023-01-23 04:10:35.942731: step: 552/527, loss: 3.833770824712701e-05 2023-01-23 04:10:37.082084: step: 556/527, loss: 1.4400482541532256e-05 2023-01-23 04:10:38.213702: step: 560/527, loss: 0.013247203081846237 2023-01-23 04:10:39.323218: step: 564/527, loss: 2.956390289909905e-06 2023-01-23 04:10:40.422762: step: 568/527, loss: 0.00011405945406295359 2023-01-23 04:10:41.513873: step: 572/527, loss: 0.0040070535615086555 2023-01-23 04:10:42.633613: step: 576/527, loss: 0.018068790435791016 2023-01-23 04:10:43.750318: step: 580/527, loss: 0.0018008708721026778 2023-01-23 04:10:44.854675: step: 584/527, loss: 2.47955322265625e-05 2023-01-23 04:10:45.947842: step: 588/527, loss: 0.0020657540298998356 2023-01-23 04:10:47.080342: step: 592/527, loss: 0.14208821952342987 2023-01-23 04:10:48.171148: step: 596/527, loss: 1.220703143189894e-05 2023-01-23 04:10:49.306404: step: 600/527, loss: 0.005465984344482422 2023-01-23 04:10:50.440421: step: 604/527, loss: 0.0006066322093829513 2023-01-23 04:10:51.550423: step: 608/527, loss: 0.0057464600540697575 2023-01-23 04:10:52.692673: step: 612/527, loss: 0.0015774727798998356 2023-01-23 04:10:53.815349: step: 616/527, loss: 0.002112960908561945 2023-01-23 04:10:54.901893: step: 620/527, loss: 1.678466833254788e-05 2023-01-23 04:10:56.016210: step: 624/527, loss: 0.006539439782500267 2023-01-23 04:10:57.110988: step: 628/527, loss: 0.0017723083728924394 2023-01-23 04:10:58.222736: step: 632/527, loss: 0.0009443283197470009 2023-01-23 04:10:59.336588: step: 636/527, loss: 2.1839141481905244e-05 2023-01-23 04:11:00.450143: step: 640/527, loss: 0.004162025637924671 2023-01-23 04:11:01.530086: step: 644/527, loss: 0.0003446579212322831 2023-01-23 04:11:02.656586: step: 648/527, loss: 0.0057426453568041325 2023-01-23 04:11:03.790098: step: 652/527, loss: 0.011994934640824795 2023-01-23 04:11:04.913476: step: 656/527, loss: 0.00012216568575240672 2023-01-23 04:11:06.067021: step: 660/527, loss: 1.9073486328125e-06 2023-01-23 04:11:07.174816: step: 664/527, loss: -1.9073486328125e-06 2023-01-23 04:11:08.369010: step: 668/527, loss: 2.536773718020413e-05 2023-01-23 04:11:09.489438: step: 672/527, loss: 0.01849053055047989 2023-01-23 04:11:10.640826: step: 676/527, loss: 1.9073486328125e-05 2023-01-23 04:11:11.753038: step: 680/527, loss: 1.8024444216280244e-05 2023-01-23 04:11:12.865997: step: 684/527, loss: 0.0004604339774232358 2023-01-23 04:11:13.981631: step: 688/527, loss: 0.034531306475400925 2023-01-23 04:11:15.089162: step: 692/527, loss: 0.00037136077298782766 2023-01-23 04:11:16.234272: step: 696/527, loss: 0.0023142099380493164 2023-01-23 04:11:17.354984: step: 700/527, loss: 0.0022110939025878906 2023-01-23 04:11:18.445953: step: 704/527, loss: 7.62939453125e-05 2023-01-23 04:11:19.589428: step: 708/527, loss: 4.806518700206652e-05 2023-01-23 04:11:20.691265: step: 712/527, loss: 7.925033423816785e-05 2023-01-23 04:11:21.823185: step: 716/527, loss: 0.00011453629122115672 2023-01-23 04:11:22.959404: step: 720/527, loss: 0.0009848595364019275 2023-01-23 04:11:24.087162: step: 724/527, loss: 0.004429626744240522 2023-01-23 04:11:25.217321: step: 728/527, loss: 0.0004675865639001131 2023-01-23 04:11:26.319160: step: 732/527, loss: 0.02022247388958931 2023-01-23 04:11:27.421202: step: 736/527, loss: 0.0001201629638671875 2023-01-23 04:11:28.547397: step: 740/527, loss: 0.00289573660120368 2023-01-23 04:11:29.676386: step: 744/527, loss: 0.0012001037830486894 2023-01-23 04:11:30.809671: step: 748/527, loss: 0.002192688174545765 2023-01-23 04:11:31.925302: step: 752/527, loss: 6.322861008811742e-05 2023-01-23 04:11:33.045531: step: 756/527, loss: 0.012088966555893421 2023-01-23 04:11:34.165982: step: 760/527, loss: 0.326596200466156 2023-01-23 04:11:35.325571: step: 764/527, loss: 1.087188684323337e-05 2023-01-23 04:11:36.437266: step: 768/527, loss: 4.482269105210435e-06 2023-01-23 04:11:37.534147: step: 772/527, loss: 0.010147858411073685 2023-01-23 04:11:38.634597: step: 776/527, loss: 0.0014352798461914062 2023-01-23 04:11:39.749493: step: 780/527, loss: 1.201629675051663e-05 2023-01-23 04:11:40.853360: step: 784/527, loss: 5.722046125811175e-07 2023-01-23 04:11:41.965497: step: 788/527, loss: 0.0015343456761911511 2023-01-23 04:11:43.077357: step: 792/527, loss: 0.02372293546795845 2023-01-23 04:11:44.217203: step: 796/527, loss: 0.04090938717126846 2023-01-23 04:11:45.345542: step: 800/527, loss: 0.09455318748950958 2023-01-23 04:11:46.472741: step: 804/527, loss: 0.024419786408543587 2023-01-23 04:11:47.562241: step: 808/527, loss: 0.0001714706450002268 2023-01-23 04:11:48.663612: step: 812/527, loss: 0.0367090217769146 2023-01-23 04:11:49.784797: step: 816/527, loss: 0.0023056031204760075 2023-01-23 04:11:50.908875: step: 820/527, loss: 2.670288040462765e-06 2023-01-23 04:11:52.041189: step: 824/527, loss: 0.0015481949085369706 2023-01-23 04:11:53.116917: step: 828/527, loss: 0.002475929446518421 2023-01-23 04:11:54.254949: step: 832/527, loss: 0.0013447761302813888 2023-01-23 04:11:55.353263: step: 836/527, loss: 0.0020334243308752775 2023-01-23 04:11:56.439284: step: 840/527, loss: 0.002803945681080222 2023-01-23 04:11:57.538858: step: 844/527, loss: 0.04653206095099449 2023-01-23 04:11:58.637574: step: 848/527, loss: 1.8501283193472773e-05 2023-01-23 04:11:59.775588: step: 852/527, loss: 0.0213457103818655 2023-01-23 04:12:00.903476: step: 856/527, loss: 0.04106597974896431 2023-01-23 04:12:02.047352: step: 860/527, loss: 0.0006138801109045744 2023-01-23 04:12:03.143060: step: 864/527, loss: 0.005485725589096546 2023-01-23 04:12:04.246038: step: 868/527, loss: 0.0023593902587890625 2023-01-23 04:12:05.390779: step: 872/527, loss: 0.020300962030887604 2023-01-23 04:12:06.507076: step: 876/527, loss: 0.011713982559740543 2023-01-23 04:12:07.609060: step: 880/527, loss: 0.0007172584882937372 2023-01-23 04:12:08.700800: step: 884/527, loss: 2.918243444582913e-05 2023-01-23 04:12:09.826182: step: 888/527, loss: 0.05857863277196884 2023-01-23 04:12:10.935059: step: 892/527, loss: 0.0005195617559365928 2023-01-23 04:12:12.066997: step: 896/527, loss: 0.00010604858107399195 2023-01-23 04:12:13.191592: step: 900/527, loss: 8.087158494163305e-05 2023-01-23 04:12:14.306544: step: 904/527, loss: 0.00024223329091910273 2023-01-23 04:12:15.376337: step: 908/527, loss: 0.0002206802455475554 2023-01-23 04:12:16.503519: step: 912/527, loss: 0.009944559074938297 2023-01-23 04:12:17.629115: step: 916/527, loss: 0.01507263258099556 2023-01-23 04:12:18.776027: step: 920/527, loss: 0.05059022828936577 2023-01-23 04:12:19.914846: step: 924/527, loss: 0.00016579628572799265 2023-01-23 04:12:21.003684: step: 928/527, loss: 0.010430526919662952 2023-01-23 04:12:22.148398: step: 932/527, loss: 0.05597362667322159 2023-01-23 04:12:23.239316: step: 936/527, loss: 0.011325836181640625 2023-01-23 04:12:24.334775: step: 940/527, loss: 0.002430153079330921 2023-01-23 04:12:25.431429: step: 944/527, loss: 6.86645489622606e-06 2023-01-23 04:12:26.551777: step: 948/527, loss: 0.00010547637793933973 2023-01-23 04:12:27.678026: step: 952/527, loss: 0.06659820675849915 2023-01-23 04:12:28.787804: step: 956/527, loss: 0.003160381456837058 2023-01-23 04:12:29.903141: step: 960/527, loss: 0.00043816567631438375 2023-01-23 04:12:31.009805: step: 964/527, loss: 0.0023853301536291838 2023-01-23 04:12:32.143477: step: 968/527, loss: 0.01980781741440296 2023-01-23 04:12:33.222981: step: 972/527, loss: 0.022972488775849342 2023-01-23 04:12:34.306041: step: 976/527, loss: 0.0002794266038108617 2023-01-23 04:12:35.433072: step: 980/527, loss: 0.00012836456880904734 2023-01-23 04:12:36.541828: step: 984/527, loss: 0.0029923440888524055 2023-01-23 04:12:37.643741: step: 988/527, loss: 0.00010814666893566027 2023-01-23 04:12:38.767010: step: 992/527, loss: 0.0006361007690429688 2023-01-23 04:12:39.865182: step: 996/527, loss: 0.0024063109885901213 2023-01-23 04:12:40.965827: step: 1000/527, loss: 0.0034011839888989925 2023-01-23 04:12:42.104965: step: 1004/527, loss: 0.035730551928281784 2023-01-23 04:12:43.322519: step: 1008/527, loss: 0.0005638122675009072 2023-01-23 04:12:44.446908: step: 1012/527, loss: 1.010894811770413e-05 2023-01-23 04:12:45.567845: step: 1016/527, loss: 0.028648758307099342 2023-01-23 04:12:46.670481: step: 1020/527, loss: 1.7452241081628017e-05 2023-01-23 04:12:47.787960: step: 1024/527, loss: 0.0160935390740633 2023-01-23 04:12:48.907050: step: 1028/527, loss: 0.003731346223503351 2023-01-23 04:12:50.021694: step: 1032/527, loss: 0.38848456740379333 2023-01-23 04:12:51.131404: step: 1036/527, loss: 0.013659859076142311 2023-01-23 04:12:52.232050: step: 1040/527, loss: 0.0004177093505859375 2023-01-23 04:12:53.331741: step: 1044/527, loss: 0.0009197235340252519 2023-01-23 04:12:54.466212: step: 1048/527, loss: 0.02584228478372097 2023-01-23 04:12:55.583248: step: 1052/527, loss: 0.002713489346206188 2023-01-23 04:12:56.680498: step: 1056/527, loss: -2.117157055181451e-05 2023-01-23 04:12:57.783359: step: 1060/527, loss: 1.1992455256404355e-05 2023-01-23 04:12:58.858753: step: 1064/527, loss: 0.0023584368173033 2023-01-23 04:12:59.987019: step: 1068/527, loss: 0.0005010604509152472 2023-01-23 04:13:01.103978: step: 1072/527, loss: 0.009381484240293503 2023-01-23 04:13:02.210922: step: 1076/527, loss: 0.0001177787926280871 2023-01-23 04:13:03.333816: step: 1080/527, loss: 0.020886898040771484 2023-01-23 04:13:04.443104: step: 1084/527, loss: 0.007814550772309303 2023-01-23 04:13:05.543368: step: 1088/527, loss: 0.021423721686005592 2023-01-23 04:13:06.661423: step: 1092/527, loss: 0.0026113511994481087 2023-01-23 04:13:07.768069: step: 1096/527, loss: 0.0763712003827095 2023-01-23 04:13:08.901837: step: 1100/527, loss: 0.0005100250127725303 2023-01-23 04:13:10.024558: step: 1104/527, loss: 0.00018730164447333664 2023-01-23 04:13:11.152842: step: 1108/527, loss: 0.00315685267560184 2023-01-23 04:13:12.288454: step: 1112/527, loss: 0.00045299530029296875 2023-01-23 04:13:13.395371: step: 1116/527, loss: 0.2409219741821289 2023-01-23 04:13:14.517934: step: 1120/527, loss: 0.012544346041977406 2023-01-23 04:13:15.648129: step: 1124/527, loss: 0.0019412994151934981 2023-01-23 04:13:16.746907: step: 1128/527, loss: 5.531311398954131e-05 2023-01-23 04:13:17.864374: step: 1132/527, loss: 0.0935780480504036 2023-01-23 04:13:19.013847: step: 1136/527, loss: 0.006203460972756147 2023-01-23 04:13:20.094927: step: 1140/527, loss: 0.00879664532840252 2023-01-23 04:13:21.224416: step: 1144/527, loss: 0.07566747069358826 2023-01-23 04:13:22.372735: step: 1148/527, loss: 0.00037384036113508046 2023-01-23 04:13:23.505798: step: 1152/527, loss: 7.629394644936838e-07 2023-01-23 04:13:24.626617: step: 1156/527, loss: 0.0005529404152184725 2023-01-23 04:13:25.733345: step: 1160/527, loss: 0.005059623625129461 2023-01-23 04:13:26.860252: step: 1164/527, loss: 9.632110959501006e-06 2023-01-23 04:13:27.950668: step: 1168/527, loss: 0.00043125153752043843 2023-01-23 04:13:29.068782: step: 1172/527, loss: 0.003337860107421875 2023-01-23 04:13:30.204174: step: 1176/527, loss: 0.019627952948212624 2023-01-23 04:13:31.301853: step: 1180/527, loss: 0.01567087136209011 2023-01-23 04:13:32.401411: step: 1184/527, loss: 0.0031160356011241674 2023-01-23 04:13:33.524637: step: 1188/527, loss: 0.00018334388732910156 2023-01-23 04:13:34.643908: step: 1192/527, loss: 0.09085607528686523 2023-01-23 04:13:35.755474: step: 1196/527, loss: 0.0003784179862122983 2023-01-23 04:13:36.864862: step: 1200/527, loss: 0.00029811859712935984 2023-01-23 04:13:37.981220: step: 1204/527, loss: 0.0007266998873092234 2023-01-23 04:13:39.123017: step: 1208/527, loss: 0.000263023393927142 2023-01-23 04:13:40.237535: step: 1212/527, loss: 1.2111663636460435e-05 2023-01-23 04:13:41.381461: step: 1216/527, loss: 3.662109520519152e-05 2023-01-23 04:13:42.519251: step: 1220/527, loss: 0.001142740249633789 2023-01-23 04:13:43.629782: step: 1224/527, loss: 0.18275032937526703 2023-01-23 04:13:44.731411: step: 1228/527, loss: 0.002254486083984375 2023-01-23 04:13:45.842675: step: 1232/527, loss: 9.107588994083926e-06 2023-01-23 04:13:46.951854: step: 1236/527, loss: 0.0022802352905273438 2023-01-23 04:13:48.044307: step: 1240/527, loss: 0.0007385254139080644 2023-01-23 04:13:49.127175: step: 1244/527, loss: 0.0002507210010662675 2023-01-23 04:13:50.267512: step: 1248/527, loss: 0.018927669152617455 2023-01-23 04:13:51.412054: step: 1252/527, loss: 0.0023344040382653475 2023-01-23 04:13:52.546593: step: 1256/527, loss: 0.0031208039727061987 2023-01-23 04:13:53.659039: step: 1260/527, loss: 0.052913546562194824 2023-01-23 04:13:54.775673: step: 1264/527, loss: 5.006790615880163e-06 2023-01-23 04:13:55.905539: step: 1268/527, loss: 5.054473876953125e-05 2023-01-23 04:13:57.027072: step: 1272/527, loss: 0.00620956439524889 2023-01-23 04:13:58.165823: step: 1276/527, loss: 0.02844705618917942 2023-01-23 04:13:59.315551: step: 1280/527, loss: 0.00705032330006361 2023-01-23 04:14:00.394945: step: 1284/527, loss: 0.00817260704934597 2023-01-23 04:14:01.511071: step: 1288/527, loss: 0.0027587893418967724 2023-01-23 04:14:02.624912: step: 1292/527, loss: 0.0001911163271870464 2023-01-23 04:14:03.735462: step: 1296/527, loss: 0.003289127256721258 2023-01-23 04:14:04.840627: step: 1300/527, loss: 0.07440871745347977 2023-01-23 04:14:05.949571: step: 1304/527, loss: 0.0045642852783203125 2023-01-23 04:14:07.052745: step: 1308/527, loss: 7.743835885776207e-05 2023-01-23 04:14:08.162411: step: 1312/527, loss: 0.00011272430856479332 2023-01-23 04:14:09.300888: step: 1316/527, loss: 8.382798114325851e-05 2023-01-23 04:14:10.426277: step: 1320/527, loss: 0.0015691756270825863 2023-01-23 04:14:11.525332: step: 1324/527, loss: 0.0030309678986668587 2023-01-23 04:14:12.645367: step: 1328/527, loss: 0.00040874481783248484 2023-01-23 04:14:13.757440: step: 1332/527, loss: 0.00029354094294831157 2023-01-23 04:14:14.876198: step: 1336/527, loss: 0.005395126529037952 2023-01-23 04:14:15.984742: step: 1340/527, loss: 0.04798431321978569 2023-01-23 04:14:17.084341: step: 1344/527, loss: 3.52859501617786e-06 2023-01-23 04:14:18.203617: step: 1348/527, loss: 0.03640594705939293 2023-01-23 04:14:19.302387: step: 1352/527, loss: 1.1253356205997989e-05 2023-01-23 04:14:20.435363: step: 1356/527, loss: 0.009305287152528763 2023-01-23 04:14:21.523310: step: 1360/527, loss: 0.00022411346435546875 2023-01-23 04:14:22.657392: step: 1364/527, loss: 0.011685753241181374 2023-01-23 04:14:23.751872: step: 1368/527, loss: 0.0002561330911703408 2023-01-23 04:14:24.840055: step: 1372/527, loss: 0.00043125153752043843 2023-01-23 04:14:25.967308: step: 1376/527, loss: 0.0005689620738849044 2023-01-23 04:14:27.076067: step: 1380/527, loss: 7.4386593951203395e-06 2023-01-23 04:14:28.200815: step: 1384/527, loss: 0.004019069951027632 2023-01-23 04:14:29.326025: step: 1388/527, loss: 0.006441688630729914 2023-01-23 04:14:30.436497: step: 1392/527, loss: 1.71661376953125e-05 2023-01-23 04:14:31.570274: step: 1396/527, loss: 0.00011672973778331652 2023-01-23 04:14:32.660770: step: 1400/527, loss: 0.006808471865952015 2023-01-23 04:14:33.755815: step: 1404/527, loss: 0.017092324793338776 2023-01-23 04:14:34.858470: step: 1408/527, loss: 0.0018543243641033769 2023-01-23 04:14:35.952643: step: 1412/527, loss: 7.543564424850047e-05 2023-01-23 04:14:37.060907: step: 1416/527, loss: 0.018691254779696465 2023-01-23 04:14:38.183205: step: 1420/527, loss: 0.47512856125831604 2023-01-23 04:14:39.292069: step: 1424/527, loss: 3.366470627952367e-05 2023-01-23 04:14:40.394017: step: 1428/527, loss: 1.411438006471144e-05 2023-01-23 04:14:41.518470: step: 1432/527, loss: 7.24792471373803e-06 2023-01-23 04:14:42.626019: step: 1436/527, loss: 0.012579345144331455 2023-01-23 04:14:43.747824: step: 1440/527, loss: 0.00023937225341796875 2023-01-23 04:14:44.861212: step: 1444/527, loss: 0.03071027249097824 2023-01-23 04:14:45.966872: step: 1448/527, loss: 0.009813499636948109 2023-01-23 04:14:47.087684: step: 1452/527, loss: 0.03610806539654732 2023-01-23 04:14:48.218972: step: 1456/527, loss: 2.307891918462701e-05 2023-01-23 04:14:49.352783: step: 1460/527, loss: 0.01668224297463894 2023-01-23 04:14:50.472285: step: 1464/527, loss: 0.0004642486746888608 2023-01-23 04:14:51.561452: step: 1468/527, loss: 0.027607537806034088 2023-01-23 04:14:52.691131: step: 1472/527, loss: 0.012082099914550781 2023-01-23 04:14:53.788282: step: 1476/527, loss: 0.00813837070018053 2023-01-23 04:14:54.878509: step: 1480/527, loss: 0.0001066207914846018 2023-01-23 04:14:55.965222: step: 1484/527, loss: 2.870559910661541e-05 2023-01-23 04:14:57.083675: step: 1488/527, loss: 0.010634804144501686 2023-01-23 04:14:58.191078: step: 1492/527, loss: 0.018246840685606003 2023-01-23 04:14:59.293455: step: 1496/527, loss: 0.010386180132627487 2023-01-23 04:15:00.392197: step: 1500/527, loss: 0.028856659308075905 2023-01-23 04:15:01.495281: step: 1504/527, loss: 1.678466833254788e-05 2023-01-23 04:15:02.620239: step: 1508/527, loss: 0.020030593499541283 2023-01-23 04:15:03.722174: step: 1512/527, loss: 0.00045185087947174907 2023-01-23 04:15:04.861898: step: 1516/527, loss: 0.0005821228260174394 2023-01-23 04:15:05.970735: step: 1520/527, loss: 3.547668529790826e-05 2023-01-23 04:15:07.092577: step: 1524/527, loss: 0.0023143768776208162 2023-01-23 04:15:08.223480: step: 1528/527, loss: 0.021855641156435013 2023-01-23 04:15:09.327525: step: 1532/527, loss: 1.583099401614163e-05 2023-01-23 04:15:10.428348: step: 1536/527, loss: -1.9550323031580774e-06 2023-01-23 04:15:11.531558: step: 1540/527, loss: 0.0011399269569665194 2023-01-23 04:15:12.641361: step: 1544/527, loss: 0.0050754547119140625 2023-01-23 04:15:13.744182: step: 1548/527, loss: 0.0017654896946623921 2023-01-23 04:15:14.881776: step: 1552/527, loss: -2.3269651137525216e-05 2023-01-23 04:15:15.983461: step: 1556/527, loss: 7.314682443393394e-05 2023-01-23 04:15:17.072158: step: 1560/527, loss: 3.6239621294953395e-06 2023-01-23 04:15:18.184474: step: 1564/527, loss: 0.00015573501877952367 2023-01-23 04:15:19.315111: step: 1568/527, loss: 0.00044527053250931203 2023-01-23 04:15:20.420607: step: 1572/527, loss: 2.6709365844726562 2023-01-23 04:15:21.525378: step: 1576/527, loss: 0.002544450806453824 2023-01-23 04:15:22.667003: step: 1580/527, loss: 0.024519825354218483 2023-01-23 04:15:23.796431: step: 1584/527, loss: 0.0005123138544149697 2023-01-23 04:15:24.908432: step: 1588/527, loss: 0.0003116607549600303 2023-01-23 04:15:26.018567: step: 1592/527, loss: 0.018262101337313652 2023-01-23 04:15:27.144660: step: 1596/527, loss: 0.0007053375011309981 2023-01-23 04:15:28.285619: step: 1600/527, loss: 0.0007838249439373612 2023-01-23 04:15:29.407681: step: 1604/527, loss: 0.0001655578671488911 2023-01-23 04:15:30.507490: step: 1608/527, loss: 0.0026958466041833162 2023-01-23 04:15:31.606726: step: 1612/527, loss: 0.0035102847032248974 2023-01-23 04:15:32.737704: step: 1616/527, loss: 0.004958248697221279 2023-01-23 04:15:33.890123: step: 1620/527, loss: 0.004815483465790749 2023-01-23 04:15:34.999336: step: 1624/527, loss: 6.294250852079131e-06 2023-01-23 04:15:36.134489: step: 1628/527, loss: 4.816055661649443e-05 2023-01-23 04:15:37.221877: step: 1632/527, loss: 0.0018827916355803609 2023-01-23 04:15:38.345174: step: 1636/527, loss: 3.843307786155492e-05 2023-01-23 04:15:39.476825: step: 1640/527, loss: 0.0004249572812113911 2023-01-23 04:15:40.590320: step: 1644/527, loss: 0.0005807877168990672 2023-01-23 04:15:41.708526: step: 1648/527, loss: 0.10231515020132065 2023-01-23 04:15:42.798659: step: 1652/527, loss: 0.001157569931820035 2023-01-23 04:15:43.936489: step: 1656/527, loss: 0.002237987471744418 2023-01-23 04:15:45.048636: step: 1660/527, loss: 9.574890282237902e-05 2023-01-23 04:15:46.140762: step: 1664/527, loss: 5.197525297262473e-06 2023-01-23 04:15:47.234245: step: 1668/527, loss: 0.00015010833158157766 2023-01-23 04:15:48.380505: step: 1672/527, loss: 0.005751943681389093 2023-01-23 04:15:49.483578: step: 1676/527, loss: 4.329681542003527e-05 2023-01-23 04:15:50.640188: step: 1680/527, loss: 0.02666759490966797 2023-01-23 04:15:51.761017: step: 1684/527, loss: 3.0490757126244716e-05 2023-01-23 04:15:52.909761: step: 1688/527, loss: 0.058798789978027344 2023-01-23 04:15:54.007639: step: 1692/527, loss: 9.994507126975805e-05 2023-01-23 04:15:55.118898: step: 1696/527, loss: 8.20159948489163e-06 2023-01-23 04:15:56.246725: step: 1700/527, loss: 4.76837158203125e-05 2023-01-23 04:15:57.367118: step: 1704/527, loss: 0.34508514404296875 2023-01-23 04:15:58.471670: step: 1708/527, loss: 0.11628475040197372 2023-01-23 04:15:59.589461: step: 1712/527, loss: 0.0008120537386275828 2023-01-23 04:16:00.688370: step: 1716/527, loss: 0.0002593994140625 2023-01-23 04:16:01.802149: step: 1720/527, loss: 0.0031949998810887337 2023-01-23 04:16:02.908445: step: 1724/527, loss: 0.0020835876930505037 2023-01-23 04:16:04.026193: step: 1728/527, loss: 0.0005289077525958419 2023-01-23 04:16:05.131484: step: 1732/527, loss: 0.004260825924575329 2023-01-23 04:16:06.262309: step: 1736/527, loss: 0.016506386920809746 2023-01-23 04:16:07.392130: step: 1740/527, loss: 0.010212182998657227 2023-01-23 04:16:08.481805: step: 1744/527, loss: 0.00013117790513206273 2023-01-23 04:16:09.593194: step: 1748/527, loss: 0.009903526864945889 2023-01-23 04:16:10.705093: step: 1752/527, loss: 0.006413173396140337 2023-01-23 04:16:11.799260: step: 1756/527, loss: 0.00016367435455322266 2023-01-23 04:16:12.925337: step: 1760/527, loss: 0.0014652252430096269 2023-01-23 04:16:14.059506: step: 1764/527, loss: 0.0017039298545569181 2023-01-23 04:16:15.215098: step: 1768/527, loss: 0.00058832170907408 2023-01-23 04:16:16.322682: step: 1772/527, loss: 0.0031113626901060343 2023-01-23 04:16:17.409679: step: 1776/527, loss: 0.014738750644028187 2023-01-23 04:16:18.551858: step: 1780/527, loss: 0.006545448210090399 2023-01-23 04:16:19.641104: step: 1784/527, loss: 0.009801817126572132 2023-01-23 04:16:20.752484: step: 1788/527, loss: 0.001978302141651511 2023-01-23 04:16:21.873247: step: 1792/527, loss: 0.011224651709198952 2023-01-23 04:16:22.998403: step: 1796/527, loss: 0.01539001613855362 2023-01-23 04:16:24.091263: step: 1800/527, loss: 0.0004193306085653603 2023-01-23 04:16:25.205043: step: 1804/527, loss: 0.0024080276489257812 2023-01-23 04:16:26.325709: step: 1808/527, loss: 0.23451752960681915 2023-01-23 04:16:27.421452: step: 1812/527, loss: 0.002439308213070035 2023-01-23 04:16:28.533619: step: 1816/527, loss: 0.13236045837402344 2023-01-23 04:16:29.643291: step: 1820/527, loss: 0.051760233938694 2023-01-23 04:16:30.746082: step: 1824/527, loss: -3.8146970382513246e-07 2023-01-23 04:16:31.857433: step: 1828/527, loss: 0.0007528305286541581 2023-01-23 04:16:32.948689: step: 1832/527, loss: 0.026163483038544655 2023-01-23 04:16:34.085819: step: 1836/527, loss: 5.6838991440599784e-05 2023-01-23 04:16:35.218527: step: 1840/527, loss: 0.0010646820301190019 2023-01-23 04:16:36.331903: step: 1844/527, loss: 0.0003292083565611392 2023-01-23 04:16:37.437631: step: 1848/527, loss: 0.010122299194335938 2023-01-23 04:16:38.567107: step: 1852/527, loss: 0.15897493064403534 2023-01-23 04:16:39.697366: step: 1856/527, loss: 0.08352112770080566 2023-01-23 04:16:40.820380: step: 1860/527, loss: 0.04018288105726242 2023-01-23 04:16:41.902666: step: 1864/527, loss: 2.2220612663659267e-05 2023-01-23 04:16:43.004999: step: 1868/527, loss: 0.002647113986313343 2023-01-23 04:16:44.096654: step: 1872/527, loss: 0.0006129264947958291 2023-01-23 04:16:45.216115: step: 1876/527, loss: 0.0002506256278138608 2023-01-23 04:16:46.363015: step: 1880/527, loss: 0.00805673561990261 2023-01-23 04:16:47.486898: step: 1884/527, loss: -2.6702882678364404e-06 2023-01-23 04:16:48.585704: step: 1888/527, loss: 2.365112231927924e-05 2023-01-23 04:16:49.725201: step: 1892/527, loss: 0.03178234398365021 2023-01-23 04:16:50.818214: step: 1896/527, loss: 0.0005556106334552169 2023-01-23 04:16:51.911475: step: 1900/527, loss: 0.00022506712411995977 2023-01-23 04:16:53.064997: step: 1904/527, loss: 0.00249404925853014 2023-01-23 04:16:54.161635: step: 1908/527, loss: 0.0011820793151855469 2023-01-23 04:16:55.306936: step: 1912/527, loss: 0.00015459061251021922 2023-01-23 04:16:56.408506: step: 1916/527, loss: 0.00032720566377975047 2023-01-23 04:16:57.541577: step: 1920/527, loss: 0.00040073395939543843 2023-01-23 04:16:58.660253: step: 1924/527, loss: 0.00025081634521484375 2023-01-23 04:16:59.770912: step: 1928/527, loss: 0.0005310058477334678 2023-01-23 04:17:00.856262: step: 1932/527, loss: 0.00023217200941871852 2023-01-23 04:17:01.983325: step: 1936/527, loss: 0.009486199356615543 2023-01-23 04:17:03.108684: step: 1940/527, loss: 7.057190487103071e-06 2023-01-23 04:17:04.218884: step: 1944/527, loss: 0.00887689646333456 2023-01-23 04:17:05.309062: step: 1948/527, loss: 0.0026689530350267887 2023-01-23 04:17:06.422990: step: 1952/527, loss: 0.0011229516239836812 2023-01-23 04:17:07.548294: step: 1956/527, loss: 0.01138229388743639 2023-01-23 04:17:08.648506: step: 1960/527, loss: 0.42775943875312805 2023-01-23 04:17:09.761854: step: 1964/527, loss: 6.341934204101562e-05 2023-01-23 04:17:10.874481: step: 1968/527, loss: 0.0005316734313964844 2023-01-23 04:17:11.995500: step: 1972/527, loss: 0.01154871005564928 2023-01-23 04:17:13.117896: step: 1976/527, loss: 0.0087296012789011 2023-01-23 04:17:14.226216: step: 1980/527, loss: 0.00046939851017668843 2023-01-23 04:17:15.371626: step: 1984/527, loss: 0.0003887176571879536 2023-01-23 04:17:16.533519: step: 1988/527, loss: 0.0837644562125206 2023-01-23 04:17:17.720367: step: 1992/527, loss: 0.01765899732708931 2023-01-23 04:17:18.837542: step: 1996/527, loss: 0.1999751180410385 2023-01-23 04:17:19.962777: step: 2000/527, loss: 0.006393528077751398 2023-01-23 04:17:21.093276: step: 2004/527, loss: 5.798339770990424e-05 2023-01-23 04:17:22.196963: step: 2008/527, loss: 0.0020574568770825863 2023-01-23 04:17:23.277620: step: 2012/527, loss: 0.00037631989107467234 2023-01-23 04:17:24.408727: step: 2016/527, loss: 0.01455764751881361 2023-01-23 04:17:25.500405: step: 2020/527, loss: 6.961822691664565e-06 2023-01-23 04:17:26.607201: step: 2024/527, loss: 0.019783783704042435 2023-01-23 04:17:27.715987: step: 2028/527, loss: 0.007009411230683327 2023-01-23 04:17:28.812664: step: 2032/527, loss: 2.713203321036417e-05 2023-01-23 04:17:29.909283: step: 2036/527, loss: 0.004231167491525412 2023-01-23 04:17:31.036435: step: 2040/527, loss: 0.003732299664989114 2023-01-23 04:17:32.156582: step: 2044/527, loss: 0.010844802483916283 2023-01-23 04:17:33.244440: step: 2048/527, loss: -7.05719003235572e-06 2023-01-23 04:17:34.346812: step: 2052/527, loss: 0.045352935791015625 2023-01-23 04:17:35.453070: step: 2056/527, loss: 6.580352783203125e-05 2023-01-23 04:17:36.562784: step: 2060/527, loss: 0.10742244869470596 2023-01-23 04:17:37.707231: step: 2064/527, loss: 0.0011796951293945312 2023-01-23 04:17:38.806310: step: 2068/527, loss: 0.001395416329614818 2023-01-23 04:17:39.902404: step: 2072/527, loss: 0.001425325870513916 2023-01-23 04:17:41.033121: step: 2076/527, loss: 0.019658468663692474 2023-01-23 04:17:42.138857: step: 2080/527, loss: 0.0008678436279296875 2023-01-23 04:17:43.230933: step: 2084/527, loss: 0.0005279540782794356 2023-01-23 04:17:44.341487: step: 2088/527, loss: 0.007942581549286842 2023-01-23 04:17:45.441581: step: 2092/527, loss: 0.03498439863324165 2023-01-23 04:17:46.573552: step: 2096/527, loss: 0.0013710500206798315 2023-01-23 04:17:47.679183: step: 2100/527, loss: 0.004145812708884478 2023-01-23 04:17:48.809609: step: 2104/527, loss: 0.024820709601044655 2023-01-23 04:17:49.924713: step: 2108/527, loss: 2.95162208203692e-05 ================================================== Loss: 0.023 -------------------- Dev: {'event': {'p': 0.5995785036880927, 'r': 0.7576564580559254, 'f1': 0.6694117647058823}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Test: {'event': {'p': 0.6422574626865671, 'r': 0.7868571428571428, 'f1': 0.707241910631741}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Chinese: {'event': {'p': 0.5714285714285714, 'r': 0.8148148148148148, 'f1': 0.6717557251908397}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Korean: {'event': {'p': 0.6363636363636364, 'r': 0.5555555555555556, 'f1': 0.5932203389830508}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Russian: {'event': {'p': 0.5405405405405406, 'r': 0.5555555555555556, 'f1': 0.547945205479452}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6063157894736843, 'r': 0.7669773635153129, 'f1': 0.6772486772486773}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Eng Test for Korean: {'event': {'p': 0.6427238805970149, 'r': 0.7874285714285715, 'f1': 0.7077555213148434}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Korean: {'event': {'p': 0.6792452830188679, 'r': 0.5714285714285714, 'f1': 0.6206896551724137}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 28 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 04:18:31.546464: step: 4/527, loss: 0.04735551029443741 2023-01-23 04:18:32.668295: step: 8/527, loss: 0.010111046023666859 2023-01-23 04:18:33.757480: step: 12/527, loss: 0.00013046263484284282 2023-01-23 04:18:34.860931: step: 16/527, loss: 0.0005805015680380166 2023-01-23 04:18:35.960731: step: 20/527, loss: 0.00017433168250136077 2023-01-23 04:18:37.075780: step: 24/527, loss: 0.006561470218002796 2023-01-23 04:18:38.179876: step: 28/527, loss: 0.001269531319849193 2023-01-23 04:18:39.304970: step: 32/527, loss: 0.0016639232635498047 2023-01-23 04:18:40.447854: step: 36/527, loss: 0.0001375198335153982 2023-01-23 04:18:41.560817: step: 40/527, loss: 0.0001443863002350554 2023-01-23 04:18:42.676439: step: 44/527, loss: 0.012665940448641777 2023-01-23 04:18:43.785935: step: 48/527, loss: 0.012413167394697666 2023-01-23 04:18:44.860798: step: 52/527, loss: 4.529953366727568e-05 2023-01-23 04:18:45.990226: step: 56/527, loss: 0.000675201416015625 2023-01-23 04:18:47.117294: step: 60/527, loss: 0.0002800941583700478 2023-01-23 04:18:48.252912: step: 64/527, loss: 0.00045499802217818797 2023-01-23 04:18:49.384437: step: 68/527, loss: 0.002941703889518976 2023-01-23 04:18:50.483332: step: 72/527, loss: 0.03275804594159126 2023-01-23 04:18:51.593585: step: 76/527, loss: 0.00012168884859420359 2023-01-23 04:18:52.714372: step: 80/527, loss: 9.822845458984375e-05 2023-01-23 04:18:53.812172: step: 84/527, loss: 0.0013421059120446444 2023-01-23 04:18:54.923856: step: 88/527, loss: 0.0009275436750613153 2023-01-23 04:18:56.055707: step: 92/527, loss: 0.07247848808765411 2023-01-23 04:18:57.186674: step: 96/527, loss: 0.0656684935092926 2023-01-23 04:18:58.321519: step: 100/527, loss: 0.025267362594604492 2023-01-23 04:18:59.437906: step: 104/527, loss: 7.805824861861765e-05 2023-01-23 04:19:00.574707: step: 108/527, loss: 0.0002262592315673828 2023-01-23 04:19:01.663492: step: 112/527, loss: 7.724761962890625e-05 2023-01-23 04:19:02.747799: step: 116/527, loss: 0.005851077847182751 2023-01-23 04:19:03.870879: step: 120/527, loss: 0.1161341741681099 2023-01-23 04:19:04.970906: step: 124/527, loss: 7.686614844715223e-05 2023-01-23 04:19:06.072459: step: 128/527, loss: 4.76837158203125e-05 2023-01-23 04:19:07.194619: step: 132/527, loss: 0.0004421234189067036 2023-01-23 04:19:08.313253: step: 136/527, loss: 0.013034629635512829 2023-01-23 04:19:09.427679: step: 140/527, loss: 0.0051322937943041325 2023-01-23 04:19:10.543763: step: 144/527, loss: 0.00010607243166305125 2023-01-23 04:19:11.658285: step: 148/527, loss: 0.0015447615878656507 2023-01-23 04:19:12.764326: step: 152/527, loss: -3.910065061063506e-06 2023-01-23 04:19:13.855586: step: 156/527, loss: 0.017389679327607155 2023-01-23 04:19:14.991292: step: 160/527, loss: 0.0005512237548828125 2023-01-23 04:19:16.096824: step: 164/527, loss: 0.023341991007328033 2023-01-23 04:19:17.203938: step: 168/527, loss: 0.0038568496238440275 2023-01-23 04:19:18.322265: step: 172/527, loss: 1.4400482541532256e-05 2023-01-23 04:19:19.469609: step: 176/527, loss: 0.0002906799491029233 2023-01-23 04:19:20.586879: step: 180/527, loss: 0.4488166868686676 2023-01-23 04:19:21.693955: step: 184/527, loss: 2.6702882678364404e-06 2023-01-23 04:19:22.780376: step: 188/527, loss: 9.965896424546372e-06 2023-01-23 04:19:23.939097: step: 192/527, loss: 3.06129441014491e-05 2023-01-23 04:19:25.076867: step: 196/527, loss: 0.0010047913528978825 2023-01-23 04:19:26.181567: step: 200/527, loss: 0.0002020835963776335 2023-01-23 04:19:27.280404: step: 204/527, loss: 0.0021049499046057463 2023-01-23 04:19:28.401249: step: 208/527, loss: 0.00012683868408203125 2023-01-23 04:19:29.511317: step: 212/527, loss: 0.004429721739143133 2023-01-23 04:19:30.648448: step: 216/527, loss: 0.009278679266571999 2023-01-23 04:19:31.762668: step: 220/527, loss: 0.0005752563592977822 2023-01-23 04:19:32.859624: step: 224/527, loss: 0.02311573177576065 2023-01-23 04:19:33.972876: step: 228/527, loss: 8.544922457076609e-05 2023-01-23 04:19:35.074596: step: 232/527, loss: 0.006171036045998335 2023-01-23 04:19:36.214924: step: 236/527, loss: 0.0009214401361532509 2023-01-23 04:19:37.333500: step: 240/527, loss: 1.945495569088962e-05 2023-01-23 04:19:38.423660: step: 244/527, loss: 0.001588541199453175 2023-01-23 04:19:39.511910: step: 248/527, loss: 9.012222290039062e-05 2023-01-23 04:19:40.616838: step: 252/527, loss: 0.00021457672119140625 2023-01-23 04:19:41.731546: step: 256/527, loss: 6.4849853515625e-05 2023-01-23 04:19:42.855288: step: 260/527, loss: 9.040832810569555e-05 2023-01-23 04:19:43.943353: step: 264/527, loss: 6.85691848048009e-05 2023-01-23 04:19:45.049057: step: 268/527, loss: 0.0003631591971497983 2023-01-23 04:19:46.189880: step: 272/527, loss: 0.0002639770391397178 2023-01-23 04:19:47.299887: step: 276/527, loss: 4.472732689464465e-05 2023-01-23 04:19:48.384160: step: 280/527, loss: 0.00015144348435569555 2023-01-23 04:19:49.489254: step: 284/527, loss: 0.06241626664996147 2023-01-23 04:19:50.597224: step: 288/527, loss: 1.487731969973538e-05 2023-01-23 04:19:51.704390: step: 292/527, loss: 0.4360926151275635 2023-01-23 04:19:52.852871: step: 296/527, loss: 0.012171745300292969 2023-01-23 04:19:53.942039: step: 300/527, loss: 0.2054843008518219 2023-01-23 04:19:55.007850: step: 304/527, loss: 0.0017966270679607987 2023-01-23 04:19:56.122346: step: 308/527, loss: 0.0009456634288653731 2023-01-23 04:19:57.224747: step: 312/527, loss: 0.0014430999290198088 2023-01-23 04:19:58.327711: step: 316/527, loss: 0.0018686294788494706 2023-01-23 04:19:59.440496: step: 320/527, loss: 2.3746491933707148e-05 2023-01-23 04:20:00.556015: step: 324/527, loss: 0.04205608367919922 2023-01-23 04:20:01.657835: step: 328/527, loss: 5.7983401347883046e-05 2023-01-23 04:20:02.762440: step: 332/527, loss: 0.00044040678767487407 2023-01-23 04:20:03.880512: step: 336/527, loss: 0.0015327453147619963 2023-01-23 04:20:04.989755: step: 340/527, loss: 0.001429653144441545 2023-01-23 04:20:06.088309: step: 344/527, loss: 0.0008526802412234247 2023-01-23 04:20:07.216824: step: 348/527, loss: 0.010652541182935238 2023-01-23 04:20:08.336986: step: 352/527, loss: 0.00011787413677666336 2023-01-23 04:20:09.437446: step: 356/527, loss: 3.719329833984375e-05 2023-01-23 04:20:10.535264: step: 360/527, loss: 0.010147476568818092 2023-01-23 04:20:11.632206: step: 364/527, loss: 0.040294744074344635 2023-01-23 04:20:12.761797: step: 368/527, loss: 1.316070574830519e-05 2023-01-23 04:20:13.877862: step: 372/527, loss: 0.0001508712739450857 2023-01-23 04:20:14.989987: step: 376/527, loss: 0.000629425048828125 2023-01-23 04:20:16.086650: step: 380/527, loss: 0.0002520561101846397 2023-01-23 04:20:17.229509: step: 384/527, loss: 4.9781796406023204e-05 2023-01-23 04:20:18.356961: step: 388/527, loss: 0.00012855530076194555 2023-01-23 04:20:19.482634: step: 392/527, loss: 0.02536744996905327 2023-01-23 04:20:20.595876: step: 396/527, loss: 1.9741059077205136e-05 2023-01-23 04:20:21.716499: step: 400/527, loss: 0.011190605349838734 2023-01-23 04:20:22.829856: step: 404/527, loss: 0.0005252361297607422 2023-01-23 04:20:23.939143: step: 408/527, loss: 5.102157501823967e-06 2023-01-23 04:20:25.060036: step: 412/527, loss: 1.239776611328125e-05 2023-01-23 04:20:26.181574: step: 416/527, loss: 0.016800880432128906 2023-01-23 04:20:27.276785: step: 420/527, loss: 0.0001065254255081527 2023-01-23 04:20:28.438465: step: 424/527, loss: 5.760193380410783e-05 2023-01-23 04:20:29.538202: step: 428/527, loss: 2.47955322265625e-05 2023-01-23 04:20:30.670638: step: 432/527, loss: 0.09670142829418182 2023-01-23 04:20:31.773828: step: 436/527, loss: 3.604888843256049e-05 2023-01-23 04:20:32.886876: step: 440/527, loss: 5.91278057981981e-06 2023-01-23 04:20:33.988975: step: 444/527, loss: 2.4795533590804553e-06 2023-01-23 04:20:35.086509: step: 448/527, loss: 0.0638696700334549 2023-01-23 04:20:36.218645: step: 452/527, loss: 2.47955322265625e-05 2023-01-23 04:20:37.330629: step: 456/527, loss: 0.0022597312927246094 2023-01-23 04:20:38.471972: step: 460/527, loss: 2.574920654296875e-05 2023-01-23 04:20:39.582310: step: 464/527, loss: 0.03964271768927574 2023-01-23 04:20:40.709494: step: 468/527, loss: 8.811951556708664e-05 2023-01-23 04:20:41.834823: step: 472/527, loss: 0.047460176050662994 2023-01-23 04:20:42.969795: step: 476/527, loss: 0.008591747842729092 2023-01-23 04:20:44.078945: step: 480/527, loss: 0.015171051025390625 2023-01-23 04:20:45.202565: step: 484/527, loss: 0.0016754150856286287 2023-01-23 04:20:46.310952: step: 488/527, loss: 0.001619052840396762 2023-01-23 04:20:47.424027: step: 492/527, loss: 8.58306884765625e-06 2023-01-23 04:20:48.533750: step: 496/527, loss: 0.0002715110604185611 2023-01-23 04:20:49.644325: step: 500/527, loss: 0.010997009463608265 2023-01-23 04:20:50.767957: step: 504/527, loss: 0.03769245371222496 2023-01-23 04:20:51.871858: step: 508/527, loss: 0.012016678228974342 2023-01-23 04:20:53.017664: step: 512/527, loss: 2.212524486822076e-05 2023-01-23 04:20:54.145533: step: 516/527, loss: 0.01716017723083496 2023-01-23 04:20:55.255183: step: 520/527, loss: 0.0001218795805471018 2023-01-23 04:20:56.341121: step: 524/527, loss: 1.33514404296875e-05 2023-01-23 04:20:57.491947: step: 528/527, loss: 0.019282149150967598 2023-01-23 04:20:58.593559: step: 532/527, loss: 9.956360008800402e-05 2023-01-23 04:20:59.677437: step: 536/527, loss: 0.0005784988170489669 2023-01-23 04:21:00.789697: step: 540/527, loss: 0.00015506744966842234 2023-01-23 04:21:01.949373: step: 544/527, loss: 0.00017766954260878265 2023-01-23 04:21:03.035282: step: 548/527, loss: -1.9073513612966053e-07 2023-01-23 04:21:04.150429: step: 552/527, loss: 0.022127151489257812 2023-01-23 04:21:05.244543: step: 556/527, loss: 0.0011636734707280993 2023-01-23 04:21:06.353913: step: 560/527, loss: 0.009599637240171432 2023-01-23 04:21:07.486814: step: 564/527, loss: 0.0012530327076092362 2023-01-23 04:21:08.647839: step: 568/527, loss: 0.013079453259706497 2023-01-23 04:21:09.756298: step: 572/527, loss: 1.1444091796875e-05 2023-01-23 04:21:10.897319: step: 576/527, loss: 0.01215906161814928 2023-01-23 04:21:12.005248: step: 580/527, loss: 0.004487800411880016 2023-01-23 04:21:13.109801: step: 584/527, loss: 0.00047855376033112407 2023-01-23 04:21:14.253374: step: 588/527, loss: 0.018651390448212624 2023-01-23 04:21:15.381443: step: 592/527, loss: 0.06014423444867134 2023-01-23 04:21:16.489843: step: 596/527, loss: 0.0021610737312585115 2023-01-23 04:21:17.573587: step: 600/527, loss: 3.089904930675402e-05 2023-01-23 04:21:18.700514: step: 604/527, loss: 0.017348574474453926 2023-01-23 04:21:19.813372: step: 608/527, loss: 8.94546537892893e-05 2023-01-23 04:21:20.956788: step: 612/527, loss: 0.00970010831952095 2023-01-23 04:21:22.092766: step: 616/527, loss: 5.34057608092553e-06 2023-01-23 04:21:23.233859: step: 620/527, loss: 0.08303900063037872 2023-01-23 04:21:24.369670: step: 624/527, loss: 0.028952598571777344 2023-01-23 04:21:25.463510: step: 628/527, loss: 3.719329924933845e-06 2023-01-23 04:21:26.557523: step: 632/527, loss: 2.5749204723979346e-06 2023-01-23 04:21:27.686534: step: 636/527, loss: 0.0001691818324616179 2023-01-23 04:21:28.789697: step: 640/527, loss: 0.015911294147372246 2023-01-23 04:21:29.885406: step: 644/527, loss: 0.0034130571875721216 2023-01-23 04:21:31.026055: step: 648/527, loss: 0.0005619049188680947 2023-01-23 04:21:32.154882: step: 652/527, loss: 0.010981273837387562 2023-01-23 04:21:33.259757: step: 656/527, loss: 0.0025578499771654606 2023-01-23 04:21:34.362781: step: 660/527, loss: 2.3698807126493193e-05 2023-01-23 04:21:35.482943: step: 664/527, loss: 0.002362918807193637 2023-01-23 04:21:36.584359: step: 668/527, loss: 0.017154216766357422 2023-01-23 04:21:37.697333: step: 672/527, loss: 1.0490417707842425e-06 2023-01-23 04:21:38.811958: step: 676/527, loss: 0.02232055552303791 2023-01-23 04:21:39.941598: step: 680/527, loss: 0.032080650329589844 2023-01-23 04:21:41.080255: step: 684/527, loss: 9.250640869140625e-05 2023-01-23 04:21:42.158697: step: 688/527, loss: 6.48498553346144e-06 2023-01-23 04:21:43.296764: step: 692/527, loss: 0.0012264251708984375 2023-01-23 04:21:44.446668: step: 696/527, loss: 2.9563905627583154e-05 2023-01-23 04:21:45.547727: step: 700/527, loss: 0.0020650862716138363 2023-01-23 04:21:46.671562: step: 704/527, loss: 1.8787384760798886e-05 2023-01-23 04:21:47.770918: step: 708/527, loss: 0.00014705659123137593 2023-01-23 04:21:48.898149: step: 712/527, loss: 4.57763690064894e-06 2023-01-23 04:21:50.034874: step: 716/527, loss: 3.051757857974735e-06 2023-01-23 04:21:51.142087: step: 720/527, loss: 0.00011768341937568039 2023-01-23 04:21:52.231110: step: 724/527, loss: 0.012250233441591263 2023-01-23 04:21:53.353165: step: 728/527, loss: 1.163482647825731e-05 2023-01-23 04:21:54.522893: step: 732/527, loss: 0.0001356124848825857 2023-01-23 04:21:55.644998: step: 736/527, loss: 0.011827087961137295 2023-01-23 04:21:56.792444: step: 740/527, loss: 0.02020740695297718 2023-01-23 04:21:57.894904: step: 744/527, loss: 0.02274017408490181 2023-01-23 04:21:59.000626: step: 748/527, loss: 0.00014557837857864797 2023-01-23 04:22:00.135079: step: 752/527, loss: 0.005122947972267866 2023-01-23 04:22:01.270645: step: 756/527, loss: 3.910065061063506e-06 2023-01-23 04:22:02.379619: step: 760/527, loss: 0.00037021638127043843 2023-01-23 04:22:03.514968: step: 764/527, loss: 0.01791534572839737 2023-01-23 04:22:04.657775: step: 768/527, loss: 0.0015121460892260075 2023-01-23 04:22:05.768200: step: 772/527, loss: 1.1682510375976562e-05 2023-01-23 04:22:06.978618: step: 776/527, loss: 0.0676511749625206 2023-01-23 04:22:08.097029: step: 780/527, loss: 0.05192718654870987 2023-01-23 04:22:09.231802: step: 784/527, loss: 0.03401775285601616 2023-01-23 04:22:10.362958: step: 788/527, loss: 0.005163765046745539 2023-01-23 04:22:11.468268: step: 792/527, loss: 0.0007431030389852822 2023-01-23 04:22:12.623015: step: 796/527, loss: 6.418228440452367e-05 2023-01-23 04:22:13.747081: step: 800/527, loss: 0.0024358273949474096 2023-01-23 04:22:14.850793: step: 804/527, loss: 0.00030803680419921875 2023-01-23 04:22:15.983675: step: 808/527, loss: 0.0018108368385583162 2023-01-23 04:22:17.085909: step: 812/527, loss: 0.032341767102479935 2023-01-23 04:22:18.194800: step: 816/527, loss: 0.0004055977042298764 2023-01-23 04:22:19.309111: step: 820/527, loss: 0.012580872513353825 2023-01-23 04:22:20.396517: step: 824/527, loss: 0.0013387680519372225 2023-01-23 04:22:21.523076: step: 828/527, loss: 0.0004814147832803428 2023-01-23 04:22:22.656167: step: 832/527, loss: 0.0003933906555175781 2023-01-23 04:22:23.753531: step: 836/527, loss: 0.11872200667858124 2023-01-23 04:22:24.852900: step: 840/527, loss: 0.00018415450176689774 2023-01-23 04:22:25.956081: step: 844/527, loss: 0.012525368481874466 2023-01-23 04:22:27.032698: step: 848/527, loss: 0.014970016665756702 2023-01-23 04:22:28.141224: step: 852/527, loss: 0.026124905794858932 2023-01-23 04:22:29.258323: step: 856/527, loss: 0.0007524490356445312 2023-01-23 04:22:30.385985: step: 860/527, loss: 0.005696296691894531 2023-01-23 04:22:31.488395: step: 864/527, loss: 0.0031391142401844263 2023-01-23 04:22:32.574302: step: 868/527, loss: 0.0009866715408861637 2023-01-23 04:22:33.677867: step: 872/527, loss: 0.01913614384829998 2023-01-23 04:22:34.792165: step: 876/527, loss: 0.020315933972597122 2023-01-23 04:22:35.907572: step: 880/527, loss: 0.01634521596133709 2023-01-23 04:22:37.044668: step: 884/527, loss: 0.00026378632173873484 2023-01-23 04:22:38.152805: step: 888/527, loss: 0.008613204583525658 2023-01-23 04:22:39.279576: step: 892/527, loss: 0.0049607278779149055 2023-01-23 04:22:40.407053: step: 896/527, loss: 0.002787971403449774 2023-01-23 04:22:41.512839: step: 900/527, loss: 0.005481529049575329 2023-01-23 04:22:42.641331: step: 904/527, loss: 3.2901763916015625e-05 2023-01-23 04:22:43.771775: step: 908/527, loss: 0.003894519992172718 2023-01-23 04:22:44.871939: step: 912/527, loss: 0.0002590179501567036 2023-01-23 04:22:45.958857: step: 916/527, loss: 0.016374588012695312 2023-01-23 04:22:47.080702: step: 920/527, loss: 0.00014839171490166336 2023-01-23 04:22:48.196120: step: 924/527, loss: 3.910064697265625e-05 2023-01-23 04:22:49.309208: step: 928/527, loss: 0.029592515900731087 2023-01-23 04:22:50.404426: step: 932/527, loss: 0.0004029273986816406 2023-01-23 04:22:51.499247: step: 936/527, loss: 0.0037467002402991056 2023-01-23 04:22:52.586815: step: 940/527, loss: 1.0633469173626509e-05 2023-01-23 04:22:53.719465: step: 944/527, loss: 0.0017711640102788806 2023-01-23 04:22:54.873568: step: 948/527, loss: 3.509521411615424e-05 2023-01-23 04:22:55.987893: step: 952/527, loss: 0.0012497903080657125 2023-01-23 04:22:57.097764: step: 956/527, loss: 7.190704491222277e-05 2023-01-23 04:22:58.221850: step: 960/527, loss: 0.005519104190170765 2023-01-23 04:22:59.353040: step: 964/527, loss: 0.00021429063053801656 2023-01-23 04:23:00.470618: step: 968/527, loss: 3.337860107421875e-06 2023-01-23 04:23:01.598459: step: 972/527, loss: 0.016888046637177467 2023-01-23 04:23:02.704040: step: 976/527, loss: 0.10534238815307617 2023-01-23 04:23:03.789067: step: 980/527, loss: -5.340576535672881e-06 2023-01-23 04:23:04.910063: step: 984/527, loss: -1.3065337952866685e-05 2023-01-23 04:23:06.019763: step: 988/527, loss: 0.00045223237248137593 2023-01-23 04:23:07.153311: step: 992/527, loss: 0.027906036004424095 2023-01-23 04:23:08.252887: step: 996/527, loss: 0.002360534854233265 2023-01-23 04:23:09.353714: step: 1000/527, loss: 0.0001746654452290386 2023-01-23 04:23:10.456602: step: 1004/527, loss: 4.76837158203125e-07 2023-01-23 04:23:11.570440: step: 1008/527, loss: 0.0008020401000976562 2023-01-23 04:23:12.697611: step: 1012/527, loss: 0.10755062103271484 2023-01-23 04:23:13.792291: step: 1016/527, loss: 0.005852890200912952 2023-01-23 04:23:14.926126: step: 1020/527, loss: 0.0003234863106627017 2023-01-23 04:23:16.010441: step: 1024/527, loss: 0.006233310792595148 2023-01-23 04:23:17.104384: step: 1028/527, loss: 0.025316238403320312 2023-01-23 04:23:18.226413: step: 1032/527, loss: 0.005932998843491077 2023-01-23 04:23:19.356343: step: 1036/527, loss: 0.00048189167864620686 2023-01-23 04:23:20.446585: step: 1040/527, loss: 0.00022716523380950093 2023-01-23 04:23:21.572432: step: 1044/527, loss: 3.843307786155492e-05 2023-01-23 04:23:22.694892: step: 1048/527, loss: 0.011029244400560856 2023-01-23 04:23:23.799651: step: 1052/527, loss: 0.0005187034839764237 2023-01-23 04:23:24.906931: step: 1056/527, loss: 0.0021114349365234375 2023-01-23 04:23:26.030308: step: 1060/527, loss: 0.0026578903198242188 2023-01-23 04:23:27.153964: step: 1064/527, loss: 0.04144611582159996 2023-01-23 04:23:28.261359: step: 1068/527, loss: 0.0001848220854299143 2023-01-23 04:23:29.350478: step: 1072/527, loss: 1.1825562069134321e-05 2023-01-23 04:23:30.473997: step: 1076/527, loss: 1.4305115882962127e-06 2023-01-23 04:23:31.592927: step: 1080/527, loss: 0.0007661819690838456 2023-01-23 04:23:32.717333: step: 1084/527, loss: 0.12997588515281677 2023-01-23 04:23:33.841857: step: 1088/527, loss: 1.950263867911417e-05 2023-01-23 04:23:34.931062: step: 1092/527, loss: 1.4209747860149946e-05 2023-01-23 04:23:36.018629: step: 1096/527, loss: -5.817413693876006e-06 2023-01-23 04:23:37.126811: step: 1100/527, loss: 4.1484832763671875e-05 2023-01-23 04:23:38.214851: step: 1104/527, loss: 2.4509430659236386e-05 2023-01-23 04:23:39.319803: step: 1108/527, loss: 7.696151442360133e-05 2023-01-23 04:23:40.429074: step: 1112/527, loss: 5.645752025884576e-05 2023-01-23 04:23:41.546674: step: 1116/527, loss: 0.008178806863725185 2023-01-23 04:23:42.643397: step: 1120/527, loss: 0.004252815619111061 2023-01-23 04:23:43.753976: step: 1124/527, loss: 0.0008460998651571572 2023-01-23 04:23:44.854593: step: 1128/527, loss: 0.07693938910961151 2023-01-23 04:23:45.975934: step: 1132/527, loss: 0.0008541106944903731 2023-01-23 04:23:47.096860: step: 1136/527, loss: 4.863739013671875e-05 2023-01-23 04:23:48.209976: step: 1140/527, loss: 0.0014945983421057463 2023-01-23 04:23:49.355685: step: 1144/527, loss: 0.004836368374526501 2023-01-23 04:23:50.492946: step: 1148/527, loss: 0.0009296417701989412 2023-01-23 04:23:51.612568: step: 1152/527, loss: 0.0004594803031068295 2023-01-23 04:23:52.718631: step: 1156/527, loss: 6.67572021484375e-06 2023-01-23 04:23:53.817193: step: 1160/527, loss: -1.8119812921213452e-06 2023-01-23 04:23:54.921504: step: 1164/527, loss: 0.0023652436211705208 2023-01-23 04:23:56.030506: step: 1168/527, loss: 0.0007623672718182206 2023-01-23 04:23:57.176373: step: 1172/527, loss: 0.020679855719208717 2023-01-23 04:23:58.290997: step: 1176/527, loss: 1.0681153071345761e-05 2023-01-23 04:23:59.394499: step: 1180/527, loss: 1.316070574830519e-05 2023-01-23 04:24:00.518779: step: 1184/527, loss: 1.449584942747606e-05 2023-01-23 04:24:01.642646: step: 1188/527, loss: 0.0017274857964366674 2023-01-23 04:24:02.745704: step: 1192/527, loss: 6.85691848048009e-05 2023-01-23 04:24:03.856011: step: 1196/527, loss: -2.155303991457913e-05 2023-01-23 04:24:04.960796: step: 1200/527, loss: 0.007727622985839844 2023-01-23 04:24:06.086148: step: 1204/527, loss: 0.0018629074329510331 2023-01-23 04:24:07.193680: step: 1208/527, loss: 3.566742088878527e-05 2023-01-23 04:24:08.337033: step: 1212/527, loss: 2.765655608527595e-06 2023-01-23 04:24:09.444347: step: 1216/527, loss: 0.021380042657256126 2023-01-23 04:24:10.527596: step: 1220/527, loss: 0.002145195147022605 2023-01-23 04:24:11.652337: step: 1224/527, loss: 0.13016262650489807 2023-01-23 04:24:12.760935: step: 1228/527, loss: 0.0023868561256676912 2023-01-23 04:24:13.894786: step: 1232/527, loss: 0.025242233648896217 2023-01-23 04:24:15.003886: step: 1236/527, loss: 0.0061647770926356316 2023-01-23 04:24:16.139454: step: 1240/527, loss: 0.0017040251987054944 2023-01-23 04:24:17.238828: step: 1244/527, loss: 1.8072127204504795e-05 2023-01-23 04:24:18.358381: step: 1248/527, loss: 0.0004831314436160028 2023-01-23 04:24:19.470801: step: 1252/527, loss: 0.0008007049327716231 2023-01-23 04:24:20.583329: step: 1256/527, loss: 0.00021262170048430562 2023-01-23 04:24:21.749783: step: 1260/527, loss: 0.0006473064422607422 2023-01-23 04:24:22.871545: step: 1264/527, loss: -2.2983551389188506e-05 2023-01-23 04:24:23.973765: step: 1268/527, loss: 2.079009937006049e-05 2023-01-23 04:24:25.072668: step: 1272/527, loss: 0.05006871372461319 2023-01-23 04:24:26.187617: step: 1276/527, loss: 2.00748436327558e-05 2023-01-23 04:24:27.287862: step: 1280/527, loss: 3.3092499506892636e-05 2023-01-23 04:24:28.429866: step: 1284/527, loss: 0.07275257259607315 2023-01-23 04:24:29.557645: step: 1288/527, loss: 0.04023456573486328 2023-01-23 04:24:30.665068: step: 1292/527, loss: 0.002548122312873602 2023-01-23 04:24:31.783227: step: 1296/527, loss: 0.0006945609929971397 2023-01-23 04:24:32.899946: step: 1300/527, loss: 0.03854179382324219 2023-01-23 04:24:34.021268: step: 1304/527, loss: 0.0011818886268883944 2023-01-23 04:24:35.162499: step: 1308/527, loss: 0.003962325863540173 2023-01-23 04:24:36.259395: step: 1312/527, loss: 5.34057608092553e-06 2023-01-23 04:24:37.374933: step: 1316/527, loss: 0.00035710333031602204 2023-01-23 04:24:38.469620: step: 1320/527, loss: 0.00016040803166106343 2023-01-23 04:24:39.621974: step: 1324/527, loss: 0.010517120361328125 2023-01-23 04:24:40.753183: step: 1328/527, loss: 3.471374657237902e-05 2023-01-23 04:24:41.840119: step: 1332/527, loss: 0.0023520467802882195 2023-01-23 04:24:42.965652: step: 1336/527, loss: 0.00012979508028365672 2023-01-23 04:24:44.090952: step: 1340/527, loss: 0.0011652946704998612 2023-01-23 04:24:45.227984: step: 1344/527, loss: 1.8119812921213452e-06 2023-01-23 04:24:46.370736: step: 1348/527, loss: 0.007854747585952282 2023-01-23 04:24:47.471148: step: 1352/527, loss: 7.677078428969253e-06 2023-01-23 04:24:48.578275: step: 1356/527, loss: 0.005829811096191406 2023-01-23 04:24:49.724598: step: 1360/527, loss: 0.0025429725646972656 2023-01-23 04:24:50.819520: step: 1364/527, loss: 1.0013580322265625e-05 2023-01-23 04:24:51.933875: step: 1368/527, loss: 0.0012989044189453125 2023-01-23 04:24:53.081426: step: 1372/527, loss: 0.0002666473446879536 2023-01-23 04:24:54.250673: step: 1376/527, loss: 0.0013496398460119963 2023-01-23 04:24:55.371419: step: 1380/527, loss: 0.007849693298339844 2023-01-23 04:24:56.485101: step: 1384/527, loss: 0.0001634597865631804 2023-01-23 04:24:57.596417: step: 1388/527, loss: 0.0018383980495855212 2023-01-23 04:24:58.717302: step: 1392/527, loss: 0.009204864501953125 2023-01-23 04:24:59.831638: step: 1396/527, loss: 0.0009677887428551912 2023-01-23 04:25:00.958457: step: 1400/527, loss: 0.003944301512092352 2023-01-23 04:25:02.062120: step: 1404/527, loss: 0.0002063751162495464 2023-01-23 04:25:03.197238: step: 1408/527, loss: 0.0030055048409849405 2023-01-23 04:25:04.312168: step: 1412/527, loss: 0.009550857357680798 2023-01-23 04:25:05.429366: step: 1416/527, loss: 0.00971241109073162 2023-01-23 04:25:06.553420: step: 1420/527, loss: 0.03001384809613228 2023-01-23 04:25:07.682703: step: 1424/527, loss: 0.0031227110885083675 2023-01-23 04:25:08.791721: step: 1428/527, loss: 0.9736614227294922 2023-01-23 04:25:09.920753: step: 1432/527, loss: 0.018283559009432793 2023-01-23 04:25:11.045051: step: 1436/527, loss: 0.0017353057628497481 2023-01-23 04:25:12.173603: step: 1440/527, loss: 1.6593934560660273e-05 2023-01-23 04:25:13.299408: step: 1444/527, loss: 0.0002774238819256425 2023-01-23 04:25:14.411335: step: 1448/527, loss: 0.006188774015754461 2023-01-23 04:25:15.548586: step: 1452/527, loss: 5.044937279308215e-05 2023-01-23 04:25:16.669300: step: 1456/527, loss: 0.0005575180402956903 2023-01-23 04:25:17.751085: step: 1460/527, loss: 0.00047149660531431437 2023-01-23 04:25:18.879149: step: 1464/527, loss: 7.295608520507812e-05 2023-01-23 04:25:19.969433: step: 1468/527, loss: 0.03987989202141762 2023-01-23 04:25:21.061396: step: 1472/527, loss: 0.0005826950073242188 2023-01-23 04:25:22.167723: step: 1476/527, loss: 0.08063941448926926 2023-01-23 04:25:23.309638: step: 1480/527, loss: 0.0001127243012888357 2023-01-23 04:25:24.456217: step: 1484/527, loss: 0.01488494873046875 2023-01-23 04:25:25.543630: step: 1488/527, loss: 0.00038776398287154734 2023-01-23 04:25:26.638925: step: 1492/527, loss: 0.00019121171499136835 2023-01-23 04:25:27.751238: step: 1496/527, loss: 0.0426454097032547 2023-01-23 04:25:28.853603: step: 1500/527, loss: 0.00022297502437140793 2023-01-23 04:25:29.945623: step: 1504/527, loss: 0.019687939435243607 2023-01-23 04:25:31.071976: step: 1508/527, loss: 0.0024658204056322575 2023-01-23 04:25:32.188087: step: 1512/527, loss: 0.00297813443467021 2023-01-23 04:25:33.294555: step: 1516/527, loss: 0.015665054321289062 2023-01-23 04:25:34.391252: step: 1520/527, loss: 0.00045242311898618937 2023-01-23 04:25:35.502651: step: 1524/527, loss: 1.2350083125056699e-05 2023-01-23 04:25:36.629959: step: 1528/527, loss: 0.11341685801744461 2023-01-23 04:25:37.739532: step: 1532/527, loss: 1.8596649169921875e-05 2023-01-23 04:25:38.877558: step: 1536/527, loss: 0.0017905235290527344 2023-01-23 04:25:40.011167: step: 1540/527, loss: 5.550384958041832e-05 2023-01-23 04:25:41.180284: step: 1544/527, loss: 0.26640960574150085 2023-01-23 04:25:42.317772: step: 1548/527, loss: 5.7220458984375e-06 2023-01-23 04:25:43.439057: step: 1552/527, loss: 0.07365904003381729 2023-01-23 04:25:44.582390: step: 1556/527, loss: 0.00035109519376419485 2023-01-23 04:25:45.723354: step: 1560/527, loss: 1.583099401614163e-05 2023-01-23 04:25:46.854870: step: 1564/527, loss: 0.0001985549897653982 2023-01-23 04:25:47.969100: step: 1568/527, loss: 0.01279306411743164 2023-01-23 04:25:49.059765: step: 1572/527, loss: 0.00038471221341751516 2023-01-23 04:25:50.145469: step: 1576/527, loss: 0.008066177368164062 2023-01-23 04:25:51.290258: step: 1580/527, loss: 0.0946468785405159 2023-01-23 04:25:52.426437: step: 1584/527, loss: 0.0013032435672357678 2023-01-23 04:25:53.560708: step: 1588/527, loss: 0.3102518320083618 2023-01-23 04:25:54.673017: step: 1592/527, loss: 0.009136534295976162 2023-01-23 04:25:55.787500: step: 1596/527, loss: 0.032277487218379974 2023-01-23 04:25:56.890121: step: 1600/527, loss: 0.003069591475650668 2023-01-23 04:25:58.071862: step: 1604/527, loss: 0.00023527145094703883 2023-01-23 04:25:59.160617: step: 1608/527, loss: 0.0001155376376118511 2023-01-23 04:26:00.295910: step: 1612/527, loss: 0.0011808396084234118 2023-01-23 04:26:01.439684: step: 1616/527, loss: 7.22885160939768e-05 2023-01-23 04:26:02.544995: step: 1620/527, loss: 0.00042495731031522155 2023-01-23 04:26:03.655142: step: 1624/527, loss: 0.0016637801891192794 2023-01-23 04:26:04.780319: step: 1628/527, loss: 0.0006689071306027472 2023-01-23 04:26:05.929024: step: 1632/527, loss: 1.1444092706369702e-05 2023-01-23 04:26:07.028467: step: 1636/527, loss: 3.728866431629285e-05 2023-01-23 04:26:08.164793: step: 1640/527, loss: 0.00015182494826149195 2023-01-23 04:26:09.264666: step: 1644/527, loss: 0.03316822275519371 2023-01-23 04:26:10.385648: step: 1648/527, loss: 0.006784438621252775 2023-01-23 04:26:11.478995: step: 1652/527, loss: 0.006494045257568359 2023-01-23 04:26:12.604360: step: 1656/527, loss: 0.014848328195512295 2023-01-23 04:26:13.713427: step: 1660/527, loss: -7.629394644936838e-07 2023-01-23 04:26:14.820492: step: 1664/527, loss: 0.015440178103744984 2023-01-23 04:26:15.937994: step: 1668/527, loss: 0.0447477325797081 2023-01-23 04:26:17.059484: step: 1672/527, loss: 0.8014277219772339 2023-01-23 04:26:18.140635: step: 1676/527, loss: 1.6498564946232364e-05 2023-01-23 04:26:19.300536: step: 1680/527, loss: 0.0001235961972270161 2023-01-23 04:26:20.403979: step: 1684/527, loss: 0.00016412735567428172 2023-01-23 04:26:21.512324: step: 1688/527, loss: 0.023527145385742188 2023-01-23 04:26:22.610412: step: 1692/527, loss: 0.003637599991634488 2023-01-23 04:26:23.698611: step: 1696/527, loss: 0.0001808166562113911 2023-01-23 04:26:24.814820: step: 1700/527, loss: 0.003204345703125 2023-01-23 04:26:25.971058: step: 1704/527, loss: 0.002167606493458152 2023-01-23 04:26:27.091728: step: 1708/527, loss: 0.0015536308055743575 2023-01-23 04:26:28.198660: step: 1712/527, loss: 0.015065384097397327 2023-01-23 04:26:29.326860: step: 1716/527, loss: 6.299018423305824e-05 2023-01-23 04:26:30.446654: step: 1720/527, loss: 0.04913024976849556 2023-01-23 04:26:31.589799: step: 1724/527, loss: 0.0004507065168581903 2023-01-23 04:26:32.727429: step: 1728/527, loss: 6.208419654285535e-05 2023-01-23 04:26:33.842042: step: 1732/527, loss: 0.019384240731596947 2023-01-23 04:26:34.939911: step: 1736/527, loss: 0.30012303590774536 2023-01-23 04:26:36.065829: step: 1740/527, loss: 0.004107857123017311 2023-01-23 04:26:37.182402: step: 1744/527, loss: 0.0014113426441326737 2023-01-23 04:26:38.285082: step: 1748/527, loss: 9.670257713878527e-05 2023-01-23 04:26:39.405855: step: 1752/527, loss: 0.00024690627469681203 2023-01-23 04:26:40.496932: step: 1756/527, loss: 0.00108680734410882 2023-01-23 04:26:41.626600: step: 1760/527, loss: 0.0006296157953329384 2023-01-23 04:26:42.744253: step: 1764/527, loss: 0.01290679071098566 2023-01-23 04:26:43.870603: step: 1768/527, loss: 0.0018275261390954256 2023-01-23 04:26:45.003461: step: 1772/527, loss: 0.0036556245759129524 2023-01-23 04:26:46.120945: step: 1776/527, loss: 0.0007310867076739669 2023-01-23 04:26:47.231886: step: 1780/527, loss: 7.467270188499242e-05 2023-01-23 04:26:48.349869: step: 1784/527, loss: 0.0006832123035565019 2023-01-23 04:26:49.479199: step: 1788/527, loss: 2.1553041733568534e-05 2023-01-23 04:26:50.586968: step: 1792/527, loss: 0.0006723403930664062 2023-01-23 04:26:51.705357: step: 1796/527, loss: 2.365112231927924e-05 2023-01-23 04:26:52.805750: step: 1800/527, loss: 2.908706846938003e-06 2023-01-23 04:26:53.935656: step: 1804/527, loss: 0.013794518075883389 2023-01-23 04:26:55.056122: step: 1808/527, loss: 0.0018934250110760331 2023-01-23 04:26:56.159026: step: 1812/527, loss: 0.0011570453643798828 2023-01-23 04:26:57.285817: step: 1816/527, loss: 0.024321556091308594 2023-01-23 04:26:58.444958: step: 1820/527, loss: 0.0009059906005859375 2023-01-23 04:26:59.572251: step: 1824/527, loss: 0.05170383304357529 2023-01-23 04:27:00.691426: step: 1828/527, loss: 0.003927039913833141 2023-01-23 04:27:01.791399: step: 1832/527, loss: 0.0013512612786144018 2023-01-23 04:27:02.882516: step: 1836/527, loss: 0.035167694091796875 2023-01-23 04:27:03.974774: step: 1840/527, loss: 0.09160614758729935 2023-01-23 04:27:05.102622: step: 1844/527, loss: 0.03256836161017418 2023-01-23 04:27:06.221965: step: 1848/527, loss: 0.00025653839111328125 2023-01-23 04:27:07.349340: step: 1852/527, loss: 0.00012006759061478078 2023-01-23 04:27:08.453291: step: 1856/527, loss: 3.347397068864666e-05 2023-01-23 04:27:09.572749: step: 1860/527, loss: 0.0015501022571697831 2023-01-23 04:27:10.679472: step: 1864/527, loss: 0.00230998988263309 2023-01-23 04:27:11.794131: step: 1868/527, loss: 0.0005203246837481856 2023-01-23 04:27:12.905143: step: 1872/527, loss: 0.01584930531680584 2023-01-23 04:27:14.015201: step: 1876/527, loss: 5.321502612787299e-05 2023-01-23 04:27:15.149757: step: 1880/527, loss: 0.003977966494858265 2023-01-23 04:27:16.285893: step: 1884/527, loss: 0.04806585609912872 2023-01-23 04:27:17.394377: step: 1888/527, loss: 0.016765404492616653 2023-01-23 04:27:18.506926: step: 1892/527, loss: 0.0013014794094488025 2023-01-23 04:27:19.603126: step: 1896/527, loss: 8.726120722712949e-05 2023-01-23 04:27:20.715301: step: 1900/527, loss: 0.0025468827225267887 2023-01-23 04:27:21.810616: step: 1904/527, loss: 0.001960182096809149 2023-01-23 04:27:22.934609: step: 1908/527, loss: 0.044085029512643814 2023-01-23 04:27:24.065938: step: 1912/527, loss: 0.0008513450738973916 2023-01-23 04:27:25.183026: step: 1916/527, loss: 0.005043411627411842 2023-01-23 04:27:26.305528: step: 1920/527, loss: 0.0001661777641857043 2023-01-23 04:27:27.382905: step: 1924/527, loss: 0.012706947512924671 2023-01-23 04:27:28.487769: step: 1928/527, loss: 0.0038674355018883944 2023-01-23 04:27:29.579643: step: 1932/527, loss: 4.1580202378099784e-05 2023-01-23 04:27:30.700991: step: 1936/527, loss: 0.0007404327625408769 2023-01-23 04:27:31.867385: step: 1940/527, loss: 0.0009922027820721269 2023-01-23 04:27:32.998673: step: 1944/527, loss: 1.5060726404190063 2023-01-23 04:27:34.127083: step: 1948/527, loss: 0.0019271851051598787 2023-01-23 04:27:35.274852: step: 1952/527, loss: 9.5367431640625e-06 2023-01-23 04:27:36.394149: step: 1956/527, loss: 0.6008445024490356 2023-01-23 04:27:37.480262: step: 1960/527, loss: 0.0003595828893594444 2023-01-23 04:27:38.602397: step: 1964/527, loss: 0.00016765593318268657 2023-01-23 04:27:39.696753: step: 1968/527, loss: 0.00015335083298850805 2023-01-23 04:27:40.805064: step: 1972/527, loss: 0.0002494812069926411 2023-01-23 04:27:41.923509: step: 1976/527, loss: 1.087188684323337e-05 2023-01-23 04:27:43.049895: step: 1980/527, loss: 0.0006203174707479775 2023-01-23 04:27:44.206624: step: 1984/527, loss: 0.003362560411915183 2023-01-23 04:27:45.333344: step: 1988/527, loss: 0.009557723999023438 2023-01-23 04:27:46.466082: step: 1992/527, loss: 0.0008581161382608116 2023-01-23 04:27:47.549148: step: 1996/527, loss: 2.2029877072782256e-05 2023-01-23 04:27:48.677792: step: 2000/527, loss: 5.6743621826171875e-05 2023-01-23 04:27:49.792128: step: 2004/527, loss: 0.03742380067706108 2023-01-23 04:27:50.867887: step: 2008/527, loss: 0.0015326023567467928 2023-01-23 04:27:52.031872: step: 2012/527, loss: 0.010592365637421608 2023-01-23 04:27:53.167231: step: 2016/527, loss: 0.03694877773523331 2023-01-23 04:27:54.274098: step: 2020/527, loss: 0.0010503769153729081 2023-01-23 04:27:55.405499: step: 2024/527, loss: 0.0022166252601891756 2023-01-23 04:27:56.529880: step: 2028/527, loss: 0.0246169101446867 2023-01-23 04:27:57.642339: step: 2032/527, loss: 0.0015837668906897306 2023-01-23 04:27:58.733189: step: 2036/527, loss: 0.001325893448665738 2023-01-23 04:27:59.849216: step: 2040/527, loss: 0.001745843910612166 2023-01-23 04:28:00.971955: step: 2044/527, loss: 0.00905466079711914 2023-01-23 04:28:02.083501: step: 2048/527, loss: 0.007166385650634766 2023-01-23 04:28:03.204714: step: 2052/527, loss: 0.012956047430634499 2023-01-23 04:28:04.292356: step: 2056/527, loss: 0.012605667114257812 2023-01-23 04:28:05.429533: step: 2060/527, loss: 1.7166138377433526e-06 2023-01-23 04:28:06.546805: step: 2064/527, loss: 0.0010304928291589022 2023-01-23 04:28:07.643323: step: 2068/527, loss: 7.62939453125e-05 2023-01-23 04:28:08.752439: step: 2072/527, loss: 0.0009903907775878906 2023-01-23 04:28:09.871084: step: 2076/527, loss: 3.395080420887098e-05 2023-01-23 04:28:10.992164: step: 2080/527, loss: 0.0005800247308798134 2023-01-23 04:28:12.091673: step: 2084/527, loss: 0.0012081146705895662 2023-01-23 04:28:13.207604: step: 2088/527, loss: 0.004014301113784313 2023-01-23 04:28:14.307692: step: 2092/527, loss: 0.0012030601501464844 2023-01-23 04:28:15.405329: step: 2096/527, loss: 0.00019807815260719508 2023-01-23 04:28:16.529245: step: 2100/527, loss: 0.003503823187202215 2023-01-23 04:28:17.643675: step: 2104/527, loss: 0.0002639770391397178 2023-01-23 04:28:18.754869: step: 2108/527, loss: 4.205703953630291e-05 ================================================== Loss: 0.020 -------------------- Dev: {'event': {'p': 0.5827482447342026, 'r': 0.7736351531291611, 'f1': 0.6647597254004577}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Test: {'event': {'p': 0.6218034993270525, 'r': 0.792, 'f1': 0.6966574516210103}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Chinese: {'event': {'p': 0.5595238095238095, 'r': 0.8703703703703703, 'f1': 0.6811594202898551}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Korean: {'event': {'p': 0.6226415094339622, 'r': 0.5238095238095238, 'f1': 0.5689655172413793}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Russian: {'event': {'p': 0.475, 'r': 0.5277777777777778, 'f1': 0.5}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6063157894736843, 'r': 0.7669773635153129, 'f1': 0.6772486772486773}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Eng Test for Korean: {'event': {'p': 0.6427238805970149, 'r': 0.7874285714285715, 'f1': 0.7077555213148434}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Korean: {'event': {'p': 0.6792452830188679, 'r': 0.5714285714285714, 'f1': 0.6206896551724137}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 29 command: python train.py --model_name trg --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 6e-4 2023-01-23 04:29:00.286824: step: 4/527, loss: 2.145767393813003e-05 2023-01-23 04:29:01.410457: step: 8/527, loss: 0.0010246277088299394 2023-01-23 04:29:02.549221: step: 12/527, loss: 0.003389644669368863 2023-01-23 04:29:03.675298: step: 16/527, loss: 0.0010288238991051912 2023-01-23 04:29:04.820349: step: 20/527, loss: 0.04168825224041939 2023-01-23 04:29:05.929148: step: 24/527, loss: -3.337860107421875e-06 2023-01-23 04:29:07.057135: step: 28/527, loss: -4.57763690064894e-06 2023-01-23 04:29:08.164254: step: 32/527, loss: 0.02978038787841797 2023-01-23 04:29:09.301171: step: 36/527, loss: 0.0001316070556640625 2023-01-23 04:29:10.439495: step: 40/527, loss: 0.00010643005953170359 2023-01-23 04:29:11.564163: step: 44/527, loss: 0.000716137932613492 2023-01-23 04:29:12.651945: step: 48/527, loss: 0.011913490481674671 2023-01-23 04:29:13.819889: step: 52/527, loss: 0.0021017075050622225 2023-01-23 04:29:14.930003: step: 56/527, loss: 0.0006391525384970009 2023-01-23 04:29:16.027403: step: 60/527, loss: 6.408691842807457e-05 2023-01-23 04:29:17.135530: step: 64/527, loss: 0.0007295608520507812 2023-01-23 04:29:18.292370: step: 68/527, loss: 2.8228761948412284e-05 2023-01-23 04:29:19.392052: step: 72/527, loss: 0.0016901969211176038 2023-01-23 04:29:20.517458: step: 76/527, loss: 0.0009433747036382556 2023-01-23 04:29:21.626422: step: 80/527, loss: 1.5878678823355585e-05 2023-01-23 04:29:22.747941: step: 84/527, loss: 0.0006093978881835938 2023-01-23 04:29:23.853747: step: 88/527, loss: 2.293586658197455e-05 2023-01-23 04:29:24.957019: step: 92/527, loss: 0.04685802385210991 2023-01-23 04:29:26.068768: step: 96/527, loss: 0.004385948181152344 2023-01-23 04:29:27.174533: step: 100/527, loss: 4.882812572759576e-05 2023-01-23 04:29:28.291746: step: 104/527, loss: 0.0015909194480627775 2023-01-23 04:29:29.411483: step: 108/527, loss: 0.000127696999697946 2023-01-23 04:29:30.577947: step: 112/527, loss: 0.002540683839470148 2023-01-23 04:29:31.719901: step: 116/527, loss: 0.002883529756218195 2023-01-23 04:29:32.811707: step: 120/527, loss: 0.004446601960808039 2023-01-23 04:29:33.942777: step: 124/527, loss: 0.034989356994628906 2023-01-23 04:29:35.051481: step: 128/527, loss: 1.9073486328125e-05 2023-01-23 04:29:36.144719: step: 132/527, loss: 0.00025768281193450093 2023-01-23 04:29:37.289312: step: 136/527, loss: 0.003114318707957864 2023-01-23 04:29:38.373051: step: 140/527, loss: 0.03297596052289009 2023-01-23 04:29:39.511125: step: 144/527, loss: 4.482269287109375e-05 2023-01-23 04:29:40.629570: step: 148/527, loss: 0.0004102706734556705 2023-01-23 04:29:41.727889: step: 152/527, loss: 0.0004593849298544228 2023-01-23 04:29:42.845172: step: 156/527, loss: 0.03136596828699112 2023-01-23 04:29:43.956244: step: 160/527, loss: 0.00022411346435546875 2023-01-23 04:29:45.086927: step: 164/527, loss: 0.003842306323349476 2023-01-23 04:29:46.218914: step: 168/527, loss: 0.00010113716416526586 2023-01-23 04:29:47.318911: step: 172/527, loss: 0.0002590179501567036 2023-01-23 04:29:48.420011: step: 176/527, loss: 0.00016012191190384328 2023-01-23 04:29:49.511338: step: 180/527, loss: 0.07830315083265305 2023-01-23 04:29:50.636953: step: 184/527, loss: 0.0003841400030069053 2023-01-23 04:29:51.736304: step: 188/527, loss: 4.386902219266631e-06 2023-01-23 04:29:52.857372: step: 192/527, loss: 0.001582336495630443 2023-01-23 04:29:53.970357: step: 196/527, loss: -8.82148754044465e-07 2023-01-23 04:29:55.095355: step: 200/527, loss: 0.00022182465181685984 2023-01-23 04:29:56.241733: step: 204/527, loss: 0.00016613007755950093 2023-01-23 04:29:57.347920: step: 208/527, loss: 0.017932463437318802 2023-01-23 04:29:58.456464: step: 212/527, loss: 0.00012130737741244957 2023-01-23 04:29:59.570711: step: 216/527, loss: 0.00566520681604743 2023-01-23 04:30:00.685172: step: 220/527, loss: 0.011272668838500977 2023-01-23 04:30:01.806936: step: 224/527, loss: 0.03747711330652237 2023-01-23 04:30:02.923447: step: 228/527, loss: 0.001617383910343051 2023-01-23 04:30:04.033104: step: 232/527, loss: 0.0033159255981445312 2023-01-23 04:30:05.136617: step: 236/527, loss: 0.010769272223114967 2023-01-23 04:30:06.239545: step: 240/527, loss: 0.019602585583925247 2023-01-23 04:30:07.390470: step: 244/527, loss: 0.0008128166664391756 2023-01-23 04:30:08.508385: step: 248/527, loss: 0.0005959987756796181 2023-01-23 04:30:09.627000: step: 252/527, loss: 0.0047279358841478825 2023-01-23 04:30:10.731806: step: 256/527, loss: 0.011433792300522327 2023-01-23 04:30:11.850790: step: 260/527, loss: 8.068084571277723e-05 2023-01-23 04:30:12.981273: step: 264/527, loss: 0.03424587473273277 2023-01-23 04:30:14.078105: step: 268/527, loss: 0.06713536381721497 2023-01-23 04:30:15.176076: step: 272/527, loss: 0.050714682787656784 2023-01-23 04:30:16.279121: step: 276/527, loss: 0.0012892247177660465 2023-01-23 04:30:17.383652: step: 280/527, loss: 0.022847890853881836 2023-01-23 04:30:18.489092: step: 284/527, loss: 0.0016241073608398438 2023-01-23 04:30:19.604801: step: 288/527, loss: 0.002081298967823386 2023-01-23 04:30:20.726465: step: 292/527, loss: 0.00010938645573332906 2023-01-23 04:30:21.827745: step: 296/527, loss: 4.58717331639491e-05 2023-01-23 04:30:22.932197: step: 300/527, loss: -1.220703143189894e-05 2023-01-23 04:30:24.046957: step: 304/527, loss: 0.019287873059511185 2023-01-23 04:30:25.145448: step: 308/527, loss: 0.00047740936861373484 2023-01-23 04:30:26.260157: step: 312/527, loss: 2.584457615739666e-05 2023-01-23 04:30:27.365218: step: 316/527, loss: 0.00015068055654410273 2023-01-23 04:30:28.489467: step: 320/527, loss: -7.62939453125e-06 2023-01-23 04:30:29.600344: step: 324/527, loss: 0.008928490802645683 2023-01-23 04:30:30.720937: step: 328/527, loss: 3.8146970382513246e-07 2023-01-23 04:30:31.824146: step: 332/527, loss: 7.114410254871473e-05 2023-01-23 04:30:32.935549: step: 336/527, loss: 0.037627603858709335 2023-01-23 04:30:34.051325: step: 340/527, loss: 0.0003310203901492059 2023-01-23 04:30:35.153760: step: 344/527, loss: 2.5224686396541074e-05 2023-01-23 04:30:36.252043: step: 348/527, loss: 0.011365699581801891 2023-01-23 04:30:37.356942: step: 352/527, loss: 0.005747413728386164 2023-01-23 04:30:38.456423: step: 356/527, loss: 6.389617919921875e-05 2023-01-23 04:30:39.577612: step: 360/527, loss: 9.250641596736386e-06 2023-01-23 04:30:40.677469: step: 364/527, loss: 8.20159912109375e-05 2023-01-23 04:30:41.774671: step: 368/527, loss: 1.006126512947958e-05 2023-01-23 04:30:42.899955: step: 372/527, loss: 0.0001655578671488911 2023-01-23 04:30:44.015416: step: 376/527, loss: 0.001965808914974332 2023-01-23 04:30:45.126722: step: 380/527, loss: 0.0002801895316224545 2023-01-23 04:30:46.260715: step: 384/527, loss: 0.0008577347034588456 2023-01-23 04:30:47.357667: step: 388/527, loss: 7.400512549793348e-05 2023-01-23 04:30:48.491969: step: 392/527, loss: 0.07707786560058594 2023-01-23 04:30:49.629048: step: 396/527, loss: 9.72747802734375e-05 2023-01-23 04:30:50.746732: step: 400/527, loss: -1.7166138377433526e-06 2023-01-23 04:30:51.840364: step: 404/527, loss: -1.001357759378152e-06 2023-01-23 04:30:52.945026: step: 408/527, loss: 4.491805884754285e-05 2023-01-23 04:30:54.112195: step: 412/527, loss: 0.009221792221069336 2023-01-23 04:30:55.224871: step: 416/527, loss: 0.002175617264583707 2023-01-23 04:30:56.325063: step: 420/527, loss: 6.65664701955393e-05 2023-01-23 04:30:57.432161: step: 424/527, loss: 0.0028001891914755106 2023-01-23 04:30:58.554291: step: 428/527, loss: 0.0042473794892430305 2023-01-23 04:30:59.680032: step: 432/527, loss: 0.00014686585927847773 2023-01-23 04:31:00.812610: step: 436/527, loss: 0.072443388402462 2023-01-23 04:31:01.921059: step: 440/527, loss: 8.792877633823082e-05 2023-01-23 04:31:03.048908: step: 444/527, loss: 0.08109913021326065 2023-01-23 04:31:04.176455: step: 448/527, loss: 0.0019282341236248612 2023-01-23 04:31:05.257445: step: 452/527, loss: 0.0811772346496582 2023-01-23 04:31:06.374371: step: 456/527, loss: 0.009087467566132545 2023-01-23 04:31:07.494578: step: 460/527, loss: 0.0022134780883789062 2023-01-23 04:31:08.611903: step: 464/527, loss: 0.005785178858786821 2023-01-23 04:31:09.750790: step: 468/527, loss: 0.0011878968216478825 2023-01-23 04:31:10.894111: step: 472/527, loss: 0.00027332306490279734 2023-01-23 04:31:12.017549: step: 476/527, loss: 0.0026000975631177425 2023-01-23 04:31:13.119923: step: 480/527, loss: 0.01197595614939928 2023-01-23 04:31:14.229315: step: 484/527, loss: 0.006413173861801624 2023-01-23 04:31:15.320793: step: 488/527, loss: 0.0011042595142498612 2023-01-23 04:31:16.403531: step: 492/527, loss: 0.0049867150373756886 2023-01-23 04:31:17.549335: step: 496/527, loss: 9.779930405784398e-05 2023-01-23 04:31:18.673350: step: 500/527, loss: 0.001121091889217496 2023-01-23 04:31:19.763583: step: 504/527, loss: 0.00024118424335028976 2023-01-23 04:31:20.885293: step: 508/527, loss: 8.678435733600054e-06 2023-01-23 04:31:22.031733: step: 512/527, loss: 0.013912391848862171 2023-01-23 04:31:23.154464: step: 516/527, loss: 0.0006174087757244706 2023-01-23 04:31:24.273202: step: 520/527, loss: 6.189346458995715e-05 2023-01-23 04:31:25.381440: step: 524/527, loss: 2.212524486822076e-05 2023-01-23 04:31:26.530896: step: 528/527, loss: 0.00011215210543014109 2023-01-23 04:31:27.648071: step: 532/527, loss: 2.2554397219209932e-05 2023-01-23 04:31:28.771506: step: 536/527, loss: 0.007109927944839001 2023-01-23 04:31:29.884966: step: 540/527, loss: 0.005600738804787397 2023-01-23 04:31:30.999679: step: 544/527, loss: 0.006458759307861328 2023-01-23 04:31:32.146127: step: 548/527, loss: 0.02709989622235298 2023-01-23 04:31:33.265924: step: 552/527, loss: 0.00013465881056617945 2023-01-23 04:31:34.382976: step: 556/527, loss: 0.0007377624860964715 2023-01-23 04:31:35.510521: step: 560/527, loss: 0.0046651363372802734 2023-01-23 04:31:36.609543: step: 564/527, loss: 4.3201445805607364e-05 2023-01-23 04:31:37.725780: step: 568/527, loss: 0.0001491546572651714 2023-01-23 04:31:38.812683: step: 572/527, loss: 4.0483475459041074e-05 2023-01-23 04:31:39.926204: step: 576/527, loss: 0.0001430511474609375 2023-01-23 04:31:41.046845: step: 580/527, loss: 0.0030183792114257812 2023-01-23 04:31:42.190102: step: 584/527, loss: 0.012265205383300781 2023-01-23 04:31:43.274136: step: 588/527, loss: 0.03284502029418945 2023-01-23 04:31:44.404750: step: 592/527, loss: 0.007442665286362171 2023-01-23 04:31:45.521998: step: 596/527, loss: 0.0010752677917480469 2023-01-23 04:31:46.632881: step: 600/527, loss: -4.863739377469756e-06 2023-01-23 04:31:47.768577: step: 604/527, loss: 0.006377029232680798 2023-01-23 04:31:48.881180: step: 608/527, loss: 0.0013196945656090975 2023-01-23 04:31:50.011628: step: 612/527, loss: 0.00010347366333007812 2023-01-23 04:31:51.123470: step: 616/527, loss: 3.24249267578125e-05 2023-01-23 04:31:52.220451: step: 620/527, loss: 0.019945908337831497 2023-01-23 04:31:53.321926: step: 624/527, loss: 0.00022659300884697586 2023-01-23 04:31:54.430700: step: 628/527, loss: 0.09983577579259872 2023-01-23 04:31:55.547638: step: 632/527, loss: 0.004530525766313076 2023-01-23 04:31:56.655456: step: 636/527, loss: 0.00013809204392600805 2023-01-23 04:31:57.779924: step: 640/527, loss: 0.35550469160079956 2023-01-23 04:31:58.894066: step: 644/527, loss: 8.459090895485133e-05 2023-01-23 04:31:59.998349: step: 648/527, loss: 0.05952663719654083 2023-01-23 04:32:01.128283: step: 652/527, loss: 0.0003520965692587197 2023-01-23 04:32:02.210859: step: 656/527, loss: 0.0001398086460540071 2023-01-23 04:32:03.321506: step: 660/527, loss: 4.768372036778601e-06 2023-01-23 04:32:04.468436: step: 664/527, loss: 0.005389214027673006 2023-01-23 04:32:05.596632: step: 668/527, loss: 2.021789623540826e-05 2023-01-23 04:32:06.707856: step: 672/527, loss: 0.00022754669771529734 2023-01-23 04:32:07.808917: step: 676/527, loss: 0.0014707566006109118 2023-01-23 04:32:08.917764: step: 680/527, loss: 0.006165695376694202 2023-01-23 04:32:10.024954: step: 684/527, loss: 0.000705337559338659 2023-01-23 04:32:11.121232: step: 688/527, loss: -3.14712519866589e-06 2023-01-23 04:32:12.232683: step: 692/527, loss: 1.2397766795402276e-06 2023-01-23 04:32:13.335611: step: 696/527, loss: 0.0009949684608727694 2023-01-23 04:32:14.425343: step: 700/527, loss: 0.0005684852949343622 2023-01-23 04:32:15.602203: step: 704/527, loss: 9.422302537132055e-05 2023-01-23 04:32:16.718158: step: 708/527, loss: 2.441406286379788e-05 2023-01-23 04:32:17.850411: step: 712/527, loss: 0.7490178942680359 2023-01-23 04:32:18.992048: step: 716/527, loss: 9.250640687241685e-06 2023-01-23 04:32:20.140513: step: 720/527, loss: 0.0010239601833745837 2023-01-23 04:32:21.247290: step: 724/527, loss: 0.00017337797908112407 2023-01-23 04:32:22.355171: step: 728/527, loss: 0.0012812615605071187 2023-01-23 04:32:23.505875: step: 732/527, loss: 3.6811830796068534e-05 2023-01-23 04:32:24.631010: step: 736/527, loss: 0.0006567001109942794 2023-01-23 04:32:25.750233: step: 740/527, loss: 7.724762326688506e-06 2023-01-23 04:32:26.899603: step: 744/527, loss: 0.0004010200500488281 2023-01-23 04:32:28.036562: step: 748/527, loss: 0.00028896331787109375 2023-01-23 04:32:29.128444: step: 752/527, loss: 0.0002511024649720639 2023-01-23 04:32:30.246796: step: 756/527, loss: 3.9577484130859375e-05 2023-01-23 04:32:31.345562: step: 760/527, loss: 2.3365020751953125e-05 2023-01-23 04:32:32.429692: step: 764/527, loss: 8.02993745310232e-05 2023-01-23 04:32:33.546217: step: 768/527, loss: 5.054474058852065e-06 2023-01-23 04:32:34.667407: step: 772/527, loss: 0.0001241683930857107 2023-01-23 04:32:35.794880: step: 776/527, loss: 0.6181806921958923 2023-01-23 04:32:36.919550: step: 780/527, loss: 0.0011331558926030993 2023-01-23 04:32:38.012210: step: 784/527, loss: 0.0010840415488928556 2023-01-23 04:32:39.130650: step: 788/527, loss: 0.00508227339014411 2023-01-23 04:32:40.237633: step: 792/527, loss: 0.045961376279592514 2023-01-23 04:32:41.347320: step: 796/527, loss: 0.00026483534020371735 2023-01-23 04:32:42.468508: step: 800/527, loss: 0.02138996124267578 2023-01-23 04:32:43.629855: step: 804/527, loss: 0.0574214905500412 2023-01-23 04:32:44.735138: step: 808/527, loss: 0.004658699035644531 2023-01-23 04:32:45.829524: step: 812/527, loss: 0.0013451576232910156 2023-01-23 04:32:46.917781: step: 816/527, loss: 0.0007417678716592491 2023-01-23 04:32:48.049212: step: 820/527, loss: 0.00014381408982444555 2023-01-23 04:32:49.183211: step: 824/527, loss: 0.00038967133150435984 2023-01-23 04:32:50.303938: step: 828/527, loss: 0.0013612747425213456 2023-01-23 04:32:51.488521: step: 832/527, loss: 0.00958862341940403 2023-01-23 04:32:52.612245: step: 836/527, loss: 0.005013084504753351 2023-01-23 04:32:53.716880: step: 840/527, loss: 0.0001201629638671875 2023-01-23 04:32:54.815779: step: 844/527, loss: 0.009236717596650124 2023-01-23 04:32:55.943526: step: 848/527, loss: 1.201629675051663e-05 2023-01-23 04:32:57.055550: step: 852/527, loss: 0.009479904547333717 2023-01-23 04:32:58.130214: step: 856/527, loss: -8.296966370835435e-06 2023-01-23 04:32:59.242455: step: 860/527, loss: 7.23838820704259e-05 2023-01-23 04:33:00.359004: step: 864/527, loss: 9.4366077973973e-05 2023-01-23 04:33:01.492390: step: 868/527, loss: 0.00796804390847683 2023-01-23 04:33:02.591586: step: 872/527, loss: 4.615783836925402e-05 2023-01-23 04:33:03.729316: step: 876/527, loss: 0.0001146316499216482 2023-01-23 04:33:04.860354: step: 880/527, loss: 0.00045642853365279734 2023-01-23 04:33:05.969156: step: 884/527, loss: 0.07573939114809036 2023-01-23 04:33:07.078494: step: 888/527, loss: 0.0003253936883993447 2023-01-23 04:33:08.226872: step: 892/527, loss: 0.015353680588304996 2023-01-23 04:33:09.342340: step: 896/527, loss: 0.045088961720466614 2023-01-23 04:33:10.469541: step: 900/527, loss: 0.002724266145378351 2023-01-23 04:33:11.584908: step: 904/527, loss: 0.00017547607421875 2023-01-23 04:33:12.687634: step: 908/527, loss: 4.849433753406629e-05 2023-01-23 04:33:13.813799: step: 912/527, loss: 0.02476043812930584 2023-01-23 04:33:14.927898: step: 916/527, loss: 0.009362602606415749 2023-01-23 04:33:16.051106: step: 920/527, loss: 0.0013145447010174394 2023-01-23 04:33:17.167393: step: 924/527, loss: 0.00038332940312102437 2023-01-23 04:33:18.325918: step: 928/527, loss: 0.004160690121352673 2023-01-23 04:33:19.465722: step: 932/527, loss: 0.4128970205783844 2023-01-23 04:33:20.590925: step: 936/527, loss: 0.0025076866149902344 2023-01-23 04:33:21.715179: step: 940/527, loss: 2.9563905172835803e-06 2023-01-23 04:33:22.858916: step: 944/527, loss: 2.6082992917508818e-05 2023-01-23 04:33:23.945916: step: 948/527, loss: 0.002614307450130582 2023-01-23 04:33:25.055172: step: 952/527, loss: 0.0018732547760009766 2023-01-23 04:33:26.194433: step: 956/527, loss: 0.006592655088752508 2023-01-23 04:33:27.305232: step: 960/527, loss: 0.0005663871997967362 2023-01-23 04:33:28.396941: step: 964/527, loss: 6.198883056640625e-05 2023-01-23 04:33:29.512253: step: 968/527, loss: 0.05710233375430107 2023-01-23 04:33:30.612737: step: 972/527, loss: 0.003173637669533491 2023-01-23 04:33:31.749326: step: 976/527, loss: 0.009129524230957031 2023-01-23 04:33:32.888044: step: 980/527, loss: 0.0004795074346475303 2023-01-23 04:33:34.005613: step: 984/527, loss: 6.66618361719884e-05 2023-01-23 04:33:35.117522: step: 988/527, loss: 1.3256073543743696e-05 2023-01-23 04:33:36.246373: step: 992/527, loss: 3.347396705066785e-05 2023-01-23 04:33:37.368197: step: 996/527, loss: 0.0008980275015346706 2023-01-23 04:33:38.472633: step: 1000/527, loss: 0.0008060455438680947 2023-01-23 04:33:39.627892: step: 1004/527, loss: 1.2063979738741182e-05 2023-01-23 04:33:40.731060: step: 1008/527, loss: 0.0016671180492267013 2023-01-23 04:33:41.848202: step: 1012/527, loss: 0.0011191368103027344 2023-01-23 04:33:42.966822: step: 1016/527, loss: 0.01555933989584446 2023-01-23 04:33:44.068465: step: 1020/527, loss: 3.852844383800402e-05 2023-01-23 04:33:45.166692: step: 1024/527, loss: 0.0727960616350174 2023-01-23 04:33:46.283639: step: 1028/527, loss: 0.03388824686408043 2023-01-23 04:33:47.407246: step: 1032/527, loss: 0.010137557983398438 2023-01-23 04:33:48.527560: step: 1036/527, loss: 0.00033054352388717234 2023-01-23 04:33:49.660826: step: 1040/527, loss: 0.0014032364124432206 2023-01-23 04:33:50.774494: step: 1044/527, loss: 0.008458137512207031 2023-01-23 04:33:51.905413: step: 1048/527, loss: 0.01613616943359375 2023-01-23 04:33:52.978008: step: 1052/527, loss: 2.899169885495212e-05 2023-01-23 04:33:54.143244: step: 1056/527, loss: 0.0011610031360760331 2023-01-23 04:33:55.279322: step: 1060/527, loss: 0.0020305633079260588 2023-01-23 04:33:56.421709: step: 1064/527, loss: 0.00011873245239257812 2023-01-23 04:33:57.547299: step: 1068/527, loss: 2.555847095209174e-05 2023-01-23 04:33:58.646112: step: 1072/527, loss: 0.0011371612781658769 2023-01-23 04:33:59.748101: step: 1076/527, loss: 0.035219814628362656 2023-01-23 04:34:00.863877: step: 1080/527, loss: 8.58306884765625e-06 2023-01-23 04:34:02.017378: step: 1084/527, loss: 0.003634643740952015 2023-01-23 04:34:03.135453: step: 1088/527, loss: 0.012740135192871094 2023-01-23 04:34:04.218258: step: 1092/527, loss: 0.026059437543153763 2023-01-23 04:34:05.328496: step: 1096/527, loss: 0.00043926239595748484 2023-01-23 04:34:06.424106: step: 1100/527, loss: 1.9073486328125e-06 2023-01-23 04:34:07.534113: step: 1104/527, loss: 0.0035418986808508635 2023-01-23 04:34:08.669897: step: 1108/527, loss: 6.4373016357421875e-06 2023-01-23 04:34:09.797772: step: 1112/527, loss: 0.0604364387691021 2023-01-23 04:34:10.903140: step: 1116/527, loss: 0.0003274917835369706 2023-01-23 04:34:12.011080: step: 1120/527, loss: 0.0017480850219726562 2023-01-23 04:34:13.137938: step: 1124/527, loss: -1.7261503671761602e-05 2023-01-23 04:34:14.262063: step: 1128/527, loss: 6.961823146411916e-06 2023-01-23 04:34:15.383139: step: 1132/527, loss: 0.024448012933135033 2023-01-23 04:34:16.498930: step: 1136/527, loss: 9.880065044853836e-05 2023-01-23 04:34:17.659364: step: 1140/527, loss: 0.009129619225859642 2023-01-23 04:34:18.767738: step: 1144/527, loss: 0.001784515450708568 2023-01-23 04:34:19.915332: step: 1148/527, loss: -1.144409225162235e-06 2023-01-23 04:34:21.042320: step: 1152/527, loss: 1.1348724001436494e-05 2023-01-23 04:34:22.175670: step: 1156/527, loss: 0.00376129150390625 2023-01-23 04:34:23.290597: step: 1160/527, loss: 0.0003010749933309853 2023-01-23 04:34:24.396329: step: 1164/527, loss: 0.0007301330333575606 2023-01-23 04:34:25.492616: step: 1168/527, loss: 0.001605224679224193 2023-01-23 04:34:26.629166: step: 1172/527, loss: 2.9087066195643274e-06 2023-01-23 04:34:27.743828: step: 1176/527, loss: 0.0001768112269928679 2023-01-23 04:34:28.866071: step: 1180/527, loss: 0.0001123428373830393 2023-01-23 04:34:29.988695: step: 1184/527, loss: 0.18721266090869904 2023-01-23 04:34:31.131579: step: 1188/527, loss: 0.0021316527854651213 2023-01-23 04:34:32.229283: step: 1192/527, loss: 0.04095039516687393 2023-01-23 04:34:33.348228: step: 1196/527, loss: 3.62396240234375e-05 2023-01-23 04:34:34.484217: step: 1200/527, loss: 0.008442497812211514 2023-01-23 04:34:35.581026: step: 1204/527, loss: 0.00034561159554868937 2023-01-23 04:34:36.735721: step: 1208/527, loss: 0.0002574920654296875 2023-01-23 04:34:37.839587: step: 1212/527, loss: 4.4536594941746444e-05 2023-01-23 04:34:38.974505: step: 1216/527, loss: 0.0007654189830645919 2023-01-23 04:34:40.085592: step: 1220/527, loss: 7.581710815429688e-05 2023-01-23 04:34:41.196284: step: 1224/527, loss: 0.0023818970657885075 2023-01-23 04:34:42.300038: step: 1228/527, loss: 0.023397158831357956 2023-01-23 04:34:43.399743: step: 1232/527, loss: 0.00258560199290514 2023-01-23 04:34:44.515268: step: 1236/527, loss: 0.020354939624667168 2023-01-23 04:34:45.637378: step: 1240/527, loss: 0.01276092603802681 2023-01-23 04:34:46.739916: step: 1244/527, loss: 0.0013586045242846012 2023-01-23 04:34:47.840223: step: 1248/527, loss: 1.1444091796875e-05 2023-01-23 04:34:48.939886: step: 1252/527, loss: 7.095336331985891e-05 2023-01-23 04:34:50.077739: step: 1256/527, loss: 0.026103973388671875 2023-01-23 04:34:51.200133: step: 1260/527, loss: 2.2602080207434483e-05 2023-01-23 04:34:52.320670: step: 1264/527, loss: 0.0014027596917003393 2023-01-23 04:34:53.449451: step: 1268/527, loss: 0.00012235641770530492 2023-01-23 04:34:54.567650: step: 1272/527, loss: 0.011886787600815296 2023-01-23 04:34:55.671155: step: 1276/527, loss: 0.0004477500915527344 2023-01-23 04:34:56.792839: step: 1280/527, loss: 9.231567673850805e-05 2023-01-23 04:34:57.886890: step: 1284/527, loss: 3.261566234868951e-05 2023-01-23 04:34:58.983318: step: 1288/527, loss: 0.004856491461396217 2023-01-23 04:35:00.089384: step: 1292/527, loss: 1.773834264895413e-05 2023-01-23 04:35:01.228276: step: 1296/527, loss: 5.53131121705519e-06 2023-01-23 04:35:02.341426: step: 1300/527, loss: 0.0018771172035485506 2023-01-23 04:35:03.461289: step: 1304/527, loss: -3.957748504035408e-06 2023-01-23 04:35:04.626735: step: 1308/527, loss: 0.0009178638574667275 2023-01-23 04:35:05.712342: step: 1312/527, loss: 0.0008335114107467234 2023-01-23 04:35:06.832397: step: 1316/527, loss: 2.746581958490424e-05 2023-01-23 04:35:07.938233: step: 1320/527, loss: 0.0013227462768554688 2023-01-23 04:35:09.048161: step: 1324/527, loss: 9.5367431640625e-07 2023-01-23 04:35:10.194695: step: 1328/527, loss: 0.0010253905784338713 2023-01-23 04:35:11.292636: step: 1332/527, loss: 2.57492069977161e-06 2023-01-23 04:35:12.388808: step: 1336/527, loss: 0.008300495333969593 2023-01-23 04:35:13.494214: step: 1340/527, loss: 4.0626528061693534e-05 2023-01-23 04:35:14.581829: step: 1344/527, loss: 0.00031108857365325093 2023-01-23 04:35:15.677387: step: 1348/527, loss: 2.3651125957258046e-05 2023-01-23 04:35:16.781478: step: 1352/527, loss: 0.029747294262051582 2023-01-23 04:35:17.902913: step: 1356/527, loss: 4.100799742445815e-06 2023-01-23 04:35:19.007517: step: 1360/527, loss: 0.0008672713884152472 2023-01-23 04:35:20.123152: step: 1364/527, loss: 0.22238807380199432 2023-01-23 04:35:21.224358: step: 1368/527, loss: 0.325967013835907 2023-01-23 04:35:22.365426: step: 1372/527, loss: 4.509836344368523e-06 2023-01-23 04:35:23.465916: step: 1376/527, loss: 0.0001333236723439768 2023-01-23 04:35:24.558693: step: 1380/527, loss: 1.544952465337701e-05 2023-01-23 04:35:25.692788: step: 1384/527, loss: 4.19616708313697e-06 2023-01-23 04:35:26.812989: step: 1388/527, loss: 3.013610876223538e-05 2023-01-23 04:35:27.925664: step: 1392/527, loss: 0.03813190758228302 2023-01-23 04:35:29.023941: step: 1396/527, loss: 1.9836426872643642e-05 2023-01-23 04:35:30.135578: step: 1400/527, loss: 7.004737562965602e-05 2023-01-23 04:35:31.266232: step: 1404/527, loss: 2.47955313170678e-06 2023-01-23 04:35:32.407097: step: 1408/527, loss: 0.03603959083557129 2023-01-23 04:35:33.512600: step: 1412/527, loss: 2.19821922655683e-05 2023-01-23 04:35:34.597831: step: 1416/527, loss: 1.8596649169921875e-05 2023-01-23 04:35:35.706316: step: 1420/527, loss: 0.00026979445829056203 2023-01-23 04:35:36.820053: step: 1424/527, loss: 0.0006380081176757812 2023-01-23 04:35:37.939776: step: 1428/527, loss: 0.0005802154773846269 2023-01-23 04:35:39.083383: step: 1432/527, loss: 0.0001510620058979839 2023-01-23 04:35:40.197906: step: 1436/527, loss: 0.004744911566376686 2023-01-23 04:35:41.308431: step: 1440/527, loss: 3.519058373058215e-05 2023-01-23 04:35:42.440836: step: 1444/527, loss: 2.784729076665826e-05 2023-01-23 04:35:43.540361: step: 1448/527, loss: 1.049041748046875e-05 2023-01-23 04:35:44.670071: step: 1452/527, loss: -7.62939453125e-06 2023-01-23 04:35:45.792367: step: 1456/527, loss: 0.020812224596738815 2023-01-23 04:35:46.910990: step: 1460/527, loss: 0.034818269312381744 2023-01-23 04:35:48.034175: step: 1464/527, loss: 1.716613724056515e-06 2023-01-23 04:35:49.165372: step: 1468/527, loss: 0.0001184463471872732 2023-01-23 04:35:50.332277: step: 1472/527, loss: 0.0003986358642578125 2023-01-23 04:35:51.410773: step: 1476/527, loss: 1.3542176020564511e-05 2023-01-23 04:35:52.527886: step: 1480/527, loss: 2.6035308110294864e-05 2023-01-23 04:35:53.625438: step: 1484/527, loss: 0.00019655228243209422 2023-01-23 04:35:54.719862: step: 1488/527, loss: 0.016224002465605736 2023-01-23 04:35:55.827925: step: 1492/527, loss: 0.0003681182861328125 2023-01-23 04:35:56.960861: step: 1496/527, loss: 0.000533676182385534 2023-01-23 04:35:58.063574: step: 1500/527, loss: 0.02605123445391655 2023-01-23 04:35:59.194327: step: 1504/527, loss: 1.4686585018353071e-05 2023-01-23 04:36:00.325636: step: 1508/527, loss: 0.007518958766013384 2023-01-23 04:36:01.419733: step: 1512/527, loss: 0.0003223419189453125 2023-01-23 04:36:02.548158: step: 1516/527, loss: 0.00869207363575697 2023-01-23 04:36:03.653984: step: 1520/527, loss: 0.01059265062212944 2023-01-23 04:36:04.773314: step: 1524/527, loss: 6.122589547885582e-05 2023-01-23 04:36:05.864951: step: 1528/527, loss: 5.34057608092553e-06 2023-01-23 04:36:06.995578: step: 1532/527, loss: 0.0017091037007048726 2023-01-23 04:36:08.123256: step: 1536/527, loss: 7.686614844715223e-05 2023-01-23 04:36:09.230413: step: 1540/527, loss: 0.0002845764101948589 2023-01-23 04:36:10.361793: step: 1544/527, loss: 0.03704690933227539 2023-01-23 04:36:11.477750: step: 1548/527, loss: 0.010331916622817516 2023-01-23 04:36:12.588929: step: 1552/527, loss: -4.24384961661417e-06 2023-01-23 04:36:13.676818: step: 1556/527, loss: 0.01786823198199272 2023-01-23 04:36:14.780743: step: 1560/527, loss: 6.50405854685232e-05 2023-01-23 04:36:15.925753: step: 1564/527, loss: 2.47955322265625e-05 2023-01-23 04:36:17.015825: step: 1568/527, loss: 2.8610230629055877e-07 2023-01-23 04:36:18.098779: step: 1572/527, loss: 0.011174154467880726 2023-01-23 04:36:19.183109: step: 1576/527, loss: 0.012739181518554688 2023-01-23 04:36:20.294655: step: 1580/527, loss: 5.626678102998994e-06 2023-01-23 04:36:21.422410: step: 1584/527, loss: 0.060941699892282486 2023-01-23 04:36:22.543259: step: 1588/527, loss: 3.814697265625e-06 2023-01-23 04:36:23.669312: step: 1592/527, loss: 0.0009203911758959293 2023-01-23 04:36:24.754812: step: 1596/527, loss: 1.163482647825731e-05 2023-01-23 04:36:25.904426: step: 1600/527, loss: 0.5229610204696655 2023-01-23 04:36:27.016074: step: 1604/527, loss: 2.098083541568485e-06 2023-01-23 04:36:28.138603: step: 1608/527, loss: 3.795623706537299e-05 2023-01-23 04:36:29.252207: step: 1612/527, loss: 0.0004807472287211567 2023-01-23 04:36:30.369582: step: 1616/527, loss: 4.00543194700731e-06 2023-01-23 04:36:31.483078: step: 1620/527, loss: 0.002050018170848489 2023-01-23 04:36:32.578776: step: 1624/527, loss: -4.95910626341356e-06 2023-01-23 04:36:33.735643: step: 1628/527, loss: 0.0001560211239848286 2023-01-23 04:36:34.849076: step: 1632/527, loss: 0.021405315026640892 2023-01-23 04:36:35.953718: step: 1636/527, loss: 7.629394076502649e-07 2023-01-23 04:36:37.043137: step: 1640/527, loss: 0.005777645390480757 2023-01-23 04:36:38.139304: step: 1644/527, loss: 0.0004177093505859375 2023-01-23 04:36:39.229129: step: 1648/527, loss: 0.006636237725615501 2023-01-23 04:36:40.341907: step: 1652/527, loss: 2.3180245989351533e-05 2023-01-23 04:36:41.459904: step: 1656/527, loss: 0.0038425445090979338 2023-01-23 04:36:42.564900: step: 1660/527, loss: 0.02945546992123127 2023-01-23 04:36:43.670309: step: 1664/527, loss: 2.28881845032447e-06 2023-01-23 04:36:44.817883: step: 1668/527, loss: 0.0005195617559365928 2023-01-23 04:36:45.913098: step: 1672/527, loss: 2.5081635612878017e-05 2023-01-23 04:36:47.022337: step: 1676/527, loss: 0.0001655578671488911 2023-01-23 04:36:48.136467: step: 1680/527, loss: 0.0026155475061386824 2023-01-23 04:36:49.252551: step: 1684/527, loss: 3.6811830796068534e-05 2023-01-23 04:36:50.352640: step: 1688/527, loss: -9.536779543850571e-08 2023-01-23 04:36:51.444372: step: 1692/527, loss: 9.832382056629285e-05 2023-01-23 04:36:52.544259: step: 1696/527, loss: 0.002795314881950617 2023-01-23 04:36:53.667709: step: 1700/527, loss: 0.00029754638671875 2023-01-23 04:36:54.807084: step: 1704/527, loss: 0.004522895906120539 2023-01-23 04:36:55.880599: step: 1708/527, loss: 8.869172233971767e-06 2023-01-23 04:36:56.971896: step: 1712/527, loss: 0.00041866302490234375 2023-01-23 04:36:58.137790: step: 1716/527, loss: 0.006893634796142578 2023-01-23 04:36:59.235943: step: 1720/527, loss: 0.12302589416503906 2023-01-23 04:37:00.328152: step: 1724/527, loss: 0.0013587952125817537 2023-01-23 04:37:01.443020: step: 1728/527, loss: 0.00021495818509720266 2023-01-23 04:37:02.563979: step: 1732/527, loss: -1.716613724056515e-06 2023-01-23 04:37:03.713914: step: 1736/527, loss: 5.245208740234375e-06 2023-01-23 04:37:04.820728: step: 1740/527, loss: 0.0020215988624840975 2023-01-23 04:37:05.940294: step: 1744/527, loss: 0.05284080654382706 2023-01-23 04:37:07.067376: step: 1748/527, loss: 0.00064935686532408 2023-01-23 04:37:08.174911: step: 1752/527, loss: 0.0012876511318609118 2023-01-23 04:37:09.276756: step: 1756/527, loss: 0.0003529548703227192 2023-01-23 04:37:10.435687: step: 1760/527, loss: 0.009379196912050247 2023-01-23 04:37:11.568523: step: 1764/527, loss: 0.046181678771972656 2023-01-23 04:37:12.689981: step: 1768/527, loss: 0.0025760652497410774 2023-01-23 04:37:13.821003: step: 1772/527, loss: 0.0003704070986714214 2023-01-23 04:37:14.930610: step: 1776/527, loss: 1.716613724056515e-06 2023-01-23 04:37:16.032443: step: 1780/527, loss: 0.00019168853759765625 2023-01-23 04:37:17.118770: step: 1784/527, loss: 0.08904609829187393 2023-01-23 04:37:18.230198: step: 1788/527, loss: 2.8514861696748994e-05 2023-01-23 04:37:19.333218: step: 1792/527, loss: 0.023518182337284088 2023-01-23 04:37:20.448402: step: 1796/527, loss: 0.014324379153549671 2023-01-23 04:37:21.563231: step: 1800/527, loss: -1.7642973944020923e-06 2023-01-23 04:37:22.685467: step: 1804/527, loss: 0.0011955262161791325 2023-01-23 04:37:23.815158: step: 1808/527, loss: 0.0001396179141011089 2023-01-23 04:37:24.936663: step: 1812/527, loss: 1.678466833254788e-05 2023-01-23 04:37:26.029962: step: 1816/527, loss: 0.00011739729961846024 2023-01-23 04:37:27.134224: step: 1820/527, loss: 0.02970914915204048 2023-01-23 04:37:28.265400: step: 1824/527, loss: 1.4686585018353071e-05 2023-01-23 04:37:29.370647: step: 1828/527, loss: 0.022895528003573418 2023-01-23 04:37:30.490881: step: 1832/527, loss: 0.0023525238502770662 2023-01-23 04:37:31.605430: step: 1836/527, loss: 0.0007162094116210938 2023-01-23 04:37:32.699265: step: 1840/527, loss: 0.016622615978121758 2023-01-23 04:37:33.836938: step: 1844/527, loss: 0.0005181312444619834 2023-01-23 04:37:34.942102: step: 1848/527, loss: 8.010864803509321e-06 2023-01-23 04:37:36.061943: step: 1852/527, loss: 0.019637491554021835 2023-01-23 04:37:37.182551: step: 1856/527, loss: 0.007338047027587891 2023-01-23 04:37:38.284714: step: 1860/527, loss: 0.0011165142059326172 2023-01-23 04:37:39.411810: step: 1864/527, loss: 0.022028256207704544 2023-01-23 04:37:40.535080: step: 1868/527, loss: 3.662109520519152e-05 2023-01-23 04:37:41.646264: step: 1872/527, loss: 2.86102294921875e-06 2023-01-23 04:37:42.748841: step: 1876/527, loss: 0.00021104812913108617 2023-01-23 04:37:43.851445: step: 1880/527, loss: 6.647109694313258e-05 2023-01-23 04:37:44.977941: step: 1884/527, loss: 0.025273799896240234 2023-01-23 04:37:46.100081: step: 1888/527, loss: 0.000213623046875 2023-01-23 04:37:47.230440: step: 1892/527, loss: 0.3446022868156433 2023-01-23 04:37:48.367223: step: 1896/527, loss: 0.001374244806356728 2023-01-23 04:37:49.516245: step: 1900/527, loss: 3.123283386230469e-05 2023-01-23 04:37:50.641001: step: 1904/527, loss: 0.004478550050407648 2023-01-23 04:37:51.769299: step: 1908/527, loss: 0.014357281848788261 2023-01-23 04:37:52.894707: step: 1912/527, loss: 0.047567371279001236 2023-01-23 04:37:54.013765: step: 1916/527, loss: 0.03014068678021431 2023-01-23 04:37:55.116343: step: 1920/527, loss: 0.0006321906694211066 2023-01-23 04:37:56.207578: step: 1924/527, loss: 2.09808349609375e-05 2023-01-23 04:37:57.362253: step: 1928/527, loss: 1.0013580322265625e-05 2023-01-23 04:37:58.471797: step: 1932/527, loss: 0.0006980896578170359 2023-01-23 04:37:59.558168: step: 1936/527, loss: 4.615783836925402e-05 2023-01-23 04:38:00.648673: step: 1940/527, loss: 0.0006071567768231034 2023-01-23 04:38:01.767359: step: 1944/527, loss: 0.00034227370633743703 2023-01-23 04:38:02.868652: step: 1948/527, loss: 0.04827642813324928 2023-01-23 04:38:04.036829: step: 1952/527, loss: 0.004059219267219305 2023-01-23 04:38:05.177752: step: 1956/527, loss: 0.004584193229675293 2023-01-23 04:38:06.306084: step: 1960/527, loss: 3.070831371587701e-05 2023-01-23 04:38:07.434820: step: 1964/527, loss: 0.0015237807529047132 2023-01-23 04:38:08.539122: step: 1968/527, loss: 0.0026638987474143505 2023-01-23 04:38:09.653523: step: 1972/527, loss: 0.00034313200740143657 2023-01-23 04:38:10.779744: step: 1976/527, loss: 0.016387557610869408 2023-01-23 04:38:11.901172: step: 1980/527, loss: 0.0035765646025538445 2023-01-23 04:38:13.014239: step: 1984/527, loss: 0.00015559197345282882 2023-01-23 04:38:14.085233: step: 1988/527, loss: 0.0016122817760333419 2023-01-23 04:38:15.187819: step: 1992/527, loss: 0.01073303259909153 2023-01-23 04:38:16.297837: step: 1996/527, loss: 0.022158240899443626 2023-01-23 04:38:17.418789: step: 2000/527, loss: 0.029164601117372513 2023-01-23 04:38:18.501387: step: 2004/527, loss: 0.00039505958557128906 2023-01-23 04:38:19.629985: step: 2008/527, loss: 9.460449655307457e-05 2023-01-23 04:38:20.742303: step: 2012/527, loss: 0.0002344131498830393 2023-01-23 04:38:21.851972: step: 2016/527, loss: 0.01551132183521986 2023-01-23 04:38:22.953672: step: 2020/527, loss: 0.0062233926728367805 2023-01-23 04:38:24.053232: step: 2024/527, loss: 0.0006309509626589715 2023-01-23 04:38:25.162984: step: 2028/527, loss: 0.00014123917208053172 2023-01-23 04:38:26.288012: step: 2032/527, loss: 6.384849257301539e-05 2023-01-23 04:38:27.409398: step: 2036/527, loss: 0.002294349716976285 2023-01-23 04:38:28.500407: step: 2040/527, loss: 0.0001321315940003842 2023-01-23 04:38:29.605139: step: 2044/527, loss: 0.016176223754882812 2023-01-23 04:38:30.708749: step: 2048/527, loss: 0.00023927689471747726 2023-01-23 04:38:31.807145: step: 2052/527, loss: 0.01070108450949192 2023-01-23 04:38:32.943452: step: 2056/527, loss: 0.0030249597039073706 2023-01-23 04:38:34.084472: step: 2060/527, loss: 0.0004129410081077367 2023-01-23 04:38:35.201914: step: 2064/527, loss: 3.871917579090223e-05 2023-01-23 04:38:36.353726: step: 2068/527, loss: 0.0024806975852698088 2023-01-23 04:38:37.463732: step: 2072/527, loss: 0.023093605414032936 2023-01-23 04:38:38.562654: step: 2076/527, loss: 0.0001046180768753402 2023-01-23 04:38:39.684336: step: 2080/527, loss: 0.0001642227143747732 2023-01-23 04:38:40.790615: step: 2084/527, loss: 7.224082946777344e-05 2023-01-23 04:38:41.915101: step: 2088/527, loss: 0.13199271261692047 2023-01-23 04:38:43.068387: step: 2092/527, loss: 0.013424873352050781 2023-01-23 04:38:44.172308: step: 2096/527, loss: 0.0001222610444528982 2023-01-23 04:38:45.285147: step: 2100/527, loss: 0.0011671066749840975 2023-01-23 04:38:46.371644: step: 2104/527, loss: 0.0020716667640954256 2023-01-23 04:38:47.474556: step: 2108/527, loss: 0.0015638350741937757 ================================================== Loss: 0.015 -------------------- Dev: {'event': {'p': 0.6049129989764586, 'r': 0.7869507323568575, 'f1': 0.6840277777777778}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Test: {'event': {'p': 0.6380996739636703, 'r': 0.7828571428571428, 'f1': 0.7031049525275853}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Chinese: {'event': {'p': 0.5393258426966292, 'r': 0.8888888888888888, 'f1': 0.6713286713286712}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Korean: {'event': {'p': 0.7, 'r': 0.4444444444444444, 'f1': 0.5436893203883495}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Russian: {'event': {'p': 0.4473684210526316, 'r': 0.4722222222222222, 'f1': 0.4594594594594595}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} ================================================== Current best result: -------------------- Eng Dev for Chinese: {'event': {'p': 0.6241758241758242, 'r': 0.7563249001331558, 'f1': 0.6839253461770018}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Eng Test for Chinese: {'event': {'p': 0.6433059449009183, 'r': 0.7605714285714286, 'f1': 0.6970411102382822}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Chinese: {'event': {'p': 0.5949367088607594, 'r': 0.8703703703703703, 'f1': 0.706766917293233}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Eng Dev for Korean: {'event': {'p': 0.6063157894736843, 'r': 0.7669773635153129, 'f1': 0.6772486772486773}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Eng Test for Korean: {'event': {'p': 0.6427238805970149, 'r': 0.7874285714285715, 'f1': 0.7077555213148434}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Korean: {'event': {'p': 0.6792452830188679, 'r': 0.5714285714285714, 'f1': 0.6206896551724137}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} -------------------- Eng Dev for Russian: {'event': {'p': 0.6400462962962963, 'r': 0.7363515312916112, 'f1': 0.6848297213622292}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Eng Test for Russian: {'event': {'p': 0.6463168516649849, 'r': 0.732, 'f1': 0.6864951768488746}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'event': {'p': 0.625, 'r': 0.5555555555555556, 'f1': 0.5882352941176471}, 'argument': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3}