Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:45:33.626187: step: 4/77, loss: 1.0521974563598633 2023-01-23 22:45:34.955242: step: 8/77, loss: 1.0530740022659302 2023-01-23 22:45:36.279033: step: 12/77, loss: 1.048304796218872 2023-01-23 22:45:37.593250: step: 16/77, loss: 1.0575125217437744 2023-01-23 22:45:38.904751: step: 20/77, loss: 1.0364112854003906 2023-01-23 22:45:40.200413: step: 24/77, loss: 1.0486927032470703 2023-01-23 22:45:41.553230: step: 28/77, loss: 1.0608787536621094 2023-01-23 22:45:42.871898: step: 32/77, loss: 1.0362920761108398 2023-01-23 22:45:44.164238: step: 36/77, loss: 1.0434200763702393 2023-01-23 22:45:45.461778: step: 40/77, loss: 1.0297248363494873 2023-01-23 22:45:46.772640: step: 44/77, loss: 1.0186331272125244 2023-01-23 22:45:48.080741: step: 48/77, loss: 1.0153753757476807 2023-01-23 22:45:49.373603: step: 52/77, loss: 1.0195648670196533 2023-01-23 22:45:50.668194: step: 56/77, loss: 1.0007667541503906 2023-01-23 22:45:51.949446: step: 60/77, loss: 0.9879387617111206 2023-01-23 22:45:53.235274: step: 64/77, loss: 0.9750910997390747 2023-01-23 22:45:54.549155: step: 68/77, loss: 0.9763144254684448 2023-01-23 22:45:55.885366: step: 72/77, loss: 0.9537844657897949 2023-01-23 22:45:57.204600: step: 76/77, loss: 0.9456419348716736 2023-01-23 22:45:58.539797: step: 80/77, loss: 0.9266098737716675 2023-01-23 22:45:59.824310: step: 84/77, loss: 0.9407334327697754 2023-01-23 22:46:01.129120: step: 88/77, loss: 0.9128175973892212 2023-01-23 22:46:02.491790: step: 92/77, loss: 0.8884884119033813 2023-01-23 22:46:03.825887: step: 96/77, loss: 0.8754992485046387 2023-01-23 22:46:05.078662: step: 100/77, loss: 0.8558071851730347 2023-01-23 22:46:06.412880: step: 104/77, loss: 0.8275822997093201 2023-01-23 22:46:07.724559: step: 108/77, loss: 0.8083636164665222 2023-01-23 22:46:09.029745: step: 112/77, loss: 0.7995848059654236 2023-01-23 22:46:10.314037: step: 116/77, loss: 0.7754606604576111 2023-01-23 22:46:11.651428: step: 120/77, loss: 0.7789733409881592 2023-01-23 22:46:12.961926: step: 124/77, loss: 0.7553587555885315 2023-01-23 22:46:14.284637: step: 128/77, loss: 0.75539231300354 2023-01-23 22:46:15.598785: step: 132/77, loss: 0.7256723642349243 2023-01-23 22:46:16.920587: step: 136/77, loss: 0.6737915277481079 2023-01-23 22:46:18.191031: step: 140/77, loss: 0.6848892569541931 2023-01-23 22:46:19.472740: step: 144/77, loss: 0.6683143377304077 2023-01-23 22:46:20.800256: step: 148/77, loss: 0.6761552095413208 2023-01-23 22:46:22.109806: step: 152/77, loss: 0.6221305131912231 2023-01-23 22:46:23.415170: step: 156/77, loss: 0.6018381118774414 2023-01-23 22:46:24.747118: step: 160/77, loss: 0.6005375981330872 2023-01-23 22:46:26.091229: step: 164/77, loss: 0.5284562110900879 2023-01-23 22:46:27.461572: step: 168/77, loss: 0.5597098469734192 2023-01-23 22:46:28.744999: step: 172/77, loss: 0.43947166204452515 2023-01-23 22:46:30.022587: step: 176/77, loss: 0.4276687204837799 2023-01-23 22:46:31.370225: step: 180/77, loss: 0.4454874098300934 2023-01-23 22:46:32.668706: step: 184/77, loss: 0.5441113710403442 2023-01-23 22:46:34.016972: step: 188/77, loss: 0.35866567492485046 2023-01-23 22:46:35.359430: step: 192/77, loss: 0.4040197730064392 2023-01-23 22:46:36.708118: step: 196/77, loss: 0.3254753053188324 2023-01-23 22:46:38.042712: step: 200/77, loss: 0.3546521067619324 2023-01-23 22:46:39.392348: step: 204/77, loss: 0.3514381945133209 2023-01-23 22:46:40.668159: step: 208/77, loss: 0.3122267425060272 2023-01-23 22:46:41.999673: step: 212/77, loss: 0.2432302087545395 2023-01-23 22:46:43.316987: step: 216/77, loss: 0.1995767056941986 2023-01-23 22:46:44.605682: step: 220/77, loss: 0.33912259340286255 2023-01-23 22:46:45.903164: step: 224/77, loss: 0.2183302938938141 2023-01-23 22:46:47.203337: step: 228/77, loss: 0.33341485261917114 2023-01-23 22:46:48.518305: step: 232/77, loss: 0.1881929337978363 2023-01-23 22:46:49.825743: step: 236/77, loss: 0.24369250237941742 2023-01-23 22:46:51.179909: step: 240/77, loss: 0.21067927777767181 2023-01-23 22:46:52.496012: step: 244/77, loss: 0.16403478384017944 2023-01-23 22:46:53.811396: step: 248/77, loss: 0.30606308579444885 2023-01-23 22:46:55.135287: step: 252/77, loss: 0.1937190592288971 2023-01-23 22:46:56.474514: step: 256/77, loss: 0.32572799921035767 2023-01-23 22:46:57.768635: step: 260/77, loss: 0.13746631145477295 2023-01-23 22:46:59.055105: step: 264/77, loss: 0.1280936598777771 2023-01-23 22:47:00.370437: step: 268/77, loss: 0.10665931552648544 2023-01-23 22:47:01.663775: step: 272/77, loss: 0.10630533844232559 2023-01-23 22:47:03.008236: step: 276/77, loss: 0.12797018885612488 2023-01-23 22:47:04.341583: step: 280/77, loss: 0.2352885901927948 2023-01-23 22:47:05.710852: step: 284/77, loss: 0.06179904192686081 2023-01-23 22:47:07.068256: step: 288/77, loss: 0.1331750750541687 2023-01-23 22:47:08.352228: step: 292/77, loss: 0.06892704963684082 2023-01-23 22:47:09.680899: step: 296/77, loss: 0.1391991227865219 2023-01-23 22:47:10.988435: step: 300/77, loss: 0.07123759388923645 2023-01-23 22:47:12.318293: step: 304/77, loss: 0.11932633817195892 2023-01-23 22:47:13.681883: step: 308/77, loss: 0.1587085872888565 2023-01-23 22:47:15.000298: step: 312/77, loss: 0.034506428986787796 2023-01-23 22:47:16.313324: step: 316/77, loss: 0.05934888869524002 2023-01-23 22:47:17.636625: step: 320/77, loss: 0.05228797718882561 2023-01-23 22:47:18.953149: step: 324/77, loss: 0.05070921778678894 2023-01-23 22:47:20.260982: step: 328/77, loss: 0.25672921538352966 2023-01-23 22:47:21.571030: step: 332/77, loss: 0.07970165461301804 2023-01-23 22:47:22.886713: step: 336/77, loss: 0.11222337186336517 2023-01-23 22:47:24.153593: step: 340/77, loss: 0.09403430670499802 2023-01-23 22:47:25.472363: step: 344/77, loss: 0.05787937343120575 2023-01-23 22:47:26.782745: step: 348/77, loss: 0.04168379306793213 2023-01-23 22:47:28.144419: step: 352/77, loss: 0.09637323021888733 2023-01-23 22:47:29.487617: step: 356/77, loss: 0.05214162915945053 2023-01-23 22:47:30.763452: step: 360/77, loss: 0.13377325236797333 2023-01-23 22:47:32.030784: step: 364/77, loss: 0.09389052540063858 2023-01-23 22:47:33.331718: step: 368/77, loss: 0.18593792617321014 2023-01-23 22:47:34.669633: step: 372/77, loss: 0.13386821746826172 2023-01-23 22:47:36.009703: step: 376/77, loss: 0.10935309529304504 2023-01-23 22:47:37.338645: step: 380/77, loss: 0.0837806761264801 2023-01-23 22:47:38.635709: step: 384/77, loss: 0.07167628407478333 2023-01-23 22:47:39.997335: step: 388/77, loss: 0.19204410910606384 ================================================== Loss: 0.486 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:49:51.161914: step: 4/77, loss: 0.16772520542144775 2023-01-23 22:49:52.508659: step: 8/77, loss: 0.09386086463928223 2023-01-23 22:49:53.763450: step: 12/77, loss: 0.07116525620222092 2023-01-23 22:49:55.048330: step: 16/77, loss: 0.08739854395389557 2023-01-23 22:49:56.362855: step: 20/77, loss: 0.09066418558359146 2023-01-23 22:49:57.645666: step: 24/77, loss: 0.09214691072702408 2023-01-23 22:49:58.950508: step: 28/77, loss: 0.07001110166311264 2023-01-23 22:50:00.263332: step: 32/77, loss: 0.24295248091220856 2023-01-23 22:50:01.600242: step: 36/77, loss: 0.2596581280231476 2023-01-23 22:50:02.896250: step: 40/77, loss: 0.10341985523700714 2023-01-23 22:50:04.203420: step: 44/77, loss: 0.06520126760005951 2023-01-23 22:50:05.518871: step: 48/77, loss: 0.08200832456350327 2023-01-23 22:50:06.805520: step: 52/77, loss: 0.07770586013793945 2023-01-23 22:50:08.092108: step: 56/77, loss: 0.3515990078449249 2023-01-23 22:50:09.402031: step: 60/77, loss: 0.061179328709840775 2023-01-23 22:50:10.753714: step: 64/77, loss: 0.10807536542415619 2023-01-23 22:50:12.075015: step: 68/77, loss: 0.0880051702260971 2023-01-23 22:50:13.466504: step: 72/77, loss: 0.09584487974643707 2023-01-23 22:50:14.804930: step: 76/77, loss: 0.08828597515821457 2023-01-23 22:50:16.137460: step: 80/77, loss: 0.2787051498889923 2023-01-23 22:50:17.505570: step: 84/77, loss: 0.07855068892240524 2023-01-23 22:50:18.826873: step: 88/77, loss: 0.33545881509780884 2023-01-23 22:50:20.196081: step: 92/77, loss: 0.22064723074436188 2023-01-23 22:50:21.541542: step: 96/77, loss: 0.07103629410266876 2023-01-23 22:50:22.885268: step: 100/77, loss: 0.05532563850283623 2023-01-23 22:50:24.214411: step: 104/77, loss: 0.14419548213481903 2023-01-23 22:50:25.485915: step: 108/77, loss: 0.1311594545841217 2023-01-23 22:50:26.777658: step: 112/77, loss: 0.03030647709965706 2023-01-23 22:50:28.069375: step: 116/77, loss: 0.14231641590595245 2023-01-23 22:50:29.383516: step: 120/77, loss: 0.049726489931344986 2023-01-23 22:50:30.717270: step: 124/77, loss: 0.14961367845535278 2023-01-23 22:50:32.077983: step: 128/77, loss: 0.10820247232913971 2023-01-23 22:50:33.413663: step: 132/77, loss: 0.05236474797129631 2023-01-23 22:50:34.719660: step: 136/77, loss: 0.24166175723075867 2023-01-23 22:50:36.000786: step: 140/77, loss: 0.08192186802625656 2023-01-23 22:50:37.265636: step: 144/77, loss: 0.10944778472185135 2023-01-23 22:50:38.573769: step: 148/77, loss: 0.10985735058784485 2023-01-23 22:50:39.923737: step: 152/77, loss: 0.1409648358821869 2023-01-23 22:50:41.281718: step: 156/77, loss: 0.07578420639038086 2023-01-23 22:50:42.605459: step: 160/77, loss: 0.10720621794462204 2023-01-23 22:50:43.910159: step: 164/77, loss: 0.13833269476890564 2023-01-23 22:50:45.247535: step: 168/77, loss: 0.06345750391483307 2023-01-23 22:50:46.606142: step: 172/77, loss: 0.053549427539110184 2023-01-23 22:50:47.919783: step: 176/77, loss: 0.050545014441013336 2023-01-23 22:50:49.290506: step: 180/77, loss: 0.060750193893909454 2023-01-23 22:50:50.621848: step: 184/77, loss: 0.1301671266555786 2023-01-23 22:50:51.909237: step: 188/77, loss: 0.07948384433984756 2023-01-23 22:50:53.218804: step: 192/77, loss: 0.07410618662834167 2023-01-23 22:50:54.543693: step: 196/77, loss: 0.1077779084444046 2023-01-23 22:50:55.844058: step: 200/77, loss: 0.06981280446052551 2023-01-23 22:50:57.153390: step: 204/77, loss: 0.06006040796637535 2023-01-23 22:50:58.480778: step: 208/77, loss: 0.11348582059144974 2023-01-23 22:50:59.775099: step: 212/77, loss: 0.11600235104560852 2023-01-23 22:51:01.044740: step: 216/77, loss: 0.09750326722860336 2023-01-23 22:51:02.359484: step: 220/77, loss: 0.04756221920251846 2023-01-23 22:51:03.664524: step: 224/77, loss: 0.18947850167751312 2023-01-23 22:51:04.928115: step: 228/77, loss: 0.02307966724038124 2023-01-23 22:51:06.205839: step: 232/77, loss: 0.12566381692886353 2023-01-23 22:51:07.533114: step: 236/77, loss: 0.05205581337213516 2023-01-23 22:51:08.888454: step: 240/77, loss: 0.20388080179691315 2023-01-23 22:51:10.200414: step: 244/77, loss: 0.15827858448028564 2023-01-23 22:51:11.511771: step: 248/77, loss: 0.08616123348474503 2023-01-23 22:51:12.872032: step: 252/77, loss: 0.08386712521314621 2023-01-23 22:51:14.171921: step: 256/77, loss: 0.15348979830741882 2023-01-23 22:51:15.500420: step: 260/77, loss: 0.11783164739608765 2023-01-23 22:51:16.785663: step: 264/77, loss: 0.0749620795249939 2023-01-23 22:51:18.068789: step: 268/77, loss: 0.1427043080329895 2023-01-23 22:51:19.387674: step: 272/77, loss: 0.1360216736793518 2023-01-23 22:51:20.723491: step: 276/77, loss: 0.04299698770046234 2023-01-23 22:51:22.031017: step: 280/77, loss: 0.032745327800512314 2023-01-23 22:51:23.383346: step: 284/77, loss: 0.06415650248527527 2023-01-23 22:51:24.708121: step: 288/77, loss: 0.02924133650958538 2023-01-23 22:51:26.059811: step: 292/77, loss: 0.1073332279920578 2023-01-23 22:51:27.342148: step: 296/77, loss: 0.035075489431619644 2023-01-23 22:51:28.613029: step: 300/77, loss: 0.14287783205509186 2023-01-23 22:51:29.904003: step: 304/77, loss: 0.289726197719574 2023-01-23 22:51:31.226819: step: 308/77, loss: 0.0739937275648117 2023-01-23 22:51:32.596454: step: 312/77, loss: 0.08734475821256638 2023-01-23 22:51:33.927683: step: 316/77, loss: 0.30002105236053467 2023-01-23 22:51:35.251414: step: 320/77, loss: 0.09770867228507996 2023-01-23 22:51:36.583399: step: 324/77, loss: 0.1629657745361328 2023-01-23 22:51:37.878644: step: 328/77, loss: 0.14150488376617432 2023-01-23 22:51:39.168425: step: 332/77, loss: 0.0872669443488121 2023-01-23 22:51:40.484058: step: 336/77, loss: 0.069187693297863 2023-01-23 22:51:41.773320: step: 340/77, loss: 0.06144079566001892 2023-01-23 22:51:43.114510: step: 344/77, loss: 0.21710869669914246 2023-01-23 22:51:44.405844: step: 348/77, loss: 0.10837674140930176 2023-01-23 22:51:45.710567: step: 352/77, loss: 0.050491150468587875 2023-01-23 22:51:47.086276: step: 356/77, loss: 0.08301954716444016 2023-01-23 22:51:48.386898: step: 360/77, loss: 0.08111888915300369 2023-01-23 22:51:49.712574: step: 364/77, loss: 0.18378156423568726 2023-01-23 22:51:51.021275: step: 368/77, loss: 0.08763030171394348 2023-01-23 22:51:52.352494: step: 372/77, loss: 0.12362384051084518 2023-01-23 22:51:53.636624: step: 376/77, loss: 0.16146932542324066 2023-01-23 22:51:54.928854: step: 380/77, loss: 0.11777414381504059 2023-01-23 22:51:56.229838: step: 384/77, loss: 0.11122996360063553 2023-01-23 22:51:57.545566: step: 388/77, loss: 0.056869350373744965 ================================================== Loss: 0.114 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:53:51.217583: step: 4/77, loss: 0.08978293836116791 2023-01-23 22:53:52.495468: step: 8/77, loss: 0.0897291973233223 2023-01-23 22:53:53.822793: step: 12/77, loss: 0.09467065334320068 2023-01-23 22:53:55.113283: step: 16/77, loss: 0.05945632606744766 2023-01-23 22:53:56.416406: step: 20/77, loss: 0.08201459795236588 2023-01-23 22:53:57.743565: step: 24/77, loss: 0.10762184858322144 2023-01-23 22:53:59.051908: step: 28/77, loss: 0.10683409869670868 2023-01-23 22:54:00.391687: step: 32/77, loss: 0.15592831373214722 2023-01-23 22:54:01.717521: step: 36/77, loss: 0.15853601694107056 2023-01-23 22:54:03.033023: step: 40/77, loss: 0.1696840524673462 2023-01-23 22:54:04.363118: step: 44/77, loss: 0.07610610127449036 2023-01-23 22:54:05.643880: step: 48/77, loss: 0.04936008155345917 2023-01-23 22:54:06.940210: step: 52/77, loss: 0.06662851572036743 2023-01-23 22:54:08.154391: step: 56/77, loss: 0.11901339143514633 2023-01-23 22:54:09.513182: step: 60/77, loss: 0.06335229426622391 2023-01-23 22:54:10.829682: step: 64/77, loss: 0.15157847106456757 2023-01-23 22:54:12.107255: step: 68/77, loss: 0.028425315394997597 2023-01-23 22:54:13.442826: step: 72/77, loss: 0.07254324853420258 2023-01-23 22:54:14.679111: step: 76/77, loss: 0.026222839951515198 2023-01-23 22:54:16.044618: step: 80/77, loss: 0.25892165303230286 2023-01-23 22:54:17.372808: step: 84/77, loss: 0.1859690099954605 2023-01-23 22:54:18.696814: step: 88/77, loss: 0.19821254909038544 2023-01-23 22:54:19.993879: step: 92/77, loss: 0.05598260462284088 2023-01-23 22:54:21.285296: step: 96/77, loss: 0.026238400489091873 2023-01-23 22:54:22.584265: step: 100/77, loss: 0.14643670618534088 2023-01-23 22:54:23.898227: step: 104/77, loss: 0.025064485147595406 2023-01-23 22:54:25.221595: step: 108/77, loss: 0.04936981201171875 2023-01-23 22:54:26.531091: step: 112/77, loss: 0.031216200441122055 2023-01-23 22:54:27.875539: step: 116/77, loss: 0.07248411327600479 2023-01-23 22:54:29.196813: step: 120/77, loss: 0.0912466049194336 2023-01-23 22:54:30.520293: step: 124/77, loss: 0.053715743124485016 2023-01-23 22:54:31.781748: step: 128/77, loss: 0.026041915640234947 2023-01-23 22:54:33.066894: step: 132/77, loss: 0.033120620995759964 2023-01-23 22:54:34.411450: step: 136/77, loss: 0.06037403270602226 2023-01-23 22:54:35.665724: step: 140/77, loss: 0.0261702761054039 2023-01-23 22:54:36.982077: step: 144/77, loss: 0.07391418516635895 2023-01-23 22:54:38.308247: step: 148/77, loss: 0.04960927367210388 2023-01-23 22:54:39.589612: step: 152/77, loss: 0.045701827853918076 2023-01-23 22:54:40.934158: step: 156/77, loss: 0.10179883986711502 2023-01-23 22:54:42.224627: step: 160/77, loss: 0.020967040210962296 2023-01-23 22:54:43.511725: step: 164/77, loss: 0.09591569006443024 2023-01-23 22:54:44.810093: step: 168/77, loss: 0.03472476080060005 2023-01-23 22:54:46.091666: step: 172/77, loss: 0.03430356830358505 2023-01-23 22:54:47.390711: step: 176/77, loss: 0.08539354801177979 2023-01-23 22:54:48.676532: step: 180/77, loss: 0.012025153264403343 2023-01-23 22:54:49.941495: step: 184/77, loss: 0.12098461389541626 2023-01-23 22:54:51.223024: step: 188/77, loss: 0.08685184270143509 2023-01-23 22:54:52.536209: step: 192/77, loss: 0.008195833303034306 2023-01-23 22:54:53.825537: step: 196/77, loss: 0.06708987802267075 2023-01-23 22:54:55.103451: step: 200/77, loss: 0.03092000260949135 2023-01-23 22:54:56.381791: step: 204/77, loss: 0.028394218534231186 2023-01-23 22:54:57.738970: step: 208/77, loss: 0.06200258433818817 2023-01-23 22:54:59.015375: step: 212/77, loss: 0.028375042602419853 2023-01-23 22:55:00.333680: step: 216/77, loss: 0.03608822450041771 2023-01-23 22:55:01.684761: step: 220/77, loss: 0.13196060061454773 2023-01-23 22:55:02.994049: step: 224/77, loss: 0.04849873483181 2023-01-23 22:55:04.333964: step: 228/77, loss: 0.024536605924367905 2023-01-23 22:55:05.609619: step: 232/77, loss: 0.02170020341873169 2023-01-23 22:55:06.937596: step: 236/77, loss: 0.13406196236610413 2023-01-23 22:55:08.232513: step: 240/77, loss: 0.04485291987657547 2023-01-23 22:55:09.532445: step: 244/77, loss: 0.015026187524199486 2023-01-23 22:55:10.797973: step: 248/77, loss: 0.1131889820098877 2023-01-23 22:55:12.116659: step: 252/77, loss: 0.11656603217124939 2023-01-23 22:55:13.417370: step: 256/77, loss: 0.07655934244394302 2023-01-23 22:55:14.718187: step: 260/77, loss: 0.005910799838602543 2023-01-23 22:55:16.011376: step: 264/77, loss: 0.02708623930811882 2023-01-23 22:55:17.302939: step: 268/77, loss: 0.07830090820789337 2023-01-23 22:55:18.672447: step: 272/77, loss: 0.005537528544664383 2023-01-23 22:55:19.999444: step: 276/77, loss: 0.030812840908765793 2023-01-23 22:55:21.292135: step: 280/77, loss: 0.05393827706575394 2023-01-23 22:55:22.636134: step: 284/77, loss: 0.011534723453223705 2023-01-23 22:55:23.992387: step: 288/77, loss: 0.03256256505846977 2023-01-23 22:55:25.303816: step: 292/77, loss: 0.05714274197816849 2023-01-23 22:55:26.622980: step: 296/77, loss: 0.052277807146310806 2023-01-23 22:55:27.930106: step: 300/77, loss: 0.04839061200618744 2023-01-23 22:55:29.218546: step: 304/77, loss: 0.02827790565788746 2023-01-23 22:55:30.521400: step: 308/77, loss: 0.012759722769260406 2023-01-23 22:55:31.843920: step: 312/77, loss: 0.004941337741911411 2023-01-23 22:55:33.133543: step: 316/77, loss: 0.011740414425730705 2023-01-23 22:55:34.439385: step: 320/77, loss: 0.03277549147605896 2023-01-23 22:55:35.759819: step: 324/77, loss: 0.3591340184211731 2023-01-23 22:55:37.016927: step: 328/77, loss: 0.04585202783346176 2023-01-23 22:55:38.287540: step: 332/77, loss: 0.0037703411653637886 2023-01-23 22:55:39.566808: step: 336/77, loss: 0.0018533715046942234 2023-01-23 22:55:40.893751: step: 340/77, loss: 0.010116029530763626 2023-01-23 22:55:42.220866: step: 344/77, loss: 0.016683772206306458 2023-01-23 22:55:43.520024: step: 348/77, loss: 0.09151042997837067 2023-01-23 22:55:44.801266: step: 352/77, loss: 0.13456642627716064 2023-01-23 22:55:46.108850: step: 356/77, loss: 0.05715903639793396 2023-01-23 22:55:47.394433: step: 360/77, loss: 0.02043786831200123 2023-01-23 22:55:48.716670: step: 364/77, loss: 0.026198705658316612 2023-01-23 22:55:49.979737: step: 368/77, loss: 0.0319807305932045 2023-01-23 22:55:51.345243: step: 372/77, loss: 0.017337413504719734 2023-01-23 22:55:52.700578: step: 376/77, loss: 0.05395495891571045 2023-01-23 22:55:54.047346: step: 380/77, loss: 0.08604675531387329 2023-01-23 22:55:55.345487: step: 384/77, loss: 0.03814491257071495 2023-01-23 22:55:56.693709: step: 388/77, loss: 0.039043229073286057 ================================================== Loss: 0.066 -------------------- Dev Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test Chinese: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test Korean: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test Russian: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test for Chinese: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test for Korean: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test for Russian: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:58:05.018654: step: 4/77, loss: 0.008569440804421902 2023-01-23 22:58:06.325053: step: 8/77, loss: 0.06104029715061188 2023-01-23 22:58:07.671524: step: 12/77, loss: 0.03824828565120697 2023-01-23 22:58:09.024458: step: 16/77, loss: 0.008139440789818764 2023-01-23 22:58:10.368780: step: 20/77, loss: 0.011608504690229893 2023-01-23 22:58:11.674130: step: 24/77, loss: 0.07698570191860199 2023-01-23 22:58:12.960052: step: 28/77, loss: 0.02622179500758648 2023-01-23 22:58:14.287193: step: 32/77, loss: 0.016003597527742386 2023-01-23 22:58:15.581994: step: 36/77, loss: 0.058047618716955185 2023-01-23 22:58:16.927188: step: 40/77, loss: 0.05063202604651451 2023-01-23 22:58:18.227710: step: 44/77, loss: 0.026681777089834213 2023-01-23 22:58:19.577260: step: 48/77, loss: 0.019537555053830147 2023-01-23 22:58:20.892688: step: 52/77, loss: 0.03459141403436661 2023-01-23 22:58:22.226499: step: 56/77, loss: 0.004173712804913521 2023-01-23 22:58:23.509426: step: 60/77, loss: 0.0021231891587376595 2023-01-23 22:58:24.831734: step: 64/77, loss: 0.05070459842681885 2023-01-23 22:58:26.122207: step: 68/77, loss: 0.019765764474868774 2023-01-23 22:58:27.393548: step: 72/77, loss: 0.011237685568630695 2023-01-23 22:58:28.709303: step: 76/77, loss: 0.01307184062898159 2023-01-23 22:58:30.004674: step: 80/77, loss: 0.12346996366977692 2023-01-23 22:58:31.315667: step: 84/77, loss: 0.08720341324806213 2023-01-23 22:58:32.633363: step: 88/77, loss: 0.04888058453798294 2023-01-23 22:58:33.916751: step: 92/77, loss: 0.013795820064842701 2023-01-23 22:58:35.238638: step: 96/77, loss: 0.013095969334244728 2023-01-23 22:58:36.560970: step: 100/77, loss: 0.013364549726247787 2023-01-23 22:58:37.900885: step: 104/77, loss: 0.041961900889873505 2023-01-23 22:58:39.228774: step: 108/77, loss: 0.0419904962182045 2023-01-23 22:58:40.539252: step: 112/77, loss: 0.0536038838326931 2023-01-23 22:58:41.820549: step: 116/77, loss: 0.03844211995601654 2023-01-23 22:58:43.125239: step: 120/77, loss: 0.07768292725086212 2023-01-23 22:58:44.432881: step: 124/77, loss: 0.015956323593854904 2023-01-23 22:58:45.715087: step: 128/77, loss: 0.03323855251073837 2023-01-23 22:58:47.077293: step: 132/77, loss: 0.14282429218292236 2023-01-23 22:58:48.407292: step: 136/77, loss: 0.03105779178440571 2023-01-23 22:58:49.659124: step: 140/77, loss: 0.04877779632806778 2023-01-23 22:58:50.978405: step: 144/77, loss: 0.012325471267104149 2023-01-23 22:58:52.294318: step: 148/77, loss: 0.0034946876112371683 2023-01-23 22:58:53.623808: step: 152/77, loss: 0.05085130035877228 2023-01-23 22:58:54.987415: step: 156/77, loss: 0.04637058824300766 2023-01-23 22:58:56.310337: step: 160/77, loss: 0.01782352849841118 2023-01-23 22:58:57.652089: step: 164/77, loss: 0.00708090839907527 2023-01-23 22:58:59.021323: step: 168/77, loss: 0.03184691071510315 2023-01-23 22:59:00.325604: step: 172/77, loss: 0.10453486442565918 2023-01-23 22:59:01.620099: step: 176/77, loss: 0.0255681574344635 2023-01-23 22:59:02.918995: step: 180/77, loss: 0.026515550911426544 2023-01-23 22:59:04.223262: step: 184/77, loss: 0.042595818638801575 2023-01-23 22:59:05.537995: step: 188/77, loss: 0.01371677964925766 2023-01-23 22:59:06.845845: step: 192/77, loss: 0.0416012778878212 2023-01-23 22:59:08.150960: step: 196/77, loss: 0.08964186161756516 2023-01-23 22:59:09.467037: step: 200/77, loss: 0.006344792433083057 2023-01-23 22:59:10.756668: step: 204/77, loss: 0.11798113584518433 2023-01-23 22:59:12.068875: step: 208/77, loss: 0.028100017458200455 2023-01-23 22:59:13.380158: step: 212/77, loss: 0.0631520226597786 2023-01-23 22:59:14.717214: step: 216/77, loss: 0.03390933945775032 2023-01-23 22:59:16.028380: step: 220/77, loss: 0.16270163655281067 2023-01-23 22:59:17.305972: step: 224/77, loss: 0.03585375100374222 2023-01-23 22:59:18.641171: step: 228/77, loss: 0.05041792616248131 2023-01-23 22:59:19.953910: step: 232/77, loss: 0.11344284564256668 2023-01-23 22:59:21.276753: step: 236/77, loss: 0.04633091390132904 2023-01-23 22:59:22.538604: step: 240/77, loss: 0.005465330556035042 2023-01-23 22:59:23.863516: step: 244/77, loss: 0.052321139723062515 2023-01-23 22:59:25.134193: step: 248/77, loss: 0.004656112752854824 2023-01-23 22:59:26.463298: step: 252/77, loss: 0.0010259983828291297 2023-01-23 22:59:27.739409: step: 256/77, loss: 0.0053174905478954315 2023-01-23 22:59:29.048401: step: 260/77, loss: 0.017421012744307518 2023-01-23 22:59:30.350530: step: 264/77, loss: 0.0311984121799469 2023-01-23 22:59:31.647308: step: 268/77, loss: 0.042203038930892944 2023-01-23 22:59:32.921241: step: 272/77, loss: 0.06962239742279053 2023-01-23 22:59:34.232353: step: 276/77, loss: 0.10471386462450027 2023-01-23 22:59:35.577193: step: 280/77, loss: 0.0454871691763401 2023-01-23 22:59:36.890468: step: 284/77, loss: 0.08277568221092224 2023-01-23 22:59:38.191232: step: 288/77, loss: 0.022217385470867157 2023-01-23 22:59:39.455896: step: 292/77, loss: 0.022832242771983147 2023-01-23 22:59:40.802670: step: 296/77, loss: 0.0851108729839325 2023-01-23 22:59:42.131309: step: 300/77, loss: 0.04043712839484215 2023-01-23 22:59:43.456634: step: 304/77, loss: 0.034135933965444565 2023-01-23 22:59:44.838330: step: 308/77, loss: 0.019902069121599197 2023-01-23 22:59:46.162669: step: 312/77, loss: 0.028999945148825645 2023-01-23 22:59:47.523155: step: 316/77, loss: 0.011801834218204021 2023-01-23 22:59:48.877356: step: 320/77, loss: 0.003783810418099165 2023-01-23 22:59:50.158049: step: 324/77, loss: 0.08780589699745178 2023-01-23 22:59:51.475258: step: 328/77, loss: 0.034666549414396286 2023-01-23 22:59:52.806095: step: 332/77, loss: 0.017708729952573776 2023-01-23 22:59:54.112485: step: 336/77, loss: 0.006578100845217705 2023-01-23 22:59:55.457433: step: 340/77, loss: 0.0038012894801795483 2023-01-23 22:59:56.780417: step: 344/77, loss: 0.016202326864004135 2023-01-23 22:59:58.138945: step: 348/77, loss: 0.07350071519613266 2023-01-23 22:59:59.478680: step: 352/77, loss: 0.031798187643289566 2023-01-23 23:00:00.823701: step: 356/77, loss: 0.04961419478058815 2023-01-23 23:00:02.114370: step: 360/77, loss: 0.03064483404159546 2023-01-23 23:00:03.379114: step: 364/77, loss: 0.01778862625360489 2023-01-23 23:00:04.695984: step: 368/77, loss: 0.05175955593585968 2023-01-23 23:00:05.999525: step: 372/77, loss: 0.08155137300491333 2023-01-23 23:00:07.311899: step: 376/77, loss: 0.20118819177150726 2023-01-23 23:00:08.660055: step: 380/77, loss: 0.04779066890478134 2023-01-23 23:00:09.988289: step: 384/77, loss: 0.043782129883766174 2023-01-23 23:00:11.292439: step: 388/77, loss: 0.021426646038889885 ================================================== Loss: 0.042 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:02:32.056786: step: 4/77, loss: 0.04137624800205231 2023-01-23 23:02:33.364029: step: 8/77, loss: 0.038542598485946655 2023-01-23 23:02:34.643689: step: 12/77, loss: 0.009367510676383972 2023-01-23 23:02:35.921845: step: 16/77, loss: 0.022862209007143974 2023-01-23 23:02:37.280096: step: 20/77, loss: 0.08002949506044388 2023-01-23 23:02:38.616628: step: 24/77, loss: 0.0319170281291008 2023-01-23 23:02:39.957588: step: 28/77, loss: 0.0006610316340811551 2023-01-23 23:02:41.230426: step: 32/77, loss: 0.02024516463279724 2023-01-23 23:02:42.481567: step: 36/77, loss: 0.01656205765902996 2023-01-23 23:02:43.760598: step: 40/77, loss: 0.013603459112346172 2023-01-23 23:02:45.113761: step: 44/77, loss: 0.002709874650463462 2023-01-23 23:02:46.430599: step: 48/77, loss: 0.06883476674556732 2023-01-23 23:02:47.731232: step: 52/77, loss: 0.01029855664819479 2023-01-23 23:02:49.081144: step: 56/77, loss: 0.07245709747076035 2023-01-23 23:02:50.375658: step: 60/77, loss: 0.012648254632949829 2023-01-23 23:02:51.667832: step: 64/77, loss: 0.02669510245323181 2023-01-23 23:02:52.914074: step: 68/77, loss: 0.028342055156826973 2023-01-23 23:02:54.195017: step: 72/77, loss: 0.07287586480379105 2023-01-23 23:02:55.522466: step: 76/77, loss: 0.011641661636531353 2023-01-23 23:02:56.810954: step: 80/77, loss: 0.03511600196361542 2023-01-23 23:02:58.092256: step: 84/77, loss: 0.009830279275774956 2023-01-23 23:02:59.359794: step: 88/77, loss: 0.008189433254301548 2023-01-23 23:03:00.712589: step: 92/77, loss: 0.01783733069896698 2023-01-23 23:03:02.025243: step: 96/77, loss: 0.00765819801017642 2023-01-23 23:03:03.354343: step: 100/77, loss: 0.023452438414096832 2023-01-23 23:03:04.674989: step: 104/77, loss: 0.019945833832025528 2023-01-23 23:03:05.983766: step: 108/77, loss: 0.02258324809372425 2023-01-23 23:03:07.226874: step: 112/77, loss: 0.020979253575205803 2023-01-23 23:03:08.521053: step: 116/77, loss: 0.015185080468654633 2023-01-23 23:03:09.811570: step: 120/77, loss: 0.1529376208782196 2023-01-23 23:03:11.134234: step: 124/77, loss: 0.07699036598205566 2023-01-23 23:03:12.509280: step: 128/77, loss: 0.0060681188479065895 2023-01-23 23:03:13.846086: step: 132/77, loss: 0.02876776084303856 2023-01-23 23:03:15.145797: step: 136/77, loss: 0.007397600449621677 2023-01-23 23:03:16.403538: step: 140/77, loss: 0.04085937887430191 2023-01-23 23:03:17.709947: step: 144/77, loss: 0.01059473305940628 2023-01-23 23:03:19.010333: step: 148/77, loss: 0.028688378632068634 2023-01-23 23:03:20.308493: step: 152/77, loss: 0.007237972691655159 2023-01-23 23:03:21.633595: step: 156/77, loss: 0.020491447299718857 2023-01-23 23:03:22.935797: step: 160/77, loss: 0.02271532267332077 2023-01-23 23:03:24.247358: step: 164/77, loss: 0.05541416257619858 2023-01-23 23:03:25.518750: step: 168/77, loss: 0.016528375446796417 2023-01-23 23:03:26.839752: step: 172/77, loss: 0.06430349498987198 2023-01-23 23:03:28.175944: step: 176/77, loss: 0.04448110982775688 2023-01-23 23:03:29.486936: step: 180/77, loss: 0.008215603418648243 2023-01-23 23:03:30.787176: step: 184/77, loss: 0.0498255118727684 2023-01-23 23:03:32.105751: step: 188/77, loss: 0.013322685845196247 2023-01-23 23:03:33.451942: step: 192/77, loss: 0.03279253840446472 2023-01-23 23:03:34.774472: step: 196/77, loss: 0.008535334840416908 2023-01-23 23:03:36.136127: step: 200/77, loss: 0.020143844187259674 2023-01-23 23:03:37.437369: step: 204/77, loss: 0.015944061800837517 2023-01-23 23:03:38.705910: step: 208/77, loss: 0.023468907922506332 2023-01-23 23:03:40.019309: step: 212/77, loss: 0.017244024202227592 2023-01-23 23:03:41.340963: step: 216/77, loss: 0.039418622851371765 2023-01-23 23:03:42.673333: step: 220/77, loss: 0.019093813374638557 2023-01-23 23:03:43.991889: step: 224/77, loss: 0.014170338399708271 2023-01-23 23:03:45.313919: step: 228/77, loss: 0.010956652462482452 2023-01-23 23:03:46.659427: step: 232/77, loss: 0.06017700955271721 2023-01-23 23:03:47.915082: step: 236/77, loss: 0.012261530384421349 2023-01-23 23:03:49.199962: step: 240/77, loss: 0.006446256302297115 2023-01-23 23:03:50.496789: step: 244/77, loss: 0.0848538726568222 2023-01-23 23:03:51.784168: step: 248/77, loss: 0.012097777798771858 2023-01-23 23:03:53.075904: step: 252/77, loss: 0.04693004861474037 2023-01-23 23:03:54.380202: step: 256/77, loss: 0.056142307817935944 2023-01-23 23:03:55.664513: step: 260/77, loss: 0.042162343859672546 2023-01-23 23:03:57.006542: step: 264/77, loss: 0.06296426802873611 2023-01-23 23:03:58.348463: step: 268/77, loss: 0.005121381487697363 2023-01-23 23:03:59.622080: step: 272/77, loss: 0.0776234120130539 2023-01-23 23:04:00.972520: step: 276/77, loss: 0.1735001653432846 2023-01-23 23:04:02.280391: step: 280/77, loss: 0.042865149676799774 2023-01-23 23:04:03.575570: step: 284/77, loss: 0.004002364352345467 2023-01-23 23:04:04.867110: step: 288/77, loss: 0.020344601944088936 2023-01-23 23:04:06.187208: step: 292/77, loss: 0.06679122149944305 2023-01-23 23:04:07.494502: step: 296/77, loss: 0.003160992171615362 2023-01-23 23:04:08.803057: step: 300/77, loss: 0.001810312969610095 2023-01-23 23:04:10.102291: step: 304/77, loss: 0.031477395445108414 2023-01-23 23:04:11.386270: step: 308/77, loss: 0.09544059634208679 2023-01-23 23:04:12.694409: step: 312/77, loss: 0.038570526987314224 2023-01-23 23:04:14.016893: step: 316/77, loss: 0.012956952676177025 2023-01-23 23:04:15.301798: step: 320/77, loss: 0.13954588770866394 2023-01-23 23:04:16.655296: step: 324/77, loss: 0.07902668416500092 2023-01-23 23:04:17.963841: step: 328/77, loss: 0.018463322892785072 2023-01-23 23:04:19.273838: step: 332/77, loss: 0.019219795241951942 2023-01-23 23:04:20.572363: step: 336/77, loss: 0.05267266929149628 2023-01-23 23:04:21.897293: step: 340/77, loss: 0.020226020365953445 2023-01-23 23:04:23.238166: step: 344/77, loss: 0.017935145646333694 2023-01-23 23:04:24.524281: step: 348/77, loss: 0.12345309555530548 2023-01-23 23:04:25.814709: step: 352/77, loss: 0.07177512347698212 2023-01-23 23:04:27.153515: step: 356/77, loss: 0.05010971054434776 2023-01-23 23:04:28.488513: step: 360/77, loss: 0.05248473584651947 2023-01-23 23:04:29.800699: step: 364/77, loss: 0.1049971655011177 2023-01-23 23:04:31.197042: step: 368/77, loss: 0.02946937084197998 2023-01-23 23:04:32.511932: step: 372/77, loss: 0.023977501317858696 2023-01-23 23:04:33.813008: step: 376/77, loss: 0.05141597241163254 2023-01-23 23:04:35.098825: step: 380/77, loss: 0.02865361049771309 2023-01-23 23:04:36.366808: step: 384/77, loss: 0.08176169544458389 2023-01-23 23:04:37.664016: step: 388/77, loss: 0.0032761467155069113 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 4} Test Chinese: {'template': {'p': 0.9245283018867925, 'r': 0.37404580152671757, 'f1': 0.5326086956521738}, 'slot': {'p': 0.5625, 'r': 0.00818926296633303, 'f1': 0.016143497757847534}, 'combined': 0.008598167284070968, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 4} Test Korean: {'template': {'p': 0.9245283018867925, 'r': 0.37404580152671757, 'f1': 0.5326086956521738}, 'slot': {'p': 0.5625, 'r': 0.00818926296633303, 'f1': 0.016143497757847534}, 'combined': 0.008598167284070968, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 4} Test Russian: {'template': {'p': 0.9245283018867925, 'r': 0.37404580152671757, 'f1': 0.5326086956521738}, 'slot': {'p': 0.5625, 'r': 0.00818926296633303, 'f1': 0.016143497757847534}, 'combined': 0.008598167284070968, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:06:31.591294: step: 4/77, loss: 0.02670639380812645 2023-01-23 23:06:32.888355: step: 8/77, loss: 0.011545292101800442 2023-01-23 23:06:34.193490: step: 12/77, loss: 0.02020694874227047 2023-01-23 23:06:35.548386: step: 16/77, loss: 0.027690250426530838 2023-01-23 23:06:36.830422: step: 20/77, loss: 0.02767796441912651 2023-01-23 23:06:38.176068: step: 24/77, loss: 0.009519163519144058 2023-01-23 23:06:39.474277: step: 28/77, loss: 0.025326617062091827 2023-01-23 23:06:40.809950: step: 32/77, loss: 0.04947042465209961 2023-01-23 23:06:42.166795: step: 36/77, loss: 0.0008288826793432236 2023-01-23 23:06:43.488299: step: 40/77, loss: 0.02415475994348526 2023-01-23 23:06:44.764702: step: 44/77, loss: 0.03522857651114464 2023-01-23 23:06:46.086987: step: 48/77, loss: 0.0046325488947331905 2023-01-23 23:06:47.416615: step: 52/77, loss: 0.04867120087146759 2023-01-23 23:06:48.688200: step: 56/77, loss: 0.004921335726976395 2023-01-23 23:06:50.006865: step: 60/77, loss: 0.027160894125699997 2023-01-23 23:06:51.339957: step: 64/77, loss: 0.01739886961877346 2023-01-23 23:06:52.643048: step: 68/77, loss: 0.0029781265184283257 2023-01-23 23:06:53.926563: step: 72/77, loss: 0.011755847372114658 2023-01-23 23:06:55.219660: step: 76/77, loss: 0.002122030593454838 2023-01-23 23:06:56.512390: step: 80/77, loss: 0.0113228689879179 2023-01-23 23:06:57.805582: step: 84/77, loss: 0.038615792989730835 2023-01-23 23:06:59.141349: step: 88/77, loss: 0.04461674764752388 2023-01-23 23:07:00.470811: step: 92/77, loss: 0.010664014145731926 2023-01-23 23:07:01.775543: step: 96/77, loss: 0.03429734334349632 2023-01-23 23:07:03.091972: step: 100/77, loss: 0.005036661867052317 2023-01-23 23:07:04.354862: step: 104/77, loss: 0.026485158130526543 2023-01-23 23:07:05.709262: step: 108/77, loss: 0.019079767167568207 2023-01-23 23:07:07.006448: step: 112/77, loss: 0.015616081655025482 2023-01-23 23:07:08.342676: step: 116/77, loss: 0.042128726840019226 2023-01-23 23:07:09.643762: step: 120/77, loss: 0.009291108697652817 2023-01-23 23:07:10.930492: step: 124/77, loss: 0.02643146552145481 2023-01-23 23:07:12.235818: step: 128/77, loss: 9.87911771517247e-05 2023-01-23 23:07:13.573298: step: 132/77, loss: 0.0171172134578228 2023-01-23 23:07:14.887353: step: 136/77, loss: 0.07349997013807297 2023-01-23 23:07:16.197984: step: 140/77, loss: 0.01097109168767929 2023-01-23 23:07:17.513719: step: 144/77, loss: 0.020437665283679962 2023-01-23 23:07:18.772324: step: 148/77, loss: 0.011600209400057793 2023-01-23 23:07:20.107060: step: 152/77, loss: 0.0519060380756855 2023-01-23 23:07:21.460160: step: 156/77, loss: 0.013362875208258629 2023-01-23 23:07:22.685257: step: 160/77, loss: 0.025527773424983025 2023-01-23 23:07:24.014796: step: 164/77, loss: 0.004494365304708481 2023-01-23 23:07:25.328537: step: 168/77, loss: 0.03141060471534729 2023-01-23 23:07:26.592424: step: 172/77, loss: 0.02037169598042965 2023-01-23 23:07:27.920138: step: 176/77, loss: 0.024241285398602486 2023-01-23 23:07:29.243341: step: 180/77, loss: 0.012550034560263157 2023-01-23 23:07:30.587415: step: 184/77, loss: 0.06026304140686989 2023-01-23 23:07:31.954489: step: 188/77, loss: 0.012313876301050186 2023-01-23 23:07:33.262195: step: 192/77, loss: 0.016531143337488174 2023-01-23 23:07:34.606724: step: 196/77, loss: 0.043189384043216705 2023-01-23 23:07:35.950929: step: 200/77, loss: 0.013896309770643711 2023-01-23 23:07:37.289755: step: 204/77, loss: 0.0076545728370547295 2023-01-23 23:07:38.609773: step: 208/77, loss: 0.043936245143413544 2023-01-23 23:07:39.889855: step: 212/77, loss: 0.0009481979068368673 2023-01-23 23:07:41.175300: step: 216/77, loss: 0.040946513414382935 2023-01-23 23:07:42.464018: step: 220/77, loss: 0.021142808720469475 2023-01-23 23:07:43.790695: step: 224/77, loss: 0.039410270750522614 2023-01-23 23:07:45.109370: step: 228/77, loss: 0.004747491329908371 2023-01-23 23:07:46.416249: step: 232/77, loss: 0.025292804464697838 2023-01-23 23:07:47.735067: step: 236/77, loss: 0.05518924817442894 2023-01-23 23:07:49.081167: step: 240/77, loss: 0.005559179000556469 2023-01-23 23:07:50.412701: step: 244/77, loss: 0.018109621480107307 2023-01-23 23:07:51.696089: step: 248/77, loss: 0.1298125833272934 2023-01-23 23:07:53.025803: step: 252/77, loss: 0.022552266716957092 2023-01-23 23:07:54.334909: step: 256/77, loss: 0.17460715770721436 2023-01-23 23:07:55.675581: step: 260/77, loss: 0.02985825017094612 2023-01-23 23:07:56.978550: step: 264/77, loss: 0.019722308963537216 2023-01-23 23:07:58.288642: step: 268/77, loss: 0.009370415471494198 2023-01-23 23:07:59.581177: step: 272/77, loss: 0.05955535173416138 2023-01-23 23:08:00.942115: step: 276/77, loss: 0.11409030854701996 2023-01-23 23:08:02.284519: step: 280/77, loss: 0.025609876960515976 2023-01-23 23:08:03.593027: step: 284/77, loss: 0.0021118037402629852 2023-01-23 23:08:04.842821: step: 288/77, loss: 0.018015822395682335 2023-01-23 23:08:06.133518: step: 292/77, loss: 0.026650303974747658 2023-01-23 23:08:07.461111: step: 296/77, loss: 0.044930506497621536 2023-01-23 23:08:08.775529: step: 300/77, loss: 0.1032029390335083 2023-01-23 23:08:10.098447: step: 304/77, loss: 0.005074769724160433 2023-01-23 23:08:11.381203: step: 308/77, loss: 0.036885812878608704 2023-01-23 23:08:12.748717: step: 312/77, loss: 0.006821052171289921 2023-01-23 23:08:14.062177: step: 316/77, loss: 0.036779746413230896 2023-01-23 23:08:15.394699: step: 320/77, loss: 0.009868866764008999 2023-01-23 23:08:16.722292: step: 324/77, loss: 0.026132123544812202 2023-01-23 23:08:18.070059: step: 328/77, loss: 0.0059155626222491264 2023-01-23 23:08:19.387909: step: 332/77, loss: 0.022484319284558296 2023-01-23 23:08:20.755189: step: 336/77, loss: 0.04049109295010567 2023-01-23 23:08:22.059028: step: 340/77, loss: 0.039941079914569855 2023-01-23 23:08:23.407054: step: 344/77, loss: 0.028673361986875534 2023-01-23 23:08:24.767962: step: 348/77, loss: 0.05313975736498833 2023-01-23 23:08:26.106340: step: 352/77, loss: 0.03900352865457535 2023-01-23 23:08:27.412546: step: 356/77, loss: 0.04854501783847809 2023-01-23 23:08:28.679809: step: 360/77, loss: 0.05382800102233887 2023-01-23 23:08:30.002481: step: 364/77, loss: 0.001742333872243762 2023-01-23 23:08:31.312172: step: 368/77, loss: 0.024771321564912796 2023-01-23 23:08:32.599742: step: 372/77, loss: 0.03442703187465668 2023-01-23 23:08:33.951145: step: 376/77, loss: 0.03055429458618164 2023-01-23 23:08:35.309212: step: 380/77, loss: 0.0014426014386117458 2023-01-23 23:08:36.594435: step: 384/77, loss: 0.035622939467430115 2023-01-23 23:08:37.886851: step: 388/77, loss: 0.0279096569865942 ================================================== Loss: 0.029 -------------------- Dev Chinese: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05014287031062771, 'epoch': 5} Test Chinese: {'template': {'p': 0.8888888888888888, 'r': 0.42748091603053434, 'f1': 0.5773195876288659}, 'slot': {'p': 0.5862068965517241, 'r': 0.015468607825295723, 'f1': 0.030141843971631208}, 'combined': 0.017401476932075746, 'epoch': 5} Dev Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05014287031062771, 'epoch': 5} Test Korean: {'template': {'p': 0.8870967741935484, 'r': 0.4198473282442748, 'f1': 0.5699481865284974}, 'slot': {'p': 0.5714285714285714, 'r': 0.014558689717925387, 'f1': 0.028393966282165038}, 'combined': 0.016183089590871266, 'epoch': 5} Dev Russian: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05014287031062771, 'epoch': 5} Test Russian: {'template': {'p': 0.8888888888888888, 'r': 0.42748091603053434, 'f1': 0.5773195876288659}, 'slot': {'p': 0.5862068965517241, 'r': 0.015468607825295723, 'f1': 0.030141843971631208}, 'combined': 0.017401476932075746, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:10:31.333377: step: 4/77, loss: 0.0005450226599350572 2023-01-23 23:10:32.622079: step: 8/77, loss: 0.005259404890239239 2023-01-23 23:10:33.921768: step: 12/77, loss: 0.019649960100650787 2023-01-23 23:10:35.201534: step: 16/77, loss: 0.0025430868845432997 2023-01-23 23:10:36.538990: step: 20/77, loss: 0.004815719556063414 2023-01-23 23:10:37.813629: step: 24/77, loss: 0.00034457709989510477 2023-01-23 23:10:39.170722: step: 28/77, loss: 0.01655828207731247 2023-01-23 23:10:40.474723: step: 32/77, loss: 0.016402754932641983 2023-01-23 23:10:41.800281: step: 36/77, loss: 0.006076469086110592 2023-01-23 23:10:43.133652: step: 40/77, loss: 0.019712939858436584 2023-01-23 23:10:44.470614: step: 44/77, loss: 0.057978931814432144 2023-01-23 23:10:45.757207: step: 48/77, loss: 0.010279682464897633 2023-01-23 23:10:47.110868: step: 52/77, loss: 0.028372686356306076 2023-01-23 23:10:48.416944: step: 56/77, loss: 0.030109090730547905 2023-01-23 23:10:49.702027: step: 60/77, loss: 0.0027865376323461533 2023-01-23 23:10:50.992434: step: 64/77, loss: 0.03058498352766037 2023-01-23 23:10:52.290793: step: 68/77, loss: 0.05856921151280403 2023-01-23 23:10:53.633343: step: 72/77, loss: 0.02341497875750065 2023-01-23 23:10:54.926440: step: 76/77, loss: 0.07214264571666718 2023-01-23 23:10:56.206895: step: 80/77, loss: 0.0024079028517007828 2023-01-23 23:10:57.522164: step: 84/77, loss: 0.02960231341421604 2023-01-23 23:10:58.773184: step: 88/77, loss: 0.041948989033699036 2023-01-23 23:11:00.083730: step: 92/77, loss: 0.0790402740240097 2023-01-23 23:11:01.379341: step: 96/77, loss: 0.025003444403409958 2023-01-23 23:11:02.686281: step: 100/77, loss: 0.03993772715330124 2023-01-23 23:11:03.977989: step: 104/77, loss: 0.0716061219573021 2023-01-23 23:11:05.264864: step: 108/77, loss: 0.010484387166798115 2023-01-23 23:11:06.597844: step: 112/77, loss: 0.004315068945288658 2023-01-23 23:11:07.880418: step: 116/77, loss: 0.0012634468730539083 2023-01-23 23:11:09.223146: step: 120/77, loss: 0.005074080545455217 2023-01-23 23:11:10.522023: step: 124/77, loss: 0.04028032720088959 2023-01-23 23:11:11.851712: step: 128/77, loss: 0.0018830453045666218 2023-01-23 23:11:13.150445: step: 132/77, loss: 0.0014695585705339909 2023-01-23 23:11:14.427531: step: 136/77, loss: 0.023692918941378593 2023-01-23 23:11:15.750661: step: 140/77, loss: 0.0029780231416225433 2023-01-23 23:11:17.058903: step: 144/77, loss: 0.016965823248028755 2023-01-23 23:11:18.366609: step: 148/77, loss: 0.06457682698965073 2023-01-23 23:11:19.622562: step: 152/77, loss: 0.007344848942011595 2023-01-23 23:11:20.897358: step: 156/77, loss: 0.012553246691823006 2023-01-23 23:11:22.186165: step: 160/77, loss: 0.03706691041588783 2023-01-23 23:11:23.481940: step: 164/77, loss: 0.03157980740070343 2023-01-23 23:11:24.755692: step: 168/77, loss: 0.009069087915122509 2023-01-23 23:11:26.072550: step: 172/77, loss: 0.15711651742458344 2023-01-23 23:11:27.428484: step: 176/77, loss: 0.01679708994925022 2023-01-23 23:11:28.718861: step: 180/77, loss: 0.043935131281614304 2023-01-23 23:11:30.034976: step: 184/77, loss: 0.00011045370774809271 2023-01-23 23:11:31.322654: step: 188/77, loss: 0.06068781763315201 2023-01-23 23:11:32.593779: step: 192/77, loss: 0.020912062376737595 2023-01-23 23:11:33.871120: step: 196/77, loss: 0.050123170018196106 2023-01-23 23:11:35.151121: step: 200/77, loss: 0.032013922929763794 2023-01-23 23:11:36.468765: step: 204/77, loss: 0.0369759202003479 2023-01-23 23:11:37.816712: step: 208/77, loss: 0.016368011012673378 2023-01-23 23:11:39.128644: step: 212/77, loss: 0.007947578094899654 2023-01-23 23:11:40.471998: step: 216/77, loss: 0.015159336850047112 2023-01-23 23:11:41.789101: step: 220/77, loss: 0.04020078480243683 2023-01-23 23:11:43.133930: step: 224/77, loss: 0.007312280125916004 2023-01-23 23:11:44.455861: step: 228/77, loss: 0.02025928534567356 2023-01-23 23:11:45.772899: step: 232/77, loss: 0.004918534308671951 2023-01-23 23:11:47.120997: step: 236/77, loss: 0.025332044810056686 2023-01-23 23:11:48.463505: step: 240/77, loss: 0.054991841316223145 2023-01-23 23:11:49.762834: step: 244/77, loss: 0.0054244897328317165 2023-01-23 23:11:51.051003: step: 248/77, loss: 0.04276290535926819 2023-01-23 23:11:52.305424: step: 252/77, loss: 0.004564788192510605 2023-01-23 23:11:53.662749: step: 256/77, loss: 0.010132751427590847 2023-01-23 23:11:54.995645: step: 260/77, loss: 0.05739155039191246 2023-01-23 23:11:56.248987: step: 264/77, loss: 0.055207569152116776 2023-01-23 23:11:57.580573: step: 268/77, loss: 0.01842823065817356 2023-01-23 23:11:58.874776: step: 272/77, loss: 0.004211059771478176 2023-01-23 23:12:00.181310: step: 276/77, loss: 0.0071431114338338375 2023-01-23 23:12:01.515301: step: 280/77, loss: 0.016630031168460846 2023-01-23 23:12:02.854365: step: 284/77, loss: 0.0799979418516159 2023-01-23 23:12:04.152166: step: 288/77, loss: 0.04651796817779541 2023-01-23 23:12:05.454702: step: 292/77, loss: 0.0067449212074279785 2023-01-23 23:12:06.808965: step: 296/77, loss: 0.0007620899705216289 2023-01-23 23:12:08.140254: step: 300/77, loss: 0.05327056720852852 2023-01-23 23:12:09.478468: step: 304/77, loss: 0.000819915032479912 2023-01-23 23:12:10.772477: step: 308/77, loss: 0.06647086143493652 2023-01-23 23:12:12.083797: step: 312/77, loss: 0.04102443531155586 2023-01-23 23:12:13.434812: step: 316/77, loss: 0.009362781420350075 2023-01-23 23:12:14.749122: step: 320/77, loss: 0.006439851596951485 2023-01-23 23:12:16.050771: step: 324/77, loss: 0.0069610318168997765 2023-01-23 23:12:17.367560: step: 328/77, loss: 0.02070482261478901 2023-01-23 23:12:18.684838: step: 332/77, loss: 0.004380959086120129 2023-01-23 23:12:20.001529: step: 336/77, loss: 0.02864585630595684 2023-01-23 23:12:21.306914: step: 340/77, loss: 0.018239619210362434 2023-01-23 23:12:22.638812: step: 344/77, loss: 0.0008536122040823102 2023-01-23 23:12:23.966296: step: 348/77, loss: 0.015499288216233253 2023-01-23 23:12:25.288509: step: 352/77, loss: 0.08047214150428772 2023-01-23 23:12:26.596261: step: 356/77, loss: 0.06944243609905243 2023-01-23 23:12:27.956895: step: 360/77, loss: 0.0006930158706381917 2023-01-23 23:12:29.225997: step: 364/77, loss: 0.01632937416434288 2023-01-23 23:12:30.532133: step: 368/77, loss: 0.10148210823535919 2023-01-23 23:12:31.900788: step: 372/77, loss: 0.024788234382867813 2023-01-23 23:12:33.231908: step: 376/77, loss: 0.024317339062690735 2023-01-23 23:12:34.487635: step: 380/77, loss: 0.0365629717707634 2023-01-23 23:12:35.820293: step: 384/77, loss: 0.010954131372272968 2023-01-23 23:12:37.130197: step: 388/77, loss: 0.002385278232395649 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Chinese: {'template': {'p': 0.9016393442622951, 'r': 0.4198473282442748, 'f1': 0.5729166666666666}, 'slot': {'p': 0.5, 'r': 0.006369426751592357, 'f1': 0.012578616352201259}, 'combined': 0.0072064989517819705, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Korean: {'template': {'p': 0.9016393442622951, 'r': 0.4198473282442748, 'f1': 0.5729166666666666}, 'slot': {'p': 0.5, 'r': 0.006369426751592357, 'f1': 0.012578616352201259}, 'combined': 0.0072064989517819705, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Russian: {'template': {'p': 0.9016393442622951, 'r': 0.4198473282442748, 'f1': 0.5729166666666666}, 'slot': {'p': 0.5, 'r': 0.006369426751592357, 'f1': 0.012578616352201259}, 'combined': 0.0072064989517819705, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:14:30.995294: step: 4/77, loss: 0.005366637371480465 2023-01-23 23:14:32.318284: step: 8/77, loss: 0.12268777936697006 2023-01-23 23:14:33.587925: step: 12/77, loss: 0.09095238894224167 2023-01-23 23:14:34.874658: step: 16/77, loss: 0.029042690992355347 2023-01-23 23:14:36.183804: step: 20/77, loss: 0.03922734782099724 2023-01-23 23:14:37.411626: step: 24/77, loss: 0.046124693006277084 2023-01-23 23:14:38.731046: step: 28/77, loss: 0.013856401666998863 2023-01-23 23:14:40.030960: step: 32/77, loss: 0.008207427337765694 2023-01-23 23:14:41.322442: step: 36/77, loss: 0.0077070193365216255 2023-01-23 23:14:42.586474: step: 40/77, loss: 0.017933424562215805 2023-01-23 23:14:43.873908: step: 44/77, loss: 0.05957060679793358 2023-01-23 23:14:45.170034: step: 48/77, loss: 0.006128543522208929 2023-01-23 23:14:46.490767: step: 52/77, loss: 0.018483951687812805 2023-01-23 23:14:47.821773: step: 56/77, loss: 0.0024321037344634533 2023-01-23 23:14:49.127707: step: 60/77, loss: 0.015778936445713043 2023-01-23 23:14:50.452461: step: 64/77, loss: 0.024857226759195328 2023-01-23 23:14:51.782507: step: 68/77, loss: 0.052271053194999695 2023-01-23 23:14:53.097607: step: 72/77, loss: 0.006552603095769882 2023-01-23 23:14:54.421751: step: 76/77, loss: 0.008574525825679302 2023-01-23 23:14:55.761393: step: 80/77, loss: 0.078099325299263 2023-01-23 23:14:57.080716: step: 84/77, loss: 0.04347037151455879 2023-01-23 23:14:58.402242: step: 88/77, loss: 0.014808414503932 2023-01-23 23:14:59.717156: step: 92/77, loss: 0.004002019762992859 2023-01-23 23:15:01.034463: step: 96/77, loss: 0.038667820394039154 2023-01-23 23:15:02.416399: step: 100/77, loss: 0.0060221110470592976 2023-01-23 23:15:03.745177: step: 104/77, loss: 0.027203550562262535 2023-01-23 23:15:05.008106: step: 108/77, loss: 0.053258925676345825 2023-01-23 23:15:06.359881: step: 112/77, loss: 0.00490473210811615 2023-01-23 23:15:07.707468: step: 116/77, loss: 0.0033097327686846256 2023-01-23 23:15:09.015322: step: 120/77, loss: 0.0017886572750285268 2023-01-23 23:15:10.355095: step: 124/77, loss: 0.03101409412920475 2023-01-23 23:15:11.652004: step: 128/77, loss: 0.028752855956554413 2023-01-23 23:15:12.931045: step: 132/77, loss: 0.0011797421611845493 2023-01-23 23:15:14.213250: step: 136/77, loss: 0.027044154703617096 2023-01-23 23:15:15.509824: step: 140/77, loss: 0.0021191895939409733 2023-01-23 23:15:16.843565: step: 144/77, loss: 0.004732126835733652 2023-01-23 23:15:18.118823: step: 148/77, loss: 0.15178532898426056 2023-01-23 23:15:19.420712: step: 152/77, loss: 0.04195141792297363 2023-01-23 23:15:20.687044: step: 156/77, loss: 0.001734606921672821 2023-01-23 23:15:22.023137: step: 160/77, loss: 0.004672032780945301 2023-01-23 23:15:23.343957: step: 164/77, loss: 0.04151839017868042 2023-01-23 23:15:24.667027: step: 168/77, loss: 0.03094092383980751 2023-01-23 23:15:25.960122: step: 172/77, loss: 0.012236851267516613 2023-01-23 23:15:27.263000: step: 176/77, loss: 0.012080186977982521 2023-01-23 23:15:28.585840: step: 180/77, loss: 0.0012469030916690826 2023-01-23 23:15:29.907366: step: 184/77, loss: 0.009040589444339275 2023-01-23 23:15:31.190991: step: 188/77, loss: 0.003566809929907322 2023-01-23 23:15:32.483169: step: 192/77, loss: 0.01853303611278534 2023-01-23 23:15:33.793024: step: 196/77, loss: 0.024924924597144127 2023-01-23 23:15:35.096673: step: 200/77, loss: 0.009820442646741867 2023-01-23 23:15:36.391273: step: 204/77, loss: 0.005277830176055431 2023-01-23 23:15:37.743240: step: 208/77, loss: 0.0072280727326869965 2023-01-23 23:15:39.031554: step: 212/77, loss: 5.547390173887834e-05 2023-01-23 23:15:40.333215: step: 216/77, loss: 0.09233056008815765 2023-01-23 23:15:41.693714: step: 220/77, loss: 0.0022794553078711033 2023-01-23 23:15:43.088633: step: 224/77, loss: 0.06972534954547882 2023-01-23 23:15:44.366751: step: 228/77, loss: 0.007869555614888668 2023-01-23 23:15:45.659620: step: 232/77, loss: 0.00014271988766267896 2023-01-23 23:15:46.955678: step: 236/77, loss: 0.03628724440932274 2023-01-23 23:15:48.258403: step: 240/77, loss: 0.03799661621451378 2023-01-23 23:15:49.551643: step: 244/77, loss: 0.007743775844573975 2023-01-23 23:15:50.881848: step: 248/77, loss: 0.0032844683155417442 2023-01-23 23:15:52.186832: step: 252/77, loss: 0.08080201596021652 2023-01-23 23:15:53.530111: step: 256/77, loss: 0.030658353120088577 2023-01-23 23:15:54.822294: step: 260/77, loss: 0.030249860137701035 2023-01-23 23:15:56.122080: step: 264/77, loss: 0.01483116950839758 2023-01-23 23:15:57.450148: step: 268/77, loss: 0.01604858599603176 2023-01-23 23:15:58.759557: step: 272/77, loss: 0.010052401572465897 2023-01-23 23:16:00.121999: step: 276/77, loss: 0.0016898037865757942 2023-01-23 23:16:01.448240: step: 280/77, loss: 0.0067345574498176575 2023-01-23 23:16:02.792667: step: 284/77, loss: 3.294476482551545e-05 2023-01-23 23:16:04.117695: step: 288/77, loss: 0.016307225450873375 2023-01-23 23:16:05.488668: step: 292/77, loss: 0.014899727888405323 2023-01-23 23:16:06.817501: step: 296/77, loss: 0.045760296285152435 2023-01-23 23:16:08.129906: step: 300/77, loss: 0.03345262631773949 2023-01-23 23:16:09.448751: step: 304/77, loss: 0.008122695609927177 2023-01-23 23:16:10.746763: step: 308/77, loss: 0.01835659332573414 2023-01-23 23:16:12.093582: step: 312/77, loss: 0.01669490337371826 2023-01-23 23:16:13.472529: step: 316/77, loss: 0.008289380930364132 2023-01-23 23:16:14.756172: step: 320/77, loss: 0.011867504566907883 2023-01-23 23:16:16.125112: step: 324/77, loss: 0.03273586556315422 2023-01-23 23:16:17.491323: step: 328/77, loss: 0.012369364500045776 2023-01-23 23:16:18.823834: step: 332/77, loss: 0.03042646124958992 2023-01-23 23:16:20.092588: step: 336/77, loss: 0.03948419541120529 2023-01-23 23:16:21.358844: step: 340/77, loss: 0.07989335060119629 2023-01-23 23:16:22.655062: step: 344/77, loss: 0.020313072949647903 2023-01-23 23:16:23.980806: step: 348/77, loss: 0.013975740410387516 2023-01-23 23:16:25.303655: step: 352/77, loss: 0.05108807608485222 2023-01-23 23:16:26.609158: step: 356/77, loss: 0.014707177877426147 2023-01-23 23:16:27.908596: step: 360/77, loss: 0.04847146198153496 2023-01-23 23:16:29.213932: step: 364/77, loss: 0.004290463402867317 2023-01-23 23:16:30.449998: step: 368/77, loss: 0.015383971855044365 2023-01-23 23:16:31.770281: step: 372/77, loss: 0.002747519174590707 2023-01-23 23:16:33.066614: step: 376/77, loss: 0.008910756558179855 2023-01-23 23:16:34.341822: step: 380/77, loss: 0.009819927625358105 2023-01-23 23:16:35.672007: step: 384/77, loss: 0.0014792424626648426 2023-01-23 23:16:36.939125: step: 388/77, loss: 0.027898721396923065 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9166666666666666, 'r': 0.4198473282442748, 'f1': 0.5759162303664921}, 'slot': {'p': 0.5238095238095238, 'r': 0.010009099181073703, 'f1': 0.019642857142857142}, 'combined': 0.011312640239341809, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9166666666666666, 'r': 0.4198473282442748, 'f1': 0.5759162303664921}, 'slot': {'p': 0.5238095238095238, 'r': 0.010009099181073703, 'f1': 0.019642857142857142}, 'combined': 0.011312640239341809, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.9166666666666666, 'r': 0.4198473282442748, 'f1': 0.5759162303664921}, 'slot': {'p': 0.5238095238095238, 'r': 0.010009099181073703, 'f1': 0.019642857142857142}, 'combined': 0.011312640239341809, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:18:30.395844: step: 4/77, loss: 0.0008204178884625435 2023-01-23 23:18:31.649956: step: 8/77, loss: 0.011769868433475494 2023-01-23 23:18:32.915604: step: 12/77, loss: 0.0043268040753901005 2023-01-23 23:18:34.243543: step: 16/77, loss: 0.0022208222653716803 2023-01-23 23:18:35.589628: step: 20/77, loss: 0.0023637483827769756 2023-01-23 23:18:36.881571: step: 24/77, loss: 0.002883343491703272 2023-01-23 23:18:38.236422: step: 28/77, loss: 0.003954778891056776 2023-01-23 23:18:39.559960: step: 32/77, loss: 0.02303282544016838 2023-01-23 23:18:40.877395: step: 36/77, loss: 0.01751818135380745 2023-01-23 23:18:42.166633: step: 40/77, loss: 0.04591015726327896 2023-01-23 23:18:43.466833: step: 44/77, loss: 7.463184010703117e-05 2023-01-23 23:18:44.788749: step: 48/77, loss: 0.009197239764034748 2023-01-23 23:18:46.120538: step: 52/77, loss: 0.028276560828089714 2023-01-23 23:18:47.429949: step: 56/77, loss: 0.008027189411222935 2023-01-23 23:18:48.724634: step: 60/77, loss: 0.012188486754894257 2023-01-23 23:18:50.060316: step: 64/77, loss: 0.06895316392183304 2023-01-23 23:18:51.392795: step: 68/77, loss: 0.0003524815256241709 2023-01-23 23:18:52.715792: step: 72/77, loss: 0.004892442375421524 2023-01-23 23:18:54.039964: step: 76/77, loss: 0.007600066717714071 2023-01-23 23:18:55.350628: step: 80/77, loss: 0.00019381933088880032 2023-01-23 23:18:56.657368: step: 84/77, loss: 0.003237048164010048 2023-01-23 23:18:57.978267: step: 88/77, loss: 0.009226077236235142 2023-01-23 23:18:59.319939: step: 92/77, loss: 0.007336574140936136 2023-01-23 23:19:00.665008: step: 96/77, loss: 0.003843441605567932 2023-01-23 23:19:01.978441: step: 100/77, loss: 0.026638394221663475 2023-01-23 23:19:03.315822: step: 104/77, loss: 0.01824391447007656 2023-01-23 23:19:04.632743: step: 108/77, loss: 0.013902065344154835 2023-01-23 23:19:05.941982: step: 112/77, loss: 0.06014961376786232 2023-01-23 23:19:07.276020: step: 116/77, loss: 0.049358613789081573 2023-01-23 23:19:08.570024: step: 120/77, loss: 0.0051002344116568565 2023-01-23 23:19:09.869459: step: 124/77, loss: 0.12966176867485046 2023-01-23 23:19:11.144798: step: 128/77, loss: 0.03141997009515762 2023-01-23 23:19:12.440379: step: 132/77, loss: 0.012090899981558323 2023-01-23 23:19:13.762093: step: 136/77, loss: 0.08095104247331619 2023-01-23 23:19:15.043001: step: 140/77, loss: 0.019286353141069412 2023-01-23 23:19:16.336113: step: 144/77, loss: 0.012866070494055748 2023-01-23 23:19:17.616370: step: 148/77, loss: 0.03086090460419655 2023-01-23 23:19:18.906782: step: 152/77, loss: 0.11356306821107864 2023-01-23 23:19:20.243437: step: 156/77, loss: 0.05788556486368179 2023-01-23 23:19:21.509167: step: 160/77, loss: 0.014221753925085068 2023-01-23 23:19:22.828351: step: 164/77, loss: 0.0002134163660230115 2023-01-23 23:19:24.122100: step: 168/77, loss: 0.0071030305698513985 2023-01-23 23:19:25.418657: step: 172/77, loss: 0.036305420100688934 2023-01-23 23:19:26.756364: step: 176/77, loss: 0.024705661460757256 2023-01-23 23:19:28.039106: step: 180/77, loss: 0.019936632364988327 2023-01-23 23:19:29.327362: step: 184/77, loss: 0.00013617813237942755 2023-01-23 23:19:30.609188: step: 188/77, loss: 0.051699113100767136 2023-01-23 23:19:31.894287: step: 192/77, loss: 0.04250229150056839 2023-01-23 23:19:33.179778: step: 196/77, loss: 0.027253786101937294 2023-01-23 23:19:34.512172: step: 200/77, loss: 0.024770382791757584 2023-01-23 23:19:35.850075: step: 204/77, loss: 0.025686733424663544 2023-01-23 23:19:37.146957: step: 208/77, loss: 0.009457824751734734 2023-01-23 23:19:38.468995: step: 212/77, loss: 0.09252268075942993 2023-01-23 23:19:39.753007: step: 216/77, loss: 0.03673015534877777 2023-01-23 23:19:41.078033: step: 220/77, loss: 0.0023359288461506367 2023-01-23 23:19:42.351029: step: 224/77, loss: 0.02088875323534012 2023-01-23 23:19:43.668674: step: 228/77, loss: 0.02472914569079876 2023-01-23 23:19:44.946530: step: 232/77, loss: 0.008620699867606163 2023-01-23 23:19:46.251326: step: 236/77, loss: 0.07274501025676727 2023-01-23 23:19:47.540760: step: 240/77, loss: 0.014064528979361057 2023-01-23 23:19:48.886791: step: 244/77, loss: 0.05628375709056854 2023-01-23 23:19:50.190130: step: 248/77, loss: 0.017337357625365257 2023-01-23 23:19:51.524805: step: 252/77, loss: 2.4178843887057155e-05 2023-01-23 23:19:52.819870: step: 256/77, loss: 0.004507299512624741 2023-01-23 23:19:54.137832: step: 260/77, loss: 0.015918653458356857 2023-01-23 23:19:55.407218: step: 264/77, loss: 0.029176900163292885 2023-01-23 23:19:56.664352: step: 268/77, loss: 0.02472537010908127 2023-01-23 23:19:57.926071: step: 272/77, loss: 0.020769568160176277 2023-01-23 23:19:59.238767: step: 276/77, loss: 0.033161960542201996 2023-01-23 23:20:00.558023: step: 280/77, loss: 0.010773420333862305 2023-01-23 23:20:01.870462: step: 284/77, loss: 0.006724311038851738 2023-01-23 23:20:03.197393: step: 288/77, loss: 0.0062055690214037895 2023-01-23 23:20:04.511695: step: 292/77, loss: 0.01167929358780384 2023-01-23 23:20:05.823257: step: 296/77, loss: 0.03370339423418045 2023-01-23 23:20:07.144751: step: 300/77, loss: 0.006366947665810585 2023-01-23 23:20:08.444012: step: 304/77, loss: 0.01827992871403694 2023-01-23 23:20:09.726640: step: 308/77, loss: 0.005884686019271612 2023-01-23 23:20:11.024547: step: 312/77, loss: 0.010750525631010532 2023-01-23 23:20:12.349716: step: 316/77, loss: 0.005311709363013506 2023-01-23 23:20:13.671439: step: 320/77, loss: 0.0023131745401769876 2023-01-23 23:20:15.056158: step: 324/77, loss: 0.007912065833806992 2023-01-23 23:20:16.441359: step: 328/77, loss: 0.0042124297469854355 2023-01-23 23:20:17.755273: step: 332/77, loss: 0.024382663890719414 2023-01-23 23:20:19.082375: step: 336/77, loss: 0.00031083874637261033 2023-01-23 23:20:20.347617: step: 340/77, loss: 0.017951298505067825 2023-01-23 23:20:21.642592: step: 344/77, loss: 0.023033462464809418 2023-01-23 23:20:22.976959: step: 348/77, loss: 0.027386359870433807 2023-01-23 23:20:24.269100: step: 352/77, loss: 0.05732313543558121 2023-01-23 23:20:25.595143: step: 356/77, loss: 0.0039208317175507545 2023-01-23 23:20:26.933742: step: 360/77, loss: 0.049601756036281586 2023-01-23 23:20:28.236842: step: 364/77, loss: 0.06384100764989853 2023-01-23 23:20:29.536779: step: 368/77, loss: 0.027912341058254242 2023-01-23 23:20:30.856165: step: 372/77, loss: 0.0020587339531630278 2023-01-23 23:20:32.189684: step: 376/77, loss: 0.003806506050750613 2023-01-23 23:20:33.494263: step: 380/77, loss: 0.002064730739220977 2023-01-23 23:20:34.821966: step: 384/77, loss: 0.0003069471858907491 2023-01-23 23:20:36.109360: step: 388/77, loss: 0.002804083051159978 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 8} Test Chinese: {'template': {'p': 0.9047619047619048, 'r': 0.4351145038167939, 'f1': 0.5876288659793814}, 'slot': {'p': 0.5, 'r': 0.009099181073703366, 'f1': 0.017873100983020553}, 'combined': 0.010502750062187333, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 8} Test Korean: {'template': {'p': 0.9047619047619048, 'r': 0.4351145038167939, 'f1': 0.5876288659793814}, 'slot': {'p': 0.5, 'r': 0.009099181073703366, 'f1': 0.017873100983020553}, 'combined': 0.010502750062187333, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 8} Test Russian: {'template': {'p': 0.9047619047619048, 'r': 0.4351145038167939, 'f1': 0.5876288659793814}, 'slot': {'p': 0.5, 'r': 0.009099181073703366, 'f1': 0.017873100983020553}, 'combined': 0.010502750062187333, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:22:29.399269: step: 4/77, loss: 0.02468053065240383 2023-01-23 23:22:30.701202: step: 8/77, loss: 0.027540545910596848 2023-01-23 23:22:32.014799: step: 12/77, loss: 0.030903879553079605 2023-01-23 23:22:33.288842: step: 16/77, loss: 0.010813569650053978 2023-01-23 23:22:34.610220: step: 20/77, loss: 0.003732758341357112 2023-01-23 23:22:35.856020: step: 24/77, loss: 0.03615504503250122 2023-01-23 23:22:37.191993: step: 28/77, loss: 0.009996151551604271 2023-01-23 23:22:38.520695: step: 32/77, loss: 0.0005582488956861198 2023-01-23 23:22:39.817075: step: 36/77, loss: 0.040143415331840515 2023-01-23 23:22:41.123889: step: 40/77, loss: 0.048482924699783325 2023-01-23 23:22:42.445383: step: 44/77, loss: 0.002379587385803461 2023-01-23 23:22:43.744907: step: 48/77, loss: 0.009470704942941666 2023-01-23 23:22:45.086137: step: 52/77, loss: 0.015861298888921738 2023-01-23 23:22:46.390311: step: 56/77, loss: 0.0008028687443584204 2023-01-23 23:22:47.686463: step: 60/77, loss: 0.00591190904378891 2023-01-23 23:22:48.993457: step: 64/77, loss: 0.023707589134573936 2023-01-23 23:22:50.250280: step: 68/77, loss: 0.00041971640894189477 2023-01-23 23:22:51.579966: step: 72/77, loss: 0.02400209940969944 2023-01-23 23:22:52.862382: step: 76/77, loss: 0.010313374921679497 2023-01-23 23:22:54.145561: step: 80/77, loss: 0.017669744789600372 2023-01-23 23:22:55.493962: step: 84/77, loss: 0.00019596553465817124 2023-01-23 23:22:56.802350: step: 88/77, loss: 0.030334951356053352 2023-01-23 23:22:58.126664: step: 92/77, loss: 0.00572825875133276 2023-01-23 23:22:59.450906: step: 96/77, loss: 0.029115553945302963 2023-01-23 23:23:00.755723: step: 100/77, loss: 0.006687483750283718 2023-01-23 23:23:02.065433: step: 104/77, loss: 0.006783424410969019 2023-01-23 23:23:03.363967: step: 108/77, loss: 0.0012595838634297252 2023-01-23 23:23:04.692350: step: 112/77, loss: 0.01739436574280262 2023-01-23 23:23:05.964939: step: 116/77, loss: 0.014384800568223 2023-01-23 23:23:07.286470: step: 120/77, loss: 0.06246951222419739 2023-01-23 23:23:08.610048: step: 124/77, loss: 0.010280019603669643 2023-01-23 23:23:09.916044: step: 128/77, loss: 0.028107155114412308 2023-01-23 23:23:11.223366: step: 132/77, loss: 0.00990170519798994 2023-01-23 23:23:12.526283: step: 136/77, loss: 0.0022096431348472834 2023-01-23 23:23:13.831340: step: 140/77, loss: 0.008368104696273804 2023-01-23 23:23:15.109405: step: 144/77, loss: 0.02378884330391884 2023-01-23 23:23:16.440166: step: 148/77, loss: 0.07476845383644104 2023-01-23 23:23:17.808902: step: 152/77, loss: 0.009604285471141338 2023-01-23 23:23:19.125431: step: 156/77, loss: 0.1093854010105133 2023-01-23 23:23:20.447244: step: 160/77, loss: 0.011621728539466858 2023-01-23 23:23:21.797762: step: 164/77, loss: 0.056823261082172394 2023-01-23 23:23:23.109334: step: 168/77, loss: 0.037123072892427444 2023-01-23 23:23:24.434948: step: 172/77, loss: 0.07171599566936493 2023-01-23 23:23:25.733879: step: 176/77, loss: 0.025615889579057693 2023-01-23 23:23:27.036927: step: 180/77, loss: 0.022923659533262253 2023-01-23 23:23:28.336576: step: 184/77, loss: 0.02796180173754692 2023-01-23 23:23:29.611970: step: 188/77, loss: 0.004988554865121841 2023-01-23 23:23:30.926668: step: 192/77, loss: 0.00012879565474577248 2023-01-23 23:23:32.231206: step: 196/77, loss: 0.02959856204688549 2023-01-23 23:23:33.553834: step: 200/77, loss: 0.03901619464159012 2023-01-23 23:23:34.860945: step: 204/77, loss: 0.04571767896413803 2023-01-23 23:23:36.181449: step: 208/77, loss: 0.014097994193434715 2023-01-23 23:23:37.526593: step: 212/77, loss: 0.010788314044475555 2023-01-23 23:23:38.850807: step: 216/77, loss: 0.05809639394283295 2023-01-23 23:23:40.155613: step: 220/77, loss: 0.0019819687586277723 2023-01-23 23:23:41.522414: step: 224/77, loss: 0.004335891455411911 2023-01-23 23:23:42.797767: step: 228/77, loss: 0.0004905299283564091 2023-01-23 23:23:44.119819: step: 232/77, loss: 0.06842028349637985 2023-01-23 23:23:45.442074: step: 236/77, loss: 0.007669140584766865 2023-01-23 23:23:46.738915: step: 240/77, loss: 0.01616811752319336 2023-01-23 23:23:48.050076: step: 244/77, loss: 0.03694730997085571 2023-01-23 23:23:49.354570: step: 248/77, loss: 0.015833966434001923 2023-01-23 23:23:50.638839: step: 252/77, loss: 0.11062652617692947 2023-01-23 23:23:51.936133: step: 256/77, loss: 0.028664622455835342 2023-01-23 23:23:53.298708: step: 260/77, loss: 0.003139983396977186 2023-01-23 23:23:54.577615: step: 264/77, loss: 0.04194479063153267 2023-01-23 23:23:55.899812: step: 268/77, loss: 0.007503472734242678 2023-01-23 23:23:57.196413: step: 272/77, loss: 0.02690306305885315 2023-01-23 23:23:58.566763: step: 276/77, loss: 0.01763150654733181 2023-01-23 23:23:59.862922: step: 280/77, loss: 0.001771294279024005 2023-01-23 23:24:01.172120: step: 284/77, loss: 0.000397772149881348 2023-01-23 23:24:02.516034: step: 288/77, loss: 0.006749084684997797 2023-01-23 23:24:03.842549: step: 292/77, loss: 0.04002555459737778 2023-01-23 23:24:05.110166: step: 296/77, loss: 0.009476988576352596 2023-01-23 23:24:06.460426: step: 300/77, loss: 0.010371961630880833 2023-01-23 23:24:07.769344: step: 304/77, loss: 0.0004451674467418343 2023-01-23 23:24:09.055150: step: 308/77, loss: 0.05008751153945923 2023-01-23 23:24:10.353688: step: 312/77, loss: 0.013565540313720703 2023-01-23 23:24:11.659742: step: 316/77, loss: 0.00715932622551918 2023-01-23 23:24:12.972737: step: 320/77, loss: 0.00995059683918953 2023-01-23 23:24:14.275289: step: 324/77, loss: 0.049439121037721634 2023-01-23 23:24:15.624174: step: 328/77, loss: 0.007938910275697708 2023-01-23 23:24:16.994064: step: 332/77, loss: 0.025580620393157005 2023-01-23 23:24:18.289106: step: 336/77, loss: 0.05015741288661957 2023-01-23 23:24:19.593111: step: 340/77, loss: 0.014777681790292263 2023-01-23 23:24:20.929653: step: 344/77, loss: 0.011329288594424725 2023-01-23 23:24:22.294525: step: 348/77, loss: 9.849500929703936e-05 2023-01-23 23:24:23.653359: step: 352/77, loss: 0.0007578809163533151 2023-01-23 23:24:25.020980: step: 356/77, loss: 0.0006742964615114033 2023-01-23 23:24:26.330024: step: 360/77, loss: 0.03660057112574577 2023-01-23 23:24:27.689699: step: 364/77, loss: 0.011663177981972694 2023-01-23 23:24:29.016441: step: 368/77, loss: 0.00030943931778892875 2023-01-23 23:24:30.265752: step: 372/77, loss: 0.00023238870198838413 2023-01-23 23:24:31.604193: step: 376/77, loss: 0.011897479183971882 2023-01-23 23:24:32.909701: step: 380/77, loss: 0.06606089323759079 2023-01-23 23:24:34.256033: step: 384/77, loss: 0.000210434605833143 2023-01-23 23:24:35.574200: step: 388/77, loss: 0.02883332222700119 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.8405797101449275, 'r': 0.44274809160305345, 'f1': 0.5800000000000001}, 'slot': {'p': 0.47058823529411764, 'r': 0.014558689717925387, 'f1': 0.02824360105913504}, 'combined': 0.016381288614298325, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.8382352941176471, 'r': 0.4351145038167939, 'f1': 0.5728643216080401}, 'slot': {'p': 0.47058823529411764, 'r': 0.014558689717925387, 'f1': 0.02824360105913504}, 'combined': 0.016179751360509517, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.8405797101449275, 'r': 0.44274809160305345, 'f1': 0.5800000000000001}, 'slot': {'p': 0.47058823529411764, 'r': 0.014558689717925387, 'f1': 0.02824360105913504}, 'combined': 0.016381288614298325, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:26:28.604622: step: 4/77, loss: 0.007230168208479881 2023-01-23 23:26:29.959657: step: 8/77, loss: 0.028481965884566307 2023-01-23 23:26:31.300760: step: 12/77, loss: 0.004072078038007021 2023-01-23 23:26:32.578803: step: 16/77, loss: 9.96085800579749e-05 2023-01-23 23:26:33.889267: step: 20/77, loss: 0.013363640755414963 2023-01-23 23:26:35.218331: step: 24/77, loss: 0.021185237914323807 2023-01-23 23:26:36.550620: step: 28/77, loss: 0.0012835885863751173 2023-01-23 23:26:37.838442: step: 32/77, loss: 0.02527708373963833 2023-01-23 23:26:39.138206: step: 36/77, loss: 0.0063269371166825294 2023-01-23 23:26:40.392867: step: 40/77, loss: 0.0035296031273901463 2023-01-23 23:26:41.702899: step: 44/77, loss: 0.017808442935347557 2023-01-23 23:26:43.001047: step: 48/77, loss: 0.06293917447328568 2023-01-23 23:26:44.295474: step: 52/77, loss: 0.00010214370558969676 2023-01-23 23:26:45.605840: step: 56/77, loss: 0.0526653528213501 2023-01-23 23:26:46.920220: step: 60/77, loss: 0.021576354280114174 2023-01-23 23:26:48.211527: step: 64/77, loss: 0.0004523025418166071 2023-01-23 23:26:49.493458: step: 68/77, loss: 0.036175526678562164 2023-01-23 23:26:50.776456: step: 72/77, loss: 0.016709093004465103 2023-01-23 23:26:52.067038: step: 76/77, loss: 0.006656886078417301 2023-01-23 23:26:53.351577: step: 80/77, loss: 0.012240472249686718 2023-01-23 23:26:54.631810: step: 84/77, loss: 0.020623495802283287 2023-01-23 23:26:55.947266: step: 88/77, loss: 0.026960119605064392 2023-01-23 23:26:57.237032: step: 92/77, loss: 0.019822752103209496 2023-01-23 23:26:58.561864: step: 96/77, loss: 0.0030144131742417812 2023-01-23 23:26:59.853613: step: 100/77, loss: 0.019956866279244423 2023-01-23 23:27:01.184169: step: 104/77, loss: 0.012890620157122612 2023-01-23 23:27:02.555097: step: 108/77, loss: 0.019601669162511826 2023-01-23 23:27:03.845972: step: 112/77, loss: 0.009381377138197422 2023-01-23 23:27:05.104525: step: 116/77, loss: 0.005626625847071409 2023-01-23 23:27:06.421127: step: 120/77, loss: 0.017440086230635643 2023-01-23 23:27:07.757502: step: 124/77, loss: 0.02458116225898266 2023-01-23 23:27:09.087060: step: 128/77, loss: 0.001182088628411293 2023-01-23 23:27:10.372229: step: 132/77, loss: 0.015194879844784737 2023-01-23 23:27:11.744249: step: 136/77, loss: 0.058345600962638855 2023-01-23 23:27:13.161047: step: 140/77, loss: 0.0004595243954099715 2023-01-23 23:27:14.496391: step: 144/77, loss: 0.00038895985926501453 2023-01-23 23:27:15.813121: step: 148/77, loss: 0.002521295566111803 2023-01-23 23:27:17.124476: step: 152/77, loss: 0.004439116455614567 2023-01-23 23:27:18.469260: step: 156/77, loss: 0.021072743460536003 2023-01-23 23:27:19.812719: step: 160/77, loss: 0.0015568241942673922 2023-01-23 23:27:21.123730: step: 164/77, loss: 0.00011058291420340538 2023-01-23 23:27:22.417863: step: 168/77, loss: 0.008715417236089706 2023-01-23 23:27:23.771181: step: 172/77, loss: 0.0002488511090632528 2023-01-23 23:27:25.066929: step: 176/77, loss: 0.0044037941843271255 2023-01-23 23:27:26.377835: step: 180/77, loss: 2.851781937351916e-05 2023-01-23 23:27:27.764981: step: 184/77, loss: 0.025874529033899307 2023-01-23 23:27:29.062656: step: 188/77, loss: 6.360773113556206e-05 2023-01-23 23:27:30.375472: step: 192/77, loss: 0.020662926137447357 2023-01-23 23:27:31.743333: step: 196/77, loss: 6.761529948562384e-05 2023-01-23 23:27:33.056250: step: 200/77, loss: 2.6647994673112407e-05 2023-01-23 23:27:34.368845: step: 204/77, loss: 0.0935317799448967 2023-01-23 23:27:35.682220: step: 208/77, loss: 0.0009675032342784107 2023-01-23 23:27:37.000384: step: 212/77, loss: 0.006623557303100824 2023-01-23 23:27:38.324840: step: 216/77, loss: 0.01613423600792885 2023-01-23 23:27:39.634237: step: 220/77, loss: 0.005601785145699978 2023-01-23 23:27:40.960880: step: 224/77, loss: 0.043269336223602295 2023-01-23 23:27:42.323341: step: 228/77, loss: 0.058971501886844635 2023-01-23 23:27:43.667398: step: 232/77, loss: 0.04367101565003395 2023-01-23 23:27:45.012606: step: 236/77, loss: 0.04910704120993614 2023-01-23 23:27:46.315680: step: 240/77, loss: 0.012337159365415573 2023-01-23 23:27:47.613712: step: 244/77, loss: 0.005926759447902441 2023-01-23 23:27:48.894392: step: 248/77, loss: 0.007234785705804825 2023-01-23 23:27:50.193618: step: 252/77, loss: 0.05337817594408989 2023-01-23 23:27:51.493817: step: 256/77, loss: 0.0013377940049394965 2023-01-23 23:27:52.751284: step: 260/77, loss: 0.029988370835781097 2023-01-23 23:27:54.087109: step: 264/77, loss: 0.02918725088238716 2023-01-23 23:27:55.401827: step: 268/77, loss: 0.017394710332155228 2023-01-23 23:27:56.647018: step: 272/77, loss: 0.007960842922329903 2023-01-23 23:27:57.898667: step: 276/77, loss: 0.015655148774385452 2023-01-23 23:27:59.257564: step: 280/77, loss: 0.07385125756263733 2023-01-23 23:28:00.609916: step: 284/77, loss: 0.01918143779039383 2023-01-23 23:28:01.893843: step: 288/77, loss: 0.027690613642334938 2023-01-23 23:28:03.203989: step: 292/77, loss: 0.004097505938261747 2023-01-23 23:28:04.494922: step: 296/77, loss: 0.014483789913356304 2023-01-23 23:28:05.859449: step: 300/77, loss: 0.006000719498842955 2023-01-23 23:28:07.185540: step: 304/77, loss: 0.004739431664347649 2023-01-23 23:28:08.504413: step: 308/77, loss: 0.03346220403909683 2023-01-23 23:28:09.801628: step: 312/77, loss: 0.009710513986647129 2023-01-23 23:28:11.145412: step: 316/77, loss: 0.016323618590831757 2023-01-23 23:28:12.470428: step: 320/77, loss: 0.0007509322604164481 2023-01-23 23:28:13.774276: step: 324/77, loss: 0.01381689589470625 2023-01-23 23:28:15.081877: step: 328/77, loss: 0.0016642776317894459 2023-01-23 23:28:16.399402: step: 332/77, loss: 0.005551490001380444 2023-01-23 23:28:17.722900: step: 336/77, loss: 0.004239788744598627 2023-01-23 23:28:19.049735: step: 340/77, loss: 0.006677926052361727 2023-01-23 23:28:20.373319: step: 344/77, loss: 0.00010080473293783143 2023-01-23 23:28:21.647488: step: 348/77, loss: 0.011997531168162823 2023-01-23 23:28:22.960202: step: 352/77, loss: 0.014606297016143799 2023-01-23 23:28:24.266249: step: 356/77, loss: 0.00018841848941519856 2023-01-23 23:28:25.574173: step: 360/77, loss: 0.01075925026088953 2023-01-23 23:28:26.911589: step: 364/77, loss: 0.046107031404972076 2023-01-23 23:28:28.296697: step: 368/77, loss: 0.0002987585321534425 2023-01-23 23:28:29.631856: step: 372/77, loss: 0.014896288514137268 2023-01-23 23:28:30.932250: step: 376/77, loss: 0.033940427005290985 2023-01-23 23:28:32.304323: step: 380/77, loss: 0.0470426082611084 2023-01-23 23:28:33.603032: step: 384/77, loss: 0.003909729886800051 2023-01-23 23:28:34.972935: step: 388/77, loss: 0.01414463110268116 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.4782608695652174, 'r': 0.010009099181073703, 'f1': 0.0196078431372549}, 'combined': 0.011804721888755502, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.4782608695652174, 'r': 0.010009099181073703, 'f1': 0.0196078431372549}, 'combined': 0.011804721888755502, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.4782608695652174, 'r': 0.010009099181073703, 'f1': 0.0196078431372549}, 'combined': 0.011804721888755502, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:30:28.746975: step: 4/77, loss: 0.001212775707244873 2023-01-23 23:30:30.027004: step: 8/77, loss: 0.0010248932521790266 2023-01-23 23:30:31.350513: step: 12/77, loss: 0.003597670467570424 2023-01-23 23:30:32.661216: step: 16/77, loss: 0.00012636728934012353 2023-01-23 23:30:34.001970: step: 20/77, loss: 0.01793883740901947 2023-01-23 23:30:35.292442: step: 24/77, loss: 0.004484567791223526 2023-01-23 23:30:36.565273: step: 28/77, loss: 0.013867422007024288 2023-01-23 23:30:37.905370: step: 32/77, loss: 0.00443370221182704 2023-01-23 23:30:39.202492: step: 36/77, loss: 0.03909948095679283 2023-01-23 23:30:40.541820: step: 40/77, loss: 0.04470532760024071 2023-01-23 23:30:41.877481: step: 44/77, loss: 0.004239007830619812 2023-01-23 23:30:43.196901: step: 48/77, loss: 0.008453510701656342 2023-01-23 23:30:44.547129: step: 52/77, loss: 0.017034407705068588 2023-01-23 23:30:45.818897: step: 56/77, loss: 0.04876886308193207 2023-01-23 23:30:47.114181: step: 60/77, loss: 0.0005208022193983197 2023-01-23 23:30:48.404730: step: 64/77, loss: 0.019471313804388046 2023-01-23 23:30:49.735968: step: 68/77, loss: 0.03318634629249573 2023-01-23 23:30:51.067688: step: 72/77, loss: 0.011108007282018661 2023-01-23 23:30:52.371349: step: 76/77, loss: 0.01431676559150219 2023-01-23 23:30:53.655961: step: 80/77, loss: 0.005948154721409082 2023-01-23 23:30:54.977470: step: 84/77, loss: 0.021397370845079422 2023-01-23 23:30:56.311666: step: 88/77, loss: 0.02325313724577427 2023-01-23 23:30:57.644496: step: 92/77, loss: 0.020098941400647163 2023-01-23 23:30:58.965209: step: 96/77, loss: 0.021467300131917 2023-01-23 23:31:00.298417: step: 100/77, loss: 0.00042401679093018174 2023-01-23 23:31:01.594665: step: 104/77, loss: 0.012996762990951538 2023-01-23 23:31:02.892456: step: 108/77, loss: 0.0010982566745951772 2023-01-23 23:31:04.217347: step: 112/77, loss: 0.010368159040808678 2023-01-23 23:31:05.477769: step: 116/77, loss: 0.0032198550179600716 2023-01-23 23:31:06.813841: step: 120/77, loss: 0.012814019806683064 2023-01-23 23:31:08.126985: step: 124/77, loss: 0.004655920900404453 2023-01-23 23:31:09.401086: step: 128/77, loss: 0.024697577580809593 2023-01-23 23:31:10.713624: step: 132/77, loss: 0.02245374023914337 2023-01-23 23:31:11.967127: step: 136/77, loss: 0.004386830143630505 2023-01-23 23:31:13.298350: step: 140/77, loss: 0.0107192387804389 2023-01-23 23:31:14.661042: step: 144/77, loss: 0.0033273466397076845 2023-01-23 23:31:16.016287: step: 148/77, loss: 0.043100181967020035 2023-01-23 23:31:17.340058: step: 152/77, loss: 0.0023186816833913326 2023-01-23 23:31:18.668224: step: 156/77, loss: 0.015858765691518784 2023-01-23 23:31:19.969766: step: 160/77, loss: 0.030266083776950836 2023-01-23 23:31:21.330294: step: 164/77, loss: 0.0035603016149252653 2023-01-23 23:31:22.645313: step: 168/77, loss: 0.01956053264439106 2023-01-23 23:31:23.914653: step: 172/77, loss: 0.003212408162653446 2023-01-23 23:31:25.231178: step: 176/77, loss: 0.0023228703066706657 2023-01-23 23:31:26.506207: step: 180/77, loss: 0.014069067314267159 2023-01-23 23:31:27.788953: step: 184/77, loss: 0.0016356257256120443 2023-01-23 23:31:29.146461: step: 188/77, loss: 0.018511053174734116 2023-01-23 23:31:30.456186: step: 192/77, loss: 0.0004446762031875551 2023-01-23 23:31:31.782757: step: 196/77, loss: 0.004548283759504557 2023-01-23 23:31:33.128467: step: 200/77, loss: 0.05551350489258766 2023-01-23 23:31:34.441647: step: 204/77, loss: 0.0006663898238912225 2023-01-23 23:31:35.739554: step: 208/77, loss: 0.02572382427752018 2023-01-23 23:31:37.049146: step: 212/77, loss: 0.034336600452661514 2023-01-23 23:31:38.335311: step: 216/77, loss: 0.003346733283251524 2023-01-23 23:31:39.667960: step: 220/77, loss: 0.00020372634753584862 2023-01-23 23:31:41.011836: step: 224/77, loss: 0.002461690455675125 2023-01-23 23:31:42.288555: step: 228/77, loss: 0.0158668365329504 2023-01-23 23:31:43.598754: step: 232/77, loss: 0.03975234553217888 2023-01-23 23:31:44.887453: step: 236/77, loss: 0.004216344561427832 2023-01-23 23:31:46.209340: step: 240/77, loss: 0.0110662542283535 2023-01-23 23:31:47.529184: step: 244/77, loss: 0.002729236613959074 2023-01-23 23:31:48.871186: step: 248/77, loss: 0.0018763559637591243 2023-01-23 23:31:50.210970: step: 252/77, loss: 0.015068122185766697 2023-01-23 23:31:51.577899: step: 256/77, loss: 0.013370676897466183 2023-01-23 23:31:52.864915: step: 260/77, loss: 0.03214767947793007 2023-01-23 23:31:54.202648: step: 264/77, loss: 0.04682979732751846 2023-01-23 23:31:55.539695: step: 268/77, loss: 0.0191793330013752 2023-01-23 23:31:56.855004: step: 272/77, loss: 0.005866359919309616 2023-01-23 23:31:58.121536: step: 276/77, loss: 0.021344557404518127 2023-01-23 23:31:59.404528: step: 280/77, loss: 0.03047803044319153 2023-01-23 23:32:00.758390: step: 284/77, loss: 0.0007430142723023891 2023-01-23 23:32:02.050171: step: 288/77, loss: 0.04519880190491676 2023-01-23 23:32:03.368614: step: 292/77, loss: 0.005685959476977587 2023-01-23 23:32:04.695840: step: 296/77, loss: 0.003991344012320042 2023-01-23 23:32:06.030207: step: 300/77, loss: 0.00977976992726326 2023-01-23 23:32:07.351096: step: 304/77, loss: 0.014140845276415348 2023-01-23 23:32:08.672964: step: 308/77, loss: 0.03848596289753914 2023-01-23 23:32:10.002319: step: 312/77, loss: 0.001883206656202674 2023-01-23 23:32:11.332987: step: 316/77, loss: 0.013112076558172703 2023-01-23 23:32:12.648246: step: 320/77, loss: 0.006420728750526905 2023-01-23 23:32:13.975871: step: 324/77, loss: 0.029596269130706787 2023-01-23 23:32:15.296351: step: 328/77, loss: 0.0002997311530634761 2023-01-23 23:32:16.575632: step: 332/77, loss: 0.016038116067647934 2023-01-23 23:32:17.927314: step: 336/77, loss: 0.005305239465087652 2023-01-23 23:32:19.217395: step: 340/77, loss: 0.02268035151064396 2023-01-23 23:32:20.516096: step: 344/77, loss: 0.005031340289860964 2023-01-23 23:32:21.868325: step: 348/77, loss: 0.001761500840075314 2023-01-23 23:32:23.206158: step: 352/77, loss: 0.04548073187470436 2023-01-23 23:32:24.566380: step: 356/77, loss: 0.014924336224794388 2023-01-23 23:32:25.892487: step: 360/77, loss: 0.007789536379277706 2023-01-23 23:32:27.239170: step: 364/77, loss: 4.7290675865951926e-05 2023-01-23 23:32:28.542535: step: 368/77, loss: 0.0004604816494975239 2023-01-23 23:32:29.847623: step: 372/77, loss: 0.0022913378197699785 2023-01-23 23:32:31.180954: step: 376/77, loss: 0.0002854047925211489 2023-01-23 23:32:32.483367: step: 380/77, loss: 0.00608748709782958 2023-01-23 23:32:33.779606: step: 384/77, loss: 0.035436298698186874 2023-01-23 23:32:35.089168: step: 388/77, loss: 0.06410303711891174 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.8923076923076924, 'r': 0.44274809160305345, 'f1': 0.5918367346938777}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014717076884039587, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.8923076923076924, 'r': 0.44274809160305345, 'f1': 0.5918367346938777}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014717076884039587, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.8923076923076924, 'r': 0.44274809160305345, 'f1': 0.5918367346938777}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014717076884039587, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:34:28.615597: step: 4/77, loss: 0.008268819190561771 2023-01-23 23:34:29.922978: step: 8/77, loss: 0.0013845355715602636 2023-01-23 23:34:31.193446: step: 12/77, loss: 0.014348512515425682 2023-01-23 23:34:32.476603: step: 16/77, loss: 0.00025518261827528477 2023-01-23 23:34:33.735638: step: 20/77, loss: 0.00025453310809098184 2023-01-23 23:34:35.019726: step: 24/77, loss: 0.005054094363003969 2023-01-23 23:34:36.292318: step: 28/77, loss: 0.015327653847634792 2023-01-23 23:34:37.624423: step: 32/77, loss: 0.007553777191787958 2023-01-23 23:34:38.908790: step: 36/77, loss: 2.335933459107764e-05 2023-01-23 23:34:40.222548: step: 40/77, loss: 0.008073270320892334 2023-01-23 23:34:41.501207: step: 44/77, loss: 2.5041801563929766e-05 2023-01-23 23:34:42.788456: step: 48/77, loss: 0.013722700998187065 2023-01-23 23:34:44.101231: step: 52/77, loss: 0.0007456339080817997 2023-01-23 23:34:45.401717: step: 56/77, loss: 0.0017448263242840767 2023-01-23 23:34:46.716703: step: 60/77, loss: 0.029238741844892502 2023-01-23 23:34:47.966483: step: 64/77, loss: 0.007461803965270519 2023-01-23 23:34:49.269870: step: 68/77, loss: 0.014584846794605255 2023-01-23 23:34:50.606757: step: 72/77, loss: 0.0127757228910923 2023-01-23 23:34:51.898245: step: 76/77, loss: 0.0109567167237401 2023-01-23 23:34:53.267895: step: 80/77, loss: 0.014077206142246723 2023-01-23 23:34:54.635009: step: 84/77, loss: 4.79845330119133e-05 2023-01-23 23:34:55.932832: step: 88/77, loss: 0.05143750086426735 2023-01-23 23:34:57.269762: step: 92/77, loss: 0.021490877494215965 2023-01-23 23:34:58.578220: step: 96/77, loss: 0.0003919099981430918 2023-01-23 23:34:59.912819: step: 100/77, loss: 0.001257411320693791 2023-01-23 23:35:01.228963: step: 104/77, loss: 0.07764703780412674 2023-01-23 23:35:02.499736: step: 108/77, loss: 0.04847249388694763 2023-01-23 23:35:03.780551: step: 112/77, loss: 0.00677803810685873 2023-01-23 23:35:05.082985: step: 116/77, loss: 0.014240864664316177 2023-01-23 23:35:06.449203: step: 120/77, loss: 0.022181320935487747 2023-01-23 23:35:07.757846: step: 124/77, loss: 0.019869450479745865 2023-01-23 23:35:09.034314: step: 128/77, loss: 0.006858724169433117 2023-01-23 23:35:10.374208: step: 132/77, loss: 0.0060902857221663 2023-01-23 23:35:11.710296: step: 136/77, loss: 0.0004512839368544519 2023-01-23 23:35:13.006238: step: 140/77, loss: 0.0038215219974517822 2023-01-23 23:35:14.297823: step: 144/77, loss: 0.0006012011435814202 2023-01-23 23:35:15.610231: step: 148/77, loss: 0.003443785710260272 2023-01-23 23:35:16.952409: step: 152/77, loss: 0.003140839748084545 2023-01-23 23:35:18.269768: step: 156/77, loss: 0.03932955116033554 2023-01-23 23:35:19.564063: step: 160/77, loss: 0.005046913865953684 2023-01-23 23:35:20.871786: step: 164/77, loss: 0.000966257881373167 2023-01-23 23:35:22.170133: step: 168/77, loss: 0.0014475728385150433 2023-01-23 23:35:23.462625: step: 172/77, loss: 0.003415848594158888 2023-01-23 23:35:24.732632: step: 176/77, loss: 0.017180226743221283 2023-01-23 23:35:26.060609: step: 180/77, loss: 0.0027672951109707355 2023-01-23 23:35:27.368708: step: 184/77, loss: 0.01768960990011692 2023-01-23 23:35:28.638424: step: 188/77, loss: 0.0016348720528185368 2023-01-23 23:35:29.965418: step: 192/77, loss: 0.0016627004370093346 2023-01-23 23:35:31.285618: step: 196/77, loss: 0.018234293907880783 2023-01-23 23:35:32.575865: step: 200/77, loss: 0.014059138484299183 2023-01-23 23:35:33.941068: step: 204/77, loss: 0.004478732589632273 2023-01-23 23:35:35.256951: step: 208/77, loss: 0.004280396271497011 2023-01-23 23:35:36.576041: step: 212/77, loss: 0.028340879827737808 2023-01-23 23:35:37.885570: step: 216/77, loss: 0.004287842195481062 2023-01-23 23:35:39.221137: step: 220/77, loss: 0.00017998297698795795 2023-01-23 23:35:40.563099: step: 224/77, loss: 0.005983490496873856 2023-01-23 23:35:41.874961: step: 228/77, loss: 0.00011496020306367427 2023-01-23 23:35:43.216617: step: 232/77, loss: 0.05257324501872063 2023-01-23 23:35:44.505983: step: 236/77, loss: 0.003619940485805273 2023-01-23 23:35:45.824646: step: 240/77, loss: 0.0004241685091983527 2023-01-23 23:35:47.124367: step: 244/77, loss: 0.04979011043906212 2023-01-23 23:35:48.443650: step: 248/77, loss: 7.13120925865951e-06 2023-01-23 23:35:49.750038: step: 252/77, loss: 0.0001238850090885535 2023-01-23 23:35:51.115228: step: 256/77, loss: 0.0006984564824961126 2023-01-23 23:35:52.396647: step: 260/77, loss: 0.00970840360969305 2023-01-23 23:35:53.705777: step: 264/77, loss: 0.021301377564668655 2023-01-23 23:35:55.059793: step: 268/77, loss: 0.0007670613704249263 2023-01-23 23:35:56.374296: step: 272/77, loss: 2.0593079170794226e-06 2023-01-23 23:35:57.733678: step: 276/77, loss: 0.018182558938860893 2023-01-23 23:35:59.060918: step: 280/77, loss: 0.0001049708153004758 2023-01-23 23:36:00.396427: step: 284/77, loss: 0.02122705802321434 2023-01-23 23:36:01.612978: step: 288/77, loss: 0.0013556723715737462 2023-01-23 23:36:02.964763: step: 292/77, loss: 0.001371250138618052 2023-01-23 23:36:04.331264: step: 296/77, loss: 0.013504598289728165 2023-01-23 23:36:05.634014: step: 300/77, loss: 0.0032943664118647575 2023-01-23 23:36:06.978283: step: 304/77, loss: 0.00025259374524466693 2023-01-23 23:36:08.273077: step: 308/77, loss: 0.0003105059149675071 2023-01-23 23:36:09.575911: step: 312/77, loss: 6.09213238931261e-05 2023-01-23 23:36:10.889586: step: 316/77, loss: 0.00036613934207707644 2023-01-23 23:36:12.177791: step: 320/77, loss: 0.0021243938244879246 2023-01-23 23:36:13.495611: step: 324/77, loss: 0.022916313260793686 2023-01-23 23:36:14.800065: step: 328/77, loss: 0.002108396030962467 2023-01-23 23:36:16.077371: step: 332/77, loss: 0.00017366012616548687 2023-01-23 23:36:17.369947: step: 336/77, loss: 0.02667391486465931 2023-01-23 23:36:18.667130: step: 340/77, loss: 0.03359730914235115 2023-01-23 23:36:20.028724: step: 344/77, loss: 0.08052822202444077 2023-01-23 23:36:21.308065: step: 348/77, loss: 0.00590179581195116 2023-01-23 23:36:22.591091: step: 352/77, loss: 0.00030119650182314217 2023-01-23 23:36:23.875223: step: 356/77, loss: 0.0023873820900917053 2023-01-23 23:36:25.183072: step: 360/77, loss: 0.0006035061087459326 2023-01-23 23:36:26.504080: step: 364/77, loss: 0.05599663779139519 2023-01-23 23:36:27.821102: step: 368/77, loss: 0.026590893045067787 2023-01-23 23:36:29.153048: step: 372/77, loss: 0.012441445142030716 2023-01-23 23:36:30.464362: step: 376/77, loss: 5.755682286689989e-05 2023-01-23 23:36:31.747006: step: 380/77, loss: 0.01148482970893383 2023-01-23 23:36:33.049830: step: 384/77, loss: 0.00023403289378620684 2023-01-23 23:36:34.365062: step: 388/77, loss: 0.008566263131797314 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04686584651435266, 'epoch': 12} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.4122137404580153, 'f1': 0.574468085106383}, 'slot': {'p': 0.5454545454545454, 'r': 0.01091901728844404, 'f1': 0.021409455842997326}, 'combined': 0.012299049101296337, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04789586511906372, 'epoch': 12} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.4122137404580153, 'f1': 0.574468085106383}, 'slot': {'p': 0.5454545454545454, 'r': 0.01091901728844404, 'f1': 0.021409455842997326}, 'combined': 0.012299049101296337, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04686584651435266, 'epoch': 12} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.4122137404580153, 'f1': 0.574468085106383}, 'slot': {'p': 0.5454545454545454, 'r': 0.01091901728844404, 'f1': 0.021409455842997326}, 'combined': 0.012299049101296337, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:38:27.900472: step: 4/77, loss: 0.0029923836700618267 2023-01-23 23:38:29.153553: step: 8/77, loss: 0.0071807983331382275 2023-01-23 23:38:30.449721: step: 12/77, loss: 0.02864694409072399 2023-01-23 23:38:31.790777: step: 16/77, loss: 0.0010537790367379785 2023-01-23 23:38:33.077396: step: 20/77, loss: 0.0022675837390124798 2023-01-23 23:38:34.382825: step: 24/77, loss: 0.008036395534873009 2023-01-23 23:38:35.709045: step: 28/77, loss: 0.0007599538657814264 2023-01-23 23:38:37.098237: step: 32/77, loss: 0.00028435743297450244 2023-01-23 23:38:38.477667: step: 36/77, loss: 0.02268192358314991 2023-01-23 23:38:39.768145: step: 40/77, loss: 0.005586005747318268 2023-01-23 23:38:41.087697: step: 44/77, loss: 0.00229069497436285 2023-01-23 23:38:42.379204: step: 48/77, loss: 0.005013742949813604 2023-01-23 23:38:43.728987: step: 52/77, loss: 0.0004219284455757588 2023-01-23 23:38:45.008955: step: 56/77, loss: 0.04628149792551994 2023-01-23 23:38:46.349543: step: 60/77, loss: 0.00035025522811338305 2023-01-23 23:38:47.643831: step: 64/77, loss: 0.00048070665798150003 2023-01-23 23:38:48.965094: step: 68/77, loss: 0.022792479023337364 2023-01-23 23:38:50.272479: step: 72/77, loss: 0.0006048143841326237 2023-01-23 23:38:51.613175: step: 76/77, loss: 0.00026993860956281424 2023-01-23 23:38:52.921252: step: 80/77, loss: 0.0002230935642728582 2023-01-23 23:38:54.258521: step: 84/77, loss: 0.033240292221307755 2023-01-23 23:38:55.603357: step: 88/77, loss: 0.0315207839012146 2023-01-23 23:38:56.900787: step: 92/77, loss: 0.013940885663032532 2023-01-23 23:38:58.237960: step: 96/77, loss: 0.014073438942432404 2023-01-23 23:38:59.593597: step: 100/77, loss: 0.03296903520822525 2023-01-23 23:39:00.896432: step: 104/77, loss: 0.007324971258640289 2023-01-23 23:39:02.216735: step: 108/77, loss: 0.001288596075028181 2023-01-23 23:39:03.513345: step: 112/77, loss: 5.987027179799043e-06 2023-01-23 23:39:04.835371: step: 116/77, loss: 0.005261044949293137 2023-01-23 23:39:06.110804: step: 120/77, loss: 0.03368813917040825 2023-01-23 23:39:07.410108: step: 124/77, loss: 0.012227150611579418 2023-01-23 23:39:08.728059: step: 128/77, loss: 0.005527400877326727 2023-01-23 23:39:10.030085: step: 132/77, loss: 0.0014371307333931327 2023-01-23 23:39:11.367532: step: 136/77, loss: 0.0013739938149228692 2023-01-23 23:39:12.733404: step: 140/77, loss: 0.005380359478294849 2023-01-23 23:39:14.121843: step: 144/77, loss: 0.006290000397711992 2023-01-23 23:39:15.421447: step: 148/77, loss: 0.008836659602820873 2023-01-23 23:39:16.677425: step: 152/77, loss: 0.005232426803559065 2023-01-23 23:39:17.978787: step: 156/77, loss: 0.00048308397526852787 2023-01-23 23:39:19.266420: step: 160/77, loss: 0.003274317365139723 2023-01-23 23:39:20.632503: step: 164/77, loss: 0.01170845702290535 2023-01-23 23:39:21.921671: step: 168/77, loss: 0.04738888889551163 2023-01-23 23:39:23.277825: step: 172/77, loss: 0.07354629039764404 2023-01-23 23:39:24.627965: step: 176/77, loss: 0.011457343585789204 2023-01-23 23:39:25.947315: step: 180/77, loss: 0.02155444398522377 2023-01-23 23:39:27.246290: step: 184/77, loss: 0.0016696201637387276 2023-01-23 23:39:28.533531: step: 188/77, loss: 0.0005738473264500499 2023-01-23 23:39:29.890956: step: 192/77, loss: 0.001273950794711709 2023-01-23 23:39:31.137412: step: 196/77, loss: 0.03267135098576546 2023-01-23 23:39:32.428905: step: 200/77, loss: 9.959276212612167e-05 2023-01-23 23:39:33.729085: step: 204/77, loss: 0.0008442182443104684 2023-01-23 23:39:35.006829: step: 208/77, loss: 0.03439297527074814 2023-01-23 23:39:36.330992: step: 212/77, loss: 0.06101587787270546 2023-01-23 23:39:37.621449: step: 216/77, loss: 3.854815804515965e-05 2023-01-23 23:39:38.961287: step: 220/77, loss: 0.0412127859890461 2023-01-23 23:39:40.304661: step: 224/77, loss: 0.005135328974574804 2023-01-23 23:39:41.573569: step: 228/77, loss: 0.0020034238696098328 2023-01-23 23:39:42.875556: step: 232/77, loss: 2.5629915967329e-07 2023-01-23 23:39:44.189660: step: 236/77, loss: 0.07443392276763916 2023-01-23 23:39:45.510046: step: 240/77, loss: 0.0030464939773082733 2023-01-23 23:39:46.869190: step: 244/77, loss: 0.0017192356754094362 2023-01-23 23:39:48.171739: step: 248/77, loss: 2.1758773073088378e-05 2023-01-23 23:39:49.499447: step: 252/77, loss: 0.07711545377969742 2023-01-23 23:39:50.756967: step: 256/77, loss: 0.0010215912479907274 2023-01-23 23:39:52.045605: step: 260/77, loss: 0.00018177239689975977 2023-01-23 23:39:53.413740: step: 264/77, loss: 4.1622581193223596e-05 2023-01-23 23:39:54.692214: step: 268/77, loss: 0.047255661338567734 2023-01-23 23:39:56.051021: step: 272/77, loss: 0.00015721115050837398 2023-01-23 23:39:57.365613: step: 276/77, loss: 0.002577256876975298 2023-01-23 23:39:58.675363: step: 280/77, loss: 0.0019506815588101745 2023-01-23 23:39:59.975076: step: 284/77, loss: 0.00016758375568315387 2023-01-23 23:40:01.300861: step: 288/77, loss: 0.026300586760044098 2023-01-23 23:40:02.638867: step: 292/77, loss: 0.00026921986136585474 2023-01-23 23:40:03.994931: step: 296/77, loss: 0.017224635928869247 2023-01-23 23:40:05.290347: step: 300/77, loss: 0.003580394433811307 2023-01-23 23:40:06.561664: step: 304/77, loss: 0.02821994386613369 2023-01-23 23:40:07.855555: step: 308/77, loss: 0.0003362175193615258 2023-01-23 23:40:09.166049: step: 312/77, loss: 0.0016050392296165228 2023-01-23 23:40:10.489568: step: 316/77, loss: 2.1911566363996826e-05 2023-01-23 23:40:11.852689: step: 320/77, loss: 0.04316862300038338 2023-01-23 23:40:13.172721: step: 324/77, loss: 0.002501176670193672 2023-01-23 23:40:14.464160: step: 328/77, loss: 0.004361604806035757 2023-01-23 23:40:15.812765: step: 332/77, loss: 0.0011792670702561736 2023-01-23 23:40:17.123431: step: 336/77, loss: 0.0005191811360418797 2023-01-23 23:40:18.419333: step: 340/77, loss: 0.01508795004338026 2023-01-23 23:40:19.705420: step: 344/77, loss: 0.00047959492076188326 2023-01-23 23:40:21.027114: step: 348/77, loss: 0.0011582657461985946 2023-01-23 23:40:22.357656: step: 352/77, loss: 0.04251958802342415 2023-01-23 23:40:23.665795: step: 356/77, loss: 0.07447528094053268 2023-01-23 23:40:24.955354: step: 360/77, loss: 0.005645543336868286 2023-01-23 23:40:26.249958: step: 364/77, loss: 0.0001554302725708112 2023-01-23 23:40:27.556971: step: 368/77, loss: 0.00981360487639904 2023-01-23 23:40:28.867955: step: 372/77, loss: 0.011518244631588459 2023-01-23 23:40:30.222137: step: 376/77, loss: 0.000307155423797667 2023-01-23 23:40:31.557718: step: 380/77, loss: 0.03827136382460594 2023-01-23 23:40:32.862439: step: 384/77, loss: 0.0002442160330247134 2023-01-23 23:40:34.151042: step: 388/77, loss: 0.01064755767583847 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 13} Test Chinese: {'template': {'p': 0.9, 'r': 0.4122137404580153, 'f1': 0.5654450261780105}, 'slot': {'p': 0.4827586206896552, 'r': 0.012738853503184714, 'f1': 0.024822695035460994}, 'combined': 0.014035869444135013, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 13} Test Korean: {'template': {'p': 0.9, 'r': 0.4122137404580153, 'f1': 0.5654450261780105}, 'slot': {'p': 0.4642857142857143, 'r': 0.011828935395814377, 'f1': 0.023070097604259095}, 'combined': 0.013044871943769541, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 13} Test Russian: {'template': {'p': 0.9, 'r': 0.4122137404580153, 'f1': 0.5654450261780105}, 'slot': {'p': 0.4827586206896552, 'r': 0.012738853503184714, 'f1': 0.024822695035460994}, 'combined': 0.014035869444135013, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:42:27.896021: step: 4/77, loss: 0.0006164819351397455 2023-01-23 23:42:29.151579: step: 8/77, loss: 0.00018814537907019258 2023-01-23 23:42:30.518358: step: 12/77, loss: 0.005015483126044273 2023-01-23 23:42:31.846446: step: 16/77, loss: 0.0001742543827276677 2023-01-23 23:42:33.193490: step: 20/77, loss: 0.0781288668513298 2023-01-23 23:42:34.484084: step: 24/77, loss: 0.008677495643496513 2023-01-23 23:42:35.785230: step: 28/77, loss: 0.10340835154056549 2023-01-23 23:42:37.089294: step: 32/77, loss: 0.0005294712027534842 2023-01-23 23:42:38.413377: step: 36/77, loss: 0.0043997676111757755 2023-01-23 23:42:39.732396: step: 40/77, loss: 0.017868174239993095 2023-01-23 23:42:41.032967: step: 44/77, loss: 0.0005032032495364547 2023-01-23 23:42:42.292568: step: 48/77, loss: 0.02159172110259533 2023-01-23 23:42:43.650444: step: 52/77, loss: 0.00976879708468914 2023-01-23 23:42:44.899221: step: 56/77, loss: 0.07533251494169235 2023-01-23 23:42:46.188692: step: 60/77, loss: 0.001760625746101141 2023-01-23 23:42:47.507794: step: 64/77, loss: 0.0021721036173403263 2023-01-23 23:42:48.765354: step: 68/77, loss: 0.021930739283561707 2023-01-23 23:42:50.046537: step: 72/77, loss: 0.00019939450430683792 2023-01-23 23:42:51.357361: step: 76/77, loss: 0.008485385216772556 2023-01-23 23:42:52.629903: step: 80/77, loss: 0.0008418073994107544 2023-01-23 23:42:53.941029: step: 84/77, loss: 0.026486661285161972 2023-01-23 23:42:55.216047: step: 88/77, loss: 0.05777119845151901 2023-01-23 23:42:56.558804: step: 92/77, loss: 0.06704266369342804 2023-01-23 23:42:57.872300: step: 96/77, loss: 0.017733411863446236 2023-01-23 23:42:59.162663: step: 100/77, loss: 0.0038883714005351067 2023-01-23 23:43:00.416563: step: 104/77, loss: 0.00019030642579309642 2023-01-23 23:43:01.727318: step: 108/77, loss: 0.004776171408593655 2023-01-23 23:43:03.063990: step: 112/77, loss: 0.019326455891132355 2023-01-23 23:43:04.364178: step: 116/77, loss: 6.101990584284067e-05 2023-01-23 23:43:05.634741: step: 120/77, loss: 0.004292602185159922 2023-01-23 23:43:06.948628: step: 124/77, loss: 0.006115391850471497 2023-01-23 23:43:08.263306: step: 128/77, loss: 0.05205165594816208 2023-01-23 23:43:09.535962: step: 132/77, loss: 0.02748934179544449 2023-01-23 23:43:10.838937: step: 136/77, loss: 0.017037197947502136 2023-01-23 23:43:12.191731: step: 140/77, loss: 0.00018027987971436232 2023-01-23 23:43:13.507095: step: 144/77, loss: 0.00023836392210796475 2023-01-23 23:43:14.786255: step: 148/77, loss: 0.04402411729097366 2023-01-23 23:43:16.090738: step: 152/77, loss: 0.02829231135547161 2023-01-23 23:43:17.363566: step: 156/77, loss: 0.05941590666770935 2023-01-23 23:43:18.700481: step: 160/77, loss: 0.023701639845967293 2023-01-23 23:43:19.999539: step: 164/77, loss: 0.0004671411879826337 2023-01-23 23:43:21.338423: step: 168/77, loss: 0.01830999366939068 2023-01-23 23:43:22.639778: step: 172/77, loss: 0.0014468998415395617 2023-01-23 23:43:23.971649: step: 176/77, loss: 0.0030556246638298035 2023-01-23 23:43:25.303578: step: 180/77, loss: 0.025845110416412354 2023-01-23 23:43:26.609740: step: 184/77, loss: 0.0004510443250183016 2023-01-23 23:43:27.898972: step: 188/77, loss: 0.00019627483561635017 2023-01-23 23:43:29.235566: step: 192/77, loss: 0.006268838420510292 2023-01-23 23:43:30.524214: step: 196/77, loss: 0.003117799060419202 2023-01-23 23:43:31.878214: step: 200/77, loss: 0.00012128011439926922 2023-01-23 23:43:33.192241: step: 204/77, loss: 0.004503394011408091 2023-01-23 23:43:34.456721: step: 208/77, loss: 0.0002909237227868289 2023-01-23 23:43:35.791036: step: 212/77, loss: 0.029473312199115753 2023-01-23 23:43:37.145881: step: 216/77, loss: 0.007408645004034042 2023-01-23 23:43:38.394609: step: 220/77, loss: 0.027147958055138588 2023-01-23 23:43:39.706027: step: 224/77, loss: 0.008658119477331638 2023-01-23 23:43:41.016006: step: 228/77, loss: 8.345770766027272e-06 2023-01-23 23:43:42.332710: step: 232/77, loss: 0.007440881337970495 2023-01-23 23:43:43.675762: step: 236/77, loss: 0.00030215029255487025 2023-01-23 23:43:45.018898: step: 240/77, loss: 0.000730294908862561 2023-01-23 23:43:46.337380: step: 244/77, loss: 0.008102841675281525 2023-01-23 23:43:47.632654: step: 248/77, loss: 0.04456748068332672 2023-01-23 23:43:48.913017: step: 252/77, loss: 0.0004915382014587522 2023-01-23 23:43:50.221805: step: 256/77, loss: 1.1332228496030439e-05 2023-01-23 23:43:51.594725: step: 260/77, loss: 0.000332854688167572 2023-01-23 23:43:52.908349: step: 264/77, loss: 6.480792944785208e-05 2023-01-23 23:43:54.223057: step: 268/77, loss: 0.00437159463763237 2023-01-23 23:43:55.519887: step: 272/77, loss: 0.00953389797359705 2023-01-23 23:43:56.829621: step: 276/77, loss: 0.01486000046133995 2023-01-23 23:43:58.144526: step: 280/77, loss: 0.0023728306405246258 2023-01-23 23:43:59.488941: step: 284/77, loss: 0.0014381734654307365 2023-01-23 23:44:00.811108: step: 288/77, loss: 0.002384813502430916 2023-01-23 23:44:02.142795: step: 292/77, loss: 0.04980777949094772 2023-01-23 23:44:03.473559: step: 296/77, loss: 0.0008250401588156819 2023-01-23 23:44:04.750845: step: 300/77, loss: 0.04165349900722504 2023-01-23 23:44:06.032946: step: 304/77, loss: 0.0007814334239810705 2023-01-23 23:44:07.328832: step: 308/77, loss: 0.014640234410762787 2023-01-23 23:44:08.657415: step: 312/77, loss: 0.006188351195305586 2023-01-23 23:44:09.972666: step: 316/77, loss: 0.002096428768709302 2023-01-23 23:44:11.320548: step: 320/77, loss: 0.012077968567609787 2023-01-23 23:44:12.627932: step: 324/77, loss: 0.006855163723230362 2023-01-23 23:44:13.946069: step: 328/77, loss: 0.011390695348381996 2023-01-23 23:44:15.272786: step: 332/77, loss: 0.0008925123256631196 2023-01-23 23:44:16.601747: step: 336/77, loss: 0.0035996062215417624 2023-01-23 23:44:17.896768: step: 340/77, loss: 0.003936652559787035 2023-01-23 23:44:19.211440: step: 344/77, loss: 0.021909981966018677 2023-01-23 23:44:20.527917: step: 348/77, loss: 0.0045426394790410995 2023-01-23 23:44:21.822367: step: 352/77, loss: 0.009716725908219814 2023-01-23 23:44:23.110023: step: 356/77, loss: 0.0018331398023292422 2023-01-23 23:44:24.440385: step: 360/77, loss: 0.003199656493961811 2023-01-23 23:44:25.732702: step: 364/77, loss: 0.004381906241178513 2023-01-23 23:44:27.035002: step: 368/77, loss: 0.0004424109938554466 2023-01-23 23:44:28.307985: step: 372/77, loss: 0.00012694572797045112 2023-01-23 23:44:29.594974: step: 376/77, loss: 0.02966800332069397 2023-01-23 23:44:30.919043: step: 380/77, loss: 0.0018701194785535336 2023-01-23 23:44:32.289515: step: 384/77, loss: 0.09938880801200867 2023-01-23 23:44:33.635204: step: 388/77, loss: 0.0001236020470969379 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 14} Test Chinese: {'template': {'p': 0.8857142857142857, 'r': 0.4732824427480916, 'f1': 0.6169154228855721}, 'slot': {'p': 0.5, 'r': 0.014558689717925387, 'f1': 0.028293545534924844}, 'combined': 0.01745472460861035, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 14} Test Korean: {'template': {'p': 0.8857142857142857, 'r': 0.4732824427480916, 'f1': 0.6169154228855721}, 'slot': {'p': 0.5, 'r': 0.014558689717925387, 'f1': 0.028293545534924844}, 'combined': 0.01745472460861035, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 14} Test Russian: {'template': {'p': 0.8857142857142857, 'r': 0.4732824427480916, 'f1': 0.6169154228855721}, 'slot': {'p': 0.5, 'r': 0.014558689717925387, 'f1': 0.028293545534924844}, 'combined': 0.01745472460861035, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:46:27.298236: step: 4/77, loss: 0.049520883709192276 2023-01-23 23:46:28.611263: step: 8/77, loss: 2.924517320934683e-05 2023-01-23 23:46:29.950821: step: 12/77, loss: 0.0001714784448267892 2023-01-23 23:46:31.198908: step: 16/77, loss: 0.0006058126455172896 2023-01-23 23:46:32.487336: step: 20/77, loss: 0.00029697720310650766 2023-01-23 23:46:33.828333: step: 24/77, loss: 0.017097534611821175 2023-01-23 23:46:35.157375: step: 28/77, loss: 0.015893323346972466 2023-01-23 23:46:36.441482: step: 32/77, loss: 0.0008558035478927195 2023-01-23 23:46:37.758300: step: 36/77, loss: 0.001351369428448379 2023-01-23 23:46:39.066172: step: 40/77, loss: 0.00041135685751214623 2023-01-23 23:46:40.394065: step: 44/77, loss: 0.009848172776401043 2023-01-23 23:46:41.750949: step: 48/77, loss: 8.295777661260217e-05 2023-01-23 23:46:43.096923: step: 52/77, loss: 0.024789001792669296 2023-01-23 23:46:44.398955: step: 56/77, loss: 0.027326753363013268 2023-01-23 23:46:45.731307: step: 60/77, loss: 0.0001087711425498128 2023-01-23 23:46:47.030251: step: 64/77, loss: 5.2774827054236084e-05 2023-01-23 23:46:48.329268: step: 68/77, loss: 0.00295753194950521 2023-01-23 23:46:49.653525: step: 72/77, loss: 0.00012791951303370297 2023-01-23 23:46:50.997887: step: 76/77, loss: 0.034874122589826584 2023-01-23 23:46:52.300907: step: 80/77, loss: 3.91757239412982e-05 2023-01-23 23:46:53.644990: step: 84/77, loss: 0.008262600749731064 2023-01-23 23:46:54.978546: step: 88/77, loss: 4.044691741000861e-05 2023-01-23 23:46:56.268506: step: 92/77, loss: 0.0029091283213347197 2023-01-23 23:46:57.605157: step: 96/77, loss: 0.026152852922677994 2023-01-23 23:46:58.891743: step: 100/77, loss: 0.00038738909643143415 2023-01-23 23:47:00.196480: step: 104/77, loss: 0.0052565522491931915 2023-01-23 23:47:01.473819: step: 108/77, loss: 0.0004201162955723703 2023-01-23 23:47:02.847590: step: 112/77, loss: 0.0010721203871071339 2023-01-23 23:47:04.161751: step: 116/77, loss: 0.004700690042227507 2023-01-23 23:47:05.463475: step: 120/77, loss: 6.839706475147977e-05 2023-01-23 23:47:06.798572: step: 124/77, loss: 0.0038220605347305536 2023-01-23 23:47:08.076977: step: 128/77, loss: 0.02317044325172901 2023-01-23 23:47:09.413846: step: 132/77, loss: 0.0005767315160483122 2023-01-23 23:47:10.739170: step: 136/77, loss: 0.03289420157670975 2023-01-23 23:47:12.041933: step: 140/77, loss: 0.0020907102152705193 2023-01-23 23:47:13.384486: step: 144/77, loss: 0.0028302576392889023 2023-01-23 23:47:14.716818: step: 148/77, loss: 0.1757725179195404 2023-01-23 23:47:16.047590: step: 152/77, loss: 4.9558102546143346e-06 2023-01-23 23:47:17.359126: step: 156/77, loss: 0.01258833333849907 2023-01-23 23:47:18.684654: step: 160/77, loss: 0.031340211629867554 2023-01-23 23:47:20.035949: step: 164/77, loss: 0.005098341032862663 2023-01-23 23:47:21.315654: step: 168/77, loss: 0.0032492538448423147 2023-01-23 23:47:22.631264: step: 172/77, loss: 0.0015646882820874453 2023-01-23 23:47:23.844534: step: 176/77, loss: 0.008963462896645069 2023-01-23 23:47:25.132246: step: 180/77, loss: 0.0030308859422802925 2023-01-23 23:47:26.435940: step: 184/77, loss: 0.003062993520870805 2023-01-23 23:47:27.734290: step: 188/77, loss: 0.0017014848999679089 2023-01-23 23:47:29.055736: step: 192/77, loss: 0.009832756593823433 2023-01-23 23:47:30.419153: step: 196/77, loss: 3.9916165405884385e-05 2023-01-23 23:47:31.757128: step: 200/77, loss: 0.0001980918023036793 2023-01-23 23:47:33.104717: step: 204/77, loss: 0.009089184924960136 2023-01-23 23:47:34.380132: step: 208/77, loss: 8.612525562057272e-06 2023-01-23 23:47:35.761715: step: 212/77, loss: 0.039376288652420044 2023-01-23 23:47:37.156809: step: 216/77, loss: 0.10439729690551758 2023-01-23 23:47:38.483887: step: 220/77, loss: 0.0011328778928145766 2023-01-23 23:47:39.845490: step: 224/77, loss: 0.013515803962945938 2023-01-23 23:47:41.152966: step: 228/77, loss: 0.00017722068878356367 2023-01-23 23:47:42.513390: step: 232/77, loss: 0.04382976144552231 2023-01-23 23:47:43.803261: step: 236/77, loss: 0.010091815143823624 2023-01-23 23:47:45.078321: step: 240/77, loss: 7.790833478793502e-05 2023-01-23 23:47:46.425454: step: 244/77, loss: 0.0025701867416501045 2023-01-23 23:47:47.737639: step: 248/77, loss: 0.00022096386237535626 2023-01-23 23:47:49.032973: step: 252/77, loss: 0.02954775094985962 2023-01-23 23:47:50.387785: step: 256/77, loss: 0.0404554083943367 2023-01-23 23:47:51.679532: step: 260/77, loss: 0.048444170504808426 2023-01-23 23:47:52.976180: step: 264/77, loss: 0.016291512176394463 2023-01-23 23:47:54.293298: step: 268/77, loss: 0.00013737943663727492 2023-01-23 23:47:55.657019: step: 272/77, loss: 0.0019735561218112707 2023-01-23 23:47:56.997081: step: 276/77, loss: 4.825846554012969e-05 2023-01-23 23:47:58.353950: step: 280/77, loss: 0.07272603362798691 2023-01-23 23:47:59.694129: step: 284/77, loss: 0.0001095397092285566 2023-01-23 23:48:00.972964: step: 288/77, loss: 5.367924313759431e-05 2023-01-23 23:48:02.269687: step: 292/77, loss: 0.004074377473443747 2023-01-23 23:48:03.576294: step: 296/77, loss: 0.0006497162394225597 2023-01-23 23:48:04.877639: step: 300/77, loss: 0.0055480943992733955 2023-01-23 23:48:06.161007: step: 304/77, loss: 2.211880564573221e-05 2023-01-23 23:48:07.488405: step: 308/77, loss: 7.929770072223619e-05 2023-01-23 23:48:08.807025: step: 312/77, loss: 0.005669772159308195 2023-01-23 23:48:10.100756: step: 316/77, loss: 4.754487599711865e-05 2023-01-23 23:48:11.402150: step: 320/77, loss: 0.03589208796620369 2023-01-23 23:48:12.722085: step: 324/77, loss: 0.01228588167577982 2023-01-23 23:48:14.085848: step: 328/77, loss: 0.002937039127573371 2023-01-23 23:48:15.451766: step: 332/77, loss: 0.09612338989973068 2023-01-23 23:48:16.770159: step: 336/77, loss: 0.04751873016357422 2023-01-23 23:48:18.132681: step: 340/77, loss: 0.0006323698908090591 2023-01-23 23:48:19.459740: step: 344/77, loss: 0.004371923394501209 2023-01-23 23:48:20.760365: step: 348/77, loss: 8.652117685414851e-05 2023-01-23 23:48:22.076077: step: 352/77, loss: 6.106193904997781e-05 2023-01-23 23:48:23.392723: step: 356/77, loss: 0.015023558400571346 2023-01-23 23:48:24.703539: step: 360/77, loss: 0.24702905118465424 2023-01-23 23:48:25.992239: step: 364/77, loss: 1.2379175424575806 2023-01-23 23:48:27.276988: step: 368/77, loss: 0.08244233578443527 2023-01-23 23:48:28.540429: step: 372/77, loss: 0.057010870426893234 2023-01-23 23:48:29.849302: step: 376/77, loss: 0.035383787006139755 2023-01-23 23:48:31.186478: step: 380/77, loss: 0.02665356732904911 2023-01-23 23:48:32.494475: step: 384/77, loss: 0.00020606214820872992 2023-01-23 23:48:33.801490: step: 388/77, loss: 0.04542049020528793 ================================================== Loss: 0.030 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.9264705882352942, 'r': 0.48091603053435117, 'f1': 0.6331658291457286}, 'slot': {'p': 0.5357142857142857, 'r': 0.01364877161055505, 'f1': 0.026619343389529728}, 'combined': 0.01685445862854646, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.9264705882352942, 'r': 0.48091603053435117, 'f1': 0.6331658291457286}, 'slot': {'p': 0.5357142857142857, 'r': 0.01364877161055505, 'f1': 0.026619343389529728}, 'combined': 0.01685445862854646, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.9264705882352942, 'r': 0.48091603053435117, 'f1': 0.6331658291457286}, 'slot': {'p': 0.5357142857142857, 'r': 0.01364877161055505, 'f1': 0.026619343389529728}, 'combined': 0.01685445862854646, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:50:27.450379: step: 4/77, loss: 0.004865787457674742 2023-01-23 23:50:28.768141: step: 8/77, loss: 0.002044866792857647 2023-01-23 23:50:30.122433: step: 12/77, loss: 0.004267704673111439 2023-01-23 23:50:31.443997: step: 16/77, loss: 0.0016755261458456516 2023-01-23 23:50:32.761264: step: 20/77, loss: 0.0039411126635968685 2023-01-23 23:50:34.061098: step: 24/77, loss: 0.011254651471972466 2023-01-23 23:50:35.344444: step: 28/77, loss: 0.038321368396282196 2023-01-23 23:50:36.658291: step: 32/77, loss: 4.7605128202121705e-05 2023-01-23 23:50:37.955905: step: 36/77, loss: 0.0011922240955755115 2023-01-23 23:50:39.228484: step: 40/77, loss: 0.0005886238650418818 2023-01-23 23:50:40.545795: step: 44/77, loss: 0.10590315610170364 2023-01-23 23:50:41.805602: step: 48/77, loss: 0.04780968278646469 2023-01-23 23:50:43.134313: step: 52/77, loss: 0.00025455671129748225 2023-01-23 23:50:44.460491: step: 56/77, loss: 0.0006137334275990725 2023-01-23 23:50:45.767577: step: 60/77, loss: 0.006472788285464048 2023-01-23 23:50:47.109254: step: 64/77, loss: 0.000599176564719528 2023-01-23 23:50:48.414436: step: 68/77, loss: 0.03149518370628357 2023-01-23 23:50:49.746365: step: 72/77, loss: 0.03944842144846916 2023-01-23 23:50:51.085892: step: 76/77, loss: 0.0012047612108290195 2023-01-23 23:50:52.357428: step: 80/77, loss: 0.006904952228069305 2023-01-23 23:50:53.635464: step: 84/77, loss: 0.04259157180786133 2023-01-23 23:50:54.941936: step: 88/77, loss: 0.0003069272788707167 2023-01-23 23:50:56.210932: step: 92/77, loss: 0.0002839508524630219 2023-01-23 23:50:57.506560: step: 96/77, loss: 0.03305383026599884 2023-01-23 23:50:58.813018: step: 100/77, loss: 0.0010568131692707539 2023-01-23 23:51:00.102691: step: 104/77, loss: 1.5277404600055888e-05 2023-01-23 23:51:01.385659: step: 108/77, loss: 0.003008649218827486 2023-01-23 23:51:02.699541: step: 112/77, loss: 0.00354586960747838 2023-01-23 23:51:04.061542: step: 116/77, loss: 0.0304990466684103 2023-01-23 23:51:05.411307: step: 120/77, loss: 0.0013609578600153327 2023-01-23 23:51:06.751754: step: 124/77, loss: 8.456044452032074e-05 2023-01-23 23:51:08.023871: step: 128/77, loss: 0.00015098247968126088 2023-01-23 23:51:09.343995: step: 132/77, loss: 0.0738513171672821 2023-01-23 23:51:10.606233: step: 136/77, loss: 9.456242696614936e-05 2023-01-23 23:51:11.904967: step: 140/77, loss: 0.00028515647863969207 2023-01-23 23:51:13.214755: step: 144/77, loss: 0.08551955223083496 2023-01-23 23:51:14.537345: step: 148/77, loss: 0.005715628154575825 2023-01-23 23:51:15.837847: step: 152/77, loss: 0.00018103979527950287 2023-01-23 23:51:17.208442: step: 156/77, loss: 0.1641266644001007 2023-01-23 23:51:18.534462: step: 160/77, loss: 0.003106701420620084 2023-01-23 23:51:19.812479: step: 164/77, loss: 0.0006164918886497617 2023-01-23 23:51:21.114887: step: 168/77, loss: 1.5115554560907185e-05 2023-01-23 23:51:22.444148: step: 172/77, loss: 0.042311687022447586 2023-01-23 23:51:23.800193: step: 176/77, loss: 0.012022551149129868 2023-01-23 23:51:25.063648: step: 180/77, loss: 0.0002095772506436333 2023-01-23 23:51:26.330608: step: 184/77, loss: 0.005340333096683025 2023-01-23 23:51:27.626140: step: 188/77, loss: 0.00031833286629989743 2023-01-23 23:51:28.979544: step: 192/77, loss: 0.03311312943696976 2023-01-23 23:51:30.278816: step: 196/77, loss: 0.0021633312571793795 2023-01-23 23:51:31.576367: step: 200/77, loss: 6.1553277191706e-05 2023-01-23 23:51:32.905399: step: 204/77, loss: 0.08515505492687225 2023-01-23 23:51:34.180447: step: 208/77, loss: 0.040249742567539215 2023-01-23 23:51:35.507664: step: 212/77, loss: 0.03576679155230522 2023-01-23 23:51:36.826210: step: 216/77, loss: 0.03553896024823189 2023-01-23 23:51:38.163267: step: 220/77, loss: 0.005703304894268513 2023-01-23 23:51:39.469310: step: 224/77, loss: 0.038781605660915375 2023-01-23 23:51:40.751233: step: 228/77, loss: 0.054898396134376526 2023-01-23 23:51:42.071782: step: 232/77, loss: 0.0010724844178184867 2023-01-23 23:51:43.410301: step: 236/77, loss: 0.001274686073884368 2023-01-23 23:51:44.703739: step: 240/77, loss: 0.004894025158137083 2023-01-23 23:51:46.032403: step: 244/77, loss: 8.713423994777258e-06 2023-01-23 23:51:47.355168: step: 248/77, loss: 0.04005669802427292 2023-01-23 23:51:48.654388: step: 252/77, loss: 0.07183679193258286 2023-01-23 23:51:49.955576: step: 256/77, loss: 0.024867655709385872 2023-01-23 23:51:51.253967: step: 260/77, loss: 0.005784600507467985 2023-01-23 23:51:52.552300: step: 264/77, loss: 0.0013034702278673649 2023-01-23 23:51:53.870714: step: 268/77, loss: 1.900579809444025e-05 2023-01-23 23:51:55.215200: step: 272/77, loss: 0.0062468210235238075 2023-01-23 23:51:56.542266: step: 276/77, loss: 0.11800191551446915 2023-01-23 23:51:57.881662: step: 280/77, loss: 9.783930727280676e-05 2023-01-23 23:51:59.196898: step: 284/77, loss: 0.006113114301115274 2023-01-23 23:52:00.513737: step: 288/77, loss: 0.0004273669619578868 2023-01-23 23:52:01.844123: step: 292/77, loss: 5.662437985165525e-08 2023-01-23 23:52:03.133692: step: 296/77, loss: 0.0011079148389399052 2023-01-23 23:52:04.384802: step: 300/77, loss: 0.00418076990172267 2023-01-23 23:52:05.704952: step: 304/77, loss: 0.0006017254781909287 2023-01-23 23:52:07.012293: step: 308/77, loss: 0.008612751960754395 2023-01-23 23:52:08.340565: step: 312/77, loss: 0.0019411951070651412 2023-01-23 23:52:09.625008: step: 316/77, loss: 0.0028733594808727503 2023-01-23 23:52:10.960055: step: 320/77, loss: 0.002464856719598174 2023-01-23 23:52:12.279227: step: 324/77, loss: 0.00017895545170176774 2023-01-23 23:52:13.561528: step: 328/77, loss: 0.0009144581854343414 2023-01-23 23:52:14.901427: step: 332/77, loss: 0.0010908320546150208 2023-01-23 23:52:16.197167: step: 336/77, loss: 0.000478222849778831 2023-01-23 23:52:17.475713: step: 340/77, loss: 5.4164491302799433e-05 2023-01-23 23:52:18.771602: step: 344/77, loss: 0.00020739153842441738 2023-01-23 23:52:20.071865: step: 348/77, loss: 0.05378265306353569 2023-01-23 23:52:21.381455: step: 352/77, loss: 0.0001235952222486958 2023-01-23 23:52:22.688915: step: 356/77, loss: 1.373855866404483e-06 2023-01-23 23:52:24.028518: step: 360/77, loss: 0.02540205419063568 2023-01-23 23:52:25.349845: step: 364/77, loss: 0.04071442410349846 2023-01-23 23:52:26.684803: step: 368/77, loss: 0.0007655530935153365 2023-01-23 23:52:28.016563: step: 372/77, loss: 0.001699113636277616 2023-01-23 23:52:29.360571: step: 376/77, loss: 0.0020794568117707968 2023-01-23 23:52:30.667006: step: 380/77, loss: 4.321304913901258e-07 2023-01-23 23:52:31.947676: step: 384/77, loss: 5.594743925030343e-05 2023-01-23 23:52:33.300056: step: 388/77, loss: 0.00025005993666127324 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 16} Test Chinese: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.5666666666666667, 'r': 0.015468607825295723, 'f1': 0.03011514614703277}, 'combined': 0.018130547170152382, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 16} Test Korean: {'template': {'p': 0.8923076923076924, 'r': 0.44274809160305345, 'f1': 0.5918367346938777}, 'slot': {'p': 0.5483870967741935, 'r': 0.015468607825295723, 'f1': 0.03008849557522124}, 'combined': 0.017807476973090125, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 16} Test Russian: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.5666666666666667, 'r': 0.015468607825295723, 'f1': 0.03011514614703277}, 'combined': 0.018130547170152382, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:54:26.822598: step: 4/77, loss: 0.00017559500702191144 2023-01-23 23:54:28.114096: step: 8/77, loss: 0.0042427037842571735 2023-01-23 23:54:29.394368: step: 12/77, loss: 0.00010297487460775301 2023-01-23 23:54:30.667619: step: 16/77, loss: 0.0001678880216786638 2023-01-23 23:54:31.964026: step: 20/77, loss: 0.00013916024181526154 2023-01-23 23:54:33.271789: step: 24/77, loss: 1.2970897842023987e-05 2023-01-23 23:54:34.607892: step: 28/77, loss: 0.014945005998015404 2023-01-23 23:54:35.905996: step: 32/77, loss: 0.006989219691604376 2023-01-23 23:54:37.247233: step: 36/77, loss: 0.06558720022439957 2023-01-23 23:54:38.559603: step: 40/77, loss: 0.005165797658264637 2023-01-23 23:54:39.857805: step: 44/77, loss: 0.007396637462079525 2023-01-23 23:54:41.210897: step: 48/77, loss: 0.00520114041864872 2023-01-23 23:54:42.543151: step: 52/77, loss: 0.013953941874206066 2023-01-23 23:54:43.876660: step: 56/77, loss: 0.012274347245693207 2023-01-23 23:54:45.135324: step: 60/77, loss: 0.008953189477324486 2023-01-23 23:54:46.452550: step: 64/77, loss: 0.0005845665000379086 2023-01-23 23:54:47.791594: step: 68/77, loss: 0.0281231626868248 2023-01-23 23:54:49.070234: step: 72/77, loss: 0.00018540435121394694 2023-01-23 23:54:50.381046: step: 76/77, loss: 0.0007554187322966754 2023-01-23 23:54:51.709196: step: 80/77, loss: 0.0003465822373982519 2023-01-23 23:54:53.043185: step: 84/77, loss: 0.0049270521849393845 2023-01-23 23:54:54.365965: step: 88/77, loss: 0.00021336837380658835 2023-01-23 23:54:55.659322: step: 92/77, loss: 2.4633269276819192e-05 2023-01-23 23:54:56.967875: step: 96/77, loss: 1.9529574274201877e-05 2023-01-23 23:54:58.285316: step: 100/77, loss: 0.0019179012160748243 2023-01-23 23:54:59.559681: step: 104/77, loss: 0.11362450569868088 2023-01-23 23:55:00.829250: step: 108/77, loss: 0.008741766214370728 2023-01-23 23:55:02.127377: step: 112/77, loss: 0.014528511092066765 2023-01-23 23:55:03.432675: step: 116/77, loss: 0.00031644152477383614 2023-01-23 23:55:04.836928: step: 120/77, loss: 0.00115126499440521 2023-01-23 23:55:06.180255: step: 124/77, loss: 0.00016333360690623522 2023-01-23 23:55:07.518398: step: 128/77, loss: 0.0008891576435416937 2023-01-23 23:55:08.868213: step: 132/77, loss: 0.10228507220745087 2023-01-23 23:55:10.193011: step: 136/77, loss: 0.00025623812689445913 2023-01-23 23:55:11.543834: step: 140/77, loss: 1.4330006706586573e-05 2023-01-23 23:55:12.862074: step: 144/77, loss: 0.01142636127769947 2023-01-23 23:55:14.145965: step: 148/77, loss: 0.0019180062226951122 2023-01-23 23:55:15.505577: step: 152/77, loss: 1.33709854708286e-05 2023-01-23 23:55:16.796542: step: 156/77, loss: 0.05863146856427193 2023-01-23 23:55:18.093712: step: 160/77, loss: 0.0013161511160433292 2023-01-23 23:55:19.435799: step: 164/77, loss: 0.00979958102107048 2023-01-23 23:55:20.799470: step: 168/77, loss: 0.008042233996093273 2023-01-23 23:55:22.162321: step: 172/77, loss: 0.0037187940906733274 2023-01-23 23:55:23.442410: step: 176/77, loss: 0.002606304595246911 2023-01-23 23:55:24.733001: step: 180/77, loss: 0.0007418693858198822 2023-01-23 23:55:26.068388: step: 184/77, loss: 1.9120017896057107e-05 2023-01-23 23:55:27.407742: step: 188/77, loss: 0.0017145859310403466 2023-01-23 23:55:28.702397: step: 192/77, loss: 0.002218936337158084 2023-01-23 23:55:30.001768: step: 196/77, loss: 8.69428549776785e-06 2023-01-23 23:55:31.332270: step: 200/77, loss: 0.0025453991256654263 2023-01-23 23:55:32.629472: step: 204/77, loss: 0.00490312185138464 2023-01-23 23:55:33.937280: step: 208/77, loss: 0.00011400566290831193 2023-01-23 23:55:35.277825: step: 212/77, loss: 0.00499136233702302 2023-01-23 23:55:36.572447: step: 216/77, loss: 0.0014865536941215396 2023-01-23 23:55:37.833818: step: 220/77, loss: 0.0018823903519660234 2023-01-23 23:55:39.206691: step: 224/77, loss: 0.0008539587142877281 2023-01-23 23:55:40.515890: step: 228/77, loss: 3.7336732930270955e-05 2023-01-23 23:55:41.842025: step: 232/77, loss: 0.04712314158678055 2023-01-23 23:55:43.173044: step: 236/77, loss: 4.869756230618805e-05 2023-01-23 23:55:44.465931: step: 240/77, loss: 2.1560317691182718e-05 2023-01-23 23:55:45.719159: step: 244/77, loss: 3.2407467642769916e-06 2023-01-23 23:55:46.995180: step: 248/77, loss: 0.007513156160712242 2023-01-23 23:55:48.323951: step: 252/77, loss: 3.0624123610323295e-05 2023-01-23 23:55:49.602821: step: 256/77, loss: 0.0002540667774155736 2023-01-23 23:55:50.868577: step: 260/77, loss: 0.002834441838786006 2023-01-23 23:55:52.146128: step: 264/77, loss: 0.00018282295786775649 2023-01-23 23:55:53.500616: step: 268/77, loss: 0.005002613645046949 2023-01-23 23:55:54.818326: step: 272/77, loss: 0.005725946743041277 2023-01-23 23:55:56.164091: step: 276/77, loss: 0.0191806573420763 2023-01-23 23:55:57.483534: step: 280/77, loss: 0.05966510251164436 2023-01-23 23:55:58.819840: step: 284/77, loss: 0.009301283396780491 2023-01-23 23:56:00.177388: step: 288/77, loss: 0.02285054698586464 2023-01-23 23:56:01.522980: step: 292/77, loss: 0.004749453626573086 2023-01-23 23:56:02.877887: step: 296/77, loss: 0.0002422179386485368 2023-01-23 23:56:04.195476: step: 300/77, loss: 0.0013061568606644869 2023-01-23 23:56:05.512879: step: 304/77, loss: 0.017308104783296585 2023-01-23 23:56:06.885935: step: 308/77, loss: 0.0003579688200261444 2023-01-23 23:56:08.185744: step: 312/77, loss: 0.004894661717116833 2023-01-23 23:56:09.500821: step: 316/77, loss: 0.0006621659849770367 2023-01-23 23:56:10.819340: step: 320/77, loss: 0.002932305447757244 2023-01-23 23:56:12.115112: step: 324/77, loss: 7.0598043748759665e-06 2023-01-23 23:56:13.422695: step: 328/77, loss: 0.0931524932384491 2023-01-23 23:56:14.725658: step: 332/77, loss: 0.00038095767376944423 2023-01-23 23:56:16.056593: step: 336/77, loss: 0.030057495459914207 2023-01-23 23:56:17.325864: step: 340/77, loss: 0.0006561462068930268 2023-01-23 23:56:18.631919: step: 344/77, loss: 6.38489582343027e-05 2023-01-23 23:56:19.930004: step: 348/77, loss: 0.028494885191321373 2023-01-23 23:56:21.269693: step: 352/77, loss: 0.00018237180483993143 2023-01-23 23:56:22.591003: step: 356/77, loss: 0.009861637838184834 2023-01-23 23:56:23.930307: step: 360/77, loss: 5.5534786952193826e-05 2023-01-23 23:56:25.193268: step: 364/77, loss: 5.960463678178485e-09 2023-01-23 23:56:26.541675: step: 368/77, loss: 8.7584754510317e-05 2023-01-23 23:56:27.878282: step: 372/77, loss: 0.00024132276303134859 2023-01-23 23:56:29.202067: step: 376/77, loss: 0.05859140679240227 2023-01-23 23:56:30.514919: step: 380/77, loss: 0.00022311658540274948 2023-01-23 23:56:31.841036: step: 384/77, loss: 0.006534324958920479 2023-01-23 23:56:33.160834: step: 388/77, loss: 0.00010127961286343634 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9206349206349206, 'r': 0.44274809160305345, 'f1': 0.5979381443298969}, 'slot': {'p': 0.5769230769230769, 'r': 0.01364877161055505, 'f1': 0.026666666666666672}, 'combined': 0.015945017182130587, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.921875, 'r': 0.45038167938931295, 'f1': 0.6051282051282051}, 'slot': {'p': 0.5769230769230769, 'r': 0.01364877161055505, 'f1': 0.026666666666666672}, 'combined': 0.016136752136752138, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9206349206349206, 'r': 0.44274809160305345, 'f1': 0.5979381443298969}, 'slot': {'p': 0.5769230769230769, 'r': 0.01364877161055505, 'f1': 0.026666666666666672}, 'combined': 0.015945017182130587, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:58:26.928510: step: 4/77, loss: 7.629332117176091e-07 2023-01-23 23:58:28.244994: step: 8/77, loss: 0.0009695360204204917 2023-01-23 23:58:29.535351: step: 12/77, loss: 0.02970375493168831 2023-01-23 23:58:30.852265: step: 16/77, loss: 1.8267503037350252e-05 2023-01-23 23:58:32.152035: step: 20/77, loss: 1.6878482711035758e-05 2023-01-23 23:58:33.447239: step: 24/77, loss: 6.711154128424823e-06 2023-01-23 23:58:34.707316: step: 28/77, loss: 0.014280336908996105 2023-01-23 23:58:36.049744: step: 32/77, loss: 0.0032120144460350275 2023-01-23 23:58:37.372566: step: 36/77, loss: 0.0006129553075879812 2023-01-23 23:58:38.657523: step: 40/77, loss: 0.06722747534513474 2023-01-23 23:58:39.956296: step: 44/77, loss: 0.00019596870697569102 2023-01-23 23:58:41.254969: step: 48/77, loss: 0.034420061856508255 2023-01-23 23:58:42.536310: step: 52/77, loss: 0.02980979159474373 2023-01-23 23:58:43.842531: step: 56/77, loss: 7.429834658978507e-05 2023-01-23 23:58:45.160488: step: 60/77, loss: 0.0005003040423616767 2023-01-23 23:58:46.435947: step: 64/77, loss: 0.0010485901730135083 2023-01-23 23:58:47.717629: step: 68/77, loss: 0.0007787790964357555 2023-01-23 23:58:49.064436: step: 72/77, loss: 0.003916820976883173 2023-01-23 23:58:50.357808: step: 76/77, loss: 1.855248774518259e-05 2023-01-23 23:58:51.684049: step: 80/77, loss: 3.254367402405478e-05 2023-01-23 23:58:52.964712: step: 84/77, loss: 0.00047225505113601685 2023-01-23 23:58:54.293338: step: 88/77, loss: 0.002391052432358265 2023-01-23 23:58:55.568849: step: 92/77, loss: 0.0017805419629439712 2023-01-23 23:58:56.836785: step: 96/77, loss: 0.0007184812566265464 2023-01-23 23:58:58.184671: step: 100/77, loss: 0.016742993146181107 2023-01-23 23:58:59.536620: step: 104/77, loss: 0.02255837246775627 2023-01-23 23:59:00.827836: step: 108/77, loss: 0.00768828671425581 2023-01-23 23:59:02.152341: step: 112/77, loss: 0.0008918981766328216 2023-01-23 23:59:03.454955: step: 116/77, loss: 0.003944310825318098 2023-01-23 23:59:04.715197: step: 120/77, loss: 3.8131292967591435e-05 2023-01-23 23:59:06.020119: step: 124/77, loss: 0.019156042486429214 2023-01-23 23:59:07.324490: step: 128/77, loss: 0.00024989162920974195 2023-01-23 23:59:08.641927: step: 132/77, loss: 4.7405908844666556e-05 2023-01-23 23:59:09.906325: step: 136/77, loss: 0.0008982517756521702 2023-01-23 23:59:11.220066: step: 140/77, loss: 0.00123036396689713 2023-01-23 23:59:12.574085: step: 144/77, loss: 0.0003520891477819532 2023-01-23 23:59:13.835503: step: 148/77, loss: 0.0002538433182053268 2023-01-23 23:59:15.096376: step: 152/77, loss: 0.004786377772688866 2023-01-23 23:59:16.427123: step: 156/77, loss: 6.741286779288203e-05 2023-01-23 23:59:17.748539: step: 160/77, loss: 0.0006734931957907975 2023-01-23 23:59:19.044364: step: 164/77, loss: 0.0005380098591558635 2023-01-23 23:59:20.371316: step: 168/77, loss: 0.001128910225816071 2023-01-23 23:59:21.669563: step: 172/77, loss: 0.0006417105323635042 2023-01-23 23:59:22.976561: step: 176/77, loss: 0.011218901723623276 2023-01-23 23:59:24.298075: step: 180/77, loss: 0.00034760363632813096 2023-01-23 23:59:25.659274: step: 184/77, loss: 0.0246910247951746 2023-01-23 23:59:27.020165: step: 188/77, loss: 9.507672803010792e-05 2023-01-23 23:59:28.335693: step: 192/77, loss: 0.00048033008351922035 2023-01-23 23:59:29.667817: step: 196/77, loss: 0.0012093075783923268 2023-01-23 23:59:30.990460: step: 200/77, loss: 9.417146793566644e-05 2023-01-23 23:59:32.319305: step: 204/77, loss: 0.0002030618634307757 2023-01-23 23:59:33.578477: step: 208/77, loss: 1.4598433153878432e-05 2023-01-23 23:59:34.931609: step: 212/77, loss: 0.00048350432189181447 2023-01-23 23:59:36.251035: step: 216/77, loss: 0.0005155607359483838 2023-01-23 23:59:37.583999: step: 220/77, loss: 1.4626096344727557e-05 2023-01-23 23:59:38.856594: step: 224/77, loss: 0.0366053506731987 2023-01-23 23:59:40.120572: step: 228/77, loss: 0.0017824744572862983 2023-01-23 23:59:41.481152: step: 232/77, loss: 6.202932127052918e-06 2023-01-23 23:59:42.872205: step: 236/77, loss: 0.001113256555981934 2023-01-23 23:59:44.162790: step: 240/77, loss: 0.00209731119684875 2023-01-23 23:59:45.465824: step: 244/77, loss: 0.0005989335477352142 2023-01-23 23:59:46.797017: step: 248/77, loss: 4.602641183737433e-06 2023-01-23 23:59:48.151558: step: 252/77, loss: 0.00042141234735026956 2023-01-23 23:59:49.455995: step: 256/77, loss: 1.1831454003186082e-06 2023-01-23 23:59:50.735479: step: 260/77, loss: 0.0005325632519088686 2023-01-23 23:59:52.078459: step: 264/77, loss: 0.08383160829544067 2023-01-23 23:59:53.366089: step: 268/77, loss: 0.0002262178750243038 2023-01-23 23:59:54.638766: step: 272/77, loss: 0.034328069537878036 2023-01-23 23:59:55.949289: step: 276/77, loss: 0.0007600605022162199 2023-01-23 23:59:57.235283: step: 280/77, loss: 1.2372864148346707e-05 2023-01-23 23:59:58.532608: step: 284/77, loss: 0.028865935280919075 2023-01-23 23:59:59.826227: step: 288/77, loss: 0.008949083276093006 2023-01-24 00:00:01.124559: step: 292/77, loss: 0.024472959339618683 2023-01-24 00:00:02.466790: step: 296/77, loss: 0.1657877266407013 2023-01-24 00:00:03.768184: step: 300/77, loss: 0.0002879844105336815 2023-01-24 00:00:05.077318: step: 304/77, loss: 0.08509407937526703 2023-01-24 00:00:06.416633: step: 308/77, loss: 0.0004706513718701899 2023-01-24 00:00:07.711123: step: 312/77, loss: 0.029056323692202568 2023-01-24 00:00:09.000710: step: 316/77, loss: 0.0041311513632535934 2023-01-24 00:00:10.359910: step: 320/77, loss: 0.03252348303794861 2023-01-24 00:00:11.643951: step: 324/77, loss: 9.75675993686309e-06 2023-01-24 00:00:12.965335: step: 328/77, loss: 0.013601149432361126 2023-01-24 00:00:14.298721: step: 332/77, loss: 8.16871615825221e-05 2023-01-24 00:00:15.573618: step: 336/77, loss: 0.000691729539539665 2023-01-24 00:00:16.891211: step: 340/77, loss: 0.00044653864460997283 2023-01-24 00:00:18.214639: step: 344/77, loss: 0.0033582733012735844 2023-01-24 00:00:19.556787: step: 348/77, loss: 5.8396275562699884e-05 2023-01-24 00:00:20.888748: step: 352/77, loss: 0.0015448533231392503 2023-01-24 00:00:22.208692: step: 356/77, loss: 0.0003613264416344464 2023-01-24 00:00:23.528535: step: 360/77, loss: 5.536605749512091e-05 2023-01-24 00:00:24.817546: step: 364/77, loss: 0.00011568279296625406 2023-01-24 00:00:26.122045: step: 368/77, loss: 7.776810525683686e-05 2023-01-24 00:00:27.419663: step: 372/77, loss: 0.026629647240042686 2023-01-24 00:00:28.705390: step: 376/77, loss: 7.089160499162972e-05 2023-01-24 00:00:30.032308: step: 380/77, loss: 0.00019869825337082148 2023-01-24 00:00:31.364349: step: 384/77, loss: 0.004738472402095795 2023-01-24 00:00:32.669149: step: 388/77, loss: 0.005280292592942715 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050055207949944794, 'epoch': 18} Test Chinese: {'template': {'p': 0.9322033898305084, 'r': 0.4198473282442748, 'f1': 0.5789473684210527}, 'slot': {'p': 0.52, 'r': 0.011828935395814377, 'f1': 0.023131672597864767}, 'combined': 0.013392020977711182, 'epoch': 18} Dev Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050055207949944794, 'epoch': 18} Test Korean: {'template': {'p': 0.9152542372881356, 'r': 0.4122137404580153, 'f1': 0.568421052631579}, 'slot': {'p': 0.52, 'r': 0.011828935395814377, 'f1': 0.023131672597864767}, 'combined': 0.013148529687207344, 'epoch': 18} Dev Russian: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050055207949944794, 'epoch': 18} Test Russian: {'template': {'p': 0.9322033898305084, 'r': 0.4198473282442748, 'f1': 0.5789473684210527}, 'slot': {'p': 0.52, 'r': 0.011828935395814377, 'f1': 0.023131672597864767}, 'combined': 0.013392020977711182, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:02:26.072145: step: 4/77, loss: 0.017067037522792816 2023-01-24 00:02:27.414489: step: 8/77, loss: 0.009045921266078949 2023-01-24 00:02:28.682590: step: 12/77, loss: 0.0011866106651723385 2023-01-24 00:02:29.978536: step: 16/77, loss: 0.0033251740969717503 2023-01-24 00:02:31.235512: step: 20/77, loss: 0.00018421841377858073 2023-01-24 00:02:32.597623: step: 24/77, loss: 0.00010344362817704678 2023-01-24 00:02:33.932898: step: 28/77, loss: 0.0037648696452379227 2023-01-24 00:02:35.277837: step: 32/77, loss: 0.010512863285839558 2023-01-24 00:02:36.618089: step: 36/77, loss: 0.037238556891679764 2023-01-24 00:02:37.862049: step: 40/77, loss: 4.192236883682199e-05 2023-01-24 00:02:39.136539: step: 44/77, loss: 1.012978009384824e-05 2023-01-24 00:02:40.433158: step: 48/77, loss: 0.00015208007243927568 2023-01-24 00:02:41.680059: step: 52/77, loss: 0.0011588814668357372 2023-01-24 00:02:42.992554: step: 56/77, loss: 0.03443307429552078 2023-01-24 00:02:44.318159: step: 60/77, loss: 0.0003580081684049219 2023-01-24 00:02:45.592922: step: 64/77, loss: 4.402750346343964e-05 2023-01-24 00:02:46.899560: step: 68/77, loss: 0.0008609103970229626 2023-01-24 00:02:48.233595: step: 72/77, loss: 9.442742157261819e-05 2023-01-24 00:02:49.494353: step: 76/77, loss: 0.013628042303025723 2023-01-24 00:02:50.776435: step: 80/77, loss: 0.00020224417676217854 2023-01-24 00:02:52.061753: step: 84/77, loss: 0.024999741464853287 2023-01-24 00:02:53.374667: step: 88/77, loss: 0.0008359450148418546 2023-01-24 00:02:54.697083: step: 92/77, loss: 0.025218121707439423 2023-01-24 00:02:56.014305: step: 96/77, loss: 0.03694126009941101 2023-01-24 00:02:57.330140: step: 100/77, loss: 0.00014883764379192144 2023-01-24 00:02:58.645292: step: 104/77, loss: 0.0005104810697957873 2023-01-24 00:02:59.958426: step: 108/77, loss: 1.234683168149786e-05 2023-01-24 00:03:01.264209: step: 112/77, loss: 0.001363265560939908 2023-01-24 00:03:02.541470: step: 116/77, loss: 0.0007034118170849979 2023-01-24 00:03:03.875631: step: 120/77, loss: 0.0013016803422942758 2023-01-24 00:03:05.146378: step: 124/77, loss: 3.0888643323123688e-06 2023-01-24 00:03:06.428027: step: 128/77, loss: 0.03757849708199501 2023-01-24 00:03:07.713010: step: 132/77, loss: 0.14182689785957336 2023-01-24 00:03:09.003290: step: 136/77, loss: 0.004270021803677082 2023-01-24 00:03:10.308579: step: 140/77, loss: 9.920346201397479e-05 2023-01-24 00:03:11.627878: step: 144/77, loss: 0.036307238042354584 2023-01-24 00:03:12.974113: step: 148/77, loss: 0.0014702532207593322 2023-01-24 00:03:14.305351: step: 152/77, loss: 1.8030320347861561e-07 2023-01-24 00:03:15.670167: step: 156/77, loss: 2.234895328001585e-05 2023-01-24 00:03:16.977821: step: 160/77, loss: 0.010909469798207283 2023-01-24 00:03:18.275515: step: 164/77, loss: 6.514219421660528e-05 2023-01-24 00:03:19.594430: step: 168/77, loss: 0.07781477272510529 2023-01-24 00:03:20.893267: step: 172/77, loss: 0.0005408531869761646 2023-01-24 00:03:22.213872: step: 176/77, loss: 0.003037202637642622 2023-01-24 00:03:23.552807: step: 180/77, loss: 4.200325929559767e-05 2023-01-24 00:03:24.928229: step: 184/77, loss: 0.00032234640093520284 2023-01-24 00:03:26.236840: step: 188/77, loss: 0.023665133863687515 2023-01-24 00:03:27.510004: step: 192/77, loss: 0.009753705002367496 2023-01-24 00:03:28.819227: step: 196/77, loss: 0.00011679470480885357 2023-01-24 00:03:30.155570: step: 200/77, loss: 0.004883726127445698 2023-01-24 00:03:31.511126: step: 204/77, loss: 0.0939313992857933 2023-01-24 00:03:32.847402: step: 208/77, loss: 0.011193937622010708 2023-01-24 00:03:34.191008: step: 212/77, loss: 0.01989990472793579 2023-01-24 00:03:35.477370: step: 216/77, loss: 0.0007058902410790324 2023-01-24 00:03:36.802259: step: 220/77, loss: 0.009262369014322758 2023-01-24 00:03:38.127372: step: 224/77, loss: 0.02305023930966854 2023-01-24 00:03:39.435741: step: 228/77, loss: 9.288202818424907e-06 2023-01-24 00:03:40.769634: step: 232/77, loss: 1.7909143934957683e-05 2023-01-24 00:03:42.077889: step: 236/77, loss: 0.012563599273562431 2023-01-24 00:03:43.412488: step: 240/77, loss: 9.349367246613838e-06 2023-01-24 00:03:44.705393: step: 244/77, loss: 0.011641522869467735 2023-01-24 00:03:46.058198: step: 248/77, loss: 0.0009031022782437503 2023-01-24 00:03:47.434109: step: 252/77, loss: 1.2669736861425918e-05 2023-01-24 00:03:48.735958: step: 256/77, loss: 0.0005724854418076575 2023-01-24 00:03:50.024051: step: 260/77, loss: 0.0295106191188097 2023-01-24 00:03:51.323328: step: 264/77, loss: 0.025614425539970398 2023-01-24 00:03:52.614718: step: 268/77, loss: 3.429138450883329e-05 2023-01-24 00:03:53.896963: step: 272/77, loss: 0.036679305136203766 2023-01-24 00:03:55.151834: step: 276/77, loss: 0.0019726853352040052 2023-01-24 00:03:56.553178: step: 280/77, loss: 0.005493839271366596 2023-01-24 00:03:57.868588: step: 284/77, loss: 2.5428231310797855e-05 2023-01-24 00:03:59.202388: step: 288/77, loss: 0.0011763731017708778 2023-01-24 00:04:00.529431: step: 292/77, loss: 7.644234756298829e-07 2023-01-24 00:04:01.866530: step: 296/77, loss: 0.0014449841110035777 2023-01-24 00:04:03.189383: step: 300/77, loss: 0.0003704916452988982 2023-01-24 00:04:04.476417: step: 304/77, loss: 0.00026468545547686517 2023-01-24 00:04:05.802981: step: 308/77, loss: 0.013125604018568993 2023-01-24 00:04:07.147162: step: 312/77, loss: 0.012028532102704048 2023-01-24 00:04:08.481567: step: 316/77, loss: 5.268731001706328e-06 2023-01-24 00:04:09.808008: step: 320/77, loss: 4.079467544215731e-05 2023-01-24 00:04:11.138828: step: 324/77, loss: 0.0522034578025341 2023-01-24 00:04:12.478273: step: 328/77, loss: 6.846023097750731e-06 2023-01-24 00:04:13.828671: step: 332/77, loss: 2.2238946257857606e-05 2023-01-24 00:04:15.161951: step: 336/77, loss: 0.0008007865399122238 2023-01-24 00:04:16.457052: step: 340/77, loss: 0.03849714621901512 2023-01-24 00:04:17.791642: step: 344/77, loss: 0.0012156052980571985 2023-01-24 00:04:19.134646: step: 348/77, loss: 0.0004726042097900063 2023-01-24 00:04:20.439159: step: 352/77, loss: 0.00023819933994673193 2023-01-24 00:04:21.729882: step: 356/77, loss: 1.0118911632162053e-05 2023-01-24 00:04:23.103202: step: 360/77, loss: 0.013653255999088287 2023-01-24 00:04:24.418990: step: 364/77, loss: 2.4288812028316897e-07 2023-01-24 00:04:25.730308: step: 368/77, loss: 2.7067086193710566e-05 2023-01-24 00:04:27.029912: step: 372/77, loss: 2.21501959458692e-05 2023-01-24 00:04:28.364380: step: 376/77, loss: 0.000515318475663662 2023-01-24 00:04:29.691301: step: 380/77, loss: 8.078628525254317e-06 2023-01-24 00:04:31.029817: step: 384/77, loss: 0.009522791020572186 2023-01-24 00:04:32.336178: step: 388/77, loss: 0.00013660037075169384 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.5, 'r': 0.012738853503184714, 'f1': 0.02484472049689441}, 'combined': 0.0149575358093548, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.48148148148148145, 'r': 0.011828935395814377, 'f1': 0.023090586145648313}, 'combined': 0.013901475332584188, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.5, 'r': 0.012738853503184714, 'f1': 0.02484472049689441}, 'combined': 0.0149575358093548, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:06:26.024347: step: 4/77, loss: 0.004518816247582436 2023-01-24 00:06:27.333450: step: 8/77, loss: 8.559234629501589e-06 2023-01-24 00:06:28.646711: step: 12/77, loss: 0.000525409122928977 2023-01-24 00:06:29.966862: step: 16/77, loss: 1.2267551937839016e-05 2023-01-24 00:06:31.271340: step: 20/77, loss: 0.0022193510085344315 2023-01-24 00:06:32.513326: step: 24/77, loss: 0.012789689004421234 2023-01-24 00:06:33.772792: step: 28/77, loss: 3.808999827015214e-05 2023-01-24 00:06:35.072090: step: 32/77, loss: 0.03038734570145607 2023-01-24 00:06:36.364431: step: 36/77, loss: 2.490369661245495e-05 2023-01-24 00:06:37.756424: step: 40/77, loss: 0.00401505921036005 2023-01-24 00:06:39.086423: step: 44/77, loss: 0.0009303762344643474 2023-01-24 00:06:40.340181: step: 48/77, loss: 0.0014327040407806635 2023-01-24 00:06:41.640536: step: 52/77, loss: 2.910674811573699e-05 2023-01-24 00:06:42.945433: step: 56/77, loss: 0.0019094038289040327 2023-01-24 00:06:44.316661: step: 60/77, loss: 0.009724936448037624 2023-01-24 00:06:45.575795: step: 64/77, loss: 6.951568502699956e-05 2023-01-24 00:06:46.909656: step: 68/77, loss: 0.00016013227286748588 2023-01-24 00:06:48.266581: step: 72/77, loss: 0.048396218568086624 2023-01-24 00:06:49.566020: step: 76/77, loss: 1.948331919265911e-05 2023-01-24 00:06:50.883464: step: 80/77, loss: 4.082077066414058e-05 2023-01-24 00:06:52.228459: step: 84/77, loss: 0.0005087255267426372 2023-01-24 00:06:53.521802: step: 88/77, loss: 0.002552201272919774 2023-01-24 00:06:54.809077: step: 92/77, loss: 4.3950120016233996e-05 2023-01-24 00:06:56.107621: step: 96/77, loss: 0.011920797638595104 2023-01-24 00:06:57.433773: step: 100/77, loss: 0.028989894315600395 2023-01-24 00:06:58.755847: step: 104/77, loss: 0.005524639040231705 2023-01-24 00:07:00.090282: step: 108/77, loss: 3.5203196603106335e-05 2023-01-24 00:07:01.360302: step: 112/77, loss: 0.0001979176013264805 2023-01-24 00:07:02.648128: step: 116/77, loss: 0.05707313120365143 2023-01-24 00:07:04.040684: step: 120/77, loss: 0.00033089410862885416 2023-01-24 00:07:05.342024: step: 124/77, loss: 2.6788911782205105e-05 2023-01-24 00:07:06.632319: step: 128/77, loss: 0.00017451458552386612 2023-01-24 00:07:07.944482: step: 132/77, loss: 7.898695002950262e-06 2023-01-24 00:07:09.221746: step: 136/77, loss: 0.0010085459798574448 2023-01-24 00:07:10.544483: step: 140/77, loss: 1.5973162135196617e-06 2023-01-24 00:07:11.816991: step: 144/77, loss: 0.000650427711661905 2023-01-24 00:07:13.151470: step: 148/77, loss: 1.05796516436385e-06 2023-01-24 00:07:14.442977: step: 152/77, loss: 0.0015735579654574394 2023-01-24 00:07:15.797943: step: 156/77, loss: 0.02899871952831745 2023-01-24 00:07:17.115061: step: 160/77, loss: 0.01198390033096075 2023-01-24 00:07:18.464611: step: 164/77, loss: 0.0010800587479025126 2023-01-24 00:07:19.796145: step: 168/77, loss: 0.0012106273788958788 2023-01-24 00:07:21.076315: step: 172/77, loss: 0.0004172230255790055 2023-01-24 00:07:22.420919: step: 176/77, loss: 0.01801799237728119 2023-01-24 00:07:23.741756: step: 180/77, loss: 0.025919422507286072 2023-01-24 00:07:25.108012: step: 184/77, loss: 8.135867801684071e-07 2023-01-24 00:07:26.424717: step: 188/77, loss: 0.001136435312218964 2023-01-24 00:07:27.718430: step: 192/77, loss: 1.6674499420332722e-05 2023-01-24 00:07:28.973816: step: 196/77, loss: 0.016235962510108948 2023-01-24 00:07:30.273989: step: 200/77, loss: 0.017281893640756607 2023-01-24 00:07:31.613280: step: 204/77, loss: 0.0018460007850080729 2023-01-24 00:07:32.936164: step: 208/77, loss: 0.004925033543258905 2023-01-24 00:07:34.305222: step: 212/77, loss: 5.231866816757247e-05 2023-01-24 00:07:35.646970: step: 216/77, loss: 3.397252612558077e-06 2023-01-24 00:07:36.972633: step: 220/77, loss: 0.00013918301556259394 2023-01-24 00:07:38.280712: step: 224/77, loss: 0.00028344333986751735 2023-01-24 00:07:39.550791: step: 228/77, loss: 1.5092291505425237e-05 2023-01-24 00:07:40.855918: step: 232/77, loss: 0.0009095754357986152 2023-01-24 00:07:42.136011: step: 236/77, loss: 7.0954274633550085e-06 2023-01-24 00:07:43.475941: step: 240/77, loss: 0.01879284903407097 2023-01-24 00:07:44.776414: step: 244/77, loss: 0.03150808438658714 2023-01-24 00:07:46.075296: step: 248/77, loss: 0.029587578028440475 2023-01-24 00:07:47.401616: step: 252/77, loss: 0.003229555208235979 2023-01-24 00:07:48.719519: step: 256/77, loss: 0.0002197189605794847 2023-01-24 00:07:50.009282: step: 260/77, loss: 5.885936502636469e-07 2023-01-24 00:07:51.347754: step: 264/77, loss: 0.0001560926903039217 2023-01-24 00:07:52.697594: step: 268/77, loss: 0.09169185161590576 2023-01-24 00:07:54.021385: step: 272/77, loss: 0.00018706178525462747 2023-01-24 00:07:55.338298: step: 276/77, loss: 1.0784182450152002e-05 2023-01-24 00:07:56.631033: step: 280/77, loss: 0.00021076208213344216 2023-01-24 00:07:57.868120: step: 284/77, loss: 0.000368105829693377 2023-01-24 00:07:59.176775: step: 288/77, loss: 4.470327041872224e-07 2023-01-24 00:08:00.526030: step: 292/77, loss: 0.03098980151116848 2023-01-24 00:08:01.803887: step: 296/77, loss: 0.014952097088098526 2023-01-24 00:08:03.090057: step: 300/77, loss: 0.0014895956264808774 2023-01-24 00:08:04.442226: step: 304/77, loss: 0.0001381791225867346 2023-01-24 00:08:05.766085: step: 308/77, loss: 3.8397406569856685e-06 2023-01-24 00:08:07.072096: step: 312/77, loss: 0.00017704522178974003 2023-01-24 00:08:08.400711: step: 316/77, loss: 0.06191083788871765 2023-01-24 00:08:09.725340: step: 320/77, loss: 5.371192855818663e-06 2023-01-24 00:08:11.019321: step: 324/77, loss: 1.8029529655905208e-06 2023-01-24 00:08:12.283151: step: 328/77, loss: 4.1384537325939164e-05 2023-01-24 00:08:13.584507: step: 332/77, loss: 0.00034231197787448764 2023-01-24 00:08:14.906673: step: 336/77, loss: 2.9323089165700367e-06 2023-01-24 00:08:16.249035: step: 340/77, loss: 0.00047633511712774634 2023-01-24 00:08:17.538348: step: 344/77, loss: 6.231231054698583e-06 2023-01-24 00:08:18.872201: step: 348/77, loss: 7.328856736421585e-05 2023-01-24 00:08:20.172259: step: 352/77, loss: 0.015188097953796387 2023-01-24 00:08:21.486053: step: 356/77, loss: 0.010481033474206924 2023-01-24 00:08:22.772021: step: 360/77, loss: 0.012685518711805344 2023-01-24 00:08:24.122565: step: 364/77, loss: 0.00010546360135776922 2023-01-24 00:08:25.441957: step: 368/77, loss: 0.1046970933675766 2023-01-24 00:08:26.818500: step: 372/77, loss: 0.004260138608515263 2023-01-24 00:08:28.142097: step: 376/77, loss: 6.853960803709924e-05 2023-01-24 00:08:29.440326: step: 380/77, loss: 0.008298509754240513 2023-01-24 00:08:30.787303: step: 384/77, loss: 1.6012103515095077e-05 2023-01-24 00:08:32.093236: step: 388/77, loss: 0.0005622516036964953 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9230769230769231, 'r': 0.4580152671755725, 'f1': 0.6122448979591837}, 'slot': {'p': 0.5172413793103449, 'r': 0.01364877161055505, 'f1': 0.026595744680851068}, 'combined': 0.016283108988276163, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.921875, 'r': 0.45038167938931295, 'f1': 0.6051282051282051}, 'slot': {'p': 0.5172413793103449, 'r': 0.01364877161055505, 'f1': 0.026595744680851068}, 'combined': 0.016093835242771415, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9230769230769231, 'r': 0.4580152671755725, 'f1': 0.6122448979591837}, 'slot': {'p': 0.5172413793103449, 'r': 0.01364877161055505, 'f1': 0.026595744680851068}, 'combined': 0.016283108988276163, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:10:26.975172: step: 4/77, loss: 0.00268231681548059 2023-01-24 00:10:28.239029: step: 8/77, loss: 0.010495544411242008 2023-01-24 00:10:29.517812: step: 12/77, loss: 0.0004444315272849053 2023-01-24 00:10:30.866412: step: 16/77, loss: 2.9283808544278145e-05 2023-01-24 00:10:32.162172: step: 20/77, loss: 0.0009628410916775465 2023-01-24 00:10:33.397291: step: 24/77, loss: 7.748473080937401e-07 2023-01-24 00:10:34.663190: step: 28/77, loss: 4.9448035497334786e-06 2023-01-24 00:10:35.979373: step: 32/77, loss: 0.008351600728929043 2023-01-24 00:10:37.309407: step: 36/77, loss: 3.924445263692178e-05 2023-01-24 00:10:38.617846: step: 40/77, loss: 0.0009096739813685417 2023-01-24 00:10:39.945909: step: 44/77, loss: 0.01073978841304779 2023-01-24 00:10:41.222280: step: 48/77, loss: 0.0011748617980629206 2023-01-24 00:10:42.528908: step: 52/77, loss: 1.230300676979823e-05 2023-01-24 00:10:43.809820: step: 56/77, loss: 0.04393262788653374 2023-01-24 00:10:45.135223: step: 60/77, loss: 0.00020851498993579298 2023-01-24 00:10:46.479951: step: 64/77, loss: 0.0004167587321717292 2023-01-24 00:10:47.791466: step: 68/77, loss: 4.6296310756588355e-05 2023-01-24 00:10:49.096090: step: 72/77, loss: 0.011651339009404182 2023-01-24 00:10:50.409235: step: 76/77, loss: 6.170851702336222e-05 2023-01-24 00:10:51.707712: step: 80/77, loss: 0.00012924282054882497 2023-01-24 00:10:52.996003: step: 84/77, loss: 4.0912304939411115e-06 2023-01-24 00:10:54.290131: step: 88/77, loss: 1.4483674704024452e-06 2023-01-24 00:10:55.623977: step: 92/77, loss: 6.428196502383798e-05 2023-01-24 00:10:56.906697: step: 96/77, loss: 1.910252649395261e-06 2023-01-24 00:10:58.267173: step: 100/77, loss: 0.0006389992777258158 2023-01-24 00:10:59.543283: step: 104/77, loss: 2.9110062314430252e-05 2023-01-24 00:11:00.819176: step: 108/77, loss: 0.02464928850531578 2023-01-24 00:11:02.119742: step: 112/77, loss: 2.1680718873540172e-06 2023-01-24 00:11:03.445590: step: 116/77, loss: 0.0006864761235192418 2023-01-24 00:11:04.773969: step: 120/77, loss: 3.829562444934709e-07 2023-01-24 00:11:06.079826: step: 124/77, loss: 7.570409798063338e-05 2023-01-24 00:11:07.400308: step: 128/77, loss: 0.001121019246056676 2023-01-24 00:11:08.715716: step: 132/77, loss: 1.2084574336768128e-06 2023-01-24 00:11:09.996452: step: 136/77, loss: 1.795521939129685e-06 2023-01-24 00:11:11.312038: step: 140/77, loss: 0.0019349417416378856 2023-01-24 00:11:12.644877: step: 144/77, loss: 0.0006557057495228946 2023-01-24 00:11:13.979267: step: 148/77, loss: 0.0007242461433634162 2023-01-24 00:11:15.287546: step: 152/77, loss: 2.154836147383321e-05 2023-01-24 00:11:16.606528: step: 156/77, loss: 1.4111105883785058e-06 2023-01-24 00:11:17.893681: step: 160/77, loss: 1.416931627318263e-05 2023-01-24 00:11:19.182304: step: 164/77, loss: 0.0006565083749592304 2023-01-24 00:11:20.523929: step: 168/77, loss: 1.5333089322666638e-06 2023-01-24 00:11:21.815711: step: 172/77, loss: 1.4919568457116839e-05 2023-01-24 00:11:23.156276: step: 176/77, loss: 0.02308845706284046 2023-01-24 00:11:24.459500: step: 180/77, loss: 0.0008895195205695927 2023-01-24 00:11:25.743134: step: 184/77, loss: 0.00043443485628813505 2023-01-24 00:11:27.109159: step: 188/77, loss: 0.051694635301828384 2023-01-24 00:11:28.423373: step: 192/77, loss: 0.00227850372903049 2023-01-24 00:11:29.778797: step: 196/77, loss: 0.021429726853966713 2023-01-24 00:11:31.099106: step: 200/77, loss: 0.00030971429077908397 2023-01-24 00:11:32.460715: step: 204/77, loss: 0.0001510155270807445 2023-01-24 00:11:33.760550: step: 208/77, loss: 0.002237622393295169 2023-01-24 00:11:35.067173: step: 212/77, loss: 2.0001190932816826e-05 2023-01-24 00:11:36.390163: step: 216/77, loss: 0.05403360351920128 2023-01-24 00:11:37.679644: step: 220/77, loss: 7.650639599887654e-05 2023-01-24 00:11:39.023574: step: 224/77, loss: 0.0007797401631250978 2023-01-24 00:11:40.328579: step: 228/77, loss: 0.00021848056348972023 2023-01-24 00:11:41.619012: step: 232/77, loss: 0.00012967440125066787 2023-01-24 00:11:42.957235: step: 236/77, loss: 0.002810570877045393 2023-01-24 00:11:44.241305: step: 240/77, loss: 0.002720152959227562 2023-01-24 00:11:45.561257: step: 244/77, loss: 4.8737554607214406e-05 2023-01-24 00:11:46.861255: step: 248/77, loss: 0.0044198185205459595 2023-01-24 00:11:48.204789: step: 252/77, loss: 1.1010704838554375e-05 2023-01-24 00:11:49.496579: step: 256/77, loss: 0.00017942961130756885 2023-01-24 00:11:50.805563: step: 260/77, loss: 0.02820507250726223 2023-01-24 00:11:52.165604: step: 264/77, loss: 2.5000796085805632e-05 2023-01-24 00:11:53.493497: step: 268/77, loss: 5.47738045497681e-06 2023-01-24 00:11:54.795129: step: 272/77, loss: 0.057624347507953644 2023-01-24 00:11:56.102207: step: 276/77, loss: 0.0057439375668764114 2023-01-24 00:11:57.422294: step: 280/77, loss: 2.388442680967273e-06 2023-01-24 00:11:58.766650: step: 284/77, loss: 0.00021082670718897134 2023-01-24 00:12:00.119399: step: 288/77, loss: 0.00012094304111087695 2023-01-24 00:12:01.462173: step: 292/77, loss: 0.026059003546833992 2023-01-24 00:12:02.769918: step: 296/77, loss: 0.03081570379436016 2023-01-24 00:12:04.044404: step: 300/77, loss: 0.00011321669444441795 2023-01-24 00:12:05.332789: step: 304/77, loss: 0.005827941931784153 2023-01-24 00:12:06.649107: step: 308/77, loss: 0.058896828442811966 2023-01-24 00:12:07.960810: step: 312/77, loss: 0.00017250858945772052 2023-01-24 00:12:09.295432: step: 316/77, loss: 0.002004768932238221 2023-01-24 00:12:10.643584: step: 320/77, loss: 2.2721074856235646e-05 2023-01-24 00:12:11.919795: step: 324/77, loss: 0.0005667175282724202 2023-01-24 00:12:13.235997: step: 328/77, loss: 0.03825288265943527 2023-01-24 00:12:14.562789: step: 332/77, loss: 0.00019563539535738528 2023-01-24 00:12:15.845001: step: 336/77, loss: 5.884109214093769e-06 2023-01-24 00:12:17.193473: step: 340/77, loss: 0.018313970416784286 2023-01-24 00:12:18.496533: step: 344/77, loss: 0.00017678536823950708 2023-01-24 00:12:19.783120: step: 348/77, loss: 1.6817306459415704e-05 2023-01-24 00:12:21.075032: step: 352/77, loss: 0.0017116623930633068 2023-01-24 00:12:22.382807: step: 356/77, loss: 0.00025027766241692007 2023-01-24 00:12:23.688248: step: 360/77, loss: 0.07846976816654205 2023-01-24 00:12:25.037690: step: 364/77, loss: 1.776100589268026e-06 2023-01-24 00:12:26.369744: step: 368/77, loss: 0.0022615143097937107 2023-01-24 00:12:27.665387: step: 372/77, loss: 5.692157856174163e-07 2023-01-24 00:12:29.022639: step: 376/77, loss: 8.089242328424007e-06 2023-01-24 00:12:30.344871: step: 380/77, loss: 6.233382737264037e-05 2023-01-24 00:12:31.634226: step: 384/77, loss: 9.111697727348655e-05 2023-01-24 00:12:32.969200: step: 388/77, loss: 0.00011282552441116422 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.8695652173913043, 'r': 0.4580152671755725, 'f1': 0.6}, 'slot': {'p': 0.4838709677419355, 'r': 0.01364877161055505, 'f1': 0.02654867256637168}, 'combined': 0.01592920353982301, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.8955223880597015, 'r': 0.4580152671755725, 'f1': 0.6060606060606061}, 'slot': {'p': 0.4827586206896552, 'r': 0.012738853503184714, 'f1': 0.024822695035460994}, 'combined': 0.015044057597249088, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.8695652173913043, 'r': 0.4580152671755725, 'f1': 0.6}, 'slot': {'p': 0.4838709677419355, 'r': 0.01364877161055505, 'f1': 0.02654867256637168}, 'combined': 0.01592920353982301, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:14:26.531740: step: 4/77, loss: 0.006674325093626976 2023-01-24 00:14:27.830965: step: 8/77, loss: 0.00012326599971856922 2023-01-24 00:14:29.117817: step: 12/77, loss: 7.927337151159008e-07 2023-01-24 00:14:30.426584: step: 16/77, loss: 1.073012754204683e-05 2023-01-24 00:14:31.766398: step: 20/77, loss: 0.04632338508963585 2023-01-24 00:14:33.051938: step: 24/77, loss: 0.0004419655306264758 2023-01-24 00:14:34.327538: step: 28/77, loss: 0.0003266305720899254 2023-01-24 00:14:35.609090: step: 32/77, loss: 0.046977244317531586 2023-01-24 00:14:36.915524: step: 36/77, loss: 0.00014411300071515143 2023-01-24 00:14:38.175414: step: 40/77, loss: 0.004513848572969437 2023-01-24 00:14:39.501175: step: 44/77, loss: 0.0028656127396970987 2023-01-24 00:14:40.800713: step: 48/77, loss: 7.888904656283557e-05 2023-01-24 00:14:42.066379: step: 52/77, loss: 0.0019767654594033957 2023-01-24 00:14:43.390565: step: 56/77, loss: 2.720719066928723e-06 2023-01-24 00:14:44.700295: step: 60/77, loss: 9.81954713097366e-07 2023-01-24 00:14:45.995201: step: 64/77, loss: 0.00030149612575769424 2023-01-24 00:14:47.303978: step: 68/77, loss: 1.1771680874517187e-06 2023-01-24 00:14:48.587326: step: 72/77, loss: 5.9117232012795284e-05 2023-01-24 00:14:49.900108: step: 76/77, loss: 0.04461955651640892 2023-01-24 00:14:51.195970: step: 80/77, loss: 0.014770245179533958 2023-01-24 00:14:52.499030: step: 84/77, loss: 7.152552683464819e-08 2023-01-24 00:14:53.820688: step: 88/77, loss: 0.001319216564297676 2023-01-24 00:14:55.112145: step: 92/77, loss: 0.00010210266918875277 2023-01-24 00:14:56.386876: step: 96/77, loss: 4.071623698109761e-05 2023-01-24 00:14:57.736785: step: 100/77, loss: 8.443147089565173e-05 2023-01-24 00:14:59.006802: step: 104/77, loss: 0.009545549750328064 2023-01-24 00:15:00.311744: step: 108/77, loss: 1.9398636140977032e-05 2023-01-24 00:15:01.591744: step: 112/77, loss: 9.834516276896466e-07 2023-01-24 00:15:02.903532: step: 116/77, loss: 0.0006247479468584061 2023-01-24 00:15:04.206255: step: 120/77, loss: 0.0005076077650301158 2023-01-24 00:15:05.547185: step: 124/77, loss: 5.5134229626219167e-08 2023-01-24 00:15:06.813804: step: 128/77, loss: 1.0052502148027997e-05 2023-01-24 00:15:08.149447: step: 132/77, loss: 0.000475154141895473 2023-01-24 00:15:09.465437: step: 136/77, loss: 1.0516861038922798e-05 2023-01-24 00:15:10.736459: step: 140/77, loss: 0.008938970044255257 2023-01-24 00:15:12.066936: step: 144/77, loss: 1.1819629435194656e-05 2023-01-24 00:15:13.389837: step: 148/77, loss: 8.478687050228473e-07 2023-01-24 00:15:14.743196: step: 152/77, loss: 0.004407108761370182 2023-01-24 00:15:16.072670: step: 156/77, loss: 1.6391214785471675e-07 2023-01-24 00:15:17.393564: step: 160/77, loss: 0.05659640207886696 2023-01-24 00:15:18.706541: step: 164/77, loss: 1.0966845138682402e-06 2023-01-24 00:15:20.033901: step: 168/77, loss: 0.0012224003439769149 2023-01-24 00:15:21.314433: step: 172/77, loss: 0.03606301173567772 2023-01-24 00:15:22.678465: step: 176/77, loss: 0.0002228868834208697 2023-01-24 00:15:23.972148: step: 180/77, loss: 0.003944731783121824 2023-01-24 00:15:25.271132: step: 184/77, loss: 0.0011084693251177669 2023-01-24 00:15:26.581128: step: 188/77, loss: 0.00027038625557906926 2023-01-24 00:15:27.900420: step: 192/77, loss: 0.0035924986004829407 2023-01-24 00:15:29.205885: step: 196/77, loss: 2.804172254400328e-06 2023-01-24 00:15:30.561416: step: 200/77, loss: 5.185574991628528e-07 2023-01-24 00:15:31.882777: step: 204/77, loss: 7.971924560479238e-07 2023-01-24 00:15:33.216328: step: 208/77, loss: 9.861079888651147e-06 2023-01-24 00:15:34.540734: step: 212/77, loss: 1.7955117073142901e-06 2023-01-24 00:15:35.866934: step: 216/77, loss: 5.492773561854847e-05 2023-01-24 00:15:37.130089: step: 220/77, loss: 9.138667337538209e-06 2023-01-24 00:15:38.444124: step: 224/77, loss: 0.0010349903022870421 2023-01-24 00:15:39.776005: step: 228/77, loss: 0.00012837017129641026 2023-01-24 00:15:41.072923: step: 232/77, loss: 0.05692150816321373 2023-01-24 00:15:42.394592: step: 236/77, loss: 0.0003018905990757048 2023-01-24 00:15:43.723439: step: 240/77, loss: 5.213461918174289e-05 2023-01-24 00:15:45.049913: step: 244/77, loss: 0.005417892709374428 2023-01-24 00:15:46.329557: step: 248/77, loss: 2.4722794478293508e-05 2023-01-24 00:15:47.570456: step: 252/77, loss: 9.783964924281463e-05 2023-01-24 00:15:48.869652: step: 256/77, loss: 0.00041096023051068187 2023-01-24 00:15:50.202123: step: 260/77, loss: 0.0002943962754216045 2023-01-24 00:15:51.496313: step: 264/77, loss: 1.968929973372724e-05 2023-01-24 00:15:52.860076: step: 268/77, loss: 3.314549394417554e-05 2023-01-24 00:15:54.198509: step: 272/77, loss: 0.0036741732619702816 2023-01-24 00:15:55.541737: step: 276/77, loss: 0.0003650987346190959 2023-01-24 00:15:56.858351: step: 280/77, loss: 0.007480195723474026 2023-01-24 00:15:58.187871: step: 284/77, loss: 0.005127861630171537 2023-01-24 00:15:59.511239: step: 288/77, loss: 5.181756932870485e-05 2023-01-24 00:16:00.856252: step: 292/77, loss: 0.009548337198793888 2023-01-24 00:16:02.138987: step: 296/77, loss: 1.3648948424815899e-06 2023-01-24 00:16:03.449527: step: 300/77, loss: 0.013185513205826283 2023-01-24 00:16:04.776105: step: 304/77, loss: 6.6154902924608905e-06 2023-01-24 00:16:06.119618: step: 308/77, loss: 0.0001683257578406483 2023-01-24 00:16:07.432625: step: 312/77, loss: 0.006210809573531151 2023-01-24 00:16:08.728326: step: 316/77, loss: 2.7647402021102607e-05 2023-01-24 00:16:10.035981: step: 320/77, loss: 0.00011505853763082996 2023-01-24 00:16:11.349957: step: 324/77, loss: 0.00043252320028841496 2023-01-24 00:16:12.665177: step: 328/77, loss: 8.42785811983049e-06 2023-01-24 00:16:13.975556: step: 332/77, loss: 0.004213067702949047 2023-01-24 00:16:15.306056: step: 336/77, loss: 0.13022495806217194 2023-01-24 00:16:16.625660: step: 340/77, loss: 1.6599018408669508e-06 2023-01-24 00:16:17.971766: step: 344/77, loss: 0.022105010226368904 2023-01-24 00:16:19.262741: step: 348/77, loss: 0.0004306129994802177 2023-01-24 00:16:20.586784: step: 352/77, loss: 7.364667544607073e-05 2023-01-24 00:16:21.865521: step: 356/77, loss: 5.502285057445988e-05 2023-01-24 00:16:23.190437: step: 360/77, loss: 0.004900304600596428 2023-01-24 00:16:24.497892: step: 364/77, loss: 0.0001213712603203021 2023-01-24 00:16:25.791017: step: 368/77, loss: 0.005602479446679354 2023-01-24 00:16:27.124859: step: 372/77, loss: 5.916656391491415e-06 2023-01-24 00:16:28.423787: step: 376/77, loss: 0.0001372866245219484 2023-01-24 00:16:29.647705: step: 380/77, loss: 6.366540037561208e-05 2023-01-24 00:16:30.967319: step: 384/77, loss: 0.0013172589242458344 2023-01-24 00:16:32.343385: step: 388/77, loss: 0.00026950225583277643 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.9333333333333333, 'r': 0.42748091603053434, 'f1': 0.5863874345549738}, 'slot': {'p': 0.5384615384615384, 'r': 0.012738853503184714, 'f1': 0.02488888888888889}, 'combined': 0.014594531704479349, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.9333333333333333, 'r': 0.42748091603053434, 'f1': 0.5863874345549738}, 'slot': {'p': 0.5384615384615384, 'r': 0.012738853503184714, 'f1': 0.02488888888888889}, 'combined': 0.014594531704479349, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.9333333333333333, 'r': 0.42748091603053434, 'f1': 0.5863874345549738}, 'slot': {'p': 0.5384615384615384, 'r': 0.012738853503184714, 'f1': 0.02488888888888889}, 'combined': 0.014594531704479349, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:18:27.607602: step: 4/77, loss: 1.3477228094416205e-05 2023-01-24 00:18:28.956243: step: 8/77, loss: 4.9881804443430156e-05 2023-01-24 00:18:30.223832: step: 12/77, loss: 8.783635712461546e-05 2023-01-24 00:18:31.501180: step: 16/77, loss: 0.0007455704035237432 2023-01-24 00:18:32.830881: step: 20/77, loss: 0.00010077015758724883 2023-01-24 00:18:34.137225: step: 24/77, loss: 2.8788408599211834e-06 2023-01-24 00:18:35.493076: step: 28/77, loss: 4.572925718093757e-06 2023-01-24 00:18:36.787711: step: 32/77, loss: 0.00016793067334219813 2023-01-24 00:18:38.070005: step: 36/77, loss: 0.0005516072269529104 2023-01-24 00:18:39.378273: step: 40/77, loss: 2.97271108138375e-05 2023-01-24 00:18:40.708567: step: 44/77, loss: 4.699544660979882e-06 2023-01-24 00:18:42.033488: step: 48/77, loss: 4.090108632226475e-06 2023-01-24 00:18:43.349600: step: 52/77, loss: 0.000500613241456449 2023-01-24 00:18:44.656686: step: 56/77, loss: 1.2382681688904995e-06 2023-01-24 00:18:45.948301: step: 60/77, loss: 0.009421252645552158 2023-01-24 00:18:47.255384: step: 64/77, loss: 4.246797402629454e-07 2023-01-24 00:18:48.610840: step: 68/77, loss: 3.3110889489762485e-05 2023-01-24 00:18:49.948147: step: 72/77, loss: 0.02121484838426113 2023-01-24 00:18:51.279392: step: 76/77, loss: 3.910715167876333e-05 2023-01-24 00:18:52.538676: step: 80/77, loss: 1.24184143714956e-05 2023-01-24 00:18:53.882480: step: 84/77, loss: 0.000224571893340908 2023-01-24 00:18:55.165942: step: 88/77, loss: 2.570334117990569e-06 2023-01-24 00:18:56.488682: step: 92/77, loss: 0.0002045604633167386 2023-01-24 00:18:57.831441: step: 96/77, loss: 3.816035587078659e-06 2023-01-24 00:18:59.129593: step: 100/77, loss: 1.0916721294051968e-05 2023-01-24 00:19:00.438066: step: 104/77, loss: 0.0005943027208559215 2023-01-24 00:19:01.734913: step: 108/77, loss: 0.044727031141519547 2023-01-24 00:19:03.021300: step: 112/77, loss: 2.0712560910851607e-07 2023-01-24 00:19:04.394833: step: 116/77, loss: 0.023349588736891747 2023-01-24 00:19:05.683040: step: 120/77, loss: 0.000780319853220135 2023-01-24 00:19:06.961474: step: 124/77, loss: 3.182269574608654e-05 2023-01-24 00:19:08.246117: step: 128/77, loss: 1.594416119132802e-07 2023-01-24 00:19:09.595676: step: 132/77, loss: 0.01661229506134987 2023-01-24 00:19:10.881279: step: 136/77, loss: 1.9326394067320507e-06 2023-01-24 00:19:12.223203: step: 140/77, loss: 0.00011061552504543215 2023-01-24 00:19:13.503985: step: 144/77, loss: 2.890803079935722e-05 2023-01-24 00:19:14.843315: step: 148/77, loss: 0.0003261259407736361 2023-01-24 00:19:16.173554: step: 152/77, loss: 0.02550693042576313 2023-01-24 00:19:17.522755: step: 156/77, loss: 2.756704873263516e-07 2023-01-24 00:19:18.858991: step: 160/77, loss: 2.3945110569911776e-06 2023-01-24 00:19:20.224132: step: 164/77, loss: 7.703841902184649e-07 2023-01-24 00:19:21.502231: step: 168/77, loss: 0.00010813689004862681 2023-01-24 00:19:22.840483: step: 172/77, loss: 1.8811510017258115e-05 2023-01-24 00:19:24.177263: step: 176/77, loss: 0.027787035331130028 2023-01-24 00:19:25.483158: step: 180/77, loss: 0.0001837980526033789 2023-01-24 00:19:26.839329: step: 184/77, loss: 0.00012887391494587064 2023-01-24 00:19:28.164486: step: 188/77, loss: 9.238715392712038e-08 2023-01-24 00:19:29.447803: step: 192/77, loss: 0.0004598087689373642 2023-01-24 00:19:30.809584: step: 196/77, loss: 3.9932165236677974e-06 2023-01-24 00:19:32.169544: step: 200/77, loss: 0.00033994330442510545 2023-01-24 00:19:33.451312: step: 204/77, loss: 0.0010090291034430265 2023-01-24 00:19:34.783607: step: 208/77, loss: 8.787318802205846e-05 2023-01-24 00:19:36.135990: step: 212/77, loss: 7.56958229430893e-07 2023-01-24 00:19:37.502148: step: 216/77, loss: 1.0773379699458019e-06 2023-01-24 00:19:38.826766: step: 220/77, loss: 2.8703432690235786e-05 2023-01-24 00:19:40.167943: step: 224/77, loss: 8.135804137054947e-07 2023-01-24 00:19:41.456749: step: 228/77, loss: 0.0323166660964489 2023-01-24 00:19:42.783742: step: 232/77, loss: 0.0011196022387593985 2023-01-24 00:19:44.127392: step: 236/77, loss: 0.00794243160635233 2023-01-24 00:19:45.461774: step: 240/77, loss: 0.009800415486097336 2023-01-24 00:19:46.781104: step: 244/77, loss: 4.991802029508108e-07 2023-01-24 00:19:48.151753: step: 248/77, loss: 5.501529449247755e-05 2023-01-24 00:19:49.506863: step: 252/77, loss: 1.1173647180839907e-05 2023-01-24 00:19:50.817432: step: 256/77, loss: 0.0006034831749275327 2023-01-24 00:19:52.211340: step: 260/77, loss: 7.432499114656821e-05 2023-01-24 00:19:53.575869: step: 264/77, loss: 3.3635027648415416e-05 2023-01-24 00:19:54.874539: step: 268/77, loss: 2.950415591840283e-07 2023-01-24 00:19:56.200584: step: 272/77, loss: 1.021041680360213e-05 2023-01-24 00:19:57.530777: step: 276/77, loss: 1.0952142019959865e-06 2023-01-24 00:19:58.913543: step: 280/77, loss: 3.1473733542952687e-05 2023-01-24 00:20:00.242061: step: 284/77, loss: 6.344338544295169e-06 2023-01-24 00:20:01.578573: step: 288/77, loss: 1.9024222638108768e-05 2023-01-24 00:20:02.912190: step: 292/77, loss: 3.579040367185371e-06 2023-01-24 00:20:04.242897: step: 296/77, loss: 9.28251029108651e-06 2023-01-24 00:20:05.592210: step: 300/77, loss: 0.026114514097571373 2023-01-24 00:20:06.885517: step: 304/77, loss: 3.993246536992956e-06 2023-01-24 00:20:08.214584: step: 308/77, loss: 1.3411042054656264e-08 2023-01-24 00:20:09.497703: step: 312/77, loss: 6.215876965143252e-06 2023-01-24 00:20:10.753574: step: 316/77, loss: 3.3527200571370486e-07 2023-01-24 00:20:12.096616: step: 320/77, loss: 4.982641712558689e-06 2023-01-24 00:20:13.487146: step: 324/77, loss: 5.602793180514709e-07 2023-01-24 00:20:14.871628: step: 328/77, loss: 0.021335016936063766 2023-01-24 00:20:16.211254: step: 332/77, loss: 0.0012030237121507525 2023-01-24 00:20:17.561641: step: 336/77, loss: 0.00014075601939111948 2023-01-24 00:20:18.901822: step: 340/77, loss: 8.080643965513445e-06 2023-01-24 00:20:20.283551: step: 344/77, loss: 6.399318863259396e-06 2023-01-24 00:20:21.589682: step: 348/77, loss: 0.0007741707959212363 2023-01-24 00:20:22.914380: step: 352/77, loss: 0.00022454277495853603 2023-01-24 00:20:24.248457: step: 356/77, loss: 3.925973942386918e-06 2023-01-24 00:20:25.588261: step: 360/77, loss: 0.0001450856652809307 2023-01-24 00:20:26.907998: step: 364/77, loss: 5.075151420896873e-05 2023-01-24 00:20:28.204245: step: 368/77, loss: 3.1441189207725984e-07 2023-01-24 00:20:29.582150: step: 372/77, loss: 9.246639820048586e-06 2023-01-24 00:20:30.957841: step: 376/77, loss: 0.08529046177864075 2023-01-24 00:20:32.276743: step: 380/77, loss: 3.843951162707526e-06 2023-01-24 00:20:33.578972: step: 384/77, loss: 4.559170520224143e-06 2023-01-24 00:20:34.937710: step: 388/77, loss: 2.82367477666412e-06 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 23} Test Chinese: {'template': {'p': 0.9242424242424242, 'r': 0.46564885496183206, 'f1': 0.6192893401015228}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.01431246474901297, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 23} Test Korean: {'template': {'p': 0.9230769230769231, 'r': 0.4580152671755725, 'f1': 0.6122448979591837}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.014149659863945578, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 23} Test Russian: {'template': {'p': 0.9242424242424242, 'r': 0.46564885496183206, 'f1': 0.6192893401015228}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.01431246474901297, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:22:30.409459: step: 4/77, loss: 0.0012159548932686448 2023-01-24 00:22:31.706302: step: 8/77, loss: 1.1826316949736793e-05 2023-01-24 00:22:32.984122: step: 12/77, loss: 3.315163849038072e-05 2023-01-24 00:22:34.289732: step: 16/77, loss: 3.07842310576234e-06 2023-01-24 00:22:35.613266: step: 20/77, loss: 4.5274249714566395e-05 2023-01-24 00:22:36.875633: step: 24/77, loss: 0.00027546961791813374 2023-01-24 00:22:38.243898: step: 28/77, loss: 0.00967315025627613 2023-01-24 00:22:39.602379: step: 32/77, loss: 8.344623836364917e-08 2023-01-24 00:22:40.912375: step: 36/77, loss: 5.8957184592145495e-06 2023-01-24 00:22:42.276866: step: 40/77, loss: 0.0320592001080513 2023-01-24 00:22:43.603323: step: 44/77, loss: 1.594416829675538e-07 2023-01-24 00:22:44.873212: step: 48/77, loss: 0.0002586401242297143 2023-01-24 00:22:46.229552: step: 52/77, loss: 0.05762874335050583 2023-01-24 00:22:47.546539: step: 56/77, loss: 0.0005221142200753093 2023-01-24 00:22:48.917024: step: 60/77, loss: 0.0007917734910733998 2023-01-24 00:22:50.220789: step: 64/77, loss: 8.27010881039314e-05 2023-01-24 00:22:51.537871: step: 68/77, loss: 0.005319363437592983 2023-01-24 00:22:52.855936: step: 72/77, loss: 0.00047313497634604573 2023-01-24 00:22:54.144382: step: 76/77, loss: 1.842924211814534e-05 2023-01-24 00:22:55.435929: step: 80/77, loss: 0.004623536020517349 2023-01-24 00:22:56.683032: step: 84/77, loss: 0.00847904197871685 2023-01-24 00:22:57.959006: step: 88/77, loss: 0.00033163363696075976 2023-01-24 00:22:59.186539: step: 92/77, loss: 0.0010802025208249688 2023-01-24 00:23:00.497326: step: 96/77, loss: 1.2591187896759948e-06 2023-01-24 00:23:01.816539: step: 100/77, loss: 9.653921006247401e-05 2023-01-24 00:23:03.112111: step: 104/77, loss: 0.0005702600465156138 2023-01-24 00:23:04.440117: step: 108/77, loss: 0.00010552568710409105 2023-01-24 00:23:05.784810: step: 112/77, loss: 1.29695708892541e-05 2023-01-24 00:23:07.035475: step: 116/77, loss: 2.2545989850186743e-05 2023-01-24 00:23:08.374393: step: 120/77, loss: 0.07159332931041718 2023-01-24 00:23:09.712460: step: 124/77, loss: 6.646419933531433e-05 2023-01-24 00:23:11.028512: step: 128/77, loss: 9.499242878518999e-05 2023-01-24 00:23:12.316063: step: 132/77, loss: 9.36659998842515e-05 2023-01-24 00:23:13.642084: step: 136/77, loss: 0.0003815369273070246 2023-01-24 00:23:14.979470: step: 140/77, loss: 5.266933385428274e-06 2023-01-24 00:23:16.350778: step: 144/77, loss: 0.03661287575960159 2023-01-24 00:23:17.699541: step: 148/77, loss: 5.105370655655861e-05 2023-01-24 00:23:19.012478: step: 152/77, loss: 0.00012779705866705626 2023-01-24 00:23:20.352580: step: 156/77, loss: 3.266726707806811e-05 2023-01-24 00:23:21.660218: step: 160/77, loss: 2.987370316986926e-05 2023-01-24 00:23:23.038447: step: 164/77, loss: 0.06484833359718323 2023-01-24 00:23:24.410990: step: 168/77, loss: 0.0031613532919436693 2023-01-24 00:23:25.741737: step: 172/77, loss: 2.6726342184701934e-05 2023-01-24 00:23:27.101906: step: 176/77, loss: 0.005361021962016821 2023-01-24 00:23:28.453630: step: 180/77, loss: 1.2211272405693308e-05 2023-01-24 00:23:29.809874: step: 184/77, loss: 0.0008916446240618825 2023-01-24 00:23:31.148497: step: 188/77, loss: 0.00016377741121686995 2023-01-24 00:23:32.474767: step: 192/77, loss: 0.001382010756060481 2023-01-24 00:23:33.816994: step: 196/77, loss: 4.331546278990572e-06 2023-01-24 00:23:35.109343: step: 200/77, loss: 0.0017925102729350328 2023-01-24 00:23:36.409224: step: 204/77, loss: 0.00042643165215849876 2023-01-24 00:23:37.727368: step: 208/77, loss: 0.0020623996388167143 2023-01-24 00:23:39.007184: step: 212/77, loss: 3.0625400540884584e-05 2023-01-24 00:23:40.319125: step: 216/77, loss: 3.4627344575710595e-05 2023-01-24 00:23:41.641614: step: 220/77, loss: 4.327108399593271e-06 2023-01-24 00:23:42.955653: step: 224/77, loss: 5.36033121534274e-06 2023-01-24 00:23:44.236563: step: 228/77, loss: 0.0003056804707739502 2023-01-24 00:23:45.524586: step: 232/77, loss: 1.1797816114267334e-05 2023-01-24 00:23:46.903117: step: 236/77, loss: 1.0462316822668072e-05 2023-01-24 00:23:48.250678: step: 240/77, loss: 0.0001913983578560874 2023-01-24 00:23:49.566182: step: 244/77, loss: 0.005624077748507261 2023-01-24 00:23:50.877488: step: 248/77, loss: 0.0017002577660605311 2023-01-24 00:23:52.159602: step: 252/77, loss: 0.01608699932694435 2023-01-24 00:23:53.442472: step: 256/77, loss: 0.00011465288116596639 2023-01-24 00:23:54.753094: step: 260/77, loss: 0.0004897843464277685 2023-01-24 00:23:56.009240: step: 264/77, loss: 3.771346382563934e-05 2023-01-24 00:23:57.317376: step: 268/77, loss: 8.024965063668787e-05 2023-01-24 00:23:58.646175: step: 272/77, loss: 1.8743507098406553e-05 2023-01-24 00:23:59.968842: step: 276/77, loss: 0.00038851777208037674 2023-01-24 00:24:01.283682: step: 280/77, loss: 0.0003650106955319643 2023-01-24 00:24:02.601054: step: 284/77, loss: 0.00029866196564398706 2023-01-24 00:24:03.882663: step: 288/77, loss: 0.0002978905104100704 2023-01-24 00:24:05.225627: step: 292/77, loss: 6.114787993283244e-06 2023-01-24 00:24:06.572100: step: 296/77, loss: 0.00012048119970131665 2023-01-24 00:24:07.890332: step: 300/77, loss: 0.11712194979190826 2023-01-24 00:24:09.200987: step: 304/77, loss: 0.0046895164996385574 2023-01-24 00:24:10.525688: step: 308/77, loss: 1.209469701279886e-05 2023-01-24 00:24:11.838204: step: 312/77, loss: 6.371148629114032e-06 2023-01-24 00:24:13.167200: step: 316/77, loss: 0.00048257590970024467 2023-01-24 00:24:14.474995: step: 320/77, loss: 1.3525404938263819e-05 2023-01-24 00:24:15.752246: step: 324/77, loss: 9.887629857985303e-06 2023-01-24 00:24:17.016639: step: 328/77, loss: 5.4313310101861134e-05 2023-01-24 00:24:18.361940: step: 332/77, loss: 0.000798575347289443 2023-01-24 00:24:19.666518: step: 336/77, loss: 9.506832157057943e-07 2023-01-24 00:24:20.974982: step: 340/77, loss: 7.241319053719053e-06 2023-01-24 00:24:22.264849: step: 344/77, loss: 9.029993179865414e-07 2023-01-24 00:24:23.600295: step: 348/77, loss: 3.7921424791420577e-06 2023-01-24 00:24:24.905715: step: 352/77, loss: 0.07554040104150772 2023-01-24 00:24:26.218994: step: 356/77, loss: 7.293753878911957e-05 2023-01-24 00:24:27.551230: step: 360/77, loss: 7.862165512051433e-05 2023-01-24 00:24:28.882945: step: 364/77, loss: 3.064231714233756e-05 2023-01-24 00:24:30.187734: step: 368/77, loss: 1.3740112990490161e-05 2023-01-24 00:24:31.468948: step: 372/77, loss: 7.894382724771276e-05 2023-01-24 00:24:32.812952: step: 376/77, loss: 3.303540506749414e-05 2023-01-24 00:24:34.140046: step: 380/77, loss: 2.9992257623234764e-05 2023-01-24 00:24:35.432088: step: 384/77, loss: 8.209863881347701e-05 2023-01-24 00:24:36.711236: step: 388/77, loss: 0.014107540249824524 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.8771929824561403, 'r': 0.3816793893129771, 'f1': 0.5319148936170213}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.012293144208037824, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.8771929824561403, 'r': 0.3816793893129771, 'f1': 0.5319148936170213}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.012293144208037824, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.8771929824561403, 'r': 0.3816793893129771, 'f1': 0.5319148936170213}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.012293144208037824, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:26:31.625300: step: 4/77, loss: 1.8078786524711177e-05 2023-01-24 00:26:32.948572: step: 8/77, loss: 0.01451027300208807 2023-01-24 00:26:34.274790: step: 12/77, loss: 0.0024240748025476933 2023-01-24 00:26:35.588786: step: 16/77, loss: 4.846112642553635e-05 2023-01-24 00:26:36.902833: step: 20/77, loss: 4.4428663386497647e-05 2023-01-24 00:26:38.272682: step: 24/77, loss: 0.0004771506355609745 2023-01-24 00:26:39.589081: step: 28/77, loss: 0.00028463450144045055 2023-01-24 00:26:40.913659: step: 32/77, loss: 2.9871722290408798e-05 2023-01-24 00:26:42.290632: step: 36/77, loss: 0.00035358016612008214 2023-01-24 00:26:43.619757: step: 40/77, loss: 0.011880475096404552 2023-01-24 00:26:44.943037: step: 44/77, loss: 9.635615424485877e-05 2023-01-24 00:26:46.241135: step: 48/77, loss: 0.015608715824782848 2023-01-24 00:26:47.525540: step: 52/77, loss: 7.743830792605877e-06 2023-01-24 00:26:48.829890: step: 56/77, loss: 0.013100337237119675 2023-01-24 00:26:50.084675: step: 60/77, loss: 0.02021319791674614 2023-01-24 00:26:51.413322: step: 64/77, loss: 0.0001840683980844915 2023-01-24 00:26:52.729952: step: 68/77, loss: 0.00018551107496023178 2023-01-24 00:26:54.038810: step: 72/77, loss: 3.874457615893334e-05 2023-01-24 00:26:55.285289: step: 76/77, loss: 0.003859947668388486 2023-01-24 00:26:56.584227: step: 80/77, loss: 6.0701422626152635e-05 2023-01-24 00:26:57.878484: step: 84/77, loss: 0.00010562671377556399 2023-01-24 00:26:59.206674: step: 88/77, loss: 0.0003795281518250704 2023-01-24 00:27:00.544984: step: 92/77, loss: 4.508575784711866e-06 2023-01-24 00:27:01.871765: step: 96/77, loss: 7.796460704412311e-05 2023-01-24 00:27:03.239115: step: 100/77, loss: 1.855391019489616e-05 2023-01-24 00:27:04.561365: step: 104/77, loss: 0.0023824882227927446 2023-01-24 00:27:05.865442: step: 108/77, loss: 0.00010295546235283837 2023-01-24 00:27:07.148587: step: 112/77, loss: 0.02050900273025036 2023-01-24 00:27:08.492204: step: 116/77, loss: 3.378879773663357e-05 2023-01-24 00:27:09.844391: step: 120/77, loss: 5.485976726049557e-05 2023-01-24 00:27:11.166114: step: 124/77, loss: 1.6510293789906427e-06 2023-01-24 00:27:12.501922: step: 128/77, loss: 3.83666338166222e-05 2023-01-24 00:27:13.799005: step: 132/77, loss: 4.299523061490618e-05 2023-01-24 00:27:15.112954: step: 136/77, loss: 1.727883682178799e-05 2023-01-24 00:27:16.417485: step: 140/77, loss: 0.0006323217530734837 2023-01-24 00:27:17.754866: step: 144/77, loss: 0.03129790350794792 2023-01-24 00:27:19.030555: step: 148/77, loss: 0.003158903680741787 2023-01-24 00:27:20.397585: step: 152/77, loss: 0.0005364782409742475 2023-01-24 00:27:21.740947: step: 156/77, loss: 0.00014099475811235607 2023-01-24 00:27:23.067492: step: 160/77, loss: 0.01949172280728817 2023-01-24 00:27:24.389599: step: 164/77, loss: 0.007668100763112307 2023-01-24 00:27:25.722291: step: 168/77, loss: 2.051947740255855e-05 2023-01-24 00:27:27.052804: step: 172/77, loss: 0.0004227279277984053 2023-01-24 00:27:28.384390: step: 176/77, loss: 0.008719212375581264 2023-01-24 00:27:29.680966: step: 180/77, loss: 0.003100724657997489 2023-01-24 00:27:31.013267: step: 184/77, loss: 0.01714274100959301 2023-01-24 00:27:32.360970: step: 188/77, loss: 0.00026967335725203156 2023-01-24 00:27:33.721203: step: 192/77, loss: 2.4471361030009575e-05 2023-01-24 00:27:35.077292: step: 196/77, loss: 0.0002496147935744375 2023-01-24 00:27:36.380286: step: 200/77, loss: 0.0005400096997618675 2023-01-24 00:27:37.698329: step: 204/77, loss: 0.020806198939681053 2023-01-24 00:27:39.031873: step: 208/77, loss: 0.002908297348767519 2023-01-24 00:27:40.341303: step: 212/77, loss: 1.0736946023826022e-05 2023-01-24 00:27:41.671679: step: 216/77, loss: 0.036727242171764374 2023-01-24 00:27:42.957968: step: 220/77, loss: 0.010679392144083977 2023-01-24 00:27:44.357938: step: 224/77, loss: 0.0247371606528759 2023-01-24 00:27:45.679136: step: 228/77, loss: 0.013214082457125187 2023-01-24 00:27:47.006408: step: 232/77, loss: 3.2482425012858585e-05 2023-01-24 00:27:48.359118: step: 236/77, loss: 0.0776718333363533 2023-01-24 00:27:49.732577: step: 240/77, loss: 0.0001316557900281623 2023-01-24 00:27:51.094263: step: 244/77, loss: 1.5604582586092874e-05 2023-01-24 00:27:52.368422: step: 248/77, loss: 3.693843609653413e-05 2023-01-24 00:27:53.731125: step: 252/77, loss: 0.033623889088630676 2023-01-24 00:27:55.054129: step: 256/77, loss: 0.010058862157166004 2023-01-24 00:27:56.385127: step: 260/77, loss: 0.0002979889395646751 2023-01-24 00:27:57.771176: step: 264/77, loss: 5.346520629245788e-05 2023-01-24 00:27:59.078151: step: 268/77, loss: 0.0005596758564934134 2023-01-24 00:28:00.411020: step: 272/77, loss: 9.120593858824577e-06 2023-01-24 00:28:01.694613: step: 276/77, loss: 0.00045637143193744123 2023-01-24 00:28:03.064339: step: 280/77, loss: 0.0001845014630816877 2023-01-24 00:28:04.335238: step: 284/77, loss: 0.00040560748311690986 2023-01-24 00:28:05.658882: step: 288/77, loss: 0.0025985874235630035 2023-01-24 00:28:06.959389: step: 292/77, loss: 0.0003584503720048815 2023-01-24 00:28:08.339441: step: 296/77, loss: 7.811022805981338e-05 2023-01-24 00:28:09.638057: step: 300/77, loss: 7.227884634630755e-05 2023-01-24 00:28:10.933589: step: 304/77, loss: 1.4953435311326757e-05 2023-01-24 00:28:12.274725: step: 308/77, loss: 3.5179625683667837e-06 2023-01-24 00:28:13.573115: step: 312/77, loss: 0.00016858424351084977 2023-01-24 00:28:14.875165: step: 316/77, loss: 0.0001187587040476501 2023-01-24 00:28:16.237300: step: 320/77, loss: 0.01913926936686039 2023-01-24 00:28:17.577161: step: 324/77, loss: 0.00039371493039652705 2023-01-24 00:28:18.927750: step: 328/77, loss: 0.0018512567039579153 2023-01-24 00:28:20.260065: step: 332/77, loss: 2.1880205167690292e-05 2023-01-24 00:28:21.627875: step: 336/77, loss: 0.0002811326121445745 2023-01-24 00:28:22.909164: step: 340/77, loss: 5.7129363995045424e-05 2023-01-24 00:28:24.261968: step: 344/77, loss: 0.0019272958161309361 2023-01-24 00:28:25.641986: step: 348/77, loss: 0.00403275853022933 2023-01-24 00:28:26.948926: step: 352/77, loss: 0.002493165200576186 2023-01-24 00:28:28.284127: step: 356/77, loss: 1.8539773009251803e-05 2023-01-24 00:28:29.678033: step: 360/77, loss: 0.06888003647327423 2023-01-24 00:28:30.993995: step: 364/77, loss: 0.001463544089347124 2023-01-24 00:28:32.303094: step: 368/77, loss: 0.00014727277448400855 2023-01-24 00:28:33.631832: step: 372/77, loss: 0.0008448277367278934 2023-01-24 00:28:35.003878: step: 376/77, loss: 0.00031047200900502503 2023-01-24 00:28:36.367738: step: 380/77, loss: 6.899063009768724e-07 2023-01-24 00:28:37.704206: step: 384/77, loss: 8.046570769693062e-07 2023-01-24 00:28:39.006213: step: 388/77, loss: 0.001087652170099318 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9253731343283582, 'r': 0.4732824427480916, 'f1': 0.6262626262626263}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.014473625140291806, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9242424242424242, 'r': 0.46564885496183206, 'f1': 0.6192893401015228}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.01431246474901297, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9253731343283582, 'r': 0.4732824427480916, 'f1': 0.6262626262626263}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.014473625140291806, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:30:34.251212: step: 4/77, loss: 0.0005845409468747675 2023-01-24 00:30:35.581529: step: 8/77, loss: 7.716177606198471e-06 2023-01-24 00:30:36.903567: step: 12/77, loss: 0.018682831898331642 2023-01-24 00:30:38.234537: step: 16/77, loss: 7.593029295094311e-06 2023-01-24 00:30:39.499638: step: 20/77, loss: 1.414090093021514e-06 2023-01-24 00:30:40.824618: step: 24/77, loss: 1.3693821756532998e-06 2023-01-24 00:30:42.121513: step: 28/77, loss: 0.03636294603347778 2023-01-24 00:30:43.466354: step: 32/77, loss: 1.1353353329468518e-05 2023-01-24 00:30:44.756167: step: 36/77, loss: 1.8178689060732722e-06 2023-01-24 00:30:46.105994: step: 40/77, loss: 3.197551222910988e-06 2023-01-24 00:30:47.375883: step: 44/77, loss: 0.0011801602086052299 2023-01-24 00:30:48.699793: step: 48/77, loss: 0.00013486703392118216 2023-01-24 00:30:49.989992: step: 52/77, loss: 0.00027940733707509935 2023-01-24 00:30:51.327224: step: 56/77, loss: 7.922427903395146e-05 2023-01-24 00:30:52.596675: step: 60/77, loss: 0.005133678670972586 2023-01-24 00:30:53.896972: step: 64/77, loss: 0.0019212663173675537 2023-01-24 00:30:55.204594: step: 68/77, loss: 0.0002738984767347574 2023-01-24 00:30:56.531156: step: 72/77, loss: 0.002075678901746869 2023-01-24 00:30:57.800912: step: 76/77, loss: 0.00015379862452391535 2023-01-24 00:30:59.117351: step: 80/77, loss: 7.955257024150342e-05 2023-01-24 00:31:00.460220: step: 84/77, loss: 8.302839705720544e-05 2023-01-24 00:31:01.798310: step: 88/77, loss: 2.8817096335842507e-06 2023-01-24 00:31:03.096632: step: 92/77, loss: 0.0004095395270269364 2023-01-24 00:31:04.397846: step: 96/77, loss: 8.702026548235153e-07 2023-01-24 00:31:05.734627: step: 100/77, loss: 0.030109496787190437 2023-01-24 00:31:07.055161: step: 104/77, loss: 8.553329280402977e-06 2023-01-24 00:31:08.337057: step: 108/77, loss: 0.013677610084414482 2023-01-24 00:31:09.633325: step: 112/77, loss: 0.0006004376336932182 2023-01-24 00:31:10.960160: step: 116/77, loss: 8.59678584674839e-06 2023-01-24 00:31:12.268082: step: 120/77, loss: 0.00037552695721387863 2023-01-24 00:31:13.628540: step: 124/77, loss: 9.507484355708584e-05 2023-01-24 00:31:14.954038: step: 128/77, loss: 0.00028156503685750067 2023-01-24 00:31:16.272736: step: 132/77, loss: 0.00040554223232902586 2023-01-24 00:31:17.555175: step: 136/77, loss: 7.318492862395942e-05 2023-01-24 00:31:18.879573: step: 140/77, loss: 9.517766739008948e-05 2023-01-24 00:31:20.221946: step: 144/77, loss: 0.003778319340199232 2023-01-24 00:31:21.548078: step: 148/77, loss: 0.006978815887123346 2023-01-24 00:31:22.889010: step: 152/77, loss: 3.5166615930393164e-07 2023-01-24 00:31:24.199950: step: 156/77, loss: 5.376308399718255e-05 2023-01-24 00:31:25.534712: step: 160/77, loss: 1.1846309462271165e-06 2023-01-24 00:31:26.873280: step: 164/77, loss: 3.244573963456787e-05 2023-01-24 00:31:28.205208: step: 168/77, loss: 1.4926770745660178e-05 2023-01-24 00:31:29.545032: step: 172/77, loss: 5.1937960961367935e-05 2023-01-24 00:31:30.851534: step: 176/77, loss: 2.361210499657318e-05 2023-01-24 00:31:32.200013: step: 180/77, loss: 3.657995694084093e-05 2023-01-24 00:31:33.521737: step: 184/77, loss: 0.0023028128780424595 2023-01-24 00:31:34.799048: step: 188/77, loss: 8.001786113709386e-07 2023-01-24 00:31:36.121137: step: 192/77, loss: 0.00041240183054469526 2023-01-24 00:31:37.459807: step: 196/77, loss: 1.0102717169502284e-06 2023-01-24 00:31:38.771904: step: 200/77, loss: 6.346671580104157e-05 2023-01-24 00:31:40.061690: step: 204/77, loss: 1.5809321212145733e-06 2023-01-24 00:31:41.344899: step: 208/77, loss: 3.2718826332711615e-06 2023-01-24 00:31:42.638795: step: 212/77, loss: 4.130280103709083e-06 2023-01-24 00:31:43.939743: step: 216/77, loss: 6.496434707514709e-06 2023-01-24 00:31:45.260143: step: 220/77, loss: 0.03695246949791908 2023-01-24 00:31:46.617130: step: 224/77, loss: 0.0018174505094066262 2023-01-24 00:31:47.915010: step: 228/77, loss: 2.1439096599351615e-05 2023-01-24 00:31:49.228532: step: 232/77, loss: 2.9224374884506688e-05 2023-01-24 00:31:50.554528: step: 236/77, loss: 0.001979271648451686 2023-01-24 00:31:51.914456: step: 240/77, loss: 2.1010566797485808e-07 2023-01-24 00:31:53.225554: step: 244/77, loss: 7.301549231897297e-08 2023-01-24 00:31:54.555355: step: 248/77, loss: 3.650733049198607e-07 2023-01-24 00:31:55.846109: step: 252/77, loss: 6.574868621100904e-06 2023-01-24 00:31:57.165501: step: 256/77, loss: 9.089868399314582e-06 2023-01-24 00:31:58.490940: step: 260/77, loss: 6.109467420856163e-08 2023-01-24 00:31:59.794217: step: 264/77, loss: 0.014889734797179699 2023-01-24 00:32:01.084883: step: 268/77, loss: 4.674082902056398e-06 2023-01-24 00:32:02.384751: step: 272/77, loss: 7.316713890759274e-05 2023-01-24 00:32:03.751980: step: 276/77, loss: 2.4442459107376635e-05 2023-01-24 00:32:05.041357: step: 280/77, loss: 3.446165692366776e-06 2023-01-24 00:32:06.316000: step: 284/77, loss: 0.01793898269534111 2023-01-24 00:32:07.678202: step: 288/77, loss: 0.0002047650923486799 2023-01-24 00:32:08.973010: step: 292/77, loss: 0.001654884428717196 2023-01-24 00:32:10.264259: step: 296/77, loss: 2.056354730939347e-07 2023-01-24 00:32:11.580565: step: 300/77, loss: 4.407174856169149e-05 2023-01-24 00:32:12.938637: step: 304/77, loss: 1.5867699403315783e-05 2023-01-24 00:32:14.240005: step: 308/77, loss: 0.00010468468099134043 2023-01-24 00:32:15.562485: step: 312/77, loss: 0.00015533588884864002 2023-01-24 00:32:16.877872: step: 316/77, loss: 3.3252330467803404e-05 2023-01-24 00:32:18.175790: step: 320/77, loss: 0.017645152285695076 2023-01-24 00:32:19.511830: step: 324/77, loss: 0.008587392047047615 2023-01-24 00:32:20.866103: step: 328/77, loss: 2.6197356419288553e-05 2023-01-24 00:32:22.180042: step: 332/77, loss: 0.0016763238236308098 2023-01-24 00:32:23.507834: step: 336/77, loss: 5.572942427534144e-07 2023-01-24 00:32:24.787243: step: 340/77, loss: 3.2335131550098595e-07 2023-01-24 00:32:26.082518: step: 344/77, loss: 1.4901160305669237e-09 2023-01-24 00:32:27.371843: step: 348/77, loss: 0.00472618080675602 2023-01-24 00:32:28.693635: step: 352/77, loss: 6.518932787002996e-05 2023-01-24 00:32:30.013482: step: 356/77, loss: 2.8163009346826584e-07 2023-01-24 00:32:31.357111: step: 360/77, loss: 4.072131559951231e-05 2023-01-24 00:32:32.666306: step: 364/77, loss: 5.950074773863889e-05 2023-01-24 00:32:34.028244: step: 368/77, loss: 1.0579780251873672e-07 2023-01-24 00:32:35.324238: step: 372/77, loss: 3.1063103961059824e-05 2023-01-24 00:32:36.672020: step: 376/77, loss: 6.884224603709299e-07 2023-01-24 00:32:37.991723: step: 380/77, loss: 0.005597976036369801 2023-01-24 00:32:39.299793: step: 384/77, loss: 5.24139468325302e-05 2023-01-24 00:32:40.589395: step: 388/77, loss: 6.258482443399771e-08 ================================================== Loss: 0.002 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Chinese: {'template': {'p': 0.9344262295081968, 'r': 0.4351145038167939, 'f1': 0.59375}, 'slot': {'p': 0.5909090909090909, 'r': 0.011828935395814377, 'f1': 0.023193577163247096}, 'combined': 0.013771186440677964, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Korean: {'template': {'p': 0.9344262295081968, 'r': 0.4351145038167939, 'f1': 0.59375}, 'slot': {'p': 0.5909090909090909, 'r': 0.011828935395814377, 'f1': 0.023193577163247096}, 'combined': 0.013771186440677964, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Russian: {'template': {'p': 0.9344262295081968, 'r': 0.4351145038167939, 'f1': 0.59375}, 'slot': {'p': 0.5909090909090909, 'r': 0.011828935395814377, 'f1': 0.023193577163247096}, 'combined': 0.013771186440677964, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:34:35.479471: step: 4/77, loss: 5.606117611023365e-06 2023-01-24 00:34:36.793757: step: 8/77, loss: 8.432649337919429e-05 2023-01-24 00:34:38.086269: step: 12/77, loss: 0.00898031610995531 2023-01-24 00:34:39.411396: step: 16/77, loss: 0.001969903940334916 2023-01-24 00:34:40.750141: step: 20/77, loss: 6.214072072907584e-06 2023-01-24 00:34:42.048212: step: 24/77, loss: 0.00011349977285135537 2023-01-24 00:34:43.372652: step: 28/77, loss: 3.203712992672081e-07 2023-01-24 00:34:44.695227: step: 32/77, loss: 0.0015820408007130027 2023-01-24 00:34:46.057245: step: 36/77, loss: 8.64265459199487e-08 2023-01-24 00:34:47.341307: step: 40/77, loss: 8.586096555518452e-06 2023-01-24 00:34:48.674294: step: 44/77, loss: 0.018632622435688972 2023-01-24 00:34:50.017975: step: 48/77, loss: 6.8772255872318055e-06 2023-01-24 00:34:51.342782: step: 52/77, loss: 1.6801041056169197e-05 2023-01-24 00:34:52.708098: step: 56/77, loss: 1.35445168325532e-06 2023-01-24 00:34:53.993139: step: 60/77, loss: 2.0160439362371108e-06 2023-01-24 00:34:55.347691: step: 64/77, loss: 3.139088221359998e-05 2023-01-24 00:34:56.631305: step: 68/77, loss: 7.599579276984514e-08 2023-01-24 00:34:57.936616: step: 72/77, loss: 5.960463678178485e-09 2023-01-24 00:34:59.268712: step: 76/77, loss: 4.649100162623654e-07 2023-01-24 00:35:00.608086: step: 80/77, loss: 8.607782547187526e-06 2023-01-24 00:35:01.945322: step: 84/77, loss: 4.1276010165347543e-07 2023-01-24 00:35:03.237338: step: 88/77, loss: 1.9534702460077824e-06 2023-01-24 00:35:04.565852: step: 92/77, loss: 3.3809201340773143e-06 2023-01-24 00:35:05.896395: step: 96/77, loss: 0.000383280887035653 2023-01-24 00:35:07.252023: step: 100/77, loss: 0.04520416259765625 2023-01-24 00:35:08.570345: step: 104/77, loss: 3.577503957785666e-05 2023-01-24 00:35:09.917624: step: 108/77, loss: 3.874299281392268e-08 2023-01-24 00:35:11.256408: step: 112/77, loss: 2.9057110850772006e-07 2023-01-24 00:35:12.578744: step: 116/77, loss: 0.0004997196956537664 2023-01-24 00:35:13.922092: step: 120/77, loss: 4.0233111064935656e-08 2023-01-24 00:35:15.216664: step: 124/77, loss: 1.4305092577160394e-07 2023-01-24 00:35:16.555974: step: 128/77, loss: 3.3963064197450876e-05 2023-01-24 00:35:17.882487: step: 132/77, loss: 0.0001803104387363419 2023-01-24 00:35:19.178892: step: 136/77, loss: 0.023600829765200615 2023-01-24 00:35:20.466264: step: 140/77, loss: 0.0324283204972744 2023-01-24 00:35:21.787324: step: 144/77, loss: 4.0875565900933e-05 2023-01-24 00:35:23.108649: step: 148/77, loss: 3.6556259146891534e-05 2023-01-24 00:35:24.457563: step: 152/77, loss: 1.1175855973988291e-07 2023-01-24 00:35:25.776841: step: 156/77, loss: 1.71362657397367e-07 2023-01-24 00:35:27.073420: step: 160/77, loss: 0.004206412006169558 2023-01-24 00:35:28.431113: step: 164/77, loss: 3.8100883102742955e-05 2023-01-24 00:35:29.753443: step: 168/77, loss: 5.173112731426954e-05 2023-01-24 00:35:31.109769: step: 172/77, loss: 0.0023015725892037153 2023-01-24 00:35:32.374651: step: 176/77, loss: 1.4766195590709685e-06 2023-01-24 00:35:33.702648: step: 180/77, loss: 5.628646977129392e-05 2023-01-24 00:35:35.002666: step: 184/77, loss: 0.003803855739533901 2023-01-24 00:35:36.327073: step: 188/77, loss: 6.612636298086727e-06 2023-01-24 00:35:37.677401: step: 192/77, loss: 2.401980964350514e-06 2023-01-24 00:35:39.012905: step: 196/77, loss: 2.813032779158675e-06 2023-01-24 00:35:40.330225: step: 200/77, loss: 0.00033058272674679756 2023-01-24 00:35:41.696821: step: 204/77, loss: 2.8712543098663446e-06 2023-01-24 00:35:43.051651: step: 208/77, loss: 3.3868632272060495e-06 2023-01-24 00:35:44.380938: step: 212/77, loss: 7.646017365914304e-06 2023-01-24 00:35:45.748700: step: 216/77, loss: 5.230267561273649e-07 2023-01-24 00:35:47.029151: step: 220/77, loss: 0.0002331840805709362 2023-01-24 00:35:48.350915: step: 224/77, loss: 1.4259716181186377e-06 2023-01-24 00:35:49.726121: step: 228/77, loss: 0.002828009892255068 2023-01-24 00:35:51.063231: step: 232/77, loss: 1.7716438378556632e-06 2023-01-24 00:35:52.428780: step: 236/77, loss: 4.002767673227936e-05 2023-01-24 00:35:53.702135: step: 240/77, loss: 4.567943960864795e-06 2023-01-24 00:35:54.983946: step: 244/77, loss: 2.5778825829547714e-07 2023-01-24 00:35:56.276850: step: 248/77, loss: 3.5877167192666093e-06 2023-01-24 00:35:57.591791: step: 252/77, loss: 3.363920041010715e-05 2023-01-24 00:35:58.876942: step: 256/77, loss: 0.001751170726493001 2023-01-24 00:36:00.175964: step: 260/77, loss: 3.6384628856467316e-06 2023-01-24 00:36:01.528614: step: 264/77, loss: 1.1056458788516466e-06 2023-01-24 00:36:02.852444: step: 268/77, loss: 9.072668945009355e-06 2023-01-24 00:36:04.216222: step: 272/77, loss: 6.171829591039568e-05 2023-01-24 00:36:05.588364: step: 276/77, loss: 0.052831538021564484 2023-01-24 00:36:06.934981: step: 280/77, loss: 5.888020677957684e-05 2023-01-24 00:36:08.264508: step: 284/77, loss: 0.0285626370459795 2023-01-24 00:36:09.593899: step: 288/77, loss: 0.004710773937404156 2023-01-24 00:36:10.912800: step: 292/77, loss: 0.018306363373994827 2023-01-24 00:36:12.205035: step: 296/77, loss: 0.0022880875039845705 2023-01-24 00:36:13.561984: step: 300/77, loss: 8.508421274200373e-07 2023-01-24 00:36:14.887470: step: 304/77, loss: 3.084105264861137e-05 2023-01-24 00:36:16.154616: step: 308/77, loss: 0.00011660241580102593 2023-01-24 00:36:17.472877: step: 312/77, loss: 0.004058020189404488 2023-01-24 00:36:18.777147: step: 316/77, loss: 6.645774988101039e-07 2023-01-24 00:36:20.099222: step: 320/77, loss: 0.06051657348871231 2023-01-24 00:36:21.417286: step: 324/77, loss: 2.8312189925827624e-08 2023-01-24 00:36:22.736288: step: 328/77, loss: 1.4622408343711868e-05 2023-01-24 00:36:24.087271: step: 332/77, loss: 2.007118609981262e-06 2023-01-24 00:36:25.452387: step: 336/77, loss: 1.2787851119355764e-05 2023-01-24 00:36:26.746772: step: 340/77, loss: 9.536724121517182e-08 2023-01-24 00:36:28.043380: step: 344/77, loss: 1.1309793990221806e-06 2023-01-24 00:36:29.389584: step: 348/77, loss: 7.3547039391996805e-06 2023-01-24 00:36:30.713514: step: 352/77, loss: 1.5257948007274535e-06 2023-01-24 00:36:32.070231: step: 356/77, loss: 0.06010865792632103 2023-01-24 00:36:33.325843: step: 360/77, loss: 2.6253237592754886e-06 2023-01-24 00:36:34.621186: step: 364/77, loss: 8.09173161542276e-06 2023-01-24 00:36:35.960798: step: 368/77, loss: 0.010575935244560242 2023-01-24 00:36:37.296916: step: 372/77, loss: 2.2738091502105817e-05 2023-01-24 00:36:38.629664: step: 376/77, loss: 8.940689610881236e-08 2023-01-24 00:36:39.946810: step: 380/77, loss: 2.5374167762493016e-06 2023-01-24 00:36:41.293926: step: 384/77, loss: 0.002553819213062525 2023-01-24 00:36:42.571928: step: 388/77, loss: 0.0005925609730184078 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014430465953119392, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Korean: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014430465953119392, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014430465953119392, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:38:37.769799: step: 4/77, loss: 0.027176707983016968 2023-01-24 00:38:39.085067: step: 8/77, loss: 2.357594348723069e-05 2023-01-24 00:38:40.363197: step: 12/77, loss: 1.2739908470393857e-06 2023-01-24 00:38:41.646214: step: 16/77, loss: 5.902528300794074e-06 2023-01-24 00:38:42.956400: step: 20/77, loss: 1.3411042054656264e-08 2023-01-24 00:38:44.294487: step: 24/77, loss: 0.03589042276144028 2023-01-24 00:38:45.588244: step: 28/77, loss: 3.278254467886654e-08 2023-01-24 00:38:46.871980: step: 32/77, loss: 0.0012277706991881132 2023-01-24 00:38:48.207124: step: 36/77, loss: 0.0007992468308657408 2023-01-24 00:38:49.547708: step: 40/77, loss: 1.8924379219242837e-07 2023-01-24 00:38:50.854621: step: 44/77, loss: 1.7195046666529379e-06 2023-01-24 00:38:52.203445: step: 48/77, loss: 1.925296055560466e-05 2023-01-24 00:38:53.505549: step: 52/77, loss: 5.960464122267695e-09 2023-01-24 00:38:54.777741: step: 56/77, loss: 1.776973294909112e-05 2023-01-24 00:38:56.124229: step: 60/77, loss: 4.7385313450831745e-07 2023-01-24 00:38:57.447456: step: 64/77, loss: 0.005373673513531685 2023-01-24 00:38:58.743306: step: 68/77, loss: 3.814671458712837e-07 2023-01-24 00:39:00.036961: step: 72/77, loss: 3.012846946148784e-06 2023-01-24 00:39:01.325356: step: 76/77, loss: 0.023529857397079468 2023-01-24 00:39:02.622029: step: 80/77, loss: 5.0663917505744394e-08 2023-01-24 00:39:03.951551: step: 84/77, loss: 0.0036892006173729897 2023-01-24 00:39:05.242614: step: 88/77, loss: 5.421810328698484e-06 2023-01-24 00:39:06.613879: step: 92/77, loss: 0.00023501695250160992 2023-01-24 00:39:08.001011: step: 96/77, loss: 0.0010119794169440866 2023-01-24 00:39:09.343758: step: 100/77, loss: 0.000184997174073942 2023-01-24 00:39:10.659762: step: 104/77, loss: 8.692131814314052e-05 2023-01-24 00:39:11.987169: step: 108/77, loss: 1.566055743751349e-06 2023-01-24 00:39:13.282378: step: 112/77, loss: 0.011820207349956036 2023-01-24 00:39:14.633758: step: 116/77, loss: 0.0020299663301557302 2023-01-24 00:39:15.980905: step: 120/77, loss: 3.847132575174328e-06 2023-01-24 00:39:17.350703: step: 124/77, loss: 4.768290011725185e-07 2023-01-24 00:39:18.720362: step: 128/77, loss: 1.5795211538716103e-07 2023-01-24 00:39:20.050757: step: 132/77, loss: 0.005068625323474407 2023-01-24 00:39:21.357508: step: 136/77, loss: 0.028464682400226593 2023-01-24 00:39:22.646759: step: 140/77, loss: 3.08748240058776e-05 2023-01-24 00:39:23.914994: step: 144/77, loss: 5.006713763577864e-07 2023-01-24 00:39:25.225990: step: 148/77, loss: 2.1415771698229946e-05 2023-01-24 00:39:26.566793: step: 152/77, loss: 1.928160145325819e-06 2023-01-24 00:39:27.883624: step: 156/77, loss: 4.453421934158541e-05 2023-01-24 00:39:29.185724: step: 160/77, loss: 0.000179176073288545 2023-01-24 00:39:30.503078: step: 164/77, loss: 4.336169183716265e-07 2023-01-24 00:39:31.807771: step: 168/77, loss: 6.029274663887918e-05 2023-01-24 00:39:33.137804: step: 172/77, loss: 0.014012141153216362 2023-01-24 00:39:34.455210: step: 176/77, loss: 3.4449938084435416e-06 2023-01-24 00:39:35.741138: step: 180/77, loss: 3.4434592635079753e-06 2023-01-24 00:39:37.107380: step: 184/77, loss: 0.0042570470832288265 2023-01-24 00:39:38.446343: step: 188/77, loss: 0.03931383043527603 2023-01-24 00:39:39.719329: step: 192/77, loss: 0.0006372120114974678 2023-01-24 00:39:41.020648: step: 196/77, loss: 0.007489933166652918 2023-01-24 00:39:42.287187: step: 200/77, loss: 0.00017490240861661732 2023-01-24 00:39:43.580145: step: 204/77, loss: 0.0037117195315659046 2023-01-24 00:39:44.898638: step: 208/77, loss: 1.7183883755933493e-05 2023-01-24 00:39:46.207838: step: 212/77, loss: 0.003626827849075198 2023-01-24 00:39:47.516859: step: 216/77, loss: 8.484267709718551e-06 2023-01-24 00:39:48.854861: step: 220/77, loss: 2.6822064569387294e-08 2023-01-24 00:39:50.211687: step: 224/77, loss: 0.00013494711311068386 2023-01-24 00:39:51.523522: step: 228/77, loss: 9.595980827725725e-07 2023-01-24 00:39:52.869332: step: 232/77, loss: 0.10228412598371506 2023-01-24 00:39:54.187717: step: 236/77, loss: 0.0014023756375536323 2023-01-24 00:39:55.504257: step: 240/77, loss: 1.2811026863346342e-05 2023-01-24 00:39:56.873317: step: 244/77, loss: 1.1586070286284667e-05 2023-01-24 00:39:58.132573: step: 248/77, loss: 1.8640672578840167e-06 2023-01-24 00:39:59.480371: step: 252/77, loss: 1.558184521854855e-05 2023-01-24 00:40:00.784262: step: 256/77, loss: 0.0018802760168910027 2023-01-24 00:40:02.126783: step: 260/77, loss: 4.5580063670058735e-06 2023-01-24 00:40:03.446921: step: 264/77, loss: 9.280101949116215e-06 2023-01-24 00:40:04.734009: step: 268/77, loss: 0.0014139283448457718 2023-01-24 00:40:06.075036: step: 272/77, loss: 2.8619801014428958e-05 2023-01-24 00:40:07.447506: step: 276/77, loss: 0.009071122854948044 2023-01-24 00:40:08.771825: step: 280/77, loss: 5.535672971745953e-05 2023-01-24 00:40:10.099489: step: 284/77, loss: 0.00023126896121539176 2023-01-24 00:40:11.382524: step: 288/77, loss: 1.0298275810782798e-05 2023-01-24 00:40:12.678083: step: 292/77, loss: 7.255918171722442e-05 2023-01-24 00:40:14.037653: step: 296/77, loss: 0.00014489045133814216 2023-01-24 00:40:15.307308: step: 300/77, loss: 3.173933009747998e-07 2023-01-24 00:40:16.595584: step: 304/77, loss: 4.8189587687375024e-05 2023-01-24 00:40:17.941196: step: 308/77, loss: 6.347854650812224e-07 2023-01-24 00:40:19.272741: step: 312/77, loss: 0.007390583399683237 2023-01-24 00:40:20.589474: step: 316/77, loss: 1.857395182014443e-05 2023-01-24 00:40:21.898866: step: 320/77, loss: 0.0323256254196167 2023-01-24 00:40:23.203171: step: 324/77, loss: 7.146695861592889e-05 2023-01-24 00:40:24.490930: step: 328/77, loss: 1.5746916687930934e-05 2023-01-24 00:40:25.806768: step: 332/77, loss: 5.342927579476964e-06 2023-01-24 00:40:27.174273: step: 336/77, loss: 0.0007421516347676516 2023-01-24 00:40:28.463404: step: 340/77, loss: 9.998560699386871e-07 2023-01-24 00:40:29.839711: step: 344/77, loss: 2.6507834718358936e-06 2023-01-24 00:40:31.136048: step: 348/77, loss: 0.00045299509656615555 2023-01-24 00:40:32.460046: step: 352/77, loss: 2.3236254492076114e-05 2023-01-24 00:40:33.754513: step: 356/77, loss: 0.14836445450782776 2023-01-24 00:40:35.078706: step: 360/77, loss: 6.130066321929917e-05 2023-01-24 00:40:36.366849: step: 364/77, loss: 2.449561725370586e-05 2023-01-24 00:40:37.705765: step: 368/77, loss: 2.849341217370238e-05 2023-01-24 00:40:39.085561: step: 372/77, loss: 1.4483533732345677e-06 2023-01-24 00:40:40.400526: step: 376/77, loss: 0.003930346108973026 2023-01-24 00:40:41.717644: step: 380/77, loss: 5.199554834689479e-06 2023-01-24 00:40:43.019269: step: 384/77, loss: 5.274257273413241e-05 2023-01-24 00:40:44.349248: step: 388/77, loss: 3.4195909393019974e-06 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Chinese: {'template': {'p': 0.8939393939393939, 'r': 0.45038167938931295, 'f1': 0.598984771573604}, 'slot': {'p': 0.41379310344827586, 'r': 0.01091901728844404, 'f1': 0.021276595744680854}, 'combined': 0.012744356841991576, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Korean: {'template': {'p': 0.8939393939393939, 'r': 0.45038167938931295, 'f1': 0.598984771573604}, 'slot': {'p': 0.41379310344827586, 'r': 0.01091901728844404, 'f1': 0.021276595744680854}, 'combined': 0.012744356841991576, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.8939393939393939, 'r': 0.45038167938931295, 'f1': 0.598984771573604}, 'slot': {'p': 0.41379310344827586, 'r': 0.01091901728844404, 'f1': 0.021276595744680854}, 'combined': 0.012744356841991576, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:42:39.482048: step: 4/77, loss: 7.253940111695556e-06 2023-01-24 00:42:40.867478: step: 8/77, loss: 3.473104879958555e-05 2023-01-24 00:42:42.227482: step: 12/77, loss: 0.01304363552480936 2023-01-24 00:42:43.512785: step: 16/77, loss: 1.4995683159213513e-05 2023-01-24 00:42:44.860777: step: 20/77, loss: 2.4330889573320746e-05 2023-01-24 00:42:46.179192: step: 24/77, loss: 0.0013938556658104062 2023-01-24 00:42:47.463320: step: 28/77, loss: 8.665451787237544e-06 2023-01-24 00:42:48.799238: step: 32/77, loss: 0.04268426075577736 2023-01-24 00:42:50.167746: step: 36/77, loss: 1.2397526006679982e-05 2023-01-24 00:42:51.507444: step: 40/77, loss: 0.000455087807495147 2023-01-24 00:42:52.781490: step: 44/77, loss: 0.0007864607032388449 2023-01-24 00:42:54.055061: step: 48/77, loss: 0.0001235969248227775 2023-01-24 00:42:55.360145: step: 52/77, loss: 1.1711987326634699e-06 2023-01-24 00:42:56.740834: step: 56/77, loss: 2.0414560708559293e-07 2023-01-24 00:42:58.038543: step: 60/77, loss: 2.1368396119214594e-05 2023-01-24 00:42:59.336476: step: 64/77, loss: 3.2661637305864133e-06 2023-01-24 00:43:00.654981: step: 68/77, loss: 1.2516939307261055e-07 2023-01-24 00:43:01.980234: step: 72/77, loss: 8.197230636142194e-05 2023-01-24 00:43:03.312495: step: 76/77, loss: 1.4051466905584675e-06 2023-01-24 00:43:04.674173: step: 80/77, loss: 6.705506194748523e-08 2023-01-24 00:43:06.014461: step: 84/77, loss: 1.0430811769879256e-08 2023-01-24 00:43:07.339244: step: 88/77, loss: 0.0014793449081480503 2023-01-24 00:43:08.676235: step: 92/77, loss: 0.0017137302784249187 2023-01-24 00:43:09.976252: step: 96/77, loss: 2.363282510486897e-05 2023-01-24 00:43:11.257044: step: 100/77, loss: 1.1309737146802945e-06 2023-01-24 00:43:12.561399: step: 104/77, loss: 7.13046529199346e-06 2023-01-24 00:43:13.938348: step: 108/77, loss: 0.06501419842243195 2023-01-24 00:43:15.272794: step: 112/77, loss: 1.5586274457746185e-06 2023-01-24 00:43:16.563994: step: 116/77, loss: 2.2008064206602285e-06 2023-01-24 00:43:17.856887: step: 120/77, loss: 0.00013937058974988759 2023-01-24 00:43:19.142341: step: 124/77, loss: 1.0892539421547554e-06 2023-01-24 00:43:20.437168: step: 128/77, loss: 0.01585729420185089 2023-01-24 00:43:21.735269: step: 132/77, loss: 6.720363217027625e-07 2023-01-24 00:43:23.011526: step: 136/77, loss: 0.00378251145593822 2023-01-24 00:43:24.313066: step: 140/77, loss: 3.5358402783458587e-06 2023-01-24 00:43:25.587223: step: 144/77, loss: 3.8056539779063314e-05 2023-01-24 00:43:26.920563: step: 148/77, loss: 1.3648892718265415e-06 2023-01-24 00:43:28.218723: step: 152/77, loss: 4.371733666630462e-06 2023-01-24 00:43:29.532659: step: 156/77, loss: 1.1600843208725564e-05 2023-01-24 00:43:30.844741: step: 160/77, loss: 0.00023744924692437053 2023-01-24 00:43:32.185147: step: 164/77, loss: 1.0906782335950993e-05 2023-01-24 00:43:33.523368: step: 168/77, loss: 0.0010377811267971992 2023-01-24 00:43:34.833604: step: 172/77, loss: 0.0004167997103650123 2023-01-24 00:43:36.179653: step: 176/77, loss: 5.140875032338954e-07 2023-01-24 00:43:37.470871: step: 180/77, loss: 6.854525480548546e-08 2023-01-24 00:43:38.783730: step: 184/77, loss: 4.440526595317351e-07 2023-01-24 00:43:40.097012: step: 188/77, loss: 8.821267556413659e-07 2023-01-24 00:43:41.458687: step: 192/77, loss: 3.393060978851281e-05 2023-01-24 00:43:42.827245: step: 196/77, loss: 1.320510000368813e-05 2023-01-24 00:43:44.144637: step: 200/77, loss: 0.0030558668076992035 2023-01-24 00:43:45.461831: step: 204/77, loss: 0.0003135878941975534 2023-01-24 00:43:46.827323: step: 208/77, loss: 0.29668128490448 2023-01-24 00:43:48.160305: step: 212/77, loss: 6.300913810264319e-05 2023-01-24 00:43:49.465917: step: 216/77, loss: 2.0934267013217323e-05 2023-01-24 00:43:50.766100: step: 220/77, loss: 1.5049633930175332e-06 2023-01-24 00:43:52.056573: step: 224/77, loss: 0.004783345386385918 2023-01-24 00:43:53.334636: step: 228/77, loss: 0.00034404834150336683 2023-01-24 00:43:54.641003: step: 232/77, loss: 2.16644457395887e-06 2023-01-24 00:43:55.952654: step: 236/77, loss: 1.6405801943619736e-06 2023-01-24 00:43:57.300149: step: 240/77, loss: 0.0533568300306797 2023-01-24 00:43:58.627378: step: 244/77, loss: 0.0034404934849590063 2023-01-24 00:43:59.973097: step: 248/77, loss: 0.01579485647380352 2023-01-24 00:44:01.272644: step: 252/77, loss: 4.902420869257185e-07 2023-01-24 00:44:02.581572: step: 256/77, loss: 9.106691140914336e-05 2023-01-24 00:44:03.918557: step: 260/77, loss: 1.725894253468141e-05 2023-01-24 00:44:05.240230: step: 264/77, loss: 0.0009840124985203147 2023-01-24 00:44:06.601240: step: 268/77, loss: 0.0015173020074144006 2023-01-24 00:44:07.944979: step: 272/77, loss: 6.512515392387286e-05 2023-01-24 00:44:09.268703: step: 276/77, loss: 0.016170240938663483 2023-01-24 00:44:10.609680: step: 280/77, loss: 0.00011477198859211057 2023-01-24 00:44:11.937899: step: 284/77, loss: 0.0687544196844101 2023-01-24 00:44:13.240618: step: 288/77, loss: 0.016865387558937073 2023-01-24 00:44:14.562993: step: 292/77, loss: 7.856273441575468e-05 2023-01-24 00:44:15.863844: step: 296/77, loss: 5.081226959191554e-07 2023-01-24 00:44:17.190979: step: 300/77, loss: 0.05468401312828064 2023-01-24 00:44:18.509725: step: 304/77, loss: 0.00026558851823210716 2023-01-24 00:44:19.833643: step: 308/77, loss: 0.022894341498613358 2023-01-24 00:44:21.146963: step: 312/77, loss: 1.1726783668564167e-06 2023-01-24 00:44:22.447187: step: 316/77, loss: 6.428937012969982e-06 2023-01-24 00:44:23.781268: step: 320/77, loss: 0.0047365231439471245 2023-01-24 00:44:25.129820: step: 324/77, loss: 0.0038206009194254875 2023-01-24 00:44:26.471157: step: 328/77, loss: 0.0006751363398507237 2023-01-24 00:44:27.767495: step: 332/77, loss: 5.191058335185517e-06 2023-01-24 00:44:29.059750: step: 336/77, loss: 1.8258346244692802e-05 2023-01-24 00:44:30.392164: step: 340/77, loss: 0.013415530323982239 2023-01-24 00:44:31.699050: step: 344/77, loss: 3.3673438792902743e-06 2023-01-24 00:44:33.021445: step: 348/77, loss: 5.558072189160157e-07 2023-01-24 00:44:34.348161: step: 352/77, loss: 2.9575996904895874e-06 2023-01-24 00:44:35.678771: step: 356/77, loss: 0.0006695285555906594 2023-01-24 00:44:36.949248: step: 360/77, loss: 0.0005699301254935563 2023-01-24 00:44:38.282747: step: 364/77, loss: 2.152216802642215e-05 2023-01-24 00:44:39.673604: step: 368/77, loss: 0.15161128342151642 2023-01-24 00:44:40.983082: step: 372/77, loss: 2.96686303045135e-05 2023-01-24 00:44:42.332829: step: 376/77, loss: 0.001383065595291555 2023-01-24 00:44:43.676246: step: 380/77, loss: 2.3879963919171132e-05 2023-01-24 00:44:44.961939: step: 384/77, loss: 0.00949044805020094 2023-01-24 00:44:46.297394: step: 388/77, loss: 2.4197840957640437e-06 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Chinese: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.56, 'r': 0.012738853503184714, 'f1': 0.02491103202846975}, 'combined': 0.014456142938801098, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Korean: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.5416666666666666, 'r': 0.011828935395814377, 'f1': 0.023152270703472838}, 'combined': 0.013435514605124132, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Russian: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.56, 'r': 0.012738853503184714, 'f1': 0.02491103202846975}, 'combined': 0.014456142938801098, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3}