Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:24:23.194544: step: 4/77, loss: 1.0566054582595825 2023-01-23 22:24:24.440178: step: 8/77, loss: 1.0537757873535156 2023-01-23 22:24:25.720034: step: 12/77, loss: 1.0559892654418945 2023-01-23 22:24:27.052726: step: 16/77, loss: 1.0583343505859375 2023-01-23 22:24:28.359239: step: 20/77, loss: 1.0633549690246582 2023-01-23 22:24:29.704223: step: 24/77, loss: 1.0405185222625732 2023-01-23 22:24:30.979635: step: 28/77, loss: 1.0492514371871948 2023-01-23 22:24:32.299375: step: 32/77, loss: 1.0514004230499268 2023-01-23 22:24:33.601338: step: 36/77, loss: 1.0344562530517578 2023-01-23 22:24:34.928640: step: 40/77, loss: 1.0398215055465698 2023-01-23 22:24:36.255526: step: 44/77, loss: 1.018258810043335 2023-01-23 22:24:37.543656: step: 48/77, loss: 1.0067038536071777 2023-01-23 22:24:38.871609: step: 52/77, loss: 1.0088309049606323 2023-01-23 22:24:40.189301: step: 56/77, loss: 0.9948985576629639 2023-01-23 22:24:41.456424: step: 60/77, loss: 0.9983891248703003 2023-01-23 22:24:42.772978: step: 64/77, loss: 0.9850597977638245 2023-01-23 22:24:44.061901: step: 68/77, loss: 0.95908522605896 2023-01-23 22:24:45.363295: step: 72/77, loss: 0.9487576484680176 2023-01-23 22:24:46.690649: step: 76/77, loss: 0.9316009879112244 2023-01-23 22:24:48.016697: step: 80/77, loss: 0.9227492213249207 2023-01-23 22:24:49.338079: step: 84/77, loss: 0.9227690696716309 2023-01-23 22:24:50.616950: step: 88/77, loss: 0.8968497514724731 2023-01-23 22:24:51.908636: step: 92/77, loss: 0.8681602478027344 2023-01-23 22:24:53.179707: step: 96/77, loss: 0.8769820928573608 2023-01-23 22:24:54.481940: step: 100/77, loss: 0.881488025188446 2023-01-23 22:24:55.718759: step: 104/77, loss: 0.8428722620010376 2023-01-23 22:24:57.052276: step: 108/77, loss: 0.7961598634719849 2023-01-23 22:24:58.336221: step: 112/77, loss: 0.8437957763671875 2023-01-23 22:24:59.654553: step: 116/77, loss: 0.7791297435760498 2023-01-23 22:25:01.031895: step: 120/77, loss: 0.7618874907493591 2023-01-23 22:25:02.345677: step: 124/77, loss: 0.7741999626159668 2023-01-23 22:25:03.605563: step: 128/77, loss: 0.7475306987762451 2023-01-23 22:25:04.898936: step: 132/77, loss: 0.7159825563430786 2023-01-23 22:25:06.162750: step: 136/77, loss: 0.6631938815116882 2023-01-23 22:25:07.514757: step: 140/77, loss: 0.7139487266540527 2023-01-23 22:25:08.815677: step: 144/77, loss: 0.6545838713645935 2023-01-23 22:25:10.086748: step: 148/77, loss: 0.6513230800628662 2023-01-23 22:25:11.432093: step: 152/77, loss: 0.6200884580612183 2023-01-23 22:25:12.802567: step: 156/77, loss: 0.5634316205978394 2023-01-23 22:25:14.112889: step: 160/77, loss: 0.6044880747795105 2023-01-23 22:25:15.432987: step: 164/77, loss: 0.5412224531173706 2023-01-23 22:25:16.718465: step: 168/77, loss: 0.5273445844650269 2023-01-23 22:25:18.026468: step: 172/77, loss: 0.4999113082885742 2023-01-23 22:25:19.301451: step: 176/77, loss: 0.4900851547718048 2023-01-23 22:25:20.655361: step: 180/77, loss: 0.44046568870544434 2023-01-23 22:25:21.934173: step: 184/77, loss: 0.40524929761886597 2023-01-23 22:25:23.265791: step: 188/77, loss: 0.35899320244789124 2023-01-23 22:25:24.555717: step: 192/77, loss: 0.4494326412677765 2023-01-23 22:25:25.855780: step: 196/77, loss: 0.3220992684364319 2023-01-23 22:25:27.147019: step: 200/77, loss: 0.3402412533760071 2023-01-23 22:25:28.443174: step: 204/77, loss: 0.4437170624732971 2023-01-23 22:25:29.775799: step: 208/77, loss: 0.24657674133777618 2023-01-23 22:25:31.092145: step: 212/77, loss: 0.2253047227859497 2023-01-23 22:25:32.374668: step: 216/77, loss: 0.2559199631214142 2023-01-23 22:25:33.654900: step: 220/77, loss: 0.2264961451292038 2023-01-23 22:25:34.983206: step: 224/77, loss: 0.23335587978363037 2023-01-23 22:25:36.288012: step: 228/77, loss: 0.23723739385604858 2023-01-23 22:25:37.590667: step: 232/77, loss: 0.20965644717216492 2023-01-23 22:25:38.876093: step: 236/77, loss: 0.27283912897109985 2023-01-23 22:25:40.136578: step: 240/77, loss: 0.119234099984169 2023-01-23 22:25:41.424080: step: 244/77, loss: 0.3087769150733948 2023-01-23 22:25:42.755219: step: 248/77, loss: 0.1239112913608551 2023-01-23 22:25:44.095227: step: 252/77, loss: 0.11339374631643295 2023-01-23 22:25:45.412306: step: 256/77, loss: 0.15533994138240814 2023-01-23 22:25:46.746337: step: 260/77, loss: 0.11168913543224335 2023-01-23 22:25:48.006790: step: 264/77, loss: 0.08665478974580765 2023-01-23 22:25:49.263920: step: 268/77, loss: 0.06983567774295807 2023-01-23 22:25:50.575215: step: 272/77, loss: 0.32832252979278564 2023-01-23 22:25:51.882669: step: 276/77, loss: 0.1965167075395584 2023-01-23 22:25:53.157595: step: 280/77, loss: 0.08166693896055222 2023-01-23 22:25:54.464980: step: 284/77, loss: 0.04782284051179886 2023-01-23 22:25:55.750560: step: 288/77, loss: 0.34814804792404175 2023-01-23 22:25:57.029323: step: 292/77, loss: 0.043856192380189896 2023-01-23 22:25:58.357246: step: 296/77, loss: 0.09605462104082108 2023-01-23 22:25:59.615605: step: 300/77, loss: 0.24693399667739868 2023-01-23 22:26:00.898525: step: 304/77, loss: 0.06090881675481796 2023-01-23 22:26:02.174441: step: 308/77, loss: 0.08499729633331299 2023-01-23 22:26:03.444230: step: 312/77, loss: 0.027350492775440216 2023-01-23 22:26:04.790253: step: 316/77, loss: 0.05126441642642021 2023-01-23 22:26:06.121268: step: 320/77, loss: 0.10123664885759354 2023-01-23 22:26:07.439816: step: 324/77, loss: 0.06947410106658936 2023-01-23 22:26:08.757225: step: 328/77, loss: 0.10531488060951233 2023-01-23 22:26:10.042823: step: 332/77, loss: 0.27845829725265503 2023-01-23 22:26:11.331912: step: 336/77, loss: 0.15196259319782257 2023-01-23 22:26:12.615496: step: 340/77, loss: 0.24861951172351837 2023-01-23 22:26:13.897290: step: 344/77, loss: 0.05084856599569321 2023-01-23 22:26:15.208688: step: 348/77, loss: 0.04008103907108307 2023-01-23 22:26:16.522590: step: 352/77, loss: 0.09764869511127472 2023-01-23 22:26:17.861144: step: 356/77, loss: 0.07287060469388962 2023-01-23 22:26:19.122243: step: 360/77, loss: 0.07838761061429977 2023-01-23 22:26:20.439028: step: 364/77, loss: 0.10594262182712555 2023-01-23 22:26:21.766995: step: 368/77, loss: 0.18141813576221466 2023-01-23 22:26:23.113905: step: 372/77, loss: 0.04820623993873596 2023-01-23 22:26:24.388054: step: 376/77, loss: 0.0767727792263031 2023-01-23 22:26:25.705016: step: 380/77, loss: 0.14602497220039368 2023-01-23 22:26:27.017940: step: 384/77, loss: 0.060212016105651855 2023-01-23 22:26:28.338893: step: 388/77, loss: 0.12397897988557816 ================================================== Loss: 0.484 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:28:25.536068: step: 4/77, loss: 0.08628113567829132 2023-01-23 22:28:26.852732: step: 8/77, loss: 0.19332587718963623 2023-01-23 22:28:28.141015: step: 12/77, loss: 0.12840916216373444 2023-01-23 22:28:29.425011: step: 16/77, loss: 0.09008502960205078 2023-01-23 22:28:30.682980: step: 20/77, loss: 0.11896339058876038 2023-01-23 22:28:31.963545: step: 24/77, loss: 0.0842742994427681 2023-01-23 22:28:33.238278: step: 28/77, loss: 0.07516887784004211 2023-01-23 22:28:34.563755: step: 32/77, loss: 0.17610064148902893 2023-01-23 22:28:35.912676: step: 36/77, loss: 0.09164276719093323 2023-01-23 22:28:37.193637: step: 40/77, loss: 0.23728494346141815 2023-01-23 22:28:38.492401: step: 44/77, loss: 0.05573999136686325 2023-01-23 22:28:39.784012: step: 48/77, loss: 0.05871153622865677 2023-01-23 22:28:41.044842: step: 52/77, loss: 0.31232115626335144 2023-01-23 22:28:42.338617: step: 56/77, loss: 0.10275619477033615 2023-01-23 22:28:43.618138: step: 60/77, loss: 0.0885278731584549 2023-01-23 22:28:44.913584: step: 64/77, loss: 0.1984003782272339 2023-01-23 22:28:46.215238: step: 68/77, loss: 0.33665239810943604 2023-01-23 22:28:47.558437: step: 72/77, loss: 0.2795385718345642 2023-01-23 22:28:48.837248: step: 76/77, loss: 0.1644812524318695 2023-01-23 22:28:50.123339: step: 80/77, loss: 0.22791095077991486 2023-01-23 22:28:51.412035: step: 84/77, loss: 0.07969798147678375 2023-01-23 22:28:52.729807: step: 88/77, loss: 0.07784508913755417 2023-01-23 22:28:53.991469: step: 92/77, loss: 0.07529832422733307 2023-01-23 22:28:55.277190: step: 96/77, loss: 0.04053203761577606 2023-01-23 22:28:56.567723: step: 100/77, loss: 0.04332693666219711 2023-01-23 22:28:57.893582: step: 104/77, loss: 0.12364348769187927 2023-01-23 22:28:59.228005: step: 108/77, loss: 0.11022159457206726 2023-01-23 22:29:00.527046: step: 112/77, loss: 0.041119933128356934 2023-01-23 22:29:01.814512: step: 116/77, loss: 0.09332107752561569 2023-01-23 22:29:03.136127: step: 120/77, loss: 0.08409717679023743 2023-01-23 22:29:04.457915: step: 124/77, loss: 0.09579865634441376 2023-01-23 22:29:05.753877: step: 128/77, loss: 0.2133885622024536 2023-01-23 22:29:07.068463: step: 132/77, loss: 0.10296496748924255 2023-01-23 22:29:08.372402: step: 136/77, loss: 0.13981102406978607 2023-01-23 22:29:09.676058: step: 140/77, loss: 0.27962273359298706 2023-01-23 22:29:11.000840: step: 144/77, loss: 0.1111413836479187 2023-01-23 22:29:12.286012: step: 148/77, loss: 0.08607277274131775 2023-01-23 22:29:13.538624: step: 152/77, loss: 0.22137989103794098 2023-01-23 22:29:14.860391: step: 156/77, loss: 0.11324742436408997 2023-01-23 22:29:16.188432: step: 160/77, loss: 0.16135777533054352 2023-01-23 22:29:17.471369: step: 164/77, loss: 0.10355418920516968 2023-01-23 22:29:18.778253: step: 168/77, loss: 0.20681661367416382 2023-01-23 22:29:20.137250: step: 172/77, loss: 0.09634518623352051 2023-01-23 22:29:21.421351: step: 176/77, loss: 0.04597388207912445 2023-01-23 22:29:22.730258: step: 180/77, loss: 0.0687372162938118 2023-01-23 22:29:24.044598: step: 184/77, loss: 0.08381231129169464 2023-01-23 22:29:25.357273: step: 188/77, loss: 0.055870767682790756 2023-01-23 22:29:26.604047: step: 192/77, loss: 0.11192315816879272 2023-01-23 22:29:27.951401: step: 196/77, loss: 0.0834076777100563 2023-01-23 22:29:29.240937: step: 200/77, loss: 0.06754839420318604 2023-01-23 22:29:30.478517: step: 204/77, loss: 0.06366132944822311 2023-01-23 22:29:31.781574: step: 208/77, loss: 0.08090916275978088 2023-01-23 22:29:33.091138: step: 212/77, loss: 0.051006052643060684 2023-01-23 22:29:34.441412: step: 216/77, loss: 0.048985555768013 2023-01-23 22:29:35.745125: step: 220/77, loss: 0.04030236601829529 2023-01-23 22:29:36.979086: step: 224/77, loss: 0.10942952334880829 2023-01-23 22:29:38.323644: step: 228/77, loss: 0.05575104430317879 2023-01-23 22:29:39.582913: step: 232/77, loss: 0.057492777705192566 2023-01-23 22:29:40.912124: step: 236/77, loss: 0.12040974199771881 2023-01-23 22:29:42.277898: step: 240/77, loss: 0.10147053003311157 2023-01-23 22:29:43.523898: step: 244/77, loss: 0.15351510047912598 2023-01-23 22:29:44.832934: step: 248/77, loss: 0.2754939794540405 2023-01-23 22:29:46.129890: step: 252/77, loss: 0.08110547065734863 2023-01-23 22:29:47.416081: step: 256/77, loss: 0.11938928067684174 2023-01-23 22:29:48.780041: step: 260/77, loss: 0.0844007134437561 2023-01-23 22:29:50.122951: step: 264/77, loss: 0.20228734612464905 2023-01-23 22:29:51.428416: step: 268/77, loss: 0.19483360648155212 2023-01-23 22:29:52.711033: step: 272/77, loss: 0.09005934000015259 2023-01-23 22:29:54.014276: step: 276/77, loss: 0.10872792452573776 2023-01-23 22:29:55.292130: step: 280/77, loss: 0.035562627017498016 2023-01-23 22:29:56.587144: step: 284/77, loss: 0.06048338860273361 2023-01-23 22:29:57.887158: step: 288/77, loss: 0.02235712856054306 2023-01-23 22:29:59.204784: step: 292/77, loss: 0.06341751664876938 2023-01-23 22:30:00.527365: step: 296/77, loss: 0.019283972680568695 2023-01-23 22:30:01.832957: step: 300/77, loss: 0.14400897920131683 2023-01-23 22:30:03.094220: step: 304/77, loss: 0.24055436253547668 2023-01-23 22:30:04.430363: step: 308/77, loss: 0.14787493646144867 2023-01-23 22:30:05.738512: step: 312/77, loss: 0.18950051069259644 2023-01-23 22:30:06.987909: step: 316/77, loss: 0.19316518306732178 2023-01-23 22:30:08.353444: step: 320/77, loss: 0.03579552844166756 2023-01-23 22:30:09.662153: step: 324/77, loss: 0.17878445982933044 2023-01-23 22:30:10.964822: step: 328/77, loss: 0.09883566200733185 2023-01-23 22:30:12.268451: step: 332/77, loss: 0.07325631380081177 2023-01-23 22:30:13.590420: step: 336/77, loss: 0.10381695628166199 2023-01-23 22:30:14.854425: step: 340/77, loss: 0.03180614113807678 2023-01-23 22:30:16.197726: step: 344/77, loss: 0.0712357610464096 2023-01-23 22:30:17.476828: step: 348/77, loss: 0.1428757905960083 2023-01-23 22:30:18.763261: step: 352/77, loss: 0.048817943781614304 2023-01-23 22:30:20.062640: step: 356/77, loss: 0.03985461965203285 2023-01-23 22:30:21.372922: step: 360/77, loss: 0.08410288393497467 2023-01-23 22:30:22.711958: step: 364/77, loss: 0.18013249337673187 2023-01-23 22:30:23.994481: step: 368/77, loss: 0.08539636433124542 2023-01-23 22:30:25.352989: step: 372/77, loss: 0.08473024517297745 2023-01-23 22:30:26.630565: step: 376/77, loss: 0.06411333382129669 2023-01-23 22:30:27.936833: step: 380/77, loss: 0.2369568943977356 2023-01-23 22:30:29.254345: step: 384/77, loss: 0.18589796125888824 2023-01-23 22:30:30.537908: step: 388/77, loss: 0.04241977632045746 ================================================== Loss: 0.116 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:32:10.829732: step: 4/77, loss: 0.05408244580030441 2023-01-23 22:32:12.172386: step: 8/77, loss: 0.08903515338897705 2023-01-23 22:32:13.493333: step: 12/77, loss: 0.10017208009958267 2023-01-23 22:32:14.816407: step: 16/77, loss: 0.2349729984998703 2023-01-23 22:32:16.120754: step: 20/77, loss: 0.09357035160064697 2023-01-23 22:32:17.430620: step: 24/77, loss: 0.05663056671619415 2023-01-23 22:32:18.718947: step: 28/77, loss: 0.027106977999210358 2023-01-23 22:32:19.993541: step: 32/77, loss: 0.051700297743082047 2023-01-23 22:32:21.288269: step: 36/77, loss: 0.16336293518543243 2023-01-23 22:32:22.613818: step: 40/77, loss: 0.1838156133890152 2023-01-23 22:32:23.917637: step: 44/77, loss: 0.08277752995491028 2023-01-23 22:32:25.226957: step: 48/77, loss: 0.04305151477456093 2023-01-23 22:32:26.508153: step: 52/77, loss: 0.08392839878797531 2023-01-23 22:32:27.789747: step: 56/77, loss: 0.12962347269058228 2023-01-23 22:32:29.102361: step: 60/77, loss: 0.013288728892803192 2023-01-23 22:32:30.373892: step: 64/77, loss: 0.08217846602201462 2023-01-23 22:32:31.655751: step: 68/77, loss: 0.02144557423889637 2023-01-23 22:32:32.998299: step: 72/77, loss: 0.027972429990768433 2023-01-23 22:32:34.259707: step: 76/77, loss: 0.058149661868810654 2023-01-23 22:32:35.494101: step: 80/77, loss: 0.07532479614019394 2023-01-23 22:32:36.768869: step: 84/77, loss: 0.03448043763637543 2023-01-23 22:32:38.065215: step: 88/77, loss: 0.03737421706318855 2023-01-23 22:32:39.377798: step: 92/77, loss: 0.041130442172288895 2023-01-23 22:32:40.710739: step: 96/77, loss: 0.07225271314382553 2023-01-23 22:32:42.025613: step: 100/77, loss: 0.0408330075442791 2023-01-23 22:32:43.373110: step: 104/77, loss: 0.04161320626735687 2023-01-23 22:32:44.664687: step: 108/77, loss: 0.07850818336009979 2023-01-23 22:32:45.963998: step: 112/77, loss: 0.08138226717710495 2023-01-23 22:32:47.309350: step: 116/77, loss: 0.10798808187246323 2023-01-23 22:32:48.639521: step: 120/77, loss: 0.10373760759830475 2023-01-23 22:32:49.985933: step: 124/77, loss: 0.033469058573246 2023-01-23 22:32:51.304139: step: 128/77, loss: 0.0710071548819542 2023-01-23 22:32:52.625916: step: 132/77, loss: 0.023496082052588463 2023-01-23 22:32:53.913651: step: 136/77, loss: 0.05800343304872513 2023-01-23 22:32:55.213580: step: 140/77, loss: 0.06916903704404831 2023-01-23 22:32:56.503982: step: 144/77, loss: 0.10620550066232681 2023-01-23 22:32:57.783393: step: 148/77, loss: 0.038168005645275116 2023-01-23 22:32:59.057506: step: 152/77, loss: 0.028475288301706314 2023-01-23 22:33:00.356123: step: 156/77, loss: 0.0343252494931221 2023-01-23 22:33:01.638376: step: 160/77, loss: 0.06465528160333633 2023-01-23 22:33:02.912706: step: 164/77, loss: 0.015443643555045128 2023-01-23 22:33:04.238750: step: 168/77, loss: 0.024469029158353806 2023-01-23 22:33:05.538512: step: 172/77, loss: 0.0772266834974289 2023-01-23 22:33:06.868785: step: 176/77, loss: 0.07612720876932144 2023-01-23 22:33:08.187738: step: 180/77, loss: 0.01303707342594862 2023-01-23 22:33:09.506612: step: 184/77, loss: 0.028561269864439964 2023-01-23 22:33:10.837749: step: 188/77, loss: 0.03664770722389221 2023-01-23 22:33:12.196845: step: 192/77, loss: 0.009424678049981594 2023-01-23 22:33:13.503506: step: 196/77, loss: 0.09547540545463562 2023-01-23 22:33:14.723858: step: 200/77, loss: 0.11896775662899017 2023-01-23 22:33:16.021193: step: 204/77, loss: 0.09404917806386948 2023-01-23 22:33:17.371939: step: 208/77, loss: 0.08699438720941544 2023-01-23 22:33:18.628160: step: 212/77, loss: 0.028635360300540924 2023-01-23 22:33:19.909634: step: 216/77, loss: 0.017172960564494133 2023-01-23 22:33:21.230036: step: 220/77, loss: 0.06895173341035843 2023-01-23 22:33:22.553270: step: 224/77, loss: 0.10485464334487915 2023-01-23 22:33:23.846959: step: 228/77, loss: 0.08182030916213989 2023-01-23 22:33:25.176420: step: 232/77, loss: 0.021310996264219284 2023-01-23 22:33:26.444700: step: 236/77, loss: 0.05425971746444702 2023-01-23 22:33:27.752648: step: 240/77, loss: 0.05239848420023918 2023-01-23 22:33:29.126247: step: 244/77, loss: 0.01816706918179989 2023-01-23 22:33:30.445649: step: 248/77, loss: 0.016700895503163338 2023-01-23 22:33:31.742002: step: 252/77, loss: 0.05829840153455734 2023-01-23 22:33:33.034829: step: 256/77, loss: 0.01977475732564926 2023-01-23 22:33:34.339082: step: 260/77, loss: 0.01967601850628853 2023-01-23 22:33:35.628175: step: 264/77, loss: 0.11919214576482773 2023-01-23 22:33:36.941740: step: 268/77, loss: 0.04131526127457619 2023-01-23 22:33:38.241053: step: 272/77, loss: 0.006939525716006756 2023-01-23 22:33:39.580288: step: 276/77, loss: 0.0067922016605734825 2023-01-23 22:33:40.888197: step: 280/77, loss: 0.04663277789950371 2023-01-23 22:33:42.154328: step: 284/77, loss: 0.035179682075977325 2023-01-23 22:33:43.459096: step: 288/77, loss: 0.15610793232917786 2023-01-23 22:33:44.733926: step: 292/77, loss: 0.0425829254090786 2023-01-23 22:33:46.017837: step: 296/77, loss: 0.013804452493786812 2023-01-23 22:33:47.299005: step: 300/77, loss: 0.02332937717437744 2023-01-23 22:33:48.578236: step: 304/77, loss: 0.028147635981440544 2023-01-23 22:33:49.885131: step: 308/77, loss: 0.012575867585837841 2023-01-23 22:33:51.231004: step: 312/77, loss: 0.010541887953877449 2023-01-23 22:33:52.531959: step: 316/77, loss: 0.046181872487068176 2023-01-23 22:33:53.838331: step: 320/77, loss: 0.1516311913728714 2023-01-23 22:33:55.135369: step: 324/77, loss: 0.059479713439941406 2023-01-23 22:33:56.430518: step: 328/77, loss: 0.038284484297037125 2023-01-23 22:33:57.778960: step: 332/77, loss: 0.014213360846042633 2023-01-23 22:33:59.097408: step: 336/77, loss: 0.04028288275003433 2023-01-23 22:34:00.384125: step: 340/77, loss: 0.018666526302695274 2023-01-23 22:34:01.724435: step: 344/77, loss: 0.03541828691959381 2023-01-23 22:34:03.023540: step: 348/77, loss: 0.02697652578353882 2023-01-23 22:34:04.284746: step: 352/77, loss: 0.04431845247745514 2023-01-23 22:34:05.589244: step: 356/77, loss: 0.020619817078113556 2023-01-23 22:34:06.878822: step: 360/77, loss: 0.05683548003435135 2023-01-23 22:34:08.132868: step: 364/77, loss: 0.026009604334831238 2023-01-23 22:34:09.440091: step: 368/77, loss: 0.022610411047935486 2023-01-23 22:34:10.694478: step: 372/77, loss: 0.01979699730873108 2023-01-23 22:34:11.974434: step: 376/77, loss: 0.016157492995262146 2023-01-23 22:34:13.301584: step: 380/77, loss: 0.033022940158843994 2023-01-23 22:34:14.580413: step: 384/77, loss: 0.03987208753824234 2023-01-23 22:34:15.954844: step: 388/77, loss: 0.028012875467538834 ================================================== Loss: 0.055 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Russian: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:36:15.342258: step: 4/77, loss: 0.019014548510313034 2023-01-23 22:36:16.586737: step: 8/77, loss: 0.0735415667295456 2023-01-23 22:36:17.888830: step: 12/77, loss: 0.04280354827642441 2023-01-23 22:36:19.218982: step: 16/77, loss: 0.029063032940030098 2023-01-23 22:36:20.489342: step: 20/77, loss: 0.010564737021923065 2023-01-23 22:36:21.832167: step: 24/77, loss: 0.021256964653730392 2023-01-23 22:36:23.150943: step: 28/77, loss: 0.030867785215377808 2023-01-23 22:36:24.457565: step: 32/77, loss: 0.026070473715662956 2023-01-23 22:36:25.729247: step: 36/77, loss: 0.005676222033798695 2023-01-23 22:36:27.022043: step: 40/77, loss: 0.0662388727068901 2023-01-23 22:36:28.290534: step: 44/77, loss: 0.01055849902331829 2023-01-23 22:36:29.518526: step: 48/77, loss: 0.056679219007492065 2023-01-23 22:36:30.793057: step: 52/77, loss: 0.00822590570896864 2023-01-23 22:36:32.110305: step: 56/77, loss: 0.10689514875411987 2023-01-23 22:36:33.483173: step: 60/77, loss: 0.02256307564675808 2023-01-23 22:36:34.771320: step: 64/77, loss: 0.07043624669313431 2023-01-23 22:36:36.103673: step: 68/77, loss: 0.04915996640920639 2023-01-23 22:36:37.402507: step: 72/77, loss: 0.048030249774456024 2023-01-23 22:36:38.730805: step: 76/77, loss: 0.0073328884318470955 2023-01-23 22:36:39.988823: step: 80/77, loss: 0.017796359956264496 2023-01-23 22:36:41.315509: step: 84/77, loss: 0.00830540619790554 2023-01-23 22:36:42.663866: step: 88/77, loss: 0.04534914344549179 2023-01-23 22:36:43.960897: step: 92/77, loss: 0.0539388582110405 2023-01-23 22:36:45.292682: step: 96/77, loss: 0.03533366695046425 2023-01-23 22:36:46.576705: step: 100/77, loss: 0.02420997992157936 2023-01-23 22:36:47.898515: step: 104/77, loss: 0.00585087900981307 2023-01-23 22:36:49.199260: step: 108/77, loss: 0.014047197997570038 2023-01-23 22:36:50.487343: step: 112/77, loss: 0.0512496754527092 2023-01-23 22:36:51.820687: step: 116/77, loss: 0.049538418650627136 2023-01-23 22:36:53.116104: step: 120/77, loss: 0.03853524476289749 2023-01-23 22:36:54.420380: step: 124/77, loss: 0.04238808527588844 2023-01-23 22:36:55.737162: step: 128/77, loss: 0.05831409990787506 2023-01-23 22:36:57.051403: step: 132/77, loss: 0.06485219299793243 2023-01-23 22:36:58.338051: step: 136/77, loss: 0.04830478876829147 2023-01-23 22:36:59.681177: step: 140/77, loss: 0.016403838992118835 2023-01-23 22:37:00.976852: step: 144/77, loss: 0.022437868639826775 2023-01-23 22:37:02.266517: step: 148/77, loss: 0.13560843467712402 2023-01-23 22:37:03.582470: step: 152/77, loss: 0.009262963198125362 2023-01-23 22:37:04.911404: step: 156/77, loss: 0.06167292222380638 2023-01-23 22:37:06.228569: step: 160/77, loss: 0.053442005068063736 2023-01-23 22:37:07.536181: step: 164/77, loss: 0.05808179825544357 2023-01-23 22:37:08.815737: step: 168/77, loss: 0.0050662122666835785 2023-01-23 22:37:10.131211: step: 172/77, loss: 0.01068087387830019 2023-01-23 22:37:11.450339: step: 176/77, loss: 0.055597029626369476 2023-01-23 22:37:12.824618: step: 180/77, loss: 0.017954226583242416 2023-01-23 22:37:14.131614: step: 184/77, loss: 0.024529630318284035 2023-01-23 22:37:15.456938: step: 188/77, loss: 0.015078309923410416 2023-01-23 22:37:16.756927: step: 192/77, loss: 0.061848364770412445 2023-01-23 22:37:18.092450: step: 196/77, loss: 0.01075062993913889 2023-01-23 22:37:19.410157: step: 200/77, loss: 0.005327839404344559 2023-01-23 22:37:20.720192: step: 204/77, loss: 0.02629464492201805 2023-01-23 22:37:22.014195: step: 208/77, loss: 0.011574016883969307 2023-01-23 22:37:23.324597: step: 212/77, loss: 0.012719389982521534 2023-01-23 22:37:24.631710: step: 216/77, loss: 0.007977721281349659 2023-01-23 22:37:25.954480: step: 220/77, loss: 0.006969841662794352 2023-01-23 22:37:27.257367: step: 224/77, loss: 0.013916095718741417 2023-01-23 22:37:28.557590: step: 228/77, loss: 0.015030771493911743 2023-01-23 22:37:29.866268: step: 232/77, loss: 0.031597938388586044 2023-01-23 22:37:31.174385: step: 236/77, loss: 0.011530094780027866 2023-01-23 22:37:32.469251: step: 240/77, loss: 0.030616004019975662 2023-01-23 22:37:33.745825: step: 244/77, loss: 0.006037105806171894 2023-01-23 22:37:34.997981: step: 248/77, loss: 0.09851083904504776 2023-01-23 22:37:36.312217: step: 252/77, loss: 0.10384593158960342 2023-01-23 22:37:37.599991: step: 256/77, loss: 0.009898346848785877 2023-01-23 22:37:38.880578: step: 260/77, loss: 0.04109061509370804 2023-01-23 22:37:40.196597: step: 264/77, loss: 0.0414285734295845 2023-01-23 22:37:41.486252: step: 268/77, loss: 0.021689075976610184 2023-01-23 22:37:42.797960: step: 272/77, loss: 0.034337542951107025 2023-01-23 22:37:44.077779: step: 276/77, loss: 0.004574917256832123 2023-01-23 22:37:45.383936: step: 280/77, loss: 0.006308156065642834 2023-01-23 22:37:46.684469: step: 284/77, loss: 0.016056323423981667 2023-01-23 22:37:47.966815: step: 288/77, loss: 0.053165268152952194 2023-01-23 22:37:49.251359: step: 292/77, loss: 0.014298057183623314 2023-01-23 22:37:50.534937: step: 296/77, loss: 0.005691438913345337 2023-01-23 22:37:51.854300: step: 300/77, loss: 0.4969761371612549 2023-01-23 22:37:53.139763: step: 304/77, loss: 0.011333977803587914 2023-01-23 22:37:54.459286: step: 308/77, loss: 0.06957821547985077 2023-01-23 22:37:55.762107: step: 312/77, loss: 0.06074652075767517 2023-01-23 22:37:57.030281: step: 316/77, loss: 0.03135789930820465 2023-01-23 22:37:58.338116: step: 320/77, loss: 0.05328745394945145 2023-01-23 22:37:59.620891: step: 324/77, loss: 0.033600203692913055 2023-01-23 22:38:00.895081: step: 328/77, loss: 0.008021079003810883 2023-01-23 22:38:02.206999: step: 332/77, loss: 0.048412859439849854 2023-01-23 22:38:03.533204: step: 336/77, loss: 0.004309589043259621 2023-01-23 22:38:04.880265: step: 340/77, loss: 0.022743750363588333 2023-01-23 22:38:06.206520: step: 344/77, loss: 0.005848175846040249 2023-01-23 22:38:07.546794: step: 348/77, loss: 0.05571676418185234 2023-01-23 22:38:08.785951: step: 352/77, loss: 0.016229940578341484 2023-01-23 22:38:10.135589: step: 356/77, loss: 0.056965842843055725 2023-01-23 22:38:11.428738: step: 360/77, loss: 0.0034566251561045647 2023-01-23 22:38:12.758825: step: 364/77, loss: 0.015656542032957077 2023-01-23 22:38:14.069075: step: 368/77, loss: 0.023984838277101517 2023-01-23 22:38:15.378422: step: 372/77, loss: 0.03030308522284031 2023-01-23 22:38:16.681675: step: 376/77, loss: 0.01274532824754715 2023-01-23 22:38:17.944041: step: 380/77, loss: 0.01196263451129198 2023-01-23 22:38:19.213448: step: 384/77, loss: 0.044877029955387115 2023-01-23 22:38:20.554613: step: 388/77, loss: 0.0034370715729892254 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 3} Test Chinese: {'template': {'p': 0.9571428571428572, 'r': 0.5, 'f1': 0.6568627450980392}, 'slot': {'p': 0.6153846153846154, 'r': 0.014375561545372867, 'f1': 0.028094820017559263}, 'combined': 0.01845444059976932, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 3} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.5074626865671642, 'f1': 0.6634146341463415}, 'slot': {'p': 0.6296296296296297, 'r': 0.015274034141958671, 'f1': 0.02982456140350877}, 'combined': 0.01978605049208387, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 3} Test Russian: {'template': {'p': 0.9577464788732394, 'r': 0.5074626865671642, 'f1': 0.6634146341463415}, 'slot': {'p': 0.5769230769230769, 'r': 0.013477088948787063, 'f1': 0.02633889376646181}, 'combined': 0.017473607571896616, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 3} Test for Russian: {'template': {'p': 0.9577464788732394, 'r': 0.5074626865671642, 'f1': 0.6634146341463415}, 'slot': {'p': 0.5769230769230769, 'r': 0.013477088948787063, 'f1': 0.02633889376646181}, 'combined': 0.017473607571896616, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:40:07.256168: step: 4/77, loss: 0.24395133554935455 2023-01-23 22:40:08.518939: step: 8/77, loss: 0.042564891278743744 2023-01-23 22:40:09.813184: step: 12/77, loss: 0.009077097289264202 2023-01-23 22:40:11.109455: step: 16/77, loss: 0.038285721093416214 2023-01-23 22:40:12.437104: step: 20/77, loss: 0.0023387169931083918 2023-01-23 22:40:13.750324: step: 24/77, loss: 0.06897217780351639 2023-01-23 22:40:15.016477: step: 28/77, loss: 0.002033111173659563 2023-01-23 22:40:16.329677: step: 32/77, loss: 0.0338224321603775 2023-01-23 22:40:17.607313: step: 36/77, loss: 0.05583617091178894 2023-01-23 22:40:18.912845: step: 40/77, loss: 0.0054557062685489655 2023-01-23 22:40:20.226299: step: 44/77, loss: 0.06645769625902176 2023-01-23 22:40:21.531009: step: 48/77, loss: 0.03576688468456268 2023-01-23 22:40:22.879623: step: 52/77, loss: 0.05891217663884163 2023-01-23 22:40:24.210706: step: 56/77, loss: 0.010125461965799332 2023-01-23 22:40:25.470491: step: 60/77, loss: 0.06767314672470093 2023-01-23 22:40:26.782838: step: 64/77, loss: 0.03810075297951698 2023-01-23 22:40:28.125907: step: 68/77, loss: 0.015344061888754368 2023-01-23 22:40:29.474249: step: 72/77, loss: 0.02123776264488697 2023-01-23 22:40:30.783483: step: 76/77, loss: 0.015418780036270618 2023-01-23 22:40:32.091506: step: 80/77, loss: 0.009558499790728092 2023-01-23 22:40:33.454776: step: 84/77, loss: 0.03018416091799736 2023-01-23 22:40:34.697462: step: 88/77, loss: 0.03399305045604706 2023-01-23 22:40:36.016270: step: 92/77, loss: 0.11518257856369019 2023-01-23 22:40:37.299188: step: 96/77, loss: 0.048495933413505554 2023-01-23 22:40:38.638597: step: 100/77, loss: 0.0012406861642375588 2023-01-23 22:40:39.918171: step: 104/77, loss: 0.038416363298892975 2023-01-23 22:40:41.214027: step: 108/77, loss: 0.06289390474557877 2023-01-23 22:40:42.561413: step: 112/77, loss: 0.00570332445204258 2023-01-23 22:40:43.890063: step: 116/77, loss: 0.0008757521864026785 2023-01-23 22:40:45.210496: step: 120/77, loss: 0.03343869373202324 2023-01-23 22:40:46.542696: step: 124/77, loss: 0.010786929167807102 2023-01-23 22:40:47.883517: step: 128/77, loss: 0.01748708076775074 2023-01-23 22:40:49.215297: step: 132/77, loss: 0.028637580573558807 2023-01-23 22:40:50.518640: step: 136/77, loss: 0.04966537281870842 2023-01-23 22:40:51.810284: step: 140/77, loss: 0.04209902510046959 2023-01-23 22:40:53.099101: step: 144/77, loss: 0.035914335399866104 2023-01-23 22:40:54.381895: step: 148/77, loss: 0.044010721147060394 2023-01-23 22:40:55.682282: step: 152/77, loss: 0.013883043080568314 2023-01-23 22:40:56.995012: step: 156/77, loss: 0.021576108410954475 2023-01-23 22:40:58.354533: step: 160/77, loss: 0.0158415324985981 2023-01-23 22:40:59.680849: step: 164/77, loss: 0.20719078183174133 2023-01-23 22:41:01.012299: step: 168/77, loss: 0.0026145929004997015 2023-01-23 22:41:02.350048: step: 172/77, loss: 0.011889268644154072 2023-01-23 22:41:03.601907: step: 176/77, loss: 0.07418306171894073 2023-01-23 22:41:04.912018: step: 180/77, loss: 0.04796472564339638 2023-01-23 22:41:06.214456: step: 184/77, loss: 0.13556009531021118 2023-01-23 22:41:07.485330: step: 188/77, loss: 0.008009164594113827 2023-01-23 22:41:08.860275: step: 192/77, loss: 0.03636176884174347 2023-01-23 22:41:10.192757: step: 196/77, loss: 0.0731358677148819 2023-01-23 22:41:11.515072: step: 200/77, loss: 0.06386769562959671 2023-01-23 22:41:12.887597: step: 204/77, loss: 0.009611038491129875 2023-01-23 22:41:14.204826: step: 208/77, loss: 0.012586880475282669 2023-01-23 22:41:15.507602: step: 212/77, loss: 0.010348862037062645 2023-01-23 22:41:16.862749: step: 216/77, loss: 0.01834016479551792 2023-01-23 22:41:18.175429: step: 220/77, loss: 0.05512729287147522 2023-01-23 22:41:19.485868: step: 224/77, loss: 0.019282493740320206 2023-01-23 22:41:20.818339: step: 228/77, loss: 0.01623130775988102 2023-01-23 22:41:22.107315: step: 232/77, loss: 0.006039030849933624 2023-01-23 22:41:23.433043: step: 236/77, loss: 0.010130537673830986 2023-01-23 22:41:24.762141: step: 240/77, loss: 0.00915328785777092 2023-01-23 22:41:26.019499: step: 244/77, loss: 0.019957851618528366 2023-01-23 22:41:27.377019: step: 248/77, loss: 0.037982597947120667 2023-01-23 22:41:28.696511: step: 252/77, loss: 0.0435798242688179 2023-01-23 22:41:29.996275: step: 256/77, loss: 0.022857429459691048 2023-01-23 22:41:31.305670: step: 260/77, loss: 0.003933214582502842 2023-01-23 22:41:32.638949: step: 264/77, loss: 0.04226960241794586 2023-01-23 22:41:33.929256: step: 268/77, loss: 0.0034107593819499016 2023-01-23 22:41:35.257094: step: 272/77, loss: 0.0026267259381711483 2023-01-23 22:41:36.567544: step: 276/77, loss: 0.008702527731657028 2023-01-23 22:41:37.818534: step: 280/77, loss: 0.020024804398417473 2023-01-23 22:41:39.148855: step: 284/77, loss: 0.01928548887372017 2023-01-23 22:41:40.447864: step: 288/77, loss: 0.048891812562942505 2023-01-23 22:41:41.759494: step: 292/77, loss: 0.02976638823747635 2023-01-23 22:41:43.085794: step: 296/77, loss: 0.004443894140422344 2023-01-23 22:41:44.373649: step: 300/77, loss: 0.011118062771856785 2023-01-23 22:41:45.632725: step: 304/77, loss: 0.046159714460372925 2023-01-23 22:41:46.935326: step: 308/77, loss: 0.011610114015638828 2023-01-23 22:41:48.199298: step: 312/77, loss: 0.022255709394812584 2023-01-23 22:41:49.470608: step: 316/77, loss: 0.017545828595757484 2023-01-23 22:41:50.788487: step: 320/77, loss: 0.024126345291733742 2023-01-23 22:41:52.116047: step: 324/77, loss: 0.012279022485017776 2023-01-23 22:41:53.345784: step: 328/77, loss: 0.031908974051475525 2023-01-23 22:41:54.696173: step: 332/77, loss: 0.007514918223023415 2023-01-23 22:41:55.975396: step: 336/77, loss: 0.04214081913232803 2023-01-23 22:41:57.300277: step: 340/77, loss: 0.014605971053242683 2023-01-23 22:41:58.620421: step: 344/77, loss: 0.056578878313302994 2023-01-23 22:41:59.918647: step: 348/77, loss: 0.07047766447067261 2023-01-23 22:42:01.208701: step: 352/77, loss: 0.016497528180480003 2023-01-23 22:42:02.551790: step: 356/77, loss: 0.02124914340674877 2023-01-23 22:42:03.850090: step: 360/77, loss: 0.038115061819553375 2023-01-23 22:42:05.143041: step: 364/77, loss: 0.012755529955029488 2023-01-23 22:42:06.419348: step: 368/77, loss: 0.008632929995656013 2023-01-23 22:42:07.700566: step: 372/77, loss: 0.015460866503417492 2023-01-23 22:42:08.965621: step: 376/77, loss: 0.014853318221867085 2023-01-23 22:42:10.270863: step: 380/77, loss: 0.02519097365438938 2023-01-23 22:42:11.581561: step: 384/77, loss: 0.009393636137247086 2023-01-23 22:42:12.934692: step: 388/77, loss: 0.005673499777913094 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9855072463768116, 'r': 0.5074626865671642, 'f1': 0.6699507389162562}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.013031756194657506, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.9855072463768116, 'r': 0.5074626865671642, 'f1': 0.6699507389162562}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.013031756194657506, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:44:01.047779: step: 4/77, loss: 0.0173548124730587 2023-01-23 22:44:02.341107: step: 8/77, loss: 0.01768217794597149 2023-01-23 22:44:03.659952: step: 12/77, loss: 0.05099344998598099 2023-01-23 22:44:04.973122: step: 16/77, loss: 0.00381911126896739 2023-01-23 22:44:06.269594: step: 20/77, loss: 0.026492225006222725 2023-01-23 22:44:07.541547: step: 24/77, loss: 0.004659312777221203 2023-01-23 22:44:08.822450: step: 28/77, loss: 0.005350128747522831 2023-01-23 22:44:10.167521: step: 32/77, loss: 0.15542210638523102 2023-01-23 22:44:11.504677: step: 36/77, loss: 0.0018877113470807672 2023-01-23 22:44:12.790708: step: 40/77, loss: 0.012737736105918884 2023-01-23 22:44:14.102236: step: 44/77, loss: 0.1805691421031952 2023-01-23 22:44:15.415094: step: 48/77, loss: 0.004664191044867039 2023-01-23 22:44:16.763287: step: 52/77, loss: 0.020634517073631287 2023-01-23 22:44:18.104531: step: 56/77, loss: 0.07404866814613342 2023-01-23 22:44:19.464782: step: 60/77, loss: 0.029311828315258026 2023-01-23 22:44:20.795654: step: 64/77, loss: 0.01757962442934513 2023-01-23 22:44:22.100934: step: 68/77, loss: 0.01078371237963438 2023-01-23 22:44:23.468574: step: 72/77, loss: 0.05148717761039734 2023-01-23 22:44:24.789408: step: 76/77, loss: 0.09286247938871384 2023-01-23 22:44:26.090856: step: 80/77, loss: 0.03365876525640488 2023-01-23 22:44:27.361007: step: 84/77, loss: 0.0045639462769031525 2023-01-23 22:44:28.697324: step: 88/77, loss: 0.08936309069395065 2023-01-23 22:44:30.010534: step: 92/77, loss: 0.040674157440662384 2023-01-23 22:44:31.362111: step: 96/77, loss: 0.013063786551356316 2023-01-23 22:44:32.684567: step: 100/77, loss: 0.018701188266277313 2023-01-23 22:44:33.995708: step: 104/77, loss: 0.016493361443281174 2023-01-23 22:44:35.317922: step: 108/77, loss: 0.010979781858623028 2023-01-23 22:44:36.612855: step: 112/77, loss: 0.010761067271232605 2023-01-23 22:44:37.940417: step: 116/77, loss: 0.002823087153956294 2023-01-23 22:44:39.268668: step: 120/77, loss: 0.01579410396516323 2023-01-23 22:44:40.581617: step: 124/77, loss: 0.017261408269405365 2023-01-23 22:44:41.931832: step: 128/77, loss: 0.0017213891260325909 2023-01-23 22:44:43.219134: step: 132/77, loss: 0.16353674232959747 2023-01-23 22:44:44.525620: step: 136/77, loss: 0.0066542369313538074 2023-01-23 22:44:45.835509: step: 140/77, loss: 0.015729660168290138 2023-01-23 22:44:47.159011: step: 144/77, loss: 0.04757007211446762 2023-01-23 22:44:48.440857: step: 148/77, loss: 0.0190782081335783 2023-01-23 22:44:49.757194: step: 152/77, loss: 0.019761735573410988 2023-01-23 22:44:51.018839: step: 156/77, loss: 0.16782283782958984 2023-01-23 22:44:52.290351: step: 160/77, loss: 0.03685488924384117 2023-01-23 22:44:53.636893: step: 164/77, loss: 0.026806870475411415 2023-01-23 22:44:55.009890: step: 168/77, loss: 0.09175720065832138 2023-01-23 22:44:56.371862: step: 172/77, loss: 0.04123001918196678 2023-01-23 22:44:57.658102: step: 176/77, loss: 0.01284695416688919 2023-01-23 22:44:59.022198: step: 180/77, loss: 0.01674005389213562 2023-01-23 22:45:00.341908: step: 184/77, loss: 0.0624990351498127 2023-01-23 22:45:01.652822: step: 188/77, loss: 0.01610327512025833 2023-01-23 22:45:02.951323: step: 192/77, loss: 0.10640712827444077 2023-01-23 22:45:04.241211: step: 196/77, loss: 0.02318374626338482 2023-01-23 22:45:05.524047: step: 200/77, loss: 0.00702270632609725 2023-01-23 22:45:06.805766: step: 204/77, loss: 0.06984404474496841 2023-01-23 22:45:08.125989: step: 208/77, loss: 0.04751594364643097 2023-01-23 22:45:09.456959: step: 212/77, loss: 0.0018362406408414245 2023-01-23 22:45:10.743313: step: 216/77, loss: 0.03405271843075752 2023-01-23 22:45:12.089579: step: 220/77, loss: 0.02549353428184986 2023-01-23 22:45:13.392558: step: 224/77, loss: 0.08385471999645233 2023-01-23 22:45:14.723452: step: 228/77, loss: 0.0008324494701810181 2023-01-23 22:45:16.067562: step: 232/77, loss: 0.012937519699335098 2023-01-23 22:45:17.346885: step: 236/77, loss: 0.000671170768328011 2023-01-23 22:45:18.705395: step: 240/77, loss: 0.007344944402575493 2023-01-23 22:45:20.062982: step: 244/77, loss: 0.0027922093868255615 2023-01-23 22:45:21.399841: step: 248/77, loss: 0.025115441530942917 2023-01-23 22:45:22.765126: step: 252/77, loss: 0.02576635032892227 2023-01-23 22:45:24.070618: step: 256/77, loss: 0.01716598868370056 2023-01-23 22:45:25.369461: step: 260/77, loss: 0.04405806213617325 2023-01-23 22:45:26.645899: step: 264/77, loss: 0.08885978162288666 2023-01-23 22:45:28.040131: step: 268/77, loss: 0.011347277089953423 2023-01-23 22:45:29.376674: step: 272/77, loss: 0.0019689826294779778 2023-01-23 22:45:30.759557: step: 276/77, loss: 0.004658084828406572 2023-01-23 22:45:32.115583: step: 280/77, loss: 0.09460316598415375 2023-01-23 22:45:33.428624: step: 284/77, loss: 0.0011397113557904959 2023-01-23 22:45:34.746243: step: 288/77, loss: 0.021794844418764114 2023-01-23 22:45:36.037518: step: 292/77, loss: 0.008161050267517567 2023-01-23 22:45:37.364638: step: 296/77, loss: 0.03600213676691055 2023-01-23 22:45:38.692252: step: 300/77, loss: 0.018938438966870308 2023-01-23 22:45:40.006755: step: 304/77, loss: 0.006088805850595236 2023-01-23 22:45:41.316399: step: 308/77, loss: 0.008946132846176624 2023-01-23 22:45:42.595714: step: 312/77, loss: 0.009896229021251202 2023-01-23 22:45:43.927678: step: 316/77, loss: 0.02469654195010662 2023-01-23 22:45:45.283196: step: 320/77, loss: 0.0005184942274354398 2023-01-23 22:45:46.614319: step: 324/77, loss: 0.03663252666592598 2023-01-23 22:45:47.914989: step: 328/77, loss: 0.005019849166274071 2023-01-23 22:45:49.187657: step: 332/77, loss: 0.0011025880230590701 2023-01-23 22:45:50.480389: step: 336/77, loss: 0.0429910384118557 2023-01-23 22:45:51.757457: step: 340/77, loss: 0.028678392991423607 2023-01-23 22:45:53.048784: step: 344/77, loss: 0.04023951292037964 2023-01-23 22:45:54.350208: step: 348/77, loss: 0.006216683890670538 2023-01-23 22:45:55.660733: step: 352/77, loss: 0.01433058362454176 2023-01-23 22:45:56.940030: step: 356/77, loss: 0.016391603276133537 2023-01-23 22:45:58.241689: step: 360/77, loss: 0.021528517827391624 2023-01-23 22:45:59.638200: step: 364/77, loss: 0.005811501760035753 2023-01-23 22:46:00.951317: step: 368/77, loss: 0.0071782199665904045 2023-01-23 22:46:02.256906: step: 372/77, loss: 0.01076382864266634 2023-01-23 22:46:03.607764: step: 376/77, loss: 0.0397380106151104 2023-01-23 22:46:04.911180: step: 380/77, loss: 0.017241651192307472 2023-01-23 22:46:06.188528: step: 384/77, loss: 0.010189219377934933 2023-01-23 22:46:07.523735: step: 388/77, loss: 0.07457630336284637 ================================================== Loss: 0.031 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 5} Test Chinese: {'template': {'p': 0.8690476190476191, 'r': 0.5447761194029851, 'f1': 0.6697247706422019}, 'slot': {'p': 0.475, 'r': 0.017070979335130278, 'f1': 0.03295750216825672}, 'combined': 0.022072455580575606, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 5} Test Korean: {'template': {'p': 0.8765432098765432, 'r': 0.5298507462686567, 'f1': 0.6604651162790698}, 'slot': {'p': 0.5277777777777778, 'r': 0.017070979335130278, 'f1': 0.03307223672758921}, 'combined': 0.02184305867589613, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 5} Test Russian: {'template': {'p': 0.9, 'r': 0.5373134328358209, 'f1': 0.6728971962616822}, 'slot': {'p': 0.5135135135135135, 'r': 0.017070979335130278, 'f1': 0.03304347826086956}, 'combined': 0.022234863876472975, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:47:48.811414: step: 4/77, loss: 0.043663907796144485 2023-01-23 22:47:50.081775: step: 8/77, loss: 0.005766963120549917 2023-01-23 22:47:51.338030: step: 12/77, loss: 0.00684958603233099 2023-01-23 22:47:52.581633: step: 16/77, loss: 0.020765742287039757 2023-01-23 22:47:53.887569: step: 20/77, loss: 0.04183043912053108 2023-01-23 22:47:55.221321: step: 24/77, loss: 0.11025308072566986 2023-01-23 22:47:56.517075: step: 28/77, loss: 0.035530585795640945 2023-01-23 22:47:57.852527: step: 32/77, loss: 0.08741918206214905 2023-01-23 22:47:59.169027: step: 36/77, loss: 0.07130548357963562 2023-01-23 22:48:00.436622: step: 40/77, loss: 0.02018367126584053 2023-01-23 22:48:01.714956: step: 44/77, loss: 0.042092032730579376 2023-01-23 22:48:03.051039: step: 48/77, loss: 0.017223650589585304 2023-01-23 22:48:04.346126: step: 52/77, loss: 0.009587359614670277 2023-01-23 22:48:05.599144: step: 56/77, loss: 0.016000591218471527 2023-01-23 22:48:06.892545: step: 60/77, loss: 0.01175003033131361 2023-01-23 22:48:08.192756: step: 64/77, loss: 0.025416970252990723 2023-01-23 22:48:09.519910: step: 68/77, loss: 0.022085528820753098 2023-01-23 22:48:10.887235: step: 72/77, loss: 0.09108749032020569 2023-01-23 22:48:12.212903: step: 76/77, loss: 0.032236192375421524 2023-01-23 22:48:13.520164: step: 80/77, loss: 0.011872172355651855 2023-01-23 22:48:14.813967: step: 84/77, loss: 0.00896656047552824 2023-01-23 22:48:16.137353: step: 88/77, loss: 0.06494638323783875 2023-01-23 22:48:17.416270: step: 92/77, loss: 0.041225992143154144 2023-01-23 22:48:18.693246: step: 96/77, loss: 0.02453225664794445 2023-01-23 22:48:20.000195: step: 100/77, loss: 0.09559760242700577 2023-01-23 22:48:21.280983: step: 104/77, loss: 0.015864849090576172 2023-01-23 22:48:22.557274: step: 108/77, loss: 0.013929332606494427 2023-01-23 22:48:23.866314: step: 112/77, loss: 0.023366829380393028 2023-01-23 22:48:25.135697: step: 116/77, loss: 0.09240493923425674 2023-01-23 22:48:26.447074: step: 120/77, loss: 0.021885264664888382 2023-01-23 22:48:27.740551: step: 124/77, loss: 0.001103585003875196 2023-01-23 22:48:29.048686: step: 128/77, loss: 0.01976066455245018 2023-01-23 22:48:30.394378: step: 132/77, loss: 0.00035930052399635315 2023-01-23 22:48:31.691548: step: 136/77, loss: 0.025959907099604607 2023-01-23 22:48:33.029999: step: 140/77, loss: 0.001159863080829382 2023-01-23 22:48:34.289181: step: 144/77, loss: 0.01121261902153492 2023-01-23 22:48:35.580560: step: 148/77, loss: 0.03004949912428856 2023-01-23 22:48:36.924572: step: 152/77, loss: 0.0026157714892178774 2023-01-23 22:48:38.281222: step: 156/77, loss: 0.003008859930559993 2023-01-23 22:48:39.585351: step: 160/77, loss: 0.014713558368384838 2023-01-23 22:48:40.873915: step: 164/77, loss: 0.053925808519124985 2023-01-23 22:48:42.187388: step: 168/77, loss: 0.054232362657785416 2023-01-23 22:48:43.475269: step: 172/77, loss: 0.008975783362984657 2023-01-23 22:48:44.795575: step: 176/77, loss: 0.0080420495942235 2023-01-23 22:48:46.112916: step: 180/77, loss: 0.026775937527418137 2023-01-23 22:48:47.425133: step: 184/77, loss: 0.0018847854807972908 2023-01-23 22:48:48.763656: step: 188/77, loss: 0.017605334520339966 2023-01-23 22:48:50.061667: step: 192/77, loss: 0.007907466031610966 2023-01-23 22:48:51.402536: step: 196/77, loss: 0.01664300262928009 2023-01-23 22:48:52.728092: step: 200/77, loss: 0.030148349702358246 2023-01-23 22:48:54.009506: step: 204/77, loss: 0.03769388794898987 2023-01-23 22:48:55.286360: step: 208/77, loss: 0.06115224212408066 2023-01-23 22:48:56.581570: step: 212/77, loss: 0.003218221478164196 2023-01-23 22:48:57.925459: step: 216/77, loss: 0.0035331116523593664 2023-01-23 22:48:59.188788: step: 220/77, loss: 0.04016996547579765 2023-01-23 22:49:00.516126: step: 224/77, loss: 0.01222273800522089 2023-01-23 22:49:01.844520: step: 228/77, loss: 0.002561622764915228 2023-01-23 22:49:03.135671: step: 232/77, loss: 0.019069360569119453 2023-01-23 22:49:04.436088: step: 236/77, loss: 0.004311387427151203 2023-01-23 22:49:05.720855: step: 240/77, loss: 0.035350359976291656 2023-01-23 22:49:07.040167: step: 244/77, loss: 0.061739981174468994 2023-01-23 22:49:08.370171: step: 248/77, loss: 0.07043258100748062 2023-01-23 22:49:09.688160: step: 252/77, loss: 0.023818643763661385 2023-01-23 22:49:11.015606: step: 256/77, loss: 0.025439318269491196 2023-01-23 22:49:12.350668: step: 260/77, loss: 0.002321291249245405 2023-01-23 22:49:13.634422: step: 264/77, loss: 0.011257769539952278 2023-01-23 22:49:14.973614: step: 268/77, loss: 0.017980799078941345 2023-01-23 22:49:16.283332: step: 272/77, loss: 0.021800994873046875 2023-01-23 22:49:17.595063: step: 276/77, loss: 0.005132108926773071 2023-01-23 22:49:18.947090: step: 280/77, loss: 0.062424689531326294 2023-01-23 22:49:20.288794: step: 284/77, loss: 0.06942526996135712 2023-01-23 22:49:21.565075: step: 288/77, loss: 0.015416646376252174 2023-01-23 22:49:22.879682: step: 292/77, loss: 0.02062246948480606 2023-01-23 22:49:24.202418: step: 296/77, loss: 0.014743135310709476 2023-01-23 22:49:25.508707: step: 300/77, loss: 0.005518500227481127 2023-01-23 22:49:26.809788: step: 304/77, loss: 0.003828242188319564 2023-01-23 22:49:28.113283: step: 308/77, loss: 0.01607200689613819 2023-01-23 22:49:29.440784: step: 312/77, loss: 0.007805492728948593 2023-01-23 22:49:30.784513: step: 316/77, loss: 0.05071113258600235 2023-01-23 22:49:32.099951: step: 320/77, loss: 0.020300591364502907 2023-01-23 22:49:33.431368: step: 324/77, loss: 0.006258299574255943 2023-01-23 22:49:34.767704: step: 328/77, loss: 0.12374333292245865 2023-01-23 22:49:36.089603: step: 332/77, loss: 0.019471395760774612 2023-01-23 22:49:37.435123: step: 336/77, loss: 0.013056513853371143 2023-01-23 22:49:38.808142: step: 340/77, loss: 0.00616362364962697 2023-01-23 22:49:40.098046: step: 344/77, loss: 0.005581936798989773 2023-01-23 22:49:41.407585: step: 348/77, loss: 0.027472928166389465 2023-01-23 22:49:42.778934: step: 352/77, loss: 0.021108750253915787 2023-01-23 22:49:44.133724: step: 356/77, loss: 0.006503445096313953 2023-01-23 22:49:45.418843: step: 360/77, loss: 0.0004131326568312943 2023-01-23 22:49:46.764280: step: 364/77, loss: 0.01506958156824112 2023-01-23 22:49:48.043585: step: 368/77, loss: 0.00557693699374795 2023-01-23 22:49:49.396777: step: 372/77, loss: 0.03858831524848938 2023-01-23 22:49:50.754240: step: 376/77, loss: 0.006581415422260761 2023-01-23 22:49:52.092715: step: 380/77, loss: 0.029814526438713074 2023-01-23 22:49:53.421754: step: 384/77, loss: 0.009821525774896145 2023-01-23 22:49:54.750897: step: 388/77, loss: 0.0022138720378279686 ================================================== Loss: 0.026 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5298507462686567, 'f1': 0.6761904761904761}, 'slot': {'p': 0.6129032258064516, 'r': 0.017070979335130278, 'f1': 0.033216783216783216}, 'combined': 0.022460872460872458, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.6129032258064516, 'r': 0.017070979335130278, 'f1': 0.033216783216783216}, 'combined': 0.022875709196463912, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Russian: {'template': {'p': 0.935064935064935, 'r': 0.5373134328358209, 'f1': 0.6824644549763033}, 'slot': {'p': 0.6129032258064516, 'r': 0.017070979335130278, 'f1': 0.033216783216783216}, 'combined': 0.022669273854107976, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:51:35.695943: step: 4/77, loss: 0.00011637747229542583 2023-01-23 22:51:37.002818: step: 8/77, loss: 0.0368674173951149 2023-01-23 22:51:38.280292: step: 12/77, loss: 0.02475127950310707 2023-01-23 22:51:39.614182: step: 16/77, loss: 0.01712360978126526 2023-01-23 22:51:40.910807: step: 20/77, loss: 0.012559721246361732 2023-01-23 22:51:42.227217: step: 24/77, loss: 0.00196480518206954 2023-01-23 22:51:43.620808: step: 28/77, loss: 0.005120502784848213 2023-01-23 22:51:44.964661: step: 32/77, loss: 0.10957054793834686 2023-01-23 22:51:46.290603: step: 36/77, loss: 0.01413258071988821 2023-01-23 22:51:47.577617: step: 40/77, loss: 0.014781583100557327 2023-01-23 22:51:48.854178: step: 44/77, loss: 0.02707362174987793 2023-01-23 22:51:50.153281: step: 48/77, loss: 0.015094796195626259 2023-01-23 22:51:51.435283: step: 52/77, loss: 0.009382286109030247 2023-01-23 22:51:52.758966: step: 56/77, loss: 0.00782015547156334 2023-01-23 22:51:54.034332: step: 60/77, loss: 0.0029509845189750195 2023-01-23 22:51:55.327076: step: 64/77, loss: 0.0021931114606559277 2023-01-23 22:51:56.598624: step: 68/77, loss: 0.047177016735076904 2023-01-23 22:51:57.903371: step: 72/77, loss: 0.039223603904247284 2023-01-23 22:51:59.248224: step: 76/77, loss: 0.16369330883026123 2023-01-23 22:52:00.473092: step: 80/77, loss: 0.0793350338935852 2023-01-23 22:52:01.776898: step: 84/77, loss: 0.09823714941740036 2023-01-23 22:52:03.078826: step: 88/77, loss: 0.03174787759780884 2023-01-23 22:52:04.425650: step: 92/77, loss: 0.00029889249708503485 2023-01-23 22:52:05.726018: step: 96/77, loss: 0.007706539239734411 2023-01-23 22:52:07.026205: step: 100/77, loss: 0.06518713384866714 2023-01-23 22:52:08.275327: step: 104/77, loss: 0.023982468992471695 2023-01-23 22:52:09.558388: step: 108/77, loss: 0.007779199630022049 2023-01-23 22:52:10.853027: step: 112/77, loss: 0.021028827875852585 2023-01-23 22:52:12.191223: step: 116/77, loss: 0.012213161215186119 2023-01-23 22:52:13.482076: step: 120/77, loss: 5.075455555925146e-05 2023-01-23 22:52:14.800345: step: 124/77, loss: 0.022922176867723465 2023-01-23 22:52:16.145587: step: 128/77, loss: 0.004472864791750908 2023-01-23 22:52:17.471558: step: 132/77, loss: 0.011102437973022461 2023-01-23 22:52:18.795802: step: 136/77, loss: 0.0611458458006382 2023-01-23 22:52:20.093863: step: 140/77, loss: 2.13903040275909e-05 2023-01-23 22:52:21.366768: step: 144/77, loss: 0.011816516518592834 2023-01-23 22:52:22.643997: step: 148/77, loss: 0.037825245410203934 2023-01-23 22:52:23.915151: step: 152/77, loss: 0.029810432344675064 2023-01-23 22:52:25.204857: step: 156/77, loss: 0.015813257545232773 2023-01-23 22:52:26.533264: step: 160/77, loss: 0.025386273860931396 2023-01-23 22:52:27.857608: step: 164/77, loss: 0.12720629572868347 2023-01-23 22:52:29.203588: step: 168/77, loss: 0.022320548072457314 2023-01-23 22:52:30.510145: step: 172/77, loss: 0.006334016565233469 2023-01-23 22:52:31.814739: step: 176/77, loss: 0.01868707872927189 2023-01-23 22:52:33.166117: step: 180/77, loss: 0.021900024265050888 2023-01-23 22:52:34.488156: step: 184/77, loss: 0.01332312822341919 2023-01-23 22:52:35.824043: step: 188/77, loss: 0.0190892331302166 2023-01-23 22:52:37.146931: step: 192/77, loss: 0.053591687232255936 2023-01-23 22:52:38.392366: step: 196/77, loss: 0.03223322704434395 2023-01-23 22:52:39.777248: step: 200/77, loss: 0.012321964837610722 2023-01-23 22:52:41.091701: step: 204/77, loss: 0.05096723884344101 2023-01-23 22:52:42.395538: step: 208/77, loss: 0.012583065778017044 2023-01-23 22:52:43.704202: step: 212/77, loss: 0.22834046185016632 2023-01-23 22:52:45.003585: step: 216/77, loss: 0.006164837162941694 2023-01-23 22:52:46.277185: step: 220/77, loss: 0.04228643327951431 2023-01-23 22:52:47.578580: step: 224/77, loss: 0.014050657860934734 2023-01-23 22:52:48.868866: step: 228/77, loss: 0.029714161530137062 2023-01-23 22:52:50.198378: step: 232/77, loss: 0.01477883756160736 2023-01-23 22:52:51.536072: step: 236/77, loss: 0.030889861285686493 2023-01-23 22:52:52.842519: step: 240/77, loss: 0.014133702963590622 2023-01-23 22:52:54.187411: step: 244/77, loss: 0.0030390899628400803 2023-01-23 22:52:55.495851: step: 248/77, loss: 0.00498980050906539 2023-01-23 22:52:56.789156: step: 252/77, loss: 0.031213590875267982 2023-01-23 22:52:58.058108: step: 256/77, loss: 0.004602812230587006 2023-01-23 22:52:59.364193: step: 260/77, loss: 0.04868490993976593 2023-01-23 22:53:00.659749: step: 264/77, loss: 0.0032033356837928295 2023-01-23 22:53:01.963763: step: 268/77, loss: 0.024175122380256653 2023-01-23 22:53:03.267641: step: 272/77, loss: 0.011258386075496674 2023-01-23 22:53:04.589073: step: 276/77, loss: 0.0016684290021657944 2023-01-23 22:53:05.893336: step: 280/77, loss: 0.000780187314376235 2023-01-23 22:53:07.213865: step: 284/77, loss: 0.009816921316087246 2023-01-23 22:53:08.530753: step: 288/77, loss: 0.005105116404592991 2023-01-23 22:53:09.827009: step: 292/77, loss: 0.013287038542330265 2023-01-23 22:53:11.150564: step: 296/77, loss: 0.025341173633933067 2023-01-23 22:53:12.470213: step: 300/77, loss: 0.003929181955754757 2023-01-23 22:53:13.791010: step: 304/77, loss: 0.017698541283607483 2023-01-23 22:53:15.066144: step: 308/77, loss: 0.022486358880996704 2023-01-23 22:53:16.363394: step: 312/77, loss: 0.003166760317981243 2023-01-23 22:53:17.671829: step: 316/77, loss: 0.009733240120112896 2023-01-23 22:53:18.990197: step: 320/77, loss: 0.003576915245503187 2023-01-23 22:53:20.280384: step: 324/77, loss: 0.039995431900024414 2023-01-23 22:53:21.596381: step: 328/77, loss: 0.06405875086784363 2023-01-23 22:53:22.904870: step: 332/77, loss: 0.03697868436574936 2023-01-23 22:53:24.178390: step: 336/77, loss: 8.15482489997521e-05 2023-01-23 22:53:25.495824: step: 340/77, loss: 0.0017763026989996433 2023-01-23 22:53:26.806382: step: 344/77, loss: 0.0001419158943463117 2023-01-23 22:53:28.109030: step: 348/77, loss: 0.0014657576102763414 2023-01-23 22:53:29.372472: step: 352/77, loss: 0.009428405202925205 2023-01-23 22:53:30.666896: step: 356/77, loss: 0.06886311620473862 2023-01-23 22:53:31.942265: step: 360/77, loss: 0.03953403979539871 2023-01-23 22:53:33.233410: step: 364/77, loss: 0.026917902752757072 2023-01-23 22:53:34.556175: step: 368/77, loss: 0.017108548432588577 2023-01-23 22:53:35.884478: step: 372/77, loss: 0.006558586843311787 2023-01-23 22:53:37.175548: step: 376/77, loss: 0.004070008639246225 2023-01-23 22:53:38.457855: step: 380/77, loss: 0.011937337927520275 2023-01-23 22:53:39.763762: step: 384/77, loss: 0.025983836501836777 2023-01-23 22:53:41.079904: step: 388/77, loss: 0.013943709433078766 ================================================== Loss: 0.026 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9024390243902439, 'r': 0.5522388059701493, 'f1': 0.6851851851851852}, 'slot': {'p': 0.42857142857142855, 'r': 0.018867924528301886, 'f1': 0.03614457831325301}, 'combined': 0.024765729585006693, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.925, 'r': 0.5522388059701493, 'f1': 0.6915887850467289}, 'slot': {'p': 0.45652173913043476, 'r': 0.018867924528301886, 'f1': 0.0362381363244176}, 'combined': 0.025061888672961705, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.925, 'r': 0.5522388059701493, 'f1': 0.6915887850467289}, 'slot': {'p': 0.4666666666666667, 'r': 0.018867924528301886, 'f1': 0.03626943005181347}, 'combined': 0.025083531063870994, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:55:21.560129: step: 4/77, loss: 0.04400216042995453 2023-01-23 22:55:22.863447: step: 8/77, loss: 0.0165130402892828 2023-01-23 22:55:24.140866: step: 12/77, loss: 0.011227885261178017 2023-01-23 22:55:25.470876: step: 16/77, loss: 0.012780023738741875 2023-01-23 22:55:26.776254: step: 20/77, loss: 0.007339693605899811 2023-01-23 22:55:28.052340: step: 24/77, loss: 0.018071576952934265 2023-01-23 22:55:29.331107: step: 28/77, loss: 0.011834011413156986 2023-01-23 22:55:30.646099: step: 32/77, loss: 0.04743792116641998 2023-01-23 22:55:31.923247: step: 36/77, loss: 0.054762475192546844 2023-01-23 22:55:33.178293: step: 40/77, loss: 0.025785135105252266 2023-01-23 22:55:34.467547: step: 44/77, loss: 0.004111032467335463 2023-01-23 22:55:35.757643: step: 48/77, loss: 0.04216965287923813 2023-01-23 22:55:37.095483: step: 52/77, loss: 0.00041233195224776864 2023-01-23 22:55:38.392099: step: 56/77, loss: 0.0029579598922282457 2023-01-23 22:55:39.703808: step: 60/77, loss: 0.007977910339832306 2023-01-23 22:55:41.027232: step: 64/77, loss: 0.026709450408816338 2023-01-23 22:55:42.362615: step: 68/77, loss: 0.10443099588155746 2023-01-23 22:55:43.721463: step: 72/77, loss: 0.0038933041505515575 2023-01-23 22:55:44.963388: step: 76/77, loss: 0.009387347847223282 2023-01-23 22:55:46.275921: step: 80/77, loss: 0.0030997178982943296 2023-01-23 22:55:47.561693: step: 84/77, loss: 0.006862640380859375 2023-01-23 22:55:48.863381: step: 88/77, loss: 0.008558643981814384 2023-01-23 22:55:50.162821: step: 92/77, loss: 0.004854598548263311 2023-01-23 22:55:51.462606: step: 96/77, loss: 0.029413817450404167 2023-01-23 22:55:52.763091: step: 100/77, loss: 0.019570060074329376 2023-01-23 22:55:54.087914: step: 104/77, loss: 0.00334225594997406 2023-01-23 22:55:55.355160: step: 108/77, loss: 0.006600016728043556 2023-01-23 22:55:56.655705: step: 112/77, loss: 0.057152628898620605 2023-01-23 22:55:57.972460: step: 116/77, loss: 0.05850166454911232 2023-01-23 22:55:59.221816: step: 120/77, loss: 0.026945604011416435 2023-01-23 22:56:00.525688: step: 124/77, loss: 0.002308004070073366 2023-01-23 22:56:01.822822: step: 128/77, loss: 0.030956268310546875 2023-01-23 22:56:03.160404: step: 132/77, loss: 0.004936728626489639 2023-01-23 22:56:04.469972: step: 136/77, loss: 0.021307937800884247 2023-01-23 22:56:05.750839: step: 140/77, loss: 0.07446351647377014 2023-01-23 22:56:07.059103: step: 144/77, loss: 0.002486872486770153 2023-01-23 22:56:08.334430: step: 148/77, loss: 0.0009867411572486162 2023-01-23 22:56:09.659386: step: 152/77, loss: 0.0032195388339459896 2023-01-23 22:56:10.970924: step: 156/77, loss: 0.03516755253076553 2023-01-23 22:56:12.315386: step: 160/77, loss: 0.1058875247836113 2023-01-23 22:56:13.609181: step: 164/77, loss: 0.034278132021427155 2023-01-23 22:56:14.920123: step: 168/77, loss: 0.008507543243467808 2023-01-23 22:56:16.220732: step: 172/77, loss: 0.0005227087531238794 2023-01-23 22:56:17.538809: step: 176/77, loss: 0.03415210545063019 2023-01-23 22:56:18.841193: step: 180/77, loss: 0.003491913666948676 2023-01-23 22:56:20.102896: step: 184/77, loss: 0.00460367975756526 2023-01-23 22:56:21.399389: step: 188/77, loss: 0.01312391459941864 2023-01-23 22:56:22.701889: step: 192/77, loss: 0.060950737446546555 2023-01-23 22:56:24.001292: step: 196/77, loss: 0.0038686206098645926 2023-01-23 22:56:25.345884: step: 200/77, loss: 0.011776718311011791 2023-01-23 22:56:26.636587: step: 204/77, loss: 0.06949067115783691 2023-01-23 22:56:27.930688: step: 208/77, loss: 0.004097940865904093 2023-01-23 22:56:29.267970: step: 212/77, loss: 0.043237391859292984 2023-01-23 22:56:30.566118: step: 216/77, loss: 0.04034322500228882 2023-01-23 22:56:31.862231: step: 220/77, loss: 0.00045904243597760797 2023-01-23 22:56:33.148136: step: 224/77, loss: 0.01494511030614376 2023-01-23 22:56:34.464722: step: 228/77, loss: 0.0987260490655899 2023-01-23 22:56:35.743311: step: 232/77, loss: 0.0036450622137635946 2023-01-23 22:56:37.067172: step: 236/77, loss: 0.004664699546992779 2023-01-23 22:56:38.403202: step: 240/77, loss: 0.023832963779568672 2023-01-23 22:56:39.715744: step: 244/77, loss: 0.02122790366411209 2023-01-23 22:56:41.061747: step: 248/77, loss: 0.0848722830414772 2023-01-23 22:56:42.420275: step: 252/77, loss: 0.00020455481717363 2023-01-23 22:56:43.716639: step: 256/77, loss: 0.023084642365574837 2023-01-23 22:56:45.019075: step: 260/77, loss: 0.039016205817461014 2023-01-23 22:56:46.327062: step: 264/77, loss: 0.02520732954144478 2023-01-23 22:56:47.629327: step: 268/77, loss: 0.0230961162596941 2023-01-23 22:56:48.943533: step: 272/77, loss: 0.00813551526516676 2023-01-23 22:56:50.241569: step: 276/77, loss: 0.001504933345131576 2023-01-23 22:56:51.543140: step: 280/77, loss: 0.0649275928735733 2023-01-23 22:56:52.851512: step: 284/77, loss: 0.009410201571881771 2023-01-23 22:56:54.188066: step: 288/77, loss: 0.004082283470779657 2023-01-23 22:56:55.514015: step: 292/77, loss: 0.002070082351565361 2023-01-23 22:56:56.879220: step: 296/77, loss: 0.03177042677998543 2023-01-23 22:56:58.211816: step: 300/77, loss: 0.10870395600795746 2023-01-23 22:56:59.542088: step: 304/77, loss: 0.0013557918136939406 2023-01-23 22:57:00.852124: step: 308/77, loss: 0.019374998286366463 2023-01-23 22:57:02.167285: step: 312/77, loss: 0.009799455292522907 2023-01-23 22:57:03.473499: step: 316/77, loss: 0.026309311389923096 2023-01-23 22:57:04.835635: step: 320/77, loss: 0.010569714941084385 2023-01-23 22:57:06.115617: step: 324/77, loss: 0.018344147130846977 2023-01-23 22:57:07.418790: step: 328/77, loss: 0.022369852289557457 2023-01-23 22:57:08.690824: step: 332/77, loss: 0.03339612856507301 2023-01-23 22:57:09.988336: step: 336/77, loss: 0.0063545554876327515 2023-01-23 22:57:11.244770: step: 340/77, loss: 0.004273498430848122 2023-01-23 22:57:12.497620: step: 344/77, loss: 0.030692612752318382 2023-01-23 22:57:13.789596: step: 348/77, loss: 0.015207581222057343 2023-01-23 22:57:15.078017: step: 352/77, loss: 0.005335003137588501 2023-01-23 22:57:16.352212: step: 356/77, loss: 0.018144914880394936 2023-01-23 22:57:17.661778: step: 360/77, loss: 0.016128433868288994 2023-01-23 22:57:18.935532: step: 364/77, loss: 0.005331959575414658 2023-01-23 22:57:20.267797: step: 368/77, loss: 0.019862277433276176 2023-01-23 22:57:21.591254: step: 372/77, loss: 0.00042862031841650605 2023-01-23 22:57:22.865710: step: 376/77, loss: 0.015438038855791092 2023-01-23 22:57:24.170089: step: 380/77, loss: 0.02398861199617386 2023-01-23 22:57:25.477449: step: 384/77, loss: 0.0012583807110786438 2023-01-23 22:57:26.830771: step: 388/77, loss: 0.04697339981794357 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 1.0, 'r': 0.48507462686567165, 'f1': 0.6532663316582915}, 'slot': {'p': 0.6666666666666666, 'r': 0.016172506738544475, 'f1': 0.03157894736842106}, 'combined': 0.020629463104998684, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 1.0, 'r': 0.47761194029850745, 'f1': 0.6464646464646464}, 'slot': {'p': 0.6896551724137931, 'r': 0.017969451931716084, 'f1': 0.03502626970227671}, 'combined': 0.022643245060057667, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 1.0, 'r': 0.47761194029850745, 'f1': 0.6464646464646464}, 'slot': {'p': 0.6923076923076923, 'r': 0.016172506738544475, 'f1': 0.03160667251975418}, 'combined': 0.02043259637640674, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:59:07.561739: step: 4/77, loss: 0.004059008788317442 2023-01-23 22:59:08.876565: step: 8/77, loss: 0.008530894294381142 2023-01-23 22:59:10.130797: step: 12/77, loss: 0.08699976652860641 2023-01-23 22:59:11.399506: step: 16/77, loss: 0.004244130104780197 2023-01-23 22:59:12.756115: step: 20/77, loss: 0.01405458152294159 2023-01-23 22:59:14.058405: step: 24/77, loss: 0.06256169825792313 2023-01-23 22:59:15.338127: step: 28/77, loss: 0.003683938877657056 2023-01-23 22:59:16.647782: step: 32/77, loss: 0.00039898944669403136 2023-01-23 22:59:17.943602: step: 36/77, loss: 0.019498568028211594 2023-01-23 22:59:19.193372: step: 40/77, loss: 0.007669855374842882 2023-01-23 22:59:20.539471: step: 44/77, loss: 0.0028932204004377127 2023-01-23 22:59:21.825104: step: 48/77, loss: 0.005956342443823814 2023-01-23 22:59:23.126673: step: 52/77, loss: 0.003917335532605648 2023-01-23 22:59:24.470433: step: 56/77, loss: 0.0020870023872703314 2023-01-23 22:59:25.790001: step: 60/77, loss: 0.037281736731529236 2023-01-23 22:59:27.097914: step: 64/77, loss: 0.006114881951361895 2023-01-23 22:59:28.388007: step: 68/77, loss: 0.00022297426767181605 2023-01-23 22:59:29.735903: step: 72/77, loss: 0.060727570205926895 2023-01-23 22:59:31.062463: step: 76/77, loss: 0.04985472559928894 2023-01-23 22:59:32.378622: step: 80/77, loss: 0.001316759968176484 2023-01-23 22:59:33.678890: step: 84/77, loss: 0.028501683846116066 2023-01-23 22:59:34.976045: step: 88/77, loss: 0.05243585258722305 2023-01-23 22:59:36.340484: step: 92/77, loss: 0.038167450577020645 2023-01-23 22:59:37.655506: step: 96/77, loss: 0.13844504952430725 2023-01-23 22:59:38.967805: step: 100/77, loss: 0.014699107967317104 2023-01-23 22:59:40.292929: step: 104/77, loss: 0.00032180227572098374 2023-01-23 22:59:41.616156: step: 108/77, loss: 0.011475548148155212 2023-01-23 22:59:42.874494: step: 112/77, loss: 0.012297457084059715 2023-01-23 22:59:44.153976: step: 116/77, loss: 0.021710721775889397 2023-01-23 22:59:45.479167: step: 120/77, loss: 0.001743190223351121 2023-01-23 22:59:46.794089: step: 124/77, loss: 0.007135982625186443 2023-01-23 22:59:48.099358: step: 128/77, loss: 0.011817889288067818 2023-01-23 22:59:49.379019: step: 132/77, loss: 0.001370633952319622 2023-01-23 22:59:50.745959: step: 136/77, loss: 0.00016879255417734385 2023-01-23 22:59:52.012028: step: 140/77, loss: 0.0038989256136119366 2023-01-23 22:59:53.277441: step: 144/77, loss: 0.04668494313955307 2023-01-23 22:59:54.610474: step: 148/77, loss: 0.005964198615401983 2023-01-23 22:59:55.888949: step: 152/77, loss: 0.01362668164074421 2023-01-23 22:59:57.170043: step: 156/77, loss: 0.008104304783046246 2023-01-23 22:59:58.453472: step: 160/77, loss: 0.02379458025097847 2023-01-23 22:59:59.740439: step: 164/77, loss: 0.027938585728406906 2023-01-23 23:00:01.054934: step: 168/77, loss: 0.005503904074430466 2023-01-23 23:00:02.344938: step: 172/77, loss: 0.006314740050584078 2023-01-23 23:00:03.656192: step: 176/77, loss: 0.006072746589779854 2023-01-23 23:00:04.999380: step: 180/77, loss: 0.02708953619003296 2023-01-23 23:00:06.336459: step: 184/77, loss: 0.134894460439682 2023-01-23 23:00:07.659506: step: 188/77, loss: 0.01490715704858303 2023-01-23 23:00:08.939329: step: 192/77, loss: 0.027591338381171227 2023-01-23 23:00:10.216592: step: 196/77, loss: 0.018336333334445953 2023-01-23 23:00:11.501104: step: 200/77, loss: 0.0029937070794403553 2023-01-23 23:00:12.848256: step: 204/77, loss: 0.061283551156520844 2023-01-23 23:00:14.145662: step: 208/77, loss: 0.01410150621086359 2023-01-23 23:00:15.449702: step: 212/77, loss: 0.015505759045481682 2023-01-23 23:00:16.752579: step: 216/77, loss: 0.020994171500205994 2023-01-23 23:00:18.125284: step: 220/77, loss: 0.0008524280274286866 2023-01-23 23:00:19.392434: step: 224/77, loss: 0.024159442633390427 2023-01-23 23:00:20.721074: step: 228/77, loss: 0.04099274054169655 2023-01-23 23:00:22.034427: step: 232/77, loss: 0.013209857046604156 2023-01-23 23:00:23.346286: step: 236/77, loss: 0.01315020676702261 2023-01-23 23:00:24.664674: step: 240/77, loss: 0.07746905833482742 2023-01-23 23:00:25.952330: step: 244/77, loss: 0.02136453054845333 2023-01-23 23:00:27.232331: step: 248/77, loss: 0.010054754093289375 2023-01-23 23:00:28.544380: step: 252/77, loss: 0.001023219432681799 2023-01-23 23:00:29.820694: step: 256/77, loss: 0.005565670784562826 2023-01-23 23:00:31.129566: step: 260/77, loss: 8.016253559617326e-06 2023-01-23 23:00:32.389966: step: 264/77, loss: 0.015554104000329971 2023-01-23 23:00:33.714696: step: 268/77, loss: 0.0036264844238758087 2023-01-23 23:00:35.067602: step: 272/77, loss: 0.0014844255056232214 2023-01-23 23:00:36.375317: step: 276/77, loss: 0.001290302723646164 2023-01-23 23:00:37.652797: step: 280/77, loss: 0.0001928550045704469 2023-01-23 23:00:39.003678: step: 284/77, loss: 0.008350521326065063 2023-01-23 23:00:40.323635: step: 288/77, loss: 0.05311926081776619 2023-01-23 23:00:41.660716: step: 292/77, loss: 0.013220196589827538 2023-01-23 23:00:42.992168: step: 296/77, loss: 0.4162817895412445 2023-01-23 23:00:44.288747: step: 300/77, loss: 0.038585688918828964 2023-01-23 23:00:45.592844: step: 304/77, loss: 0.0023205061443150043 2023-01-23 23:00:46.894697: step: 308/77, loss: 0.004533206578344107 2023-01-23 23:00:48.200135: step: 312/77, loss: 0.006979439407587051 2023-01-23 23:00:49.509159: step: 316/77, loss: 0.00823113601654768 2023-01-23 23:00:50.828990: step: 320/77, loss: 0.0049030412919819355 2023-01-23 23:00:52.194269: step: 324/77, loss: 0.001947331242263317 2023-01-23 23:00:53.485473: step: 328/77, loss: 0.010946845635771751 2023-01-23 23:00:54.808815: step: 332/77, loss: 0.07835371792316437 2023-01-23 23:00:56.157303: step: 336/77, loss: 0.016691043972969055 2023-01-23 23:00:57.446287: step: 340/77, loss: 0.023241087794303894 2023-01-23 23:00:58.740495: step: 344/77, loss: 0.00034647007123567164 2023-01-23 23:01:00.036138: step: 348/77, loss: 0.0037985360249876976 2023-01-23 23:01:01.358907: step: 352/77, loss: 0.04664488509297371 2023-01-23 23:01:02.696011: step: 356/77, loss: 0.004729550331830978 2023-01-23 23:01:04.010950: step: 360/77, loss: 0.005308020394295454 2023-01-23 23:01:05.331725: step: 364/77, loss: 0.006872281432151794 2023-01-23 23:01:06.646169: step: 368/77, loss: 0.046257421374320984 2023-01-23 23:01:07.945710: step: 372/77, loss: 0.0035345409996807575 2023-01-23 23:01:09.248697: step: 376/77, loss: 0.0029179975390434265 2023-01-23 23:01:10.586120: step: 380/77, loss: 0.003412567311897874 2023-01-23 23:01:11.961528: step: 384/77, loss: 0.001803032704629004 2023-01-23 23:01:13.303213: step: 388/77, loss: 0.0013890385162085295 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9857142857142858, 'r': 0.5149253731343284, 'f1': 0.6764705882352942}, 'slot': {'p': 0.6451612903225806, 'r': 0.017969451931716084, 'f1': 0.03496503496503497}, 'combined': 0.023652817770464834, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.9848484848484849, 'r': 0.48507462686567165, 'f1': 0.65}, 'slot': {'p': 0.6451612903225806, 'r': 0.017969451931716084, 'f1': 0.03496503496503497}, 'combined': 0.02272727272727273, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9850746268656716, 'r': 0.4925373134328358, 'f1': 0.6567164179104477}, 'slot': {'p': 0.6451612903225806, 'r': 0.017969451931716084, 'f1': 0.03496503496503497}, 'combined': 0.02296211251435132, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:02:53.669807: step: 4/77, loss: 0.004019314423203468 2023-01-23 23:02:54.997527: step: 8/77, loss: 0.0018244950333610177 2023-01-23 23:02:56.312102: step: 12/77, loss: 0.013065481558442116 2023-01-23 23:02:57.619250: step: 16/77, loss: 0.030329227447509766 2023-01-23 23:02:58.964430: step: 20/77, loss: 0.008612009696662426 2023-01-23 23:03:00.261710: step: 24/77, loss: 0.00204270682297647 2023-01-23 23:03:01.537573: step: 28/77, loss: 0.002712585497647524 2023-01-23 23:03:02.836355: step: 32/77, loss: 0.003777566133067012 2023-01-23 23:03:04.155415: step: 36/77, loss: 0.0516265407204628 2023-01-23 23:03:05.422734: step: 40/77, loss: 0.006463993806391954 2023-01-23 23:03:06.746298: step: 44/77, loss: 0.012646461836993694 2023-01-23 23:03:07.984080: step: 48/77, loss: 0.011762295849621296 2023-01-23 23:03:09.263578: step: 52/77, loss: 0.0002011667238548398 2023-01-23 23:03:10.571111: step: 56/77, loss: 0.00680114608258009 2023-01-23 23:03:11.914428: step: 60/77, loss: 0.029793288558721542 2023-01-23 23:03:13.225011: step: 64/77, loss: 0.0047124773263931274 2023-01-23 23:03:14.516964: step: 68/77, loss: 0.009396566078066826 2023-01-23 23:03:15.846768: step: 72/77, loss: 0.0017562780994921923 2023-01-23 23:03:17.187329: step: 76/77, loss: 0.01609233021736145 2023-01-23 23:03:18.460921: step: 80/77, loss: 0.0014276818837970495 2023-01-23 23:03:19.755758: step: 84/77, loss: 0.00016602581308688968 2023-01-23 23:03:21.052278: step: 88/77, loss: 0.0024550859816372395 2023-01-23 23:03:22.320623: step: 92/77, loss: 0.008442103862762451 2023-01-23 23:03:23.619332: step: 96/77, loss: 0.001850472646765411 2023-01-23 23:03:24.893714: step: 100/77, loss: 0.00015385696315206587 2023-01-23 23:03:26.207202: step: 104/77, loss: 0.0010777512798085809 2023-01-23 23:03:27.481966: step: 108/77, loss: 0.003965794574469328 2023-01-23 23:03:28.793626: step: 112/77, loss: 0.00920006912201643 2023-01-23 23:03:30.095638: step: 116/77, loss: 9.757414227351546e-05 2023-01-23 23:03:31.428884: step: 120/77, loss: 0.004842578433454037 2023-01-23 23:03:32.739598: step: 124/77, loss: 0.0011513237841427326 2023-01-23 23:03:34.091076: step: 128/77, loss: 0.03656904026865959 2023-01-23 23:03:35.398789: step: 132/77, loss: 0.006750887259840965 2023-01-23 23:03:36.689734: step: 136/77, loss: 0.002498416928574443 2023-01-23 23:03:38.004101: step: 140/77, loss: 0.002844251925125718 2023-01-23 23:03:39.274776: step: 144/77, loss: 0.005857444833964109 2023-01-23 23:03:40.556574: step: 148/77, loss: 0.0807381123304367 2023-01-23 23:03:41.909722: step: 152/77, loss: 0.0003607768740039319 2023-01-23 23:03:43.189510: step: 156/77, loss: 0.0107691939920187 2023-01-23 23:03:44.478900: step: 160/77, loss: 0.00013894679432269186 2023-01-23 23:03:45.748590: step: 164/77, loss: 0.00034505908843129873 2023-01-23 23:03:47.051228: step: 168/77, loss: 0.047398604452610016 2023-01-23 23:03:48.302043: step: 172/77, loss: 0.0016510799759998918 2023-01-23 23:03:49.653975: step: 176/77, loss: 0.01577381230890751 2023-01-23 23:03:50.988589: step: 180/77, loss: 0.004901238717138767 2023-01-23 23:03:52.282542: step: 184/77, loss: 0.04541084170341492 2023-01-23 23:03:53.624330: step: 188/77, loss: 0.06697969138622284 2023-01-23 23:03:54.938995: step: 192/77, loss: 0.0028941608034074306 2023-01-23 23:03:56.256790: step: 196/77, loss: 0.00033571728272363544 2023-01-23 23:03:57.555642: step: 200/77, loss: 0.00047282129526138306 2023-01-23 23:03:58.831105: step: 204/77, loss: 0.00794710498303175 2023-01-23 23:04:00.125638: step: 208/77, loss: 0.022247185930609703 2023-01-23 23:04:01.415103: step: 212/77, loss: 0.03522004187107086 2023-01-23 23:04:02.683305: step: 216/77, loss: 0.005268169101327658 2023-01-23 23:04:03.971456: step: 220/77, loss: 0.008936934173107147 2023-01-23 23:04:05.302940: step: 224/77, loss: 0.003135459730401635 2023-01-23 23:04:06.622585: step: 228/77, loss: 0.007926437072455883 2023-01-23 23:04:07.943717: step: 232/77, loss: 0.010014479048550129 2023-01-23 23:04:09.309505: step: 236/77, loss: 0.0014004079857841134 2023-01-23 23:04:10.594738: step: 240/77, loss: 0.006368785165250301 2023-01-23 23:04:11.903305: step: 244/77, loss: 0.01043130923062563 2023-01-23 23:04:13.217988: step: 248/77, loss: 4.2198873416054994e-05 2023-01-23 23:04:14.550773: step: 252/77, loss: 0.03743477910757065 2023-01-23 23:04:15.852791: step: 256/77, loss: 0.011894501745700836 2023-01-23 23:04:17.134323: step: 260/77, loss: 0.010070500895380974 2023-01-23 23:04:18.459797: step: 264/77, loss: 0.0018026400357484818 2023-01-23 23:04:19.797505: step: 268/77, loss: 0.0027212868444621563 2023-01-23 23:04:21.116898: step: 272/77, loss: 0.0017126111779361963 2023-01-23 23:04:22.365530: step: 276/77, loss: 0.00834929384291172 2023-01-23 23:04:23.648512: step: 280/77, loss: 0.03722648695111275 2023-01-23 23:04:24.970525: step: 284/77, loss: 0.011383737437427044 2023-01-23 23:04:26.300224: step: 288/77, loss: 0.026112789288163185 2023-01-23 23:04:27.613590: step: 292/77, loss: 0.003575179958716035 2023-01-23 23:04:28.911083: step: 296/77, loss: 0.012457441538572311 2023-01-23 23:04:30.256086: step: 300/77, loss: 0.0018797186203300953 2023-01-23 23:04:31.577624: step: 304/77, loss: 0.03331933543086052 2023-01-23 23:04:32.845017: step: 308/77, loss: 0.04128976911306381 2023-01-23 23:04:34.155215: step: 312/77, loss: 0.0009756057406775653 2023-01-23 23:04:35.457027: step: 316/77, loss: 0.006699277553707361 2023-01-23 23:04:36.796596: step: 320/77, loss: 0.0006561750778928399 2023-01-23 23:04:38.101865: step: 324/77, loss: 0.006393713876605034 2023-01-23 23:04:39.443414: step: 328/77, loss: 0.00039606340578757226 2023-01-23 23:04:40.729925: step: 332/77, loss: 0.033246491104364395 2023-01-23 23:04:42.107611: step: 336/77, loss: 0.03028215281665325 2023-01-23 23:04:43.405517: step: 340/77, loss: 0.03625636175274849 2023-01-23 23:04:44.666951: step: 344/77, loss: 7.374807319138199e-05 2023-01-23 23:04:45.936260: step: 348/77, loss: 0.01907702349126339 2023-01-23 23:04:47.232715: step: 352/77, loss: 0.003939451649785042 2023-01-23 23:04:48.526507: step: 356/77, loss: 0.038318932056427 2023-01-23 23:04:49.876144: step: 360/77, loss: 0.016218364238739014 2023-01-23 23:04:51.221913: step: 364/77, loss: 0.01653050258755684 2023-01-23 23:04:52.551123: step: 368/77, loss: 0.0006744784768670797 2023-01-23 23:04:53.864046: step: 372/77, loss: 0.013431889936327934 2023-01-23 23:04:55.169084: step: 376/77, loss: 0.11695510149002075 2023-01-23 23:04:56.462181: step: 380/77, loss: 0.006448196247220039 2023-01-23 23:04:57.795963: step: 384/77, loss: 0.02021513134241104 2023-01-23 23:04:59.109881: step: 388/77, loss: 0.010459277778863907 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 10} Test Chinese: {'template': {'p': 1.0, 'r': 0.5373134328358209, 'f1': 0.6990291262135923}, 'slot': {'p': 0.5945945945945946, 'r': 0.019766397124887692, 'f1': 0.03826086956521739}, 'combined': 0.02674546222034614, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 10} Test Korean: {'template': {'p': 1.0, 'r': 0.5373134328358209, 'f1': 0.6990291262135923}, 'slot': {'p': 0.6, 'r': 0.018867924528301886, 'f1': 0.036585365853658534}, 'combined': 0.02557423632488752, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 10} Test Russian: {'template': {'p': 1.0, 'r': 0.5298507462686567, 'f1': 0.6926829268292682}, 'slot': {'p': 0.6176470588235294, 'r': 0.018867924528301886, 'f1': 0.036617262423714034}, 'combined': 0.025364152508133623, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:06:40.104835: step: 4/77, loss: 0.004519676323980093 2023-01-23 23:06:41.430084: step: 8/77, loss: 0.010373960249125957 2023-01-23 23:06:42.764008: step: 12/77, loss: 0.006382956635206938 2023-01-23 23:06:44.055006: step: 16/77, loss: 0.02072090096771717 2023-01-23 23:06:45.356862: step: 20/77, loss: 0.007683451287448406 2023-01-23 23:06:46.638414: step: 24/77, loss: 0.031069735065102577 2023-01-23 23:06:47.912232: step: 28/77, loss: 9.526885696686804e-05 2023-01-23 23:06:49.200956: step: 32/77, loss: 0.0003794231452047825 2023-01-23 23:06:50.542815: step: 36/77, loss: 0.002033552620559931 2023-01-23 23:06:51.803215: step: 40/77, loss: 0.016326041892170906 2023-01-23 23:06:53.110908: step: 44/77, loss: 0.019845878705382347 2023-01-23 23:06:54.441107: step: 48/77, loss: 0.000536456354893744 2023-01-23 23:06:55.780722: step: 52/77, loss: 0.0061861746944487095 2023-01-23 23:06:57.103818: step: 56/77, loss: 0.00497963884845376 2023-01-23 23:06:58.432189: step: 60/77, loss: 0.01730026677250862 2023-01-23 23:06:59.779560: step: 64/77, loss: 0.001555685419589281 2023-01-23 23:07:01.106634: step: 68/77, loss: 0.030430197715759277 2023-01-23 23:07:02.405207: step: 72/77, loss: 0.005852745845913887 2023-01-23 23:07:03.730971: step: 76/77, loss: 3.0581431929022074e-05 2023-01-23 23:07:04.990044: step: 80/77, loss: 0.008818567730486393 2023-01-23 23:07:06.281844: step: 84/77, loss: 0.010246563702821732 2023-01-23 23:07:07.575138: step: 88/77, loss: 0.13901685178279877 2023-01-23 23:07:08.904682: step: 92/77, loss: 0.0018688386771827936 2023-01-23 23:07:10.207253: step: 96/77, loss: 0.03900507837533951 2023-01-23 23:07:11.536087: step: 100/77, loss: 0.005112081300467253 2023-01-23 23:07:12.892277: step: 104/77, loss: 0.016174569725990295 2023-01-23 23:07:14.168996: step: 108/77, loss: 0.0031978185288608074 2023-01-23 23:07:15.497757: step: 112/77, loss: 0.007094390690326691 2023-01-23 23:07:16.859583: step: 116/77, loss: 0.007978803478181362 2023-01-23 23:07:18.137552: step: 120/77, loss: 0.010525790974497795 2023-01-23 23:07:19.411217: step: 124/77, loss: 0.009775063954293728 2023-01-23 23:07:20.702295: step: 128/77, loss: 0.004989398177713156 2023-01-23 23:07:21.963817: step: 132/77, loss: 0.002119546290487051 2023-01-23 23:07:23.259832: step: 136/77, loss: 0.004756305366754532 2023-01-23 23:07:24.577176: step: 140/77, loss: 0.0028865262866020203 2023-01-23 23:07:25.903723: step: 144/77, loss: 0.004879987332969904 2023-01-23 23:07:27.216704: step: 148/77, loss: 0.002608640119433403 2023-01-23 23:07:28.462568: step: 152/77, loss: 0.016980547457933426 2023-01-23 23:07:29.798516: step: 156/77, loss: 0.0318780392408371 2023-01-23 23:07:31.093324: step: 160/77, loss: 0.010919311083853245 2023-01-23 23:07:32.418714: step: 164/77, loss: 0.02614099346101284 2023-01-23 23:07:33.732167: step: 168/77, loss: 0.0016549699939787388 2023-01-23 23:07:35.048334: step: 172/77, loss: 0.00027187608066014946 2023-01-23 23:07:36.335146: step: 176/77, loss: 0.015034875832498074 2023-01-23 23:07:37.649519: step: 180/77, loss: 0.03275790065526962 2023-01-23 23:07:38.949181: step: 184/77, loss: 1.341791357845068e-05 2023-01-23 23:07:40.250917: step: 188/77, loss: 0.0011008772999048233 2023-01-23 23:07:41.574985: step: 192/77, loss: 0.05528085306286812 2023-01-23 23:07:42.899889: step: 196/77, loss: 0.007917086593806744 2023-01-23 23:07:44.212235: step: 200/77, loss: 0.027716726064682007 2023-01-23 23:07:45.509793: step: 204/77, loss: 0.010608302429318428 2023-01-23 23:07:46.819180: step: 208/77, loss: 0.007032268680632114 2023-01-23 23:07:48.128268: step: 212/77, loss: 0.004656591452658176 2023-01-23 23:07:49.396053: step: 216/77, loss: 0.00042243345524184406 2023-01-23 23:07:50.709255: step: 220/77, loss: 0.0030772360041737556 2023-01-23 23:07:52.039703: step: 224/77, loss: 0.0013640880351886153 2023-01-23 23:07:53.342534: step: 228/77, loss: 0.016188669949769974 2023-01-23 23:07:54.607514: step: 232/77, loss: 0.00018536573043093085 2023-01-23 23:07:55.873679: step: 236/77, loss: 0.026044102385640144 2023-01-23 23:07:57.215348: step: 240/77, loss: 0.001282632234506309 2023-01-23 23:07:58.545023: step: 244/77, loss: 0.013146881945431232 2023-01-23 23:07:59.851076: step: 248/77, loss: 0.005962767638266087 2023-01-23 23:08:01.161128: step: 252/77, loss: 0.001891864463686943 2023-01-23 23:08:02.488952: step: 256/77, loss: 0.03969700634479523 2023-01-23 23:08:03.757838: step: 260/77, loss: 0.03692417964339256 2023-01-23 23:08:05.055316: step: 264/77, loss: 0.00492922542616725 2023-01-23 23:08:06.378733: step: 268/77, loss: 0.012601925060153008 2023-01-23 23:08:07.676331: step: 272/77, loss: 0.018331564962863922 2023-01-23 23:08:08.977064: step: 276/77, loss: 0.001236966927535832 2023-01-23 23:08:10.295480: step: 280/77, loss: 0.009361336007714272 2023-01-23 23:08:11.601638: step: 284/77, loss: 0.029684297740459442 2023-01-23 23:08:12.906232: step: 288/77, loss: 0.0017025243723765016 2023-01-23 23:08:14.221076: step: 292/77, loss: 0.018443405628204346 2023-01-23 23:08:15.516996: step: 296/77, loss: 0.0002197076682932675 2023-01-23 23:08:16.808102: step: 300/77, loss: 0.07794710993766785 2023-01-23 23:08:18.115732: step: 304/77, loss: 0.0007880099583417177 2023-01-23 23:08:19.448174: step: 308/77, loss: 0.02216692827641964 2023-01-23 23:08:20.750087: step: 312/77, loss: 0.05256933346390724 2023-01-23 23:08:22.076395: step: 316/77, loss: 0.046504754573106766 2023-01-23 23:08:23.417010: step: 320/77, loss: 0.016857344657182693 2023-01-23 23:08:24.710304: step: 324/77, loss: 0.013420961797237396 2023-01-23 23:08:25.996702: step: 328/77, loss: 0.0002451570762787014 2023-01-23 23:08:27.302420: step: 332/77, loss: 0.04551130160689354 2023-01-23 23:08:28.606405: step: 336/77, loss: 0.0486028753221035 2023-01-23 23:08:29.911737: step: 340/77, loss: 0.0007528806454502046 2023-01-23 23:08:31.194524: step: 344/77, loss: 0.004098730627447367 2023-01-23 23:08:32.496416: step: 348/77, loss: 0.040485743433237076 2023-01-23 23:08:33.762888: step: 352/77, loss: 0.00328265642747283 2023-01-23 23:08:35.025217: step: 356/77, loss: 0.0029611773788928986 2023-01-23 23:08:36.315766: step: 360/77, loss: 0.009117941372096539 2023-01-23 23:08:37.611888: step: 364/77, loss: 0.024599190801382065 2023-01-23 23:08:38.891676: step: 368/77, loss: 0.002736852038651705 2023-01-23 23:08:40.175336: step: 372/77, loss: 0.011240647174417973 2023-01-23 23:08:41.549999: step: 376/77, loss: 0.026055969297885895 2023-01-23 23:08:42.853441: step: 380/77, loss: 0.0010039397748187184 2023-01-23 23:08:44.166851: step: 384/77, loss: 0.0022746820468455553 2023-01-23 23:08:45.420012: step: 388/77, loss: 0.031541526317596436 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9863013698630136, 'r': 0.5373134328358209, 'f1': 0.6956521739130435}, 'slot': {'p': 0.5405405405405406, 'r': 0.017969451931716084, 'f1': 0.034782608695652174}, 'combined': 0.024196597353497166, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 11} Test Korean: {'template': {'p': 0.9861111111111112, 'r': 0.5298507462686567, 'f1': 0.6893203883495146}, 'slot': {'p': 0.5405405405405406, 'r': 0.017969451931716084, 'f1': 0.034782608695652174}, 'combined': 0.02397636133389616, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9861111111111112, 'r': 0.5298507462686567, 'f1': 0.6893203883495146}, 'slot': {'p': 0.5555555555555556, 'r': 0.017969451931716084, 'f1': 0.03481288076588338}, 'combined': 0.02399722848910408, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:10:25.991076: step: 4/77, loss: 0.0033823195844888687 2023-01-23 23:10:27.311258: step: 8/77, loss: 0.006067562848329544 2023-01-23 23:10:28.608611: step: 12/77, loss: 0.004152682609856129 2023-01-23 23:10:29.871693: step: 16/77, loss: 0.011536704376339912 2023-01-23 23:10:31.158232: step: 20/77, loss: 0.03795253485441208 2023-01-23 23:10:32.444867: step: 24/77, loss: 0.0029349522665143013 2023-01-23 23:10:33.735603: step: 28/77, loss: 0.04994878172874451 2023-01-23 23:10:35.030910: step: 32/77, loss: 0.00848055724054575 2023-01-23 23:10:36.381426: step: 36/77, loss: 0.00016331578081008047 2023-01-23 23:10:37.662245: step: 40/77, loss: 0.006215088535100222 2023-01-23 23:10:39.010425: step: 44/77, loss: 0.0010558516951277852 2023-01-23 23:10:40.333964: step: 48/77, loss: 0.032813701778650284 2023-01-23 23:10:41.645946: step: 52/77, loss: 0.0005068772588856518 2023-01-23 23:10:42.924002: step: 56/77, loss: 0.02557031437754631 2023-01-23 23:10:44.216055: step: 60/77, loss: 0.0047400458715856075 2023-01-23 23:10:45.561381: step: 64/77, loss: 0.009990248829126358 2023-01-23 23:10:46.800439: step: 68/77, loss: 0.03629093989729881 2023-01-23 23:10:48.130481: step: 72/77, loss: 0.04313212260603905 2023-01-23 23:10:49.490740: step: 76/77, loss: 0.008982475847005844 2023-01-23 23:10:50.756282: step: 80/77, loss: 0.0032843714579939842 2023-01-23 23:10:52.116598: step: 84/77, loss: 3.893310349667445e-05 2023-01-23 23:10:53.472886: step: 88/77, loss: 0.003442638088017702 2023-01-23 23:10:54.786202: step: 92/77, loss: 0.0004835306026507169 2023-01-23 23:10:56.082287: step: 96/77, loss: 0.0011237069265916944 2023-01-23 23:10:57.372706: step: 100/77, loss: 0.002386020962148905 2023-01-23 23:10:58.679094: step: 104/77, loss: 0.006551727186888456 2023-01-23 23:10:59.973609: step: 108/77, loss: 0.01057566050440073 2023-01-23 23:11:01.265654: step: 112/77, loss: 0.013844773173332214 2023-01-23 23:11:02.570860: step: 116/77, loss: 0.001763289445079863 2023-01-23 23:11:03.860403: step: 120/77, loss: 0.050311699509620667 2023-01-23 23:11:05.173839: step: 124/77, loss: 0.010518746450543404 2023-01-23 23:11:06.463177: step: 128/77, loss: 0.12409986555576324 2023-01-23 23:11:07.800803: step: 132/77, loss: 0.0004061115032527596 2023-01-23 23:11:09.107402: step: 136/77, loss: 0.001003948738798499 2023-01-23 23:11:10.427019: step: 140/77, loss: 0.0005174941616132855 2023-01-23 23:11:11.724896: step: 144/77, loss: 0.006630543153733015 2023-01-23 23:11:13.102853: step: 148/77, loss: 0.029026571661233902 2023-01-23 23:11:14.430760: step: 152/77, loss: 0.04973183572292328 2023-01-23 23:11:15.708506: step: 156/77, loss: 0.0009474740945734084 2023-01-23 23:11:17.016674: step: 160/77, loss: 0.0030602377373725176 2023-01-23 23:11:18.365243: step: 164/77, loss: 0.059144243597984314 2023-01-23 23:11:19.716384: step: 168/77, loss: 0.009352781809866428 2023-01-23 23:11:20.997795: step: 172/77, loss: 0.004000375512987375 2023-01-23 23:11:22.304793: step: 176/77, loss: 0.03193020820617676 2023-01-23 23:11:23.612669: step: 180/77, loss: 0.0019103833474218845 2023-01-23 23:11:24.875785: step: 184/77, loss: 0.0012183418730273843 2023-01-23 23:11:26.162446: step: 188/77, loss: 0.005255671218037605 2023-01-23 23:11:27.474629: step: 192/77, loss: 0.04504666104912758 2023-01-23 23:11:28.793509: step: 196/77, loss: 0.022459331899881363 2023-01-23 23:11:30.118411: step: 200/77, loss: 0.0003197941405232996 2023-01-23 23:11:31.457436: step: 204/77, loss: 0.0009358513634651899 2023-01-23 23:11:32.748444: step: 208/77, loss: 0.010235392488539219 2023-01-23 23:11:34.055096: step: 212/77, loss: 0.0063028484582901 2023-01-23 23:11:35.370491: step: 216/77, loss: 0.0019760727882385254 2023-01-23 23:11:36.669172: step: 220/77, loss: 0.0003104644129052758 2023-01-23 23:11:38.022407: step: 224/77, loss: 0.000779816647991538 2023-01-23 23:11:39.391446: step: 228/77, loss: 0.001094711828045547 2023-01-23 23:11:40.718413: step: 232/77, loss: 0.0022505666129291058 2023-01-23 23:11:41.971284: step: 236/77, loss: 0.004705403000116348 2023-01-23 23:11:43.309439: step: 240/77, loss: 6.31944349152036e-05 2023-01-23 23:11:44.597522: step: 244/77, loss: 0.007898539304733276 2023-01-23 23:11:45.887776: step: 248/77, loss: 0.0002214372652815655 2023-01-23 23:11:47.166044: step: 252/77, loss: 0.0009268993744626641 2023-01-23 23:11:48.493079: step: 256/77, loss: 0.0010784551268443465 2023-01-23 23:11:49.776284: step: 260/77, loss: 0.001904657343402505 2023-01-23 23:11:50.998260: step: 264/77, loss: 0.05391979590058327 2023-01-23 23:11:52.295270: step: 268/77, loss: 0.016363475471735 2023-01-23 23:11:53.605058: step: 272/77, loss: 2.981504849230987e-06 2023-01-23 23:11:54.932586: step: 276/77, loss: 4.402042395668104e-05 2023-01-23 23:11:56.225061: step: 280/77, loss: 0.0005392417078837752 2023-01-23 23:11:57.500549: step: 284/77, loss: 0.0011079860851168633 2023-01-23 23:11:58.804174: step: 288/77, loss: 0.21042890846729279 2023-01-23 23:12:00.076234: step: 292/77, loss: 0.06970375776290894 2023-01-23 23:12:01.419195: step: 296/77, loss: 0.014009656384587288 2023-01-23 23:12:02.781588: step: 300/77, loss: 0.00030892904032953084 2023-01-23 23:12:04.080475: step: 304/77, loss: 0.017677268013358116 2023-01-23 23:12:05.348313: step: 308/77, loss: 0.09594322741031647 2023-01-23 23:12:06.659741: step: 312/77, loss: 0.0032438847701996565 2023-01-23 23:12:07.900345: step: 316/77, loss: 0.004886825103312731 2023-01-23 23:12:09.217201: step: 320/77, loss: 0.0006108014495112002 2023-01-23 23:12:10.479458: step: 324/77, loss: 0.06285342574119568 2023-01-23 23:12:11.778245: step: 328/77, loss: 0.00013249233597889543 2023-01-23 23:12:13.062558: step: 332/77, loss: 0.0010816589929163456 2023-01-23 23:12:14.391354: step: 336/77, loss: 0.006212495267391205 2023-01-23 23:12:15.680025: step: 340/77, loss: 0.0132598252967 2023-01-23 23:12:16.965288: step: 344/77, loss: 0.012078355997800827 2023-01-23 23:12:18.238224: step: 348/77, loss: 0.006435449235141277 2023-01-23 23:12:19.510713: step: 352/77, loss: 0.017631251364946365 2023-01-23 23:12:20.845544: step: 356/77, loss: 0.0931030884385109 2023-01-23 23:12:22.162395: step: 360/77, loss: 0.04081280156970024 2023-01-23 23:12:23.417025: step: 364/77, loss: 7.617353548994288e-05 2023-01-23 23:12:24.740087: step: 368/77, loss: 0.0042196400463581085 2023-01-23 23:12:26.060219: step: 372/77, loss: 0.0030183957424014807 2023-01-23 23:12:27.336295: step: 376/77, loss: 0.00011321428610244766 2023-01-23 23:12:28.604246: step: 380/77, loss: 0.011876048520207405 2023-01-23 23:12:29.886370: step: 384/77, loss: 0.00406003650277853 2023-01-23 23:12:31.224186: step: 388/77, loss: 0.08133828639984131 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.5238095238095238, 'r': 0.019766397124887692, 'f1': 0.0380952380952381}, 'combined': 0.026235399820305483, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Korean: {'template': {'p': 0.9230769230769231, 'r': 0.5373134328358209, 'f1': 0.6792452830188679}, 'slot': {'p': 0.5365853658536586, 'r': 0.019766397124887692, 'f1': 0.038128249566724434}, 'combined': 0.025898433667963766, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5238095238095238, 'r': 0.019766397124887692, 'f1': 0.0380952380952381}, 'combined': 0.026122448979591834, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:14:12.425359: step: 4/77, loss: 0.0003183061198797077 2023-01-23 23:14:13.716331: step: 8/77, loss: 0.040560282766819 2023-01-23 23:14:15.019757: step: 12/77, loss: 0.04178724065423012 2023-01-23 23:14:16.293801: step: 16/77, loss: 0.01956675946712494 2023-01-23 23:14:17.553484: step: 20/77, loss: 0.06916426122188568 2023-01-23 23:14:18.884335: step: 24/77, loss: 6.770234449504642e-06 2023-01-23 23:14:20.151959: step: 28/77, loss: 0.00825644563883543 2023-01-23 23:14:21.493896: step: 32/77, loss: 0.00011275661381660029 2023-01-23 23:14:22.774866: step: 36/77, loss: 0.035464927554130554 2023-01-23 23:14:24.040292: step: 40/77, loss: 0.0007295497925952077 2023-01-23 23:14:25.300703: step: 44/77, loss: 7.648255996173248e-05 2023-01-23 23:14:26.584355: step: 48/77, loss: 0.005698735825717449 2023-01-23 23:14:27.902713: step: 52/77, loss: 0.00267980108037591 2023-01-23 23:14:29.208621: step: 56/77, loss: 0.004135094583034515 2023-01-23 23:14:30.500795: step: 60/77, loss: 0.0002119252021657303 2023-01-23 23:14:31.784789: step: 64/77, loss: 3.185726745869033e-05 2023-01-23 23:14:33.088206: step: 68/77, loss: 0.04375888407230377 2023-01-23 23:14:34.420852: step: 72/77, loss: 3.7466426874743775e-05 2023-01-23 23:14:35.750708: step: 76/77, loss: 1.2116726793465205e-05 2023-01-23 23:14:37.062749: step: 80/77, loss: 0.0002926643064711243 2023-01-23 23:14:38.362568: step: 84/77, loss: 0.005509909242391586 2023-01-23 23:14:39.717168: step: 88/77, loss: 0.02041170559823513 2023-01-23 23:14:41.015322: step: 92/77, loss: 0.0026900265365839005 2023-01-23 23:14:42.282961: step: 96/77, loss: 0.008952000178396702 2023-01-23 23:14:43.598997: step: 100/77, loss: 1.0245260455121752e-05 2023-01-23 23:14:44.897742: step: 104/77, loss: 4.525334134086734e-06 2023-01-23 23:14:46.165517: step: 108/77, loss: 4.0762461139820516e-05 2023-01-23 23:14:47.441351: step: 112/77, loss: 0.00012925347255077213 2023-01-23 23:14:48.718123: step: 116/77, loss: 0.033975325524806976 2023-01-23 23:14:50.019586: step: 120/77, loss: 0.002418851014226675 2023-01-23 23:14:51.315393: step: 124/77, loss: 0.03567567840218544 2023-01-23 23:14:52.582493: step: 128/77, loss: 0.0027435519732534885 2023-01-23 23:14:53.835844: step: 132/77, loss: 0.011629242449998856 2023-01-23 23:14:55.102287: step: 136/77, loss: 0.0009894848335534334 2023-01-23 23:14:56.412408: step: 140/77, loss: 0.007758474443107843 2023-01-23 23:14:57.708263: step: 144/77, loss: 0.012338060885667801 2023-01-23 23:14:59.038117: step: 148/77, loss: 0.008329820819199085 2023-01-23 23:15:00.307457: step: 152/77, loss: 0.0021699117496609688 2023-01-23 23:15:01.630180: step: 156/77, loss: 0.00035712175304070115 2023-01-23 23:15:02.942298: step: 160/77, loss: 0.001182187581434846 2023-01-23 23:15:04.230093: step: 164/77, loss: 0.22069135308265686 2023-01-23 23:15:05.528442: step: 168/77, loss: 0.0002262511698063463 2023-01-23 23:15:06.860092: step: 172/77, loss: 0.03320368751883507 2023-01-23 23:15:08.184489: step: 176/77, loss: 0.00016430506366305053 2023-01-23 23:15:09.510567: step: 180/77, loss: 0.017425213009119034 2023-01-23 23:15:10.845943: step: 184/77, loss: 0.0005045488360337913 2023-01-23 23:15:12.176699: step: 188/77, loss: 0.0009198979823850095 2023-01-23 23:15:13.524073: step: 192/77, loss: 0.004155697301030159 2023-01-23 23:15:14.821875: step: 196/77, loss: 0.009954680688679218 2023-01-23 23:15:16.135861: step: 200/77, loss: 0.035099174827337265 2023-01-23 23:15:17.444684: step: 204/77, loss: 0.010139863938093185 2023-01-23 23:15:18.754399: step: 208/77, loss: 0.018720045685768127 2023-01-23 23:15:20.107332: step: 212/77, loss: 0.017439253628253937 2023-01-23 23:15:21.425210: step: 216/77, loss: 0.01844675838947296 2023-01-23 23:15:22.752808: step: 220/77, loss: 0.0011854919139295816 2023-01-23 23:15:24.064575: step: 224/77, loss: 0.006082172971218824 2023-01-23 23:15:25.390571: step: 228/77, loss: 0.004856535699218512 2023-01-23 23:15:26.704193: step: 232/77, loss: 0.00015938098658807576 2023-01-23 23:15:27.986639: step: 236/77, loss: 0.0014069630997255445 2023-01-23 23:15:29.300873: step: 240/77, loss: 9.374695946462452e-05 2023-01-23 23:15:30.575121: step: 244/77, loss: 0.01892547309398651 2023-01-23 23:15:31.903173: step: 248/77, loss: 0.0004414403811097145 2023-01-23 23:15:33.280589: step: 252/77, loss: 0.1034957766532898 2023-01-23 23:15:34.604566: step: 256/77, loss: 2.6847968911170028e-05 2023-01-23 23:15:35.951493: step: 260/77, loss: 0.05694052577018738 2023-01-23 23:15:37.296811: step: 264/77, loss: 0.0010568362195044756 2023-01-23 23:15:38.598507: step: 268/77, loss: 0.008948981761932373 2023-01-23 23:15:39.892903: step: 272/77, loss: 0.0004628953174687922 2023-01-23 23:15:41.216356: step: 276/77, loss: 0.0007866702508181334 2023-01-23 23:15:42.565850: step: 280/77, loss: 0.001445968635380268 2023-01-23 23:15:43.865940: step: 284/77, loss: 0.0001449974370189011 2023-01-23 23:15:45.157656: step: 288/77, loss: 0.0007071525906212628 2023-01-23 23:15:46.410660: step: 292/77, loss: 7.404296047752723e-05 2023-01-23 23:15:47.731853: step: 296/77, loss: 0.008954092860221863 2023-01-23 23:15:49.049568: step: 300/77, loss: 0.004929321818053722 2023-01-23 23:15:50.387822: step: 304/77, loss: 0.0034438367001712322 2023-01-23 23:15:51.646382: step: 308/77, loss: 0.0035439524799585342 2023-01-23 23:15:52.964863: step: 312/77, loss: 0.013492235913872719 2023-01-23 23:15:54.305142: step: 316/77, loss: 0.00016831423272378743 2023-01-23 23:15:55.619040: step: 320/77, loss: 0.07191120833158493 2023-01-23 23:15:56.918060: step: 324/77, loss: 0.0024667498655617237 2023-01-23 23:15:58.222122: step: 328/77, loss: 0.003168292809277773 2023-01-23 23:15:59.563349: step: 332/77, loss: 5.0638423999771476e-05 2023-01-23 23:16:00.910436: step: 336/77, loss: 0.005427079740911722 2023-01-23 23:16:02.165398: step: 340/77, loss: 0.12284014374017715 2023-01-23 23:16:03.397178: step: 344/77, loss: 0.012342492118477821 2023-01-23 23:16:04.686513: step: 348/77, loss: 0.0013616869691759348 2023-01-23 23:16:05.999534: step: 352/77, loss: 3.9185226341942325e-05 2023-01-23 23:16:07.328596: step: 356/77, loss: 0.04308632016181946 2023-01-23 23:16:08.654595: step: 360/77, loss: 0.004393482580780983 2023-01-23 23:16:10.026958: step: 364/77, loss: 0.0007518457132391632 2023-01-23 23:16:11.321943: step: 368/77, loss: 0.010810460895299911 2023-01-23 23:16:12.637044: step: 372/77, loss: 0.0010192915797233582 2023-01-23 23:16:13.969486: step: 376/77, loss: 2.2996693587629125e-05 2023-01-23 23:16:15.307461: step: 380/77, loss: 4.207800520816818e-05 2023-01-23 23:16:16.607419: step: 384/77, loss: 0.002860223175957799 2023-01-23 23:16:17.877409: step: 388/77, loss: 0.00014594709500670433 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5625, 'r': 0.016172506738544475, 'f1': 0.031441048034934506}, 'combined': 0.02155957579538366, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5625, 'r': 0.016172506738544475, 'f1': 0.031441048034934506}, 'combined': 0.02155957579538366, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.59375, 'r': 0.017070979335130278, 'f1': 0.03318777292576419}, 'combined': 0.0227573300062383, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:17:58.365922: step: 4/77, loss: 0.0025070586707443 2023-01-23 23:17:59.643531: step: 8/77, loss: 0.0009089748491533101 2023-01-23 23:18:00.995147: step: 12/77, loss: 0.002720245625823736 2023-01-23 23:18:02.267574: step: 16/77, loss: 0.00035778438905254006 2023-01-23 23:18:03.583873: step: 20/77, loss: 0.0068440865725278854 2023-01-23 23:18:04.883439: step: 24/77, loss: 0.007065152749419212 2023-01-23 23:18:06.214284: step: 28/77, loss: 0.00010182852565776557 2023-01-23 23:18:07.497765: step: 32/77, loss: 0.0007257150718942285 2023-01-23 23:18:08.818263: step: 36/77, loss: 0.0004841696354560554 2023-01-23 23:18:10.112770: step: 40/77, loss: 0.00044304801849648356 2023-01-23 23:18:11.359437: step: 44/77, loss: 2.9887085474911146e-05 2023-01-23 23:18:12.672920: step: 48/77, loss: 0.04122829809784889 2023-01-23 23:18:13.992697: step: 52/77, loss: 1.3771279554930516e-05 2023-01-23 23:18:15.293290: step: 56/77, loss: 0.027065474539995193 2023-01-23 23:18:16.604736: step: 60/77, loss: 0.02150728926062584 2023-01-23 23:18:17.929073: step: 64/77, loss: 0.0026987313758581877 2023-01-23 23:18:19.238027: step: 68/77, loss: 9.448805940337479e-06 2023-01-23 23:18:20.562043: step: 72/77, loss: 0.031193898990750313 2023-01-23 23:18:21.823210: step: 76/77, loss: 0.0018287431448698044 2023-01-23 23:18:23.121932: step: 80/77, loss: 0.0008180844597518444 2023-01-23 23:18:24.410498: step: 84/77, loss: 0.006824632175266743 2023-01-23 23:18:25.704692: step: 88/77, loss: 0.004198823124170303 2023-01-23 23:18:27.022724: step: 92/77, loss: 0.05608784034848213 2023-01-23 23:18:28.307186: step: 96/77, loss: 0.003223490435630083 2023-01-23 23:18:29.639149: step: 100/77, loss: 0.0006620727945119143 2023-01-23 23:18:30.906927: step: 104/77, loss: 0.0002506496093701571 2023-01-23 23:18:32.165763: step: 108/77, loss: 0.0017600820865482092 2023-01-23 23:18:33.513903: step: 112/77, loss: 0.0040322570130229 2023-01-23 23:18:34.862527: step: 116/77, loss: 9.456691623199731e-05 2023-01-23 23:18:36.192428: step: 120/77, loss: 0.032587483525276184 2023-01-23 23:18:37.468087: step: 124/77, loss: 0.0019689786713570356 2023-01-23 23:18:38.789409: step: 128/77, loss: 0.0011962441494688392 2023-01-23 23:18:40.029871: step: 132/77, loss: 0.0035147222224622965 2023-01-23 23:18:41.310081: step: 136/77, loss: 0.0013859305763617158 2023-01-23 23:18:42.683203: step: 140/77, loss: 1.1076562259404454e-05 2023-01-23 23:18:43.970432: step: 144/77, loss: 0.0012191644636914134 2023-01-23 23:18:45.245462: step: 148/77, loss: 0.0005226008943282068 2023-01-23 23:18:46.520101: step: 152/77, loss: 0.010527782142162323 2023-01-23 23:18:47.850837: step: 156/77, loss: 0.03147884085774422 2023-01-23 23:18:49.139376: step: 160/77, loss: 0.02116863988339901 2023-01-23 23:18:50.483947: step: 164/77, loss: 0.0002335396275157109 2023-01-23 23:18:51.801324: step: 168/77, loss: 1.208573485200759e-05 2023-01-23 23:18:53.110024: step: 172/77, loss: 0.00584996584802866 2023-01-23 23:18:54.427372: step: 176/77, loss: 0.01264292374253273 2023-01-23 23:18:55.755259: step: 180/77, loss: 0.00016159848019015044 2023-01-23 23:18:57.047539: step: 184/77, loss: 4.650797927752137e-05 2023-01-23 23:18:58.301393: step: 188/77, loss: 0.0141700254753232 2023-01-23 23:18:59.572210: step: 192/77, loss: 0.0003191042342223227 2023-01-23 23:19:00.833254: step: 196/77, loss: 0.0017583910375833511 2023-01-23 23:19:02.111339: step: 200/77, loss: 0.00024490864598192275 2023-01-23 23:19:03.403134: step: 204/77, loss: 0.0005271589034236968 2023-01-23 23:19:04.705196: step: 208/77, loss: 0.004894105717539787 2023-01-23 23:19:05.971676: step: 212/77, loss: 0.04371624067425728 2023-01-23 23:19:07.237836: step: 216/77, loss: 0.011503464542329311 2023-01-23 23:19:08.510171: step: 220/77, loss: 0.037097468972206116 2023-01-23 23:19:09.825855: step: 224/77, loss: 0.07253731042146683 2023-01-23 23:19:11.130641: step: 228/77, loss: 0.032016571611166 2023-01-23 23:19:12.480656: step: 232/77, loss: 0.0006164589431136847 2023-01-23 23:19:13.813408: step: 236/77, loss: 0.002216715831309557 2023-01-23 23:19:15.166717: step: 240/77, loss: 0.024763930588960648 2023-01-23 23:19:16.417087: step: 244/77, loss: 0.003464376088231802 2023-01-23 23:19:17.710045: step: 248/77, loss: 1.4835750334896147e-05 2023-01-23 23:19:18.967407: step: 252/77, loss: 0.0022979378700256348 2023-01-23 23:19:20.284584: step: 256/77, loss: 0.0008279864559881389 2023-01-23 23:19:21.580131: step: 260/77, loss: 0.0003143183421343565 2023-01-23 23:19:22.863622: step: 264/77, loss: 1.83588908839738e-05 2023-01-23 23:19:24.113816: step: 268/77, loss: 0.006365937180817127 2023-01-23 23:19:25.394762: step: 272/77, loss: 0.0013630648609250784 2023-01-23 23:19:26.701924: step: 276/77, loss: 0.00014090965851210058 2023-01-23 23:19:28.048384: step: 280/77, loss: 0.002885822206735611 2023-01-23 23:19:29.365812: step: 284/77, loss: 0.01567690260708332 2023-01-23 23:19:30.685264: step: 288/77, loss: 0.0012530626263469458 2023-01-23 23:19:31.966977: step: 292/77, loss: 0.002671103226020932 2023-01-23 23:19:33.290537: step: 296/77, loss: 0.0026012749876827 2023-01-23 23:19:34.539378: step: 300/77, loss: 0.03614744171500206 2023-01-23 23:19:35.807958: step: 304/77, loss: 1.5982883269316517e-05 2023-01-23 23:19:37.145967: step: 308/77, loss: 0.03939162939786911 2023-01-23 23:19:38.441336: step: 312/77, loss: 0.006850957404822111 2023-01-23 23:19:39.760333: step: 316/77, loss: 7.452460704371333e-05 2023-01-23 23:19:41.064356: step: 320/77, loss: 0.008703973144292831 2023-01-23 23:19:42.421849: step: 324/77, loss: 0.004363438580185175 2023-01-23 23:19:43.748435: step: 328/77, loss: 0.00017505805590189993 2023-01-23 23:19:45.057845: step: 332/77, loss: 0.0018185640219599009 2023-01-23 23:19:46.378808: step: 336/77, loss: 0.01590447686612606 2023-01-23 23:19:47.716609: step: 340/77, loss: 0.02160751074552536 2023-01-23 23:19:48.991468: step: 344/77, loss: 0.03739278391003609 2023-01-23 23:19:50.329513: step: 348/77, loss: 0.004457756876945496 2023-01-23 23:19:51.618222: step: 352/77, loss: 0.03349829837679863 2023-01-23 23:19:52.957091: step: 356/77, loss: 0.0016237336676567793 2023-01-23 23:19:54.220589: step: 360/77, loss: 0.006631201598793268 2023-01-23 23:19:55.501201: step: 364/77, loss: 0.0008796528563834727 2023-01-23 23:19:56.760755: step: 368/77, loss: 0.004901571664959192 2023-01-23 23:19:58.078180: step: 372/77, loss: 0.0009514682460576296 2023-01-23 23:19:59.346573: step: 376/77, loss: 0.001806218409910798 2023-01-23 23:20:00.692844: step: 380/77, loss: 0.004638840444386005 2023-01-23 23:20:01.986577: step: 384/77, loss: 0.0008214544504880905 2023-01-23 23:20:03.265335: step: 388/77, loss: 2.1455241949297488e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Chinese: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.6, 'r': 0.0215633423180593, 'f1': 0.04163052905464007}, 'combined': 0.028821135499366206, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Korean: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.6153846153846154, 'r': 0.0215633423180593, 'f1': 0.04166666666666667}, 'combined': 0.028846153846153855, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Russian: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.6, 'r': 0.0215633423180593, 'f1': 0.04163052905464007}, 'combined': 0.028821135499366206, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:21:43.752187: step: 4/77, loss: 0.0339067205786705 2023-01-23 23:21:45.018959: step: 8/77, loss: 0.000985633465461433 2023-01-23 23:21:46.354688: step: 12/77, loss: 0.005644312594085932 2023-01-23 23:21:47.694654: step: 16/77, loss: 0.02237703837454319 2023-01-23 23:21:48.979038: step: 20/77, loss: 0.00017622807354200631 2023-01-23 23:21:50.270510: step: 24/77, loss: 3.434408790781163e-05 2023-01-23 23:21:51.525368: step: 28/77, loss: 0.0021593275014311075 2023-01-23 23:21:52.863378: step: 32/77, loss: 0.002325713401660323 2023-01-23 23:21:54.140153: step: 36/77, loss: 0.0001473966403864324 2023-01-23 23:21:55.424922: step: 40/77, loss: 0.00011630407243501395 2023-01-23 23:21:56.774154: step: 44/77, loss: 0.0056089069694280624 2023-01-23 23:21:58.103990: step: 48/77, loss: 0.0008513731881976128 2023-01-23 23:21:59.400198: step: 52/77, loss: 0.028125649318099022 2023-01-23 23:22:00.703053: step: 56/77, loss: 0.07658828794956207 2023-01-23 23:22:02.031934: step: 60/77, loss: 0.0002985998580697924 2023-01-23 23:22:03.383443: step: 64/77, loss: 0.01489199511706829 2023-01-23 23:22:04.699751: step: 68/77, loss: 0.02099183388054371 2023-01-23 23:22:05.992915: step: 72/77, loss: 1.1802884728240315e-05 2023-01-23 23:22:07.296880: step: 76/77, loss: 0.029980359598994255 2023-01-23 23:22:08.546595: step: 80/77, loss: 0.00036515307147055864 2023-01-23 23:22:09.850126: step: 84/77, loss: 0.028227414935827255 2023-01-23 23:22:11.160625: step: 88/77, loss: 0.0003867174091283232 2023-01-23 23:22:12.496281: step: 92/77, loss: 0.0001741455344017595 2023-01-23 23:22:13.805733: step: 96/77, loss: 0.019959811121225357 2023-01-23 23:22:15.108208: step: 100/77, loss: 3.03964097838616e-05 2023-01-23 23:22:16.367582: step: 104/77, loss: 0.002722999081015587 2023-01-23 23:22:17.657373: step: 108/77, loss: 0.004611394368112087 2023-01-23 23:22:18.955616: step: 112/77, loss: 0.0010619304375723004 2023-01-23 23:22:20.248809: step: 116/77, loss: 0.004179549403488636 2023-01-23 23:22:21.557675: step: 120/77, loss: 4.156599607085809e-05 2023-01-23 23:22:22.864129: step: 124/77, loss: 8.118282130453736e-05 2023-01-23 23:22:24.130203: step: 128/77, loss: 0.001088459393940866 2023-01-23 23:22:25.448851: step: 132/77, loss: 0.0002421422686893493 2023-01-23 23:22:26.778679: step: 136/77, loss: 6.279639637796208e-05 2023-01-23 23:22:28.077683: step: 140/77, loss: 0.008729882538318634 2023-01-23 23:22:29.364292: step: 144/77, loss: 0.0124210761860013 2023-01-23 23:22:30.645815: step: 148/77, loss: 0.0011566146276891232 2023-01-23 23:22:31.993558: step: 152/77, loss: 3.217463381588459e-05 2023-01-23 23:22:33.303017: step: 156/77, loss: 0.011010373942553997 2023-01-23 23:22:34.625458: step: 160/77, loss: 5.280670848151203e-06 2023-01-23 23:22:35.898477: step: 164/77, loss: 0.001265925238840282 2023-01-23 23:22:37.212277: step: 168/77, loss: 0.004644365515559912 2023-01-23 23:22:38.540788: step: 172/77, loss: 0.0004617736558429897 2023-01-23 23:22:39.784192: step: 176/77, loss: 0.0005241798353381455 2023-01-23 23:22:41.040423: step: 180/77, loss: 0.0013182410039007664 2023-01-23 23:22:42.346647: step: 184/77, loss: 2.7897960535483435e-05 2023-01-23 23:22:43.659719: step: 188/77, loss: 0.0011652555549517274 2023-01-23 23:22:45.020061: step: 192/77, loss: 0.03303450718522072 2023-01-23 23:22:46.336774: step: 196/77, loss: 0.00035298606962896883 2023-01-23 23:22:47.643681: step: 200/77, loss: 0.0004992393078282475 2023-01-23 23:22:48.897360: step: 204/77, loss: 0.0003858382988255471 2023-01-23 23:22:50.190891: step: 208/77, loss: 0.00010792753892019391 2023-01-23 23:22:51.467682: step: 212/77, loss: 0.007372554857283831 2023-01-23 23:22:52.764559: step: 216/77, loss: 0.03849584981799126 2023-01-23 23:22:54.078881: step: 220/77, loss: 0.023224491626024246 2023-01-23 23:22:55.376935: step: 224/77, loss: 0.02985999919474125 2023-01-23 23:22:56.714266: step: 228/77, loss: 0.019635101780295372 2023-01-23 23:22:58.039126: step: 232/77, loss: 0.040794577449560165 2023-01-23 23:22:59.306663: step: 236/77, loss: 0.00278811389580369 2023-01-23 23:23:00.630472: step: 240/77, loss: 0.0011358339106664062 2023-01-23 23:23:01.943429: step: 244/77, loss: 0.0002098227705573663 2023-01-23 23:23:03.251360: step: 248/77, loss: 0.004070743452757597 2023-01-23 23:23:04.586365: step: 252/77, loss: 0.00046853855019435287 2023-01-23 23:23:05.905453: step: 256/77, loss: 0.004174651578068733 2023-01-23 23:23:07.203080: step: 260/77, loss: 0.04534554481506348 2023-01-23 23:23:08.483089: step: 264/77, loss: 0.0013716259272769094 2023-01-23 23:23:09.814229: step: 268/77, loss: 0.008802559226751328 2023-01-23 23:23:11.116390: step: 272/77, loss: 0.0013205144787207246 2023-01-23 23:23:12.433596: step: 276/77, loss: 0.002434749389067292 2023-01-23 23:23:13.795682: step: 280/77, loss: 0.0026853452436625957 2023-01-23 23:23:15.080321: step: 284/77, loss: 0.01034157257527113 2023-01-23 23:23:16.381540: step: 288/77, loss: 0.0013206511503085494 2023-01-23 23:23:17.667061: step: 292/77, loss: 0.0029643403831869364 2023-01-23 23:23:18.947627: step: 296/77, loss: 0.0017079674871638417 2023-01-23 23:23:20.254668: step: 300/77, loss: 0.002972458256408572 2023-01-23 23:23:21.631914: step: 304/77, loss: 0.00101885583717376 2023-01-23 23:23:22.941275: step: 308/77, loss: 0.005774295423179865 2023-01-23 23:23:24.285123: step: 312/77, loss: 0.05470731854438782 2023-01-23 23:23:25.569649: step: 316/77, loss: 0.0007659116527065635 2023-01-23 23:23:26.898504: step: 320/77, loss: 0.001400307402946055 2023-01-23 23:23:28.220171: step: 324/77, loss: 0.04949542135000229 2023-01-23 23:23:29.526374: step: 328/77, loss: 0.0005832273163832724 2023-01-23 23:23:30.836912: step: 332/77, loss: 0.000931663264054805 2023-01-23 23:23:32.170525: step: 336/77, loss: 8.43062880448997e-05 2023-01-23 23:23:33.470767: step: 340/77, loss: 0.00026131156482733786 2023-01-23 23:23:34.765915: step: 344/77, loss: 1.9478706235531718e-05 2023-01-23 23:23:36.110086: step: 348/77, loss: 8.584909664932638e-05 2023-01-23 23:23:37.372757: step: 352/77, loss: 0.000725393183529377 2023-01-23 23:23:38.656372: step: 356/77, loss: 0.03045252338051796 2023-01-23 23:23:39.962012: step: 360/77, loss: 0.0006746129947714508 2023-01-23 23:23:41.269993: step: 364/77, loss: 1.7055519492714666e-05 2023-01-23 23:23:42.567852: step: 368/77, loss: 0.06524211168289185 2023-01-23 23:23:43.908677: step: 372/77, loss: 0.007068153936415911 2023-01-23 23:23:45.201366: step: 376/77, loss: 0.0007754460093565285 2023-01-23 23:23:46.485503: step: 380/77, loss: 0.005678238812834024 2023-01-23 23:23:47.812422: step: 384/77, loss: 0.007888175547122955 2023-01-23 23:23:49.114605: step: 388/77, loss: 0.03222333639860153 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5223880597014925, 'f1': 0.6796116504854368}, 'slot': {'p': 0.5666666666666667, 'r': 0.015274034141958671, 'f1': 0.029746281714785654}, 'combined': 0.02021591961199025, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5223880597014925, 'f1': 0.6796116504854368}, 'slot': {'p': 0.5806451612903226, 'r': 0.016172506738544475, 'f1': 0.03146853146853148}, 'combined': 0.021386380609681586, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5223880597014925, 'f1': 0.6796116504854368}, 'slot': {'p': 0.5806451612903226, 'r': 0.016172506738544475, 'f1': 0.03146853146853148}, 'combined': 0.021386380609681586, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:25:29.723182: step: 4/77, loss: 0.05094633251428604 2023-01-23 23:25:30.971174: step: 8/77, loss: 0.016016079112887383 2023-01-23 23:25:32.342982: step: 12/77, loss: 9.06312998267822e-05 2023-01-23 23:25:33.625706: step: 16/77, loss: 0.004453817382454872 2023-01-23 23:25:34.893877: step: 20/77, loss: 0.05539938434958458 2023-01-23 23:25:36.197135: step: 24/77, loss: 0.012972598895430565 2023-01-23 23:25:37.513030: step: 28/77, loss: 0.0011476046638563275 2023-01-23 23:25:38.849740: step: 32/77, loss: 0.004908998031169176 2023-01-23 23:25:40.156902: step: 36/77, loss: 0.004188721068203449 2023-01-23 23:25:41.494923: step: 40/77, loss: 0.16545259952545166 2023-01-23 23:25:42.804228: step: 44/77, loss: 0.031235119327902794 2023-01-23 23:25:44.113223: step: 48/77, loss: 0.004235537722706795 2023-01-23 23:25:45.447515: step: 52/77, loss: 0.0013708991464227438 2023-01-23 23:25:46.760047: step: 56/77, loss: 0.0005379368085414171 2023-01-23 23:25:48.031368: step: 60/77, loss: 0.016450677067041397 2023-01-23 23:25:49.310116: step: 64/77, loss: 0.00257531157694757 2023-01-23 23:25:50.575937: step: 68/77, loss: 0.0007961698574945331 2023-01-23 23:25:51.893874: step: 72/77, loss: 0.0009506650967523456 2023-01-23 23:25:53.228716: step: 76/77, loss: 1.0695758646761533e-05 2023-01-23 23:25:54.542007: step: 80/77, loss: 0.02750951610505581 2023-01-23 23:25:55.875824: step: 84/77, loss: 0.00876179151237011 2023-01-23 23:25:57.192764: step: 88/77, loss: 0.004473384935408831 2023-01-23 23:25:58.528577: step: 92/77, loss: 0.018638458102941513 2023-01-23 23:25:59.805308: step: 96/77, loss: 0.020320996642112732 2023-01-23 23:26:01.126822: step: 100/77, loss: 0.00042116676922887564 2023-01-23 23:26:02.512215: step: 104/77, loss: 0.013269875198602676 2023-01-23 23:26:03.859921: step: 108/77, loss: 0.0019854374695569277 2023-01-23 23:26:05.145817: step: 112/77, loss: 0.061921678483486176 2023-01-23 23:26:06.479226: step: 116/77, loss: 0.07140583544969559 2023-01-23 23:26:07.795886: step: 120/77, loss: 0.0031149161513894796 2023-01-23 23:26:09.116311: step: 124/77, loss: 0.0018452100921422243 2023-01-23 23:26:10.421120: step: 128/77, loss: 2.8724532967316918e-05 2023-01-23 23:26:11.708622: step: 132/77, loss: 0.019663723185658455 2023-01-23 23:26:13.060290: step: 136/77, loss: 0.007577202282845974 2023-01-23 23:26:14.305152: step: 140/77, loss: 2.6508414521231316e-06 2023-01-23 23:26:15.597177: step: 144/77, loss: 3.179639679729007e-05 2023-01-23 23:26:16.864110: step: 148/77, loss: 0.0009142406051978469 2023-01-23 23:26:18.197543: step: 152/77, loss: 0.06840498745441437 2023-01-23 23:26:19.483081: step: 156/77, loss: 1.7841080989455804e-05 2023-01-23 23:26:20.782662: step: 160/77, loss: 0.03614654392004013 2023-01-23 23:26:22.046693: step: 164/77, loss: 0.0021394919604063034 2023-01-23 23:26:23.337813: step: 168/77, loss: 8.524881195626222e-06 2023-01-23 23:26:24.615426: step: 172/77, loss: 0.0006625548703595996 2023-01-23 23:26:25.884517: step: 176/77, loss: 0.000172696789377369 2023-01-23 23:26:27.166584: step: 180/77, loss: 0.00027654992300085723 2023-01-23 23:26:28.457167: step: 184/77, loss: 0.003033407498151064 2023-01-23 23:26:29.803624: step: 188/77, loss: 0.003654744243249297 2023-01-23 23:26:31.129107: step: 192/77, loss: 0.000612216885201633 2023-01-23 23:26:32.408488: step: 196/77, loss: 0.004389288369566202 2023-01-23 23:26:33.654128: step: 200/77, loss: 0.0975179523229599 2023-01-23 23:26:34.954286: step: 204/77, loss: 4.933265518047847e-05 2023-01-23 23:26:36.274366: step: 208/77, loss: 0.0010515564354136586 2023-01-23 23:26:37.548586: step: 212/77, loss: 0.05458948016166687 2023-01-23 23:26:38.869688: step: 216/77, loss: 0.007991497404873371 2023-01-23 23:26:40.119987: step: 220/77, loss: 0.0030231704004108906 2023-01-23 23:26:41.419180: step: 224/77, loss: 0.012408047914505005 2023-01-23 23:26:42.760003: step: 228/77, loss: 0.0007940920768305659 2023-01-23 23:26:44.086649: step: 232/77, loss: 0.0036044989246875048 2023-01-23 23:26:45.403989: step: 236/77, loss: 0.012100227177143097 2023-01-23 23:26:46.714887: step: 240/77, loss: 0.01591656729578972 2023-01-23 23:26:48.051080: step: 244/77, loss: 9.944305929820985e-05 2023-01-23 23:26:49.359677: step: 248/77, loss: 0.03312674164772034 2023-01-23 23:26:50.680325: step: 252/77, loss: 0.00335933780297637 2023-01-23 23:26:51.982414: step: 256/77, loss: 0.002072680974379182 2023-01-23 23:26:53.244338: step: 260/77, loss: 0.005116648506373167 2023-01-23 23:26:54.492869: step: 264/77, loss: 0.0030165542848408222 2023-01-23 23:26:55.783610: step: 268/77, loss: 0.0020216633565723896 2023-01-23 23:26:57.081582: step: 272/77, loss: 0.04804634675383568 2023-01-23 23:26:58.355838: step: 276/77, loss: 0.013111795298755169 2023-01-23 23:26:59.736824: step: 280/77, loss: 0.00015685016114730388 2023-01-23 23:27:01.077816: step: 284/77, loss: 4.8443831474287435e-05 2023-01-23 23:27:02.403125: step: 288/77, loss: 0.0008266958757303655 2023-01-23 23:27:03.702731: step: 292/77, loss: 0.013310312293469906 2023-01-23 23:27:05.009279: step: 296/77, loss: 0.007367967162281275 2023-01-23 23:27:06.331777: step: 300/77, loss: 0.0004364282067399472 2023-01-23 23:27:07.662273: step: 304/77, loss: 6.982243576203473e-06 2023-01-23 23:27:08.955323: step: 308/77, loss: 0.0018932627281174064 2023-01-23 23:27:10.247018: step: 312/77, loss: 0.009672918356955051 2023-01-23 23:27:11.577082: step: 316/77, loss: 0.0009078417788259685 2023-01-23 23:27:12.871356: step: 320/77, loss: 2.581998887762893e-05 2023-01-23 23:27:14.173212: step: 324/77, loss: 3.023641329491511e-05 2023-01-23 23:27:15.457566: step: 328/77, loss: 0.05564001575112343 2023-01-23 23:27:16.778555: step: 332/77, loss: 0.03747791051864624 2023-01-23 23:27:18.063624: step: 336/77, loss: 0.00712593412026763 2023-01-23 23:27:19.397082: step: 340/77, loss: 0.0008041571127250791 2023-01-23 23:27:20.722906: step: 344/77, loss: 0.0017460859380662441 2023-01-23 23:27:22.026578: step: 348/77, loss: 0.022884294390678406 2023-01-23 23:27:23.363654: step: 352/77, loss: 0.0002562448207754642 2023-01-23 23:27:24.667470: step: 356/77, loss: 0.0006773895001970232 2023-01-23 23:27:25.981752: step: 360/77, loss: 0.0025339152198284864 2023-01-23 23:27:27.311530: step: 364/77, loss: 0.00035924420808441937 2023-01-23 23:27:28.644796: step: 368/77, loss: 0.01046315673738718 2023-01-23 23:27:29.987228: step: 372/77, loss: 0.011791563592851162 2023-01-23 23:27:31.302647: step: 376/77, loss: 0.004284712485969067 2023-01-23 23:27:32.623253: step: 380/77, loss: 0.028272613883018494 2023-01-23 23:27:33.945725: step: 384/77, loss: 0.0030928582418709993 2023-01-23 23:27:35.251028: step: 388/77, loss: 3.754132194444537e-05 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.948051948051948, 'r': 0.5447761194029851, 'f1': 0.6919431279620855}, 'slot': {'p': 0.5714285714285714, 'r': 0.0215633423180593, 'f1': 0.04155844155844156}, 'combined': 0.028756078045177578, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.948051948051948, 'r': 0.5447761194029851, 'f1': 0.6919431279620855}, 'slot': {'p': 0.5714285714285714, 'r': 0.0215633423180593, 'f1': 0.04155844155844156}, 'combined': 0.028756078045177578, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.948051948051948, 'r': 0.5447761194029851, 'f1': 0.6919431279620855}, 'slot': {'p': 0.5641025641025641, 'r': 0.019766397124887692, 'f1': 0.03819444444444445}, 'combined': 0.02642838335966299, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:29:16.097732: step: 4/77, loss: 4.712946247309446e-05 2023-01-23 23:29:17.412327: step: 8/77, loss: 0.002000941429287195 2023-01-23 23:29:18.716610: step: 12/77, loss: 5.53809786651982e-06 2023-01-23 23:29:20.019800: step: 16/77, loss: 0.001969966571778059 2023-01-23 23:29:21.308424: step: 20/77, loss: 9.836013305175584e-06 2023-01-23 23:29:22.587989: step: 24/77, loss: 6.016323368385201e-06 2023-01-23 23:29:23.905814: step: 28/77, loss: 0.0003786985471379012 2023-01-23 23:29:25.225936: step: 32/77, loss: 0.008694401942193508 2023-01-23 23:29:26.514415: step: 36/77, loss: 0.0004617235972546041 2023-01-23 23:29:27.817613: step: 40/77, loss: 0.0008308365941047668 2023-01-23 23:29:29.079186: step: 44/77, loss: 0.0004890845157206059 2023-01-23 23:29:30.428332: step: 48/77, loss: 0.0028582194354385138 2023-01-23 23:29:31.729244: step: 52/77, loss: 5.671056351275183e-05 2023-01-23 23:29:33.021353: step: 56/77, loss: 0.005951540544629097 2023-01-23 23:29:34.318199: step: 60/77, loss: 1.183111976388318e-06 2023-01-23 23:29:35.637216: step: 64/77, loss: 0.0020196991972625256 2023-01-23 23:29:36.960780: step: 68/77, loss: 0.002741363365203142 2023-01-23 23:29:38.266188: step: 72/77, loss: 0.00334263127297163 2023-01-23 23:29:39.534150: step: 76/77, loss: 0.0001288486091652885 2023-01-23 23:29:40.853096: step: 80/77, loss: 0.11691176891326904 2023-01-23 23:29:42.159180: step: 84/77, loss: 0.002223538001999259 2023-01-23 23:29:43.454928: step: 88/77, loss: 0.011185433715581894 2023-01-23 23:29:44.752553: step: 92/77, loss: 6.673255847999826e-05 2023-01-23 23:29:46.079043: step: 96/77, loss: 0.01403880026191473 2023-01-23 23:29:47.376902: step: 100/77, loss: 0.03029460832476616 2023-01-23 23:29:48.631984: step: 104/77, loss: 0.00014710072719026357 2023-01-23 23:29:49.888972: step: 108/77, loss: 0.00015252029697876424 2023-01-23 23:29:51.147736: step: 112/77, loss: 0.01632443442940712 2023-01-23 23:29:52.473024: step: 116/77, loss: 0.00362035003490746 2023-01-23 23:29:53.799408: step: 120/77, loss: 0.00628438638523221 2023-01-23 23:29:55.134611: step: 124/77, loss: 0.00045070049236528575 2023-01-23 23:29:56.429420: step: 128/77, loss: 0.00020078939269296825 2023-01-23 23:29:57.771709: step: 132/77, loss: 0.002593017416074872 2023-01-23 23:29:59.026453: step: 136/77, loss: 1.6041687558754347e-05 2023-01-23 23:30:00.410886: step: 140/77, loss: 0.0005025569698773324 2023-01-23 23:30:01.744231: step: 144/77, loss: 0.0067786527797579765 2023-01-23 23:30:03.082835: step: 148/77, loss: 0.0007784969639033079 2023-01-23 23:30:04.442962: step: 152/77, loss: 0.00020644588221330196 2023-01-23 23:30:05.752068: step: 156/77, loss: 0.0005968852783553302 2023-01-23 23:30:07.062213: step: 160/77, loss: 0.0005378980422392488 2023-01-23 23:30:08.343901: step: 164/77, loss: 0.008257530629634857 2023-01-23 23:30:09.714362: step: 168/77, loss: 0.006465138401836157 2023-01-23 23:30:10.994522: step: 172/77, loss: 5.838445576955564e-05 2023-01-23 23:30:12.287378: step: 176/77, loss: 0.06327299028635025 2023-01-23 23:30:13.575417: step: 180/77, loss: 0.000856110651511699 2023-01-23 23:30:14.875685: step: 184/77, loss: 0.0012292754836380482 2023-01-23 23:30:16.179720: step: 188/77, loss: 0.0006952299736440182 2023-01-23 23:30:17.484038: step: 192/77, loss: 1.1477966836537234e-05 2023-01-23 23:30:18.815371: step: 196/77, loss: 0.0002503438445273787 2023-01-23 23:30:20.149570: step: 200/77, loss: 0.017838360741734505 2023-01-23 23:30:21.455313: step: 204/77, loss: 0.0012066513299942017 2023-01-23 23:30:22.712573: step: 208/77, loss: 0.0015220101922750473 2023-01-23 23:30:24.035221: step: 212/77, loss: 0.0004304148897062987 2023-01-23 23:30:25.334148: step: 216/77, loss: 0.0018209205009043217 2023-01-23 23:30:26.671878: step: 220/77, loss: 0.03783602640032768 2023-01-23 23:30:27.977080: step: 224/77, loss: 0.0006194835295900702 2023-01-23 23:30:29.293640: step: 228/77, loss: 0.005525450222194195 2023-01-23 23:30:30.549429: step: 232/77, loss: 0.00037290374166332185 2023-01-23 23:30:31.883130: step: 236/77, loss: 0.00038193512591533363 2023-01-23 23:30:33.159203: step: 240/77, loss: 0.00047047025873325765 2023-01-23 23:30:34.461103: step: 244/77, loss: 0.004478194285184145 2023-01-23 23:30:35.757576: step: 248/77, loss: 0.0015100076561793685 2023-01-23 23:30:37.079472: step: 252/77, loss: 0.0014932039193809032 2023-01-23 23:30:38.399633: step: 256/77, loss: 0.00242552999407053 2023-01-23 23:30:39.682115: step: 260/77, loss: 0.0003645646502263844 2023-01-23 23:30:41.060613: step: 264/77, loss: 2.7907240109925624e-06 2023-01-23 23:30:42.391067: step: 268/77, loss: 0.005610452964901924 2023-01-23 23:30:43.720221: step: 272/77, loss: 0.0010953948367387056 2023-01-23 23:30:45.005818: step: 276/77, loss: 0.059296153485774994 2023-01-23 23:30:46.323060: step: 280/77, loss: 0.0027845720760524273 2023-01-23 23:30:47.662546: step: 284/77, loss: 0.02690986916422844 2023-01-23 23:30:48.930448: step: 288/77, loss: 0.007932419888675213 2023-01-23 23:30:50.252610: step: 292/77, loss: 7.73666615714319e-05 2023-01-23 23:30:51.593040: step: 296/77, loss: 0.0003109094104729593 2023-01-23 23:30:52.871801: step: 300/77, loss: 0.16546620428562164 2023-01-23 23:30:54.171630: step: 304/77, loss: 0.0034834558609873056 2023-01-23 23:30:55.429708: step: 308/77, loss: 0.0013043548678979278 2023-01-23 23:30:56.717803: step: 312/77, loss: 0.0002218848094344139 2023-01-23 23:30:58.035512: step: 316/77, loss: 0.3924459218978882 2023-01-23 23:30:59.375629: step: 320/77, loss: 0.0009078291477635503 2023-01-23 23:31:00.683176: step: 324/77, loss: 0.0002599633007775992 2023-01-23 23:31:02.036541: step: 328/77, loss: 0.0053293174132704735 2023-01-23 23:31:03.334683: step: 332/77, loss: 0.00010220670083072037 2023-01-23 23:31:04.648032: step: 336/77, loss: 0.03909461200237274 2023-01-23 23:31:05.937044: step: 340/77, loss: 0.0007392231491394341 2023-01-23 23:31:07.319992: step: 344/77, loss: 0.003251375164836645 2023-01-23 23:31:08.620940: step: 348/77, loss: 6.195087917149067e-05 2023-01-23 23:31:09.930130: step: 352/77, loss: 0.0010424887295812368 2023-01-23 23:31:11.201020: step: 356/77, loss: 0.0004965576226823032 2023-01-23 23:31:12.493852: step: 360/77, loss: 0.005341102834790945 2023-01-23 23:31:13.777899: step: 364/77, loss: 0.0003410697099752724 2023-01-23 23:31:15.093048: step: 368/77, loss: 0.00011480034299893305 2023-01-23 23:31:16.432305: step: 372/77, loss: 0.0009775557555258274 2023-01-23 23:31:17.796196: step: 376/77, loss: 0.02208123356103897 2023-01-23 23:31:19.096567: step: 380/77, loss: 0.21449889242649078 2023-01-23 23:31:20.387693: step: 384/77, loss: 0.008393362164497375 2023-01-23 23:31:21.682653: step: 388/77, loss: 0.036982741206884384 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03213610586011342, 'f1': 0.06039076376554175}, 'combined': 0.04201096609776818, 'epoch': 17} Test Chinese: {'template': {'p': 0.9014084507042254, 'r': 0.47761194029850745, 'f1': 0.6243902439024391}, 'slot': {'p': 0.4594594594594595, 'r': 0.015274034141958671, 'f1': 0.02956521739130435}, 'combined': 0.01846023329798516, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03213610586011342, 'f1': 0.06039076376554175}, 'combined': 0.04201096609776818, 'epoch': 17} Test Korean: {'template': {'p': 0.9014084507042254, 'r': 0.47761194029850745, 'f1': 0.6243902439024391}, 'slot': {'p': 0.47368421052631576, 'r': 0.016172506738544475, 'f1': 0.03127715030408341}, 'combined': 0.019529147506939886, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03213610586011342, 'f1': 0.06039076376554175}, 'combined': 0.04201096609776818, 'epoch': 17} Test Russian: {'template': {'p': 0.9154929577464789, 'r': 0.48507462686567165, 'f1': 0.6341463414634146}, 'slot': {'p': 0.4722222222222222, 'r': 0.015274034141958671, 'f1': 0.029590948651000874}, 'combined': 0.01876499182746397, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:33:02.287186: step: 4/77, loss: 0.004367659334093332 2023-01-23 23:33:03.603664: step: 8/77, loss: 0.01038439478725195 2023-01-23 23:33:04.891568: step: 12/77, loss: 2.6343550416640937e-05 2023-01-23 23:33:06.165546: step: 16/77, loss: 0.0012721351813524961 2023-01-23 23:33:07.434119: step: 20/77, loss: 0.03590906783938408 2023-01-23 23:33:08.744172: step: 24/77, loss: 0.001787964254617691 2023-01-23 23:33:10.025892: step: 28/77, loss: 9.45565443544183e-06 2023-01-23 23:33:11.343478: step: 32/77, loss: 0.002850484335795045 2023-01-23 23:33:12.623617: step: 36/77, loss: 0.027393924072384834 2023-01-23 23:33:13.940992: step: 40/77, loss: 0.003990638069808483 2023-01-23 23:33:15.251741: step: 44/77, loss: 2.9969392926432192e-05 2023-01-23 23:33:16.521622: step: 48/77, loss: 0.00023694556148257107 2023-01-23 23:33:17.831273: step: 52/77, loss: 0.0016057910397648811 2023-01-23 23:33:19.118375: step: 56/77, loss: 7.71491540945135e-05 2023-01-23 23:33:20.408221: step: 60/77, loss: 0.0019408424850553274 2023-01-23 23:33:21.712966: step: 64/77, loss: 2.2850603272672743e-05 2023-01-23 23:33:23.015110: step: 68/77, loss: 0.003779355902224779 2023-01-23 23:33:24.352419: step: 72/77, loss: 0.001685172668658197 2023-01-23 23:33:25.696060: step: 76/77, loss: 0.0012687535490840673 2023-01-23 23:33:27.039941: step: 80/77, loss: 0.008859056048095226 2023-01-23 23:33:28.338912: step: 84/77, loss: 0.04166802018880844 2023-01-23 23:33:29.635123: step: 88/77, loss: 0.023681601509451866 2023-01-23 23:33:30.934782: step: 92/77, loss: 0.002396229188889265 2023-01-23 23:33:32.240502: step: 96/77, loss: 0.0005476967780850828 2023-01-23 23:33:33.574893: step: 100/77, loss: 0.010149845853447914 2023-01-23 23:33:34.915761: step: 104/77, loss: 4.5273111027199775e-05 2023-01-23 23:33:36.208056: step: 108/77, loss: 0.0003784815198741853 2023-01-23 23:33:37.455978: step: 112/77, loss: 0.0028103527147322893 2023-01-23 23:33:38.763856: step: 116/77, loss: 0.0007598382653668523 2023-01-23 23:33:40.074499: step: 120/77, loss: 0.0029842997901141644 2023-01-23 23:33:41.331360: step: 124/77, loss: 0.00048490293556824327 2023-01-23 23:33:42.662914: step: 128/77, loss: 0.002160892356187105 2023-01-23 23:33:43.974644: step: 132/77, loss: 0.002475053770467639 2023-01-23 23:33:45.293475: step: 136/77, loss: 0.00031813987880013883 2023-01-23 23:33:46.554518: step: 140/77, loss: 1.4521887351293117e-05 2023-01-23 23:33:47.816435: step: 144/77, loss: 0.0018738629296422005 2023-01-23 23:33:49.132409: step: 148/77, loss: 0.0006999174365773797 2023-01-23 23:33:50.446072: step: 152/77, loss: 0.006983298808336258 2023-01-23 23:33:51.744123: step: 156/77, loss: 0.0010001725750043988 2023-01-23 23:33:53.020812: step: 160/77, loss: 0.0005602678284049034 2023-01-23 23:33:54.334680: step: 164/77, loss: 0.04589071869850159 2023-01-23 23:33:55.634545: step: 168/77, loss: 0.0003480600717011839 2023-01-23 23:33:56.935185: step: 172/77, loss: 7.911981811048463e-06 2023-01-23 23:33:58.252649: step: 176/77, loss: 0.004396666772663593 2023-01-23 23:33:59.537297: step: 180/77, loss: 0.03344809636473656 2023-01-23 23:34:00.873095: step: 184/77, loss: 0.0002191819075960666 2023-01-23 23:34:02.223924: step: 188/77, loss: 0.0014253761619329453 2023-01-23 23:34:03.543921: step: 192/77, loss: 0.05388971418142319 2023-01-23 23:34:04.913541: step: 196/77, loss: 0.001033472130075097 2023-01-23 23:34:06.233612: step: 200/77, loss: 0.0002074778894893825 2023-01-23 23:34:07.521903: step: 204/77, loss: 0.001877657719887793 2023-01-23 23:34:08.803115: step: 208/77, loss: 0.001743351574987173 2023-01-23 23:34:10.096322: step: 212/77, loss: 0.00012812843488063663 2023-01-23 23:34:11.369862: step: 216/77, loss: 0.0215502567589283 2023-01-23 23:34:12.677397: step: 220/77, loss: 3.8145283269841457e-06 2023-01-23 23:34:14.002949: step: 224/77, loss: 0.0010601211106404662 2023-01-23 23:34:15.300834: step: 228/77, loss: 0.0022998847998678684 2023-01-23 23:34:16.577364: step: 232/77, loss: 0.007071727886795998 2023-01-23 23:34:17.908713: step: 236/77, loss: 0.00010882413334911689 2023-01-23 23:34:19.237400: step: 240/77, loss: 0.0014115388039499521 2023-01-23 23:34:20.519509: step: 244/77, loss: 0.035735324025154114 2023-01-23 23:34:21.764662: step: 248/77, loss: 1.12286361400038e-05 2023-01-23 23:34:23.079931: step: 252/77, loss: 0.04284053295850754 2023-01-23 23:34:24.461495: step: 256/77, loss: 0.0025267975870519876 2023-01-23 23:34:25.765959: step: 260/77, loss: 0.0003095833817496896 2023-01-23 23:34:27.074445: step: 264/77, loss: 0.004611496813595295 2023-01-23 23:34:28.334186: step: 268/77, loss: 0.0005131922662258148 2023-01-23 23:34:29.665872: step: 272/77, loss: 0.005772388074547052 2023-01-23 23:34:31.003094: step: 276/77, loss: 0.001374881248921156 2023-01-23 23:34:32.307980: step: 280/77, loss: 0.0005107524339109659 2023-01-23 23:34:33.601920: step: 284/77, loss: 0.0014153217198327184 2023-01-23 23:34:34.923608: step: 288/77, loss: 0.00014606077456846833 2023-01-23 23:34:36.212629: step: 292/77, loss: 0.01991288922727108 2023-01-23 23:34:37.517615: step: 296/77, loss: 0.016153793781995773 2023-01-23 23:34:38.850464: step: 300/77, loss: 0.07304572314023972 2023-01-23 23:34:40.140903: step: 304/77, loss: 0.00026726553915068507 2023-01-23 23:34:41.457914: step: 308/77, loss: 0.002561915433034301 2023-01-23 23:34:42.795537: step: 312/77, loss: 0.0008868585573509336 2023-01-23 23:34:44.134265: step: 316/77, loss: 0.0014801579527556896 2023-01-23 23:34:45.418421: step: 320/77, loss: 0.000655533978715539 2023-01-23 23:34:46.668622: step: 324/77, loss: 0.002514325315132737 2023-01-23 23:34:47.955532: step: 328/77, loss: 5.803751719213324e-06 2023-01-23 23:34:49.243935: step: 332/77, loss: 0.00031647857395000756 2023-01-23 23:34:50.530379: step: 336/77, loss: 0.0026181014254689217 2023-01-23 23:34:51.854830: step: 340/77, loss: 0.00027506123296916485 2023-01-23 23:34:53.135103: step: 344/77, loss: 0.003296251641586423 2023-01-23 23:34:54.466362: step: 348/77, loss: 0.0004255310632288456 2023-01-23 23:34:55.756029: step: 352/77, loss: 0.06913476437330246 2023-01-23 23:34:57.037266: step: 356/77, loss: 0.0009715624619275331 2023-01-23 23:34:58.296471: step: 360/77, loss: 0.000134461690322496 2023-01-23 23:34:59.614833: step: 364/77, loss: 4.3554922740440816e-05 2023-01-23 23:35:00.883378: step: 368/77, loss: 8.07677861303091e-05 2023-01-23 23:35:02.211213: step: 372/77, loss: 0.02088063955307007 2023-01-23 23:35:03.504851: step: 376/77, loss: 0.001944305724464357 2023-01-23 23:35:04.854884: step: 380/77, loss: 8.24704966362333e-06 2023-01-23 23:35:06.155734: step: 384/77, loss: 9.012554073706269e-05 2023-01-23 23:35:07.459070: step: 388/77, loss: 0.01869414746761322 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9452054794520548, 'r': 0.5149253731343284, 'f1': 0.6666666666666667}, 'slot': {'p': 0.5405405405405406, 'r': 0.017969451931716084, 'f1': 0.034782608695652174}, 'combined': 0.023188405797101453, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9583333333333334, 'r': 0.5149253731343284, 'f1': 0.6699029126213593}, 'slot': {'p': 0.5277777777777778, 'r': 0.017070979335130278, 'f1': 0.03307223672758921}, 'combined': 0.0221551877107151, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5149253731343284, 'f1': 0.6699029126213593}, 'slot': {'p': 0.5277777777777778, 'r': 0.017070979335130278, 'f1': 0.03307223672758921}, 'combined': 0.0221551877107151, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:36:48.027597: step: 4/77, loss: 0.00010956196638289839 2023-01-23 23:36:49.356508: step: 8/77, loss: 0.00674779387190938 2023-01-23 23:36:50.708741: step: 12/77, loss: 0.0008990313508547843 2023-01-23 23:36:52.026644: step: 16/77, loss: 0.001597610767930746 2023-01-23 23:36:53.363433: step: 20/77, loss: 0.08414094150066376 2023-01-23 23:36:54.657957: step: 24/77, loss: 0.00018882961012423038 2023-01-23 23:36:55.936616: step: 28/77, loss: 0.052721038460731506 2023-01-23 23:36:57.226868: step: 32/77, loss: 0.0020808421541005373 2023-01-23 23:36:58.525653: step: 36/77, loss: 0.0009288503788411617 2023-01-23 23:36:59.829381: step: 40/77, loss: 1.3552657037507743e-05 2023-01-23 23:37:01.129461: step: 44/77, loss: 0.000139696552651003 2023-01-23 23:37:02.436350: step: 48/77, loss: 0.0003846232430078089 2023-01-23 23:37:03.762214: step: 52/77, loss: 7.299314893316478e-05 2023-01-23 23:37:05.039834: step: 56/77, loss: 0.03577619045972824 2023-01-23 23:37:06.304251: step: 60/77, loss: 1.30085572891403e-06 2023-01-23 23:37:07.643374: step: 64/77, loss: 0.00017460936214774847 2023-01-23 23:37:08.929811: step: 68/77, loss: 0.00021287830895744264 2023-01-23 23:37:10.238965: step: 72/77, loss: 0.02898317575454712 2023-01-23 23:37:11.571483: step: 76/77, loss: 0.10362723469734192 2023-01-23 23:37:12.883127: step: 80/77, loss: 0.00018808335880748928 2023-01-23 23:37:14.211072: step: 84/77, loss: 3.3553280900378013e-06 2023-01-23 23:37:15.527738: step: 88/77, loss: 0.000569433846976608 2023-01-23 23:37:16.829260: step: 92/77, loss: 0.00043255172204226255 2023-01-23 23:37:18.139769: step: 96/77, loss: 0.0009702723473310471 2023-01-23 23:37:19.425291: step: 100/77, loss: 0.0018793041817843914 2023-01-23 23:37:20.717191: step: 104/77, loss: 8.680361497681588e-05 2023-01-23 23:37:22.021247: step: 108/77, loss: 0.0003911007952410728 2023-01-23 23:37:23.265953: step: 112/77, loss: 0.00037496176082640886 2023-01-23 23:37:24.549214: step: 116/77, loss: 0.042494677007198334 2023-01-23 23:37:25.846600: step: 120/77, loss: 0.0002248030068585649 2023-01-23 23:37:27.140276: step: 124/77, loss: 0.0025414375122636557 2023-01-23 23:37:28.421133: step: 128/77, loss: 0.041003961116075516 2023-01-23 23:37:29.749593: step: 132/77, loss: 0.04007624834775925 2023-01-23 23:37:31.041176: step: 136/77, loss: 0.004495857283473015 2023-01-23 23:37:32.369367: step: 140/77, loss: 0.0015246759867295623 2023-01-23 23:37:33.671692: step: 144/77, loss: 0.00025638059014454484 2023-01-23 23:37:34.992580: step: 148/77, loss: 0.0013521756045520306 2023-01-23 23:37:36.268930: step: 152/77, loss: 9.346644947072491e-05 2023-01-23 23:37:37.597884: step: 156/77, loss: 0.00020250340458005667 2023-01-23 23:37:38.922208: step: 160/77, loss: 0.033732958137989044 2023-01-23 23:37:40.266318: step: 164/77, loss: 0.0006303130066953599 2023-01-23 23:37:41.563537: step: 168/77, loss: 0.00043823482701554894 2023-01-23 23:37:42.880794: step: 172/77, loss: 4.336075289756991e-05 2023-01-23 23:37:44.150339: step: 176/77, loss: 0.00014787666441407055 2023-01-23 23:37:45.501513: step: 180/77, loss: 4.3609088606899604e-05 2023-01-23 23:37:46.754080: step: 184/77, loss: 0.0050060986541211605 2023-01-23 23:37:48.035485: step: 188/77, loss: 0.0015303846448659897 2023-01-23 23:37:49.339988: step: 192/77, loss: 5.7042638218263164e-05 2023-01-23 23:37:50.658384: step: 196/77, loss: 0.02419361099600792 2023-01-23 23:37:51.981068: step: 200/77, loss: 0.0019897716119885445 2023-01-23 23:37:53.279599: step: 204/77, loss: 0.015149969607591629 2023-01-23 23:37:54.644241: step: 208/77, loss: 0.0032350337132811546 2023-01-23 23:37:55.959403: step: 212/77, loss: 0.0019988366402685642 2023-01-23 23:37:57.296538: step: 216/77, loss: 0.012999859638512135 2023-01-23 23:37:58.592550: step: 220/77, loss: 0.0004743538156617433 2023-01-23 23:37:59.912352: step: 224/77, loss: 0.0030647912062704563 2023-01-23 23:38:01.247642: step: 228/77, loss: 0.0001289759238716215 2023-01-23 23:38:02.558818: step: 232/77, loss: 4.184803401585668e-05 2023-01-23 23:38:03.858505: step: 236/77, loss: 0.006597783882170916 2023-01-23 23:38:05.151524: step: 240/77, loss: 3.632171137724072e-05 2023-01-23 23:38:06.439301: step: 244/77, loss: 0.021282393485307693 2023-01-23 23:38:07.726748: step: 248/77, loss: 1.4173661838867702e-05 2023-01-23 23:38:09.052274: step: 252/77, loss: 0.008453577756881714 2023-01-23 23:38:10.374920: step: 256/77, loss: 0.03167552500963211 2023-01-23 23:38:11.680387: step: 260/77, loss: 5.700542897102423e-05 2023-01-23 23:38:12.998497: step: 264/77, loss: 3.3676496968837455e-07 2023-01-23 23:38:14.322318: step: 268/77, loss: 6.962824045331217e-06 2023-01-23 23:38:15.625394: step: 272/77, loss: 6.902157474542037e-05 2023-01-23 23:38:16.941058: step: 276/77, loss: 0.004239629954099655 2023-01-23 23:38:18.294174: step: 280/77, loss: 5.466717266244814e-05 2023-01-23 23:38:19.592913: step: 284/77, loss: 0.0015965744387358427 2023-01-23 23:38:20.915556: step: 288/77, loss: 0.0020482433028519154 2023-01-23 23:38:22.215174: step: 292/77, loss: 2.950424686787301e-07 2023-01-23 23:38:23.574043: step: 296/77, loss: 0.00022944994270801544 2023-01-23 23:38:24.894090: step: 300/77, loss: 0.0002290060801897198 2023-01-23 23:38:26.179149: step: 304/77, loss: 5.758761290053371e-06 2023-01-23 23:38:27.493567: step: 308/77, loss: 0.0002715633891057223 2023-01-23 23:38:28.738869: step: 312/77, loss: 5.757574399467558e-06 2023-01-23 23:38:30.057147: step: 316/77, loss: 0.007970299571752548 2023-01-23 23:38:31.354320: step: 320/77, loss: 4.443063517101109e-06 2023-01-23 23:38:32.687523: step: 324/77, loss: 0.007511195261031389 2023-01-23 23:38:33.963634: step: 328/77, loss: 1.9043177417188417e-06 2023-01-23 23:38:35.306407: step: 332/77, loss: 0.08256176859140396 2023-01-23 23:38:36.610041: step: 336/77, loss: 4.316621561883949e-05 2023-01-23 23:38:37.950678: step: 340/77, loss: 7.719926361460239e-05 2023-01-23 23:38:39.315604: step: 344/77, loss: 2.679053068277426e-05 2023-01-23 23:38:40.647936: step: 348/77, loss: 0.0007886448875069618 2023-01-23 23:38:41.961984: step: 352/77, loss: 0.018656939268112183 2023-01-23 23:38:43.274400: step: 356/77, loss: 8.270098987850361e-07 2023-01-23 23:38:44.636260: step: 360/77, loss: 0.0008322189096361399 2023-01-23 23:38:45.948876: step: 364/77, loss: 0.0003038544673472643 2023-01-23 23:38:47.287721: step: 368/77, loss: 0.0002964224258903414 2023-01-23 23:38:48.560057: step: 372/77, loss: 8.963259460870177e-05 2023-01-23 23:38:49.878792: step: 376/77, loss: 0.0005112845101393759 2023-01-23 23:38:51.160615: step: 380/77, loss: 1.5869254639255814e-05 2023-01-23 23:38:52.429215: step: 384/77, loss: 0.0074441516771912575 2023-01-23 23:38:53.751946: step: 388/77, loss: 8.359365892829373e-06 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.96, 'r': 0.5373134328358209, 'f1': 0.6889952153110048}, 'slot': {'p': 0.5526315789473685, 'r': 0.018867924528301886, 'f1': 0.036490008688097306}, 'combined': 0.02514144139275604, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.96, 'r': 0.5373134328358209, 'f1': 0.6889952153110048}, 'slot': {'p': 0.5641025641025641, 'r': 0.019766397124887692, 'f1': 0.03819444444444445}, 'combined': 0.026315789473684216, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.5526315789473685, 'r': 0.018867924528301886, 'f1': 0.036490008688097306}, 'combined': 0.025262313707144293, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:40:35.062303: step: 4/77, loss: 0.0005232529947534204 2023-01-23 23:40:36.328561: step: 8/77, loss: 0.0008650250383652747 2023-01-23 23:40:37.698139: step: 12/77, loss: 0.00010305993782822043 2023-01-23 23:40:38.986316: step: 16/77, loss: 2.065046464849729e-05 2023-01-23 23:40:40.254392: step: 20/77, loss: 0.0001110277371481061 2023-01-23 23:40:41.534540: step: 24/77, loss: 0.00022450985852628946 2023-01-23 23:40:42.899374: step: 28/77, loss: 0.0008693195995874703 2023-01-23 23:40:44.225864: step: 32/77, loss: 0.009220481850206852 2023-01-23 23:40:45.502379: step: 36/77, loss: 0.000709481246303767 2023-01-23 23:40:46.855917: step: 40/77, loss: 6.52720991638489e-05 2023-01-23 23:40:48.150773: step: 44/77, loss: 0.00014434370677918196 2023-01-23 23:40:49.424206: step: 48/77, loss: 0.0001451051648473367 2023-01-23 23:40:50.733103: step: 52/77, loss: 0.005191781558096409 2023-01-23 23:40:52.029193: step: 56/77, loss: 0.0004353547701612115 2023-01-23 23:40:53.359364: step: 60/77, loss: 0.0043207453563809395 2023-01-23 23:40:54.691979: step: 64/77, loss: 7.72473358665593e-05 2023-01-23 23:40:55.960037: step: 68/77, loss: 0.00032503451802767813 2023-01-23 23:40:57.321974: step: 72/77, loss: 0.006325506139546633 2023-01-23 23:40:58.601411: step: 76/77, loss: 0.0026867722626775503 2023-01-23 23:40:59.910503: step: 80/77, loss: 0.049152620136737823 2023-01-23 23:41:01.200428: step: 84/77, loss: 0.00011464212002465501 2023-01-23 23:41:02.474586: step: 88/77, loss: 0.026460302993655205 2023-01-23 23:41:03.840540: step: 92/77, loss: 0.00032751355320215225 2023-01-23 23:41:05.136536: step: 96/77, loss: 0.11882049590349197 2023-01-23 23:41:06.469586: step: 100/77, loss: 0.0011056429939344525 2023-01-23 23:41:07.767740: step: 104/77, loss: 0.0001578621449880302 2023-01-23 23:41:09.119744: step: 108/77, loss: 9.804853107198142e-07 2023-01-23 23:41:10.378058: step: 112/77, loss: 5.513399514711637e-07 2023-01-23 23:41:11.663650: step: 116/77, loss: 4.7314893890870735e-05 2023-01-23 23:41:12.953855: step: 120/77, loss: 0.011877724900841713 2023-01-23 23:41:14.272614: step: 124/77, loss: 1.4156088923300558e-07 2023-01-23 23:41:15.611037: step: 128/77, loss: 0.00023786452948115766 2023-01-23 23:41:16.946200: step: 132/77, loss: 0.0008837380446493626 2023-01-23 23:41:18.254151: step: 136/77, loss: 5.608001447399147e-05 2023-01-23 23:41:19.518639: step: 140/77, loss: 2.1426265448099002e-05 2023-01-23 23:41:20.784627: step: 144/77, loss: 0.030281029641628265 2023-01-23 23:41:22.088510: step: 148/77, loss: 0.03355312719941139 2023-01-23 23:41:23.385701: step: 152/77, loss: 0.0007835748256184161 2023-01-23 23:41:24.727823: step: 156/77, loss: 0.00023194306413643062 2023-01-23 23:41:26.021935: step: 160/77, loss: 2.2264994186116382e-05 2023-01-23 23:41:27.362736: step: 164/77, loss: 0.0006950413808226585 2023-01-23 23:41:28.667440: step: 168/77, loss: 0.004247922450304031 2023-01-23 23:41:29.996300: step: 172/77, loss: 0.00030433133360929787 2023-01-23 23:41:31.325925: step: 176/77, loss: 0.0017913315678015351 2023-01-23 23:41:32.672078: step: 180/77, loss: 0.026901597157120705 2023-01-23 23:41:34.014620: step: 184/77, loss: 0.024439463391900063 2023-01-23 23:41:35.288376: step: 188/77, loss: 8.106151767606207e-07 2023-01-23 23:41:36.611794: step: 192/77, loss: 0.0004149790620431304 2023-01-23 23:41:37.914463: step: 196/77, loss: 0.00034722022246569395 2023-01-23 23:41:39.199365: step: 200/77, loss: 1.1565061868168414e-05 2023-01-23 23:41:40.525438: step: 204/77, loss: 0.023462709039449692 2023-01-23 23:41:41.831192: step: 208/77, loss: 0.009737570770084858 2023-01-23 23:41:43.127946: step: 212/77, loss: 3.904085019712511e-07 2023-01-23 23:41:44.436159: step: 216/77, loss: 0.14411501586437225 2023-01-23 23:41:45.780739: step: 220/77, loss: 0.02945737913250923 2023-01-23 23:41:47.037061: step: 224/77, loss: 0.0024472614750266075 2023-01-23 23:41:48.357971: step: 228/77, loss: 0.02036593109369278 2023-01-23 23:41:49.628177: step: 232/77, loss: 4.921863728668541e-05 2023-01-23 23:41:50.902618: step: 236/77, loss: 3.2393309084000066e-06 2023-01-23 23:41:52.200521: step: 240/77, loss: 0.00900073628872633 2023-01-23 23:41:53.524532: step: 244/77, loss: 0.019549444317817688 2023-01-23 23:41:54.839818: step: 248/77, loss: 0.021410422399640083 2023-01-23 23:41:56.165823: step: 252/77, loss: 0.0005116247921250761 2023-01-23 23:41:57.437285: step: 256/77, loss: 0.029065445065498352 2023-01-23 23:41:58.766526: step: 260/77, loss: 2.5479623218416236e-05 2023-01-23 23:42:00.084686: step: 264/77, loss: 0.005156747531145811 2023-01-23 23:42:01.396275: step: 268/77, loss: 0.06347062438726425 2023-01-23 23:42:02.681034: step: 272/77, loss: 0.011640047654509544 2023-01-23 23:42:03.943840: step: 276/77, loss: 0.0010201624827459455 2023-01-23 23:42:05.266778: step: 280/77, loss: 0.0004513526218943298 2023-01-23 23:42:06.559035: step: 284/77, loss: 0.03519264608621597 2023-01-23 23:42:07.878960: step: 288/77, loss: 0.0007706336909905076 2023-01-23 23:42:09.138065: step: 292/77, loss: 0.0003431455115787685 2023-01-23 23:42:10.404204: step: 296/77, loss: 0.0010055579477921128 2023-01-23 23:42:11.699465: step: 300/77, loss: 0.005652496591210365 2023-01-23 23:42:13.045027: step: 304/77, loss: 3.254468902014196e-05 2023-01-23 23:42:14.379295: step: 308/77, loss: 0.0005605981568805873 2023-01-23 23:42:15.697842: step: 312/77, loss: 0.0028895533178001642 2023-01-23 23:42:16.972152: step: 316/77, loss: 0.0020729885436594486 2023-01-23 23:42:18.273652: step: 320/77, loss: 0.0057830954901874065 2023-01-23 23:42:19.544678: step: 324/77, loss: 6.959859456401318e-05 2023-01-23 23:42:20.835724: step: 328/77, loss: 0.001899764989502728 2023-01-23 23:42:22.177213: step: 332/77, loss: 0.04220954701304436 2023-01-23 23:42:23.483634: step: 336/77, loss: 0.000656688294839114 2023-01-23 23:42:24.817106: step: 340/77, loss: 0.0028479767497628927 2023-01-23 23:42:26.180292: step: 344/77, loss: 0.00893397070467472 2023-01-23 23:42:27.448620: step: 348/77, loss: 0.0005671484395861626 2023-01-23 23:42:28.686086: step: 352/77, loss: 0.00010676322563085705 2023-01-23 23:42:29.993148: step: 356/77, loss: 0.00015144373173825443 2023-01-23 23:42:31.297079: step: 360/77, loss: 0.0004946649423800409 2023-01-23 23:42:32.613346: step: 364/77, loss: 4.4703462265260896e-08 2023-01-23 23:42:33.869173: step: 368/77, loss: 0.025740794837474823 2023-01-23 23:42:35.172147: step: 372/77, loss: 0.011371146887540817 2023-01-23 23:42:36.514345: step: 376/77, loss: 0.00037400436121970415 2023-01-23 23:42:37.816378: step: 380/77, loss: 0.03912031278014183 2023-01-23 23:42:39.153592: step: 384/77, loss: 0.0008152026566676795 2023-01-23 23:42:40.465531: step: 388/77, loss: 2.2828007786301896e-06 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5149253731343284, 'f1': 0.6699029126213593}, 'slot': {'p': 0.6774193548387096, 'r': 0.018867924528301886, 'f1': 0.03671328671328671}, 'combined': 0.02459433770113382, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5074626865671642, 'f1': 0.6666666666666667}, 'slot': {'p': 0.65625, 'r': 0.018867924528301886, 'f1': 0.03668122270742358}, 'combined': 0.02445414847161572, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5, 'f1': 0.6600985221674878}, 'slot': {'p': 0.6875, 'r': 0.019766397124887692, 'f1': 0.03842794759825328}, 'combined': 0.025366231419536648, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:44:21.608405: step: 4/77, loss: 0.004129846580326557 2023-01-23 23:44:22.892430: step: 8/77, loss: 0.0007028129184618592 2023-01-23 23:44:24.211369: step: 12/77, loss: 0.004232016392052174 2023-01-23 23:44:25.537454: step: 16/77, loss: 3.35503245878499e-05 2023-01-23 23:44:26.841359: step: 20/77, loss: 0.00023565557785332203 2023-01-23 23:44:28.140493: step: 24/77, loss: 0.007366999518126249 2023-01-23 23:44:29.442304: step: 28/77, loss: 0.0024342378601431847 2023-01-23 23:44:30.714861: step: 32/77, loss: 0.005616952199488878 2023-01-23 23:44:32.015900: step: 36/77, loss: 0.0001240503042936325 2023-01-23 23:44:33.322399: step: 40/77, loss: 0.0004758648865390569 2023-01-23 23:44:34.606751: step: 44/77, loss: 0.0020777047611773014 2023-01-23 23:44:35.920419: step: 48/77, loss: 0.0003121124464087188 2023-01-23 23:44:37.270465: step: 52/77, loss: 6.964681961107999e-05 2023-01-23 23:44:38.579234: step: 56/77, loss: 0.018555521965026855 2023-01-23 23:44:39.901185: step: 60/77, loss: 0.01895550638437271 2023-01-23 23:44:41.172610: step: 64/77, loss: 0.00019170627638231963 2023-01-23 23:44:42.515567: step: 68/77, loss: 0.0745316594839096 2023-01-23 23:44:43.844176: step: 72/77, loss: 0.000593360688071698 2023-01-23 23:44:45.183958: step: 76/77, loss: 0.00029611896025016904 2023-01-23 23:44:46.467973: step: 80/77, loss: 0.03908059000968933 2023-01-23 23:44:47.821628: step: 84/77, loss: 0.006917833350598812 2023-01-23 23:44:49.073152: step: 88/77, loss: 0.00373476161621511 2023-01-23 23:44:50.324335: step: 92/77, loss: 2.4437852630398993e-07 2023-01-23 23:44:51.617893: step: 96/77, loss: 5.9011843404732645e-05 2023-01-23 23:44:52.924531: step: 100/77, loss: 0.0007140662637539208 2023-01-23 23:44:54.217922: step: 104/77, loss: 4.786644785781391e-05 2023-01-23 23:44:55.530530: step: 108/77, loss: 0.0005600241129286587 2023-01-23 23:44:56.852020: step: 112/77, loss: 0.00032644468592479825 2023-01-23 23:44:58.166944: step: 116/77, loss: 0.011107344180345535 2023-01-23 23:44:59.468763: step: 120/77, loss: 3.209639316992252e-06 2023-01-23 23:45:00.763466: step: 124/77, loss: 0.0001459840714232996 2023-01-23 23:45:02.073998: step: 128/77, loss: 0.0024846673477441072 2023-01-23 23:45:03.320909: step: 132/77, loss: 1.0132655461347895e-06 2023-01-23 23:45:04.623311: step: 136/77, loss: 2.9131737392162904e-05 2023-01-23 23:45:05.874617: step: 140/77, loss: 0.00012506675557233393 2023-01-23 23:45:07.216067: step: 144/77, loss: 0.0036581424064934254 2023-01-23 23:45:08.490564: step: 148/77, loss: 0.0006087854853831232 2023-01-23 23:45:09.745621: step: 152/77, loss: 9.502156171947718e-05 2023-01-23 23:45:11.000033: step: 156/77, loss: 0.00010151314927497879 2023-01-23 23:45:12.292892: step: 160/77, loss: 0.00010394515265943483 2023-01-23 23:45:13.604121: step: 164/77, loss: 3.256982745369896e-05 2023-01-23 23:45:14.893116: step: 168/77, loss: 0.0004069388669449836 2023-01-23 23:45:16.166185: step: 172/77, loss: 0.0004313273529987782 2023-01-23 23:45:17.433977: step: 176/77, loss: 0.07065204530954361 2023-01-23 23:45:18.731359: step: 180/77, loss: 0.003883373225107789 2023-01-23 23:45:20.000045: step: 184/77, loss: 0.021618608385324478 2023-01-23 23:45:21.321694: step: 188/77, loss: 0.009264899417757988 2023-01-23 23:45:22.631226: step: 192/77, loss: 0.021695788949728012 2023-01-23 23:45:23.965025: step: 196/77, loss: 0.07280417531728745 2023-01-23 23:45:25.317503: step: 200/77, loss: 5.32822850800585e-05 2023-01-23 23:45:26.624043: step: 204/77, loss: 0.004343980457633734 2023-01-23 23:45:27.962158: step: 208/77, loss: 1.6370775483665057e-05 2023-01-23 23:45:29.289379: step: 212/77, loss: 0.01640847697854042 2023-01-23 23:45:30.606390: step: 216/77, loss: 1.6044676158344373e-05 2023-01-23 23:45:31.896659: step: 220/77, loss: 0.0017321545165032148 2023-01-23 23:45:33.279185: step: 224/77, loss: 0.0006852812948636711 2023-01-23 23:45:34.591227: step: 228/77, loss: 0.004491701722145081 2023-01-23 23:45:35.900417: step: 232/77, loss: 1.5705261375842383e-06 2023-01-23 23:45:37.193111: step: 236/77, loss: 5.962876002740813e-06 2023-01-23 23:45:38.499855: step: 240/77, loss: 0.02237839251756668 2023-01-23 23:45:39.840282: step: 244/77, loss: 0.0005027443403378129 2023-01-23 23:45:41.186166: step: 248/77, loss: 7.748595010070858e-08 2023-01-23 23:45:42.500652: step: 252/77, loss: 0.020615598186850548 2023-01-23 23:45:43.788286: step: 256/77, loss: 0.06497868150472641 2023-01-23 23:45:45.098324: step: 260/77, loss: 1.5785826690262184e-05 2023-01-23 23:45:46.410487: step: 264/77, loss: 0.0005293970461934805 2023-01-23 23:45:47.685697: step: 268/77, loss: 0.015436092391610146 2023-01-23 23:45:49.024350: step: 272/77, loss: 0.008673092350363731 2023-01-23 23:45:50.351473: step: 276/77, loss: 0.0017115159425884485 2023-01-23 23:45:51.654765: step: 280/77, loss: 0.01140614878386259 2023-01-23 23:45:52.975485: step: 284/77, loss: 0.009037042036652565 2023-01-23 23:45:54.322378: step: 288/77, loss: 0.003911172971129417 2023-01-23 23:45:55.611554: step: 292/77, loss: 0.0076984260231256485 2023-01-23 23:45:56.951699: step: 296/77, loss: 1.9967444586654892e-07 2023-01-23 23:45:58.253306: step: 300/77, loss: 0.0007964045507833362 2023-01-23 23:45:59.578231: step: 304/77, loss: 0.0035107415169477463 2023-01-23 23:46:00.896569: step: 308/77, loss: 2.278218516948982e-06 2023-01-23 23:46:02.221135: step: 312/77, loss: 0.0024887826293706894 2023-01-23 23:46:03.520986: step: 316/77, loss: 0.04732801765203476 2023-01-23 23:46:04.783159: step: 320/77, loss: 0.0021155159920454025 2023-01-23 23:46:06.084852: step: 324/77, loss: 2.9067188734188676e-05 2023-01-23 23:46:07.387585: step: 328/77, loss: 0.0012147235684096813 2023-01-23 23:46:08.676723: step: 332/77, loss: 7.692816325288732e-06 2023-01-23 23:46:09.986820: step: 336/77, loss: 0.015529229305684566 2023-01-23 23:46:11.338511: step: 340/77, loss: 8.99490260053426e-06 2023-01-23 23:46:12.600923: step: 344/77, loss: 0.0006836125976406038 2023-01-23 23:46:13.915807: step: 348/77, loss: 0.005854771938174963 2023-01-23 23:46:15.193302: step: 352/77, loss: 0.0006406373577192426 2023-01-23 23:46:16.482589: step: 356/77, loss: 0.009297902695834637 2023-01-23 23:46:17.746377: step: 360/77, loss: 0.015907153487205505 2023-01-23 23:46:19.057546: step: 364/77, loss: 1.087783516595664e-07 2023-01-23 23:46:20.353433: step: 368/77, loss: 0.09170454740524292 2023-01-23 23:46:21.647095: step: 372/77, loss: 0.0004183561832178384 2023-01-23 23:46:22.922774: step: 376/77, loss: 0.000553747231606394 2023-01-23 23:46:24.238756: step: 380/77, loss: 0.002163755940273404 2023-01-23 23:46:25.539564: step: 384/77, loss: 5.881304605281912e-05 2023-01-23 23:46:26.889200: step: 388/77, loss: 0.0024923363234847784 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 1.0, 'r': 0.5298507462686567, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5333333333333333, 'r': 0.014375561545372867, 'f1': 0.02799650043744532}, 'combined': 0.019392697863986515, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 1.0, 'r': 0.5149253731343284, 'f1': 0.6798029556650247}, 'slot': {'p': 0.5517241379310345, 'r': 0.014375561545372867, 'f1': 0.028021015761821366}, 'combined': 0.01904876933562241, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 1.0, 'r': 0.5149253731343284, 'f1': 0.6798029556650247}, 'slot': {'p': 0.5666666666666667, 'r': 0.015274034141958671, 'f1': 0.029746281714785654}, 'combined': 0.020221610229755767, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:48:07.943418: step: 4/77, loss: 0.0003015650436282158 2023-01-23 23:48:09.234009: step: 8/77, loss: 0.0005008360603824258 2023-01-23 23:48:10.552885: step: 12/77, loss: 0.04127756133675575 2023-01-23 23:48:11.882325: step: 16/77, loss: 0.00023727216466795653 2023-01-23 23:48:13.141211: step: 20/77, loss: 5.645280907629058e-06 2023-01-23 23:48:14.398582: step: 24/77, loss: 0.0038216097746044397 2023-01-23 23:48:15.741282: step: 28/77, loss: 0.0019458006136119366 2023-01-23 23:48:17.075954: step: 32/77, loss: 0.0010453497525304556 2023-01-23 23:48:18.358588: step: 36/77, loss: 3.918975437500194e-07 2023-01-23 23:48:19.615454: step: 40/77, loss: 1.2546540801849915e-06 2023-01-23 23:48:20.928997: step: 44/77, loss: 0.00035715149715542793 2023-01-23 23:48:22.262418: step: 48/77, loss: 0.04893004894256592 2023-01-23 23:48:23.556399: step: 52/77, loss: 2.16505691241764e-06 2023-01-23 23:48:24.841342: step: 56/77, loss: 0.00048624962801113725 2023-01-23 23:48:26.099039: step: 60/77, loss: 0.05392635613679886 2023-01-23 23:48:27.402988: step: 64/77, loss: 0.0016569013241678476 2023-01-23 23:48:28.718921: step: 68/77, loss: 4.06145081797149e-05 2023-01-23 23:48:29.980090: step: 72/77, loss: 0.02751801535487175 2023-01-23 23:48:31.271776: step: 76/77, loss: 0.00032470986479893327 2023-01-23 23:48:32.528484: step: 80/77, loss: 0.04265865311026573 2023-01-23 23:48:33.840553: step: 84/77, loss: 0.02686918154358864 2023-01-23 23:48:35.151273: step: 88/77, loss: 5.758063343819231e-05 2023-01-23 23:48:36.433923: step: 92/77, loss: 0.005514861550182104 2023-01-23 23:48:37.805408: step: 96/77, loss: 0.023688755929470062 2023-01-23 23:48:39.129053: step: 100/77, loss: 0.005752614699304104 2023-01-23 23:48:40.470506: step: 104/77, loss: 0.00012414446973707527 2023-01-23 23:48:41.784754: step: 108/77, loss: 0.006811483763158321 2023-01-23 23:48:43.062205: step: 112/77, loss: 4.321331914525217e-08 2023-01-23 23:48:44.321187: step: 116/77, loss: 0.006708010099828243 2023-01-23 23:48:45.662092: step: 120/77, loss: 0.002154033165425062 2023-01-23 23:48:46.985032: step: 124/77, loss: 0.002488675992935896 2023-01-23 23:48:48.274715: step: 128/77, loss: 2.1963771814625943e-06 2023-01-23 23:48:49.605350: step: 132/77, loss: 0.0014622567687183619 2023-01-23 23:48:50.870549: step: 136/77, loss: 0.000186900157132186 2023-01-23 23:48:52.151120: step: 140/77, loss: 0.008909492753446102 2023-01-23 23:48:53.460701: step: 144/77, loss: 0.0005386116681620479 2023-01-23 23:48:54.757363: step: 148/77, loss: 2.1678852135664783e-05 2023-01-23 23:48:56.059299: step: 152/77, loss: 0.0004342886677477509 2023-01-23 23:48:57.358599: step: 156/77, loss: 2.448557279421948e-05 2023-01-23 23:48:58.659237: step: 160/77, loss: 5.50402000953909e-05 2023-01-23 23:48:59.962890: step: 164/77, loss: 0.00042298168409615755 2023-01-23 23:49:01.239915: step: 168/77, loss: 0.00025966332759708166 2023-01-23 23:49:02.636363: step: 172/77, loss: 2.4172995836124755e-05 2023-01-23 23:49:03.929008: step: 176/77, loss: 0.05907929316163063 2023-01-23 23:49:05.187289: step: 180/77, loss: 0.0020319067407399416 2023-01-23 23:49:06.502849: step: 184/77, loss: 0.0001989850279642269 2023-01-23 23:49:07.836048: step: 188/77, loss: 3.735092468559742e-05 2023-01-23 23:49:09.140493: step: 192/77, loss: 0.02960916794836521 2023-01-23 23:49:10.475131: step: 196/77, loss: 5.357793634175323e-05 2023-01-23 23:49:11.762330: step: 200/77, loss: 0.0369393453001976 2023-01-23 23:49:13.125197: step: 204/77, loss: 0.0030806665308773518 2023-01-23 23:49:14.424473: step: 208/77, loss: 0.0004855840525124222 2023-01-23 23:49:15.723889: step: 212/77, loss: 0.012386923655867577 2023-01-23 23:49:17.041543: step: 216/77, loss: 0.0008802711381576955 2023-01-23 23:49:18.305528: step: 220/77, loss: 0.013728022575378418 2023-01-23 23:49:19.603595: step: 224/77, loss: 0.0034249231684952974 2023-01-23 23:49:20.901645: step: 228/77, loss: 2.330183269805275e-05 2023-01-23 23:49:22.207849: step: 232/77, loss: 0.005138559266924858 2023-01-23 23:49:23.507986: step: 236/77, loss: 0.0002731800777837634 2023-01-23 23:49:24.808309: step: 240/77, loss: 0.14706991612911224 2023-01-23 23:49:26.121028: step: 244/77, loss: 0.0003597013419494033 2023-01-23 23:49:27.380019: step: 248/77, loss: 2.2830377929494716e-05 2023-01-23 23:49:28.717426: step: 252/77, loss: 3.3556109428900527e-06 2023-01-23 23:49:30.008492: step: 256/77, loss: 6.024046342645306e-06 2023-01-23 23:49:31.309630: step: 260/77, loss: 0.0003622551157604903 2023-01-23 23:49:32.665975: step: 264/77, loss: 1.505012647839976e-07 2023-01-23 23:49:33.984037: step: 268/77, loss: 1.9880082618328743e-05 2023-01-23 23:49:35.309188: step: 272/77, loss: 0.000368515495210886 2023-01-23 23:49:36.609412: step: 276/77, loss: 0.000189960454008542 2023-01-23 23:49:37.899325: step: 280/77, loss: 0.05055554211139679 2023-01-23 23:49:39.214224: step: 284/77, loss: 1.7881333747027384e-07 2023-01-23 23:49:40.499022: step: 288/77, loss: 0.0001934354950208217 2023-01-23 23:49:41.891199: step: 292/77, loss: 0.00022537648328579962 2023-01-23 23:49:43.197635: step: 296/77, loss: 1.8508553694118746e-05 2023-01-23 23:49:44.529336: step: 300/77, loss: 0.00031571733416058123 2023-01-23 23:49:45.863058: step: 304/77, loss: 0.0006485034828074276 2023-01-23 23:49:47.193385: step: 308/77, loss: 0.05046778544783592 2023-01-23 23:49:48.479240: step: 312/77, loss: 0.006176185794174671 2023-01-23 23:49:49.789792: step: 316/77, loss: 0.02317003905773163 2023-01-23 23:49:51.074314: step: 320/77, loss: 0.00012699222133960575 2023-01-23 23:49:52.392134: step: 324/77, loss: 4.8745387175586075e-05 2023-01-23 23:49:53.738714: step: 328/77, loss: 0.01755366101861 2023-01-23 23:49:55.053097: step: 332/77, loss: 0.029475772753357887 2023-01-23 23:49:56.375769: step: 336/77, loss: 0.002560092369094491 2023-01-23 23:49:57.653353: step: 340/77, loss: 1.1003810868714936e-05 2023-01-23 23:49:58.998619: step: 344/77, loss: 1.8088969682139577e-06 2023-01-23 23:50:00.322806: step: 348/77, loss: 0.0017633186653256416 2023-01-23 23:50:01.682987: step: 352/77, loss: 2.831220058396866e-08 2023-01-23 23:50:02.966836: step: 356/77, loss: 6.697871867800131e-05 2023-01-23 23:50:04.277930: step: 360/77, loss: 1.5294324839487672e-05 2023-01-23 23:50:05.633517: step: 364/77, loss: 0.02993430197238922 2023-01-23 23:50:06.937595: step: 368/77, loss: 0.00010248806211166084 2023-01-23 23:50:08.266592: step: 372/77, loss: 0.00020304956706240773 2023-01-23 23:50:09.555308: step: 376/77, loss: 0.049604982137680054 2023-01-23 23:50:10.866367: step: 380/77, loss: 7.629324727531639e-07 2023-01-23 23:50:12.168094: step: 384/77, loss: 0.0034173850435763597 2023-01-23 23:50:13.476061: step: 388/77, loss: 0.022433260455727577 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5298507462686567, 'f1': 0.6761904761904761}, 'slot': {'p': 0.5384615384615384, 'r': 0.018867924528301886, 'f1': 0.036458333333333336}, 'combined': 0.024652777777777777, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.9230769230769231, 'r': 0.5373134328358209, 'f1': 0.6792452830188679}, 'slot': {'p': 0.4883720930232558, 'r': 0.018867924528301886, 'f1': 0.03633217993079585}, 'combined': 0.024678461839785858, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.935064935064935, 'r': 0.5373134328358209, 'f1': 0.6824644549763033}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.02485993678144383, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:51:54.564987: step: 4/77, loss: 4.329281728132628e-05 2023-01-23 23:51:55.830096: step: 8/77, loss: 0.00022017282026354223 2023-01-23 23:51:57.120490: step: 12/77, loss: 0.026746530085802078 2023-01-23 23:51:58.449756: step: 16/77, loss: 0.005114070139825344 2023-01-23 23:51:59.755259: step: 20/77, loss: 0.0007694660453125834 2023-01-23 23:52:01.046736: step: 24/77, loss: 0.0004395963333081454 2023-01-23 23:52:02.350410: step: 28/77, loss: 1.1439164154580794e-05 2023-01-23 23:52:03.687627: step: 32/77, loss: 0.03675878047943115 2023-01-23 23:52:04.912532: step: 36/77, loss: 0.05024024471640587 2023-01-23 23:52:06.227559: step: 40/77, loss: 0.006016051862388849 2023-01-23 23:52:07.548435: step: 44/77, loss: 0.003913963213562965 2023-01-23 23:52:08.837718: step: 48/77, loss: 1.0461508281878196e-05 2023-01-23 23:52:10.124525: step: 52/77, loss: 0.0005447589792311192 2023-01-23 23:52:11.414308: step: 56/77, loss: 0.00017727080557961017 2023-01-23 23:52:12.726782: step: 60/77, loss: 7.68654717830941e-06 2023-01-23 23:52:14.039643: step: 64/77, loss: 0.003572908928617835 2023-01-23 23:52:15.366473: step: 68/77, loss: 0.0016043151263147593 2023-01-23 23:52:16.660069: step: 72/77, loss: 0.0016125947004184127 2023-01-23 23:52:17.990637: step: 76/77, loss: 0.004037613980472088 2023-01-23 23:52:19.330544: step: 80/77, loss: 0.0036225919611752033 2023-01-23 23:52:20.620596: step: 84/77, loss: 0.0006270355661399662 2023-01-23 23:52:21.934238: step: 88/77, loss: 9.059033800440375e-06 2023-01-23 23:52:23.224917: step: 92/77, loss: 0.0002212631079601124 2023-01-23 23:52:24.525588: step: 96/77, loss: 1.0681213097996078e-05 2023-01-23 23:52:25.816029: step: 100/77, loss: 0.004466784652322531 2023-01-23 23:52:27.112055: step: 104/77, loss: 0.0010201940312981606 2023-01-23 23:52:28.367294: step: 108/77, loss: 0.0004146641003899276 2023-01-23 23:52:29.692851: step: 112/77, loss: 0.00012005192547803745 2023-01-23 23:52:30.998598: step: 116/77, loss: 5.523490472114645e-06 2023-01-23 23:52:32.306903: step: 120/77, loss: 0.0006090780370868742 2023-01-23 23:52:33.573027: step: 124/77, loss: 0.017412561923265457 2023-01-23 23:52:34.884614: step: 128/77, loss: 0.0031904433853924274 2023-01-23 23:52:36.192879: step: 132/77, loss: 0.0004236635286360979 2023-01-23 23:52:37.532949: step: 136/77, loss: 2.662978477019351e-05 2023-01-23 23:52:38.831636: step: 140/77, loss: 0.006381732877343893 2023-01-23 23:52:40.111013: step: 144/77, loss: 0.0012269781436771154 2023-01-23 23:52:41.413073: step: 148/77, loss: 0.0001025956153171137 2023-01-23 23:52:42.769648: step: 152/77, loss: 6.080747880332638e-06 2023-01-23 23:52:44.053699: step: 156/77, loss: 0.000544823007658124 2023-01-23 23:52:45.350033: step: 160/77, loss: 0.00039470160845667124 2023-01-23 23:52:46.633003: step: 164/77, loss: 0.0035439336206763983 2023-01-23 23:52:47.946563: step: 168/77, loss: 0.001191399642266333 2023-01-23 23:52:49.221678: step: 172/77, loss: 6.601181894438923e-07 2023-01-23 23:52:50.563811: step: 176/77, loss: 1.5660202734579798e-06 2023-01-23 23:52:51.894248: step: 180/77, loss: 0.00018104282207787037 2023-01-23 23:52:53.247741: step: 184/77, loss: 0.0001800114259822294 2023-01-23 23:52:54.570388: step: 188/77, loss: 5.2132389100734144e-05 2023-01-23 23:52:55.861007: step: 192/77, loss: 0.0001612513151485473 2023-01-23 23:52:57.200905: step: 196/77, loss: 0.000883870234247297 2023-01-23 23:52:58.490381: step: 200/77, loss: 0.00014345537056215107 2023-01-23 23:52:59.780321: step: 204/77, loss: 5.223417247179896e-06 2023-01-23 23:53:01.083118: step: 208/77, loss: 0.0039792186580598354 2023-01-23 23:53:02.426271: step: 212/77, loss: 0.00014408319839276373 2023-01-23 23:53:03.734255: step: 216/77, loss: 6.049819057807326e-05 2023-01-23 23:53:05.028219: step: 220/77, loss: 4.052621079608798e-06 2023-01-23 23:53:06.306022: step: 224/77, loss: 0.0001223248546011746 2023-01-23 23:53:07.612920: step: 228/77, loss: 0.0027216023299843073 2023-01-23 23:53:08.893880: step: 232/77, loss: 0.033498615026474 2023-01-23 23:53:10.162884: step: 236/77, loss: 6.296906212810427e-05 2023-01-23 23:53:11.470879: step: 240/77, loss: 8.04692263045581e-06 2023-01-23 23:53:12.770383: step: 244/77, loss: 0.0003942615003325045 2023-01-23 23:53:14.087083: step: 248/77, loss: 0.0004979265504516661 2023-01-23 23:53:15.356168: step: 252/77, loss: 0.00012309358862694353 2023-01-23 23:53:16.721660: step: 256/77, loss: 0.08111999183893204 2023-01-23 23:53:18.019474: step: 260/77, loss: 2.6105428787559504e-06 2023-01-23 23:53:19.301131: step: 264/77, loss: 2.9434100724756718e-05 2023-01-23 23:53:20.571365: step: 268/77, loss: 0.002525086048990488 2023-01-23 23:53:21.869879: step: 272/77, loss: 2.8563626983668655e-06 2023-01-23 23:53:23.189896: step: 276/77, loss: 2.3602433429914527e-06 2023-01-23 23:53:24.489310: step: 280/77, loss: 1.1607835403992794e-06 2023-01-23 23:53:25.776074: step: 284/77, loss: 0.0001664245210122317 2023-01-23 23:53:27.080830: step: 288/77, loss: 7.075824396451935e-05 2023-01-23 23:53:28.365389: step: 292/77, loss: 7.216061931103468e-06 2023-01-23 23:53:29.687952: step: 296/77, loss: 0.0018026909092441201 2023-01-23 23:53:31.034360: step: 300/77, loss: 0.008671221323311329 2023-01-23 23:53:32.350478: step: 304/77, loss: 9.186310489894822e-05 2023-01-23 23:53:33.667906: step: 308/77, loss: 9.05151591723552e-06 2023-01-23 23:53:34.948434: step: 312/77, loss: 0.037061866372823715 2023-01-23 23:53:36.232307: step: 316/77, loss: 0.03523270785808563 2023-01-23 23:53:37.567829: step: 320/77, loss: 0.00010251326602883637 2023-01-23 23:53:38.875772: step: 324/77, loss: 0.00019953730225097388 2023-01-23 23:53:40.125690: step: 328/77, loss: 4.9880240112543106e-05 2023-01-23 23:53:41.437475: step: 332/77, loss: 0.0003752804477699101 2023-01-23 23:53:42.749286: step: 336/77, loss: 7.744783943053335e-05 2023-01-23 23:53:44.053697: step: 340/77, loss: 0.0009344042628072202 2023-01-23 23:53:45.354833: step: 344/77, loss: 0.011867690831422806 2023-01-23 23:53:46.664734: step: 348/77, loss: 0.058403074741363525 2023-01-23 23:53:47.946524: step: 352/77, loss: 0.00012268997670616955 2023-01-23 23:53:49.278397: step: 356/77, loss: 0.0006498557631857693 2023-01-23 23:53:50.586303: step: 360/77, loss: 0.008733597584068775 2023-01-23 23:53:51.920761: step: 364/77, loss: 6.70292429276742e-05 2023-01-23 23:53:53.243295: step: 368/77, loss: 7.543620085925795e-06 2023-01-23 23:53:54.552593: step: 372/77, loss: 0.00015152778360061347 2023-01-23 23:53:55.837940: step: 376/77, loss: 0.00035861300420947373 2023-01-23 23:53:57.180481: step: 380/77, loss: 0.002343467902392149 2023-01-23 23:53:58.510606: step: 384/77, loss: 6.884265530970879e-07 2023-01-23 23:53:59.798122: step: 388/77, loss: 0.018556831404566765 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5405405405405406, 'r': 0.017969451931716084, 'f1': 0.034782608695652174}, 'combined': 0.02385093167701863, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9466666666666667, 'r': 0.5298507462686567, 'f1': 0.6794258373205742}, 'slot': {'p': 0.5277777777777778, 'r': 0.017070979335130278, 'f1': 0.03307223672758921}, 'combined': 0.022470132130706543, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5526315789473685, 'r': 0.018867924528301886, 'f1': 0.036490008688097306}, 'combined': 0.02502172024326672, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:55:40.903649: step: 4/77, loss: 0.01800728775560856 2023-01-23 23:55:42.271508: step: 8/77, loss: 0.00031166861299425364 2023-01-23 23:55:43.633234: step: 12/77, loss: 3.561168705346063e-05 2023-01-23 23:55:44.929342: step: 16/77, loss: 9.953811286322889e-07 2023-01-23 23:55:46.257591: step: 20/77, loss: 3.455784099060111e-05 2023-01-23 23:55:47.560861: step: 24/77, loss: 0.05289270728826523 2023-01-23 23:55:48.899144: step: 28/77, loss: 0.05813051760196686 2023-01-23 23:55:50.208769: step: 32/77, loss: 0.0003971444966737181 2023-01-23 23:55:51.533827: step: 36/77, loss: 0.000515043968334794 2023-01-23 23:55:52.824255: step: 40/77, loss: 0.0005164144677110016 2023-01-23 23:55:54.109474: step: 44/77, loss: 0.00014868365542497486 2023-01-23 23:55:55.386411: step: 48/77, loss: 9.044520993484184e-06 2023-01-23 23:55:56.671195: step: 52/77, loss: 0.0019840134773403406 2023-01-23 23:55:58.014225: step: 56/77, loss: 0.0015603199135512114 2023-01-23 23:55:59.322146: step: 60/77, loss: 3.0100272851996124e-07 2023-01-23 23:56:00.703020: step: 64/77, loss: 0.004665685817599297 2023-01-23 23:56:02.029816: step: 68/77, loss: 0.0009288692381232977 2023-01-23 23:56:03.342110: step: 72/77, loss: 2.654560375958681e-05 2023-01-23 23:56:04.621825: step: 76/77, loss: 0.00770430825650692 2023-01-23 23:56:05.863070: step: 80/77, loss: 0.0023559420369565487 2023-01-23 23:56:07.126725: step: 84/77, loss: 0.1431427150964737 2023-01-23 23:56:08.434239: step: 88/77, loss: 5.917061571381055e-05 2023-01-23 23:56:09.731607: step: 92/77, loss: 0.0010026609525084496 2023-01-23 23:56:11.033380: step: 96/77, loss: 0.0008562597795389593 2023-01-23 23:56:12.301145: step: 100/77, loss: 1.6152119997059344e-06 2023-01-23 23:56:13.628339: step: 104/77, loss: 2.2409134544432163e-05 2023-01-23 23:56:14.955790: step: 108/77, loss: 0.0005694585852324963 2023-01-23 23:56:16.265022: step: 112/77, loss: 4.734771209768951e-05 2023-01-23 23:56:17.596720: step: 116/77, loss: 0.00010205221769865602 2023-01-23 23:56:18.901622: step: 120/77, loss: 0.021125314757227898 2023-01-23 23:56:20.204895: step: 124/77, loss: 0.0005711138946935534 2023-01-23 23:56:21.501183: step: 128/77, loss: 3.6221754271537066e-05 2023-01-23 23:56:22.849994: step: 132/77, loss: 4.6742330596316606e-05 2023-01-23 23:56:24.142158: step: 136/77, loss: 1.1689064194797538e-05 2023-01-23 23:56:25.442211: step: 140/77, loss: 0.0037418147549033165 2023-01-23 23:56:26.783121: step: 144/77, loss: 0.02389315329492092 2023-01-23 23:56:28.134505: step: 148/77, loss: 1.0398875019745901e-05 2023-01-23 23:56:29.438951: step: 152/77, loss: 0.0007807943620719016 2023-01-23 23:56:30.758416: step: 156/77, loss: 0.027726897969841957 2023-01-23 23:56:32.042485: step: 160/77, loss: 0.004366937559098005 2023-01-23 23:56:33.398501: step: 164/77, loss: 0.033863894641399384 2023-01-23 23:56:34.647611: step: 168/77, loss: 0.0015756358625367284 2023-01-23 23:56:36.000430: step: 172/77, loss: 0.0041992259211838245 2023-01-23 23:56:37.266091: step: 176/77, loss: 0.012798842042684555 2023-01-23 23:56:38.542676: step: 180/77, loss: 0.0006098680896684527 2023-01-23 23:56:39.841204: step: 184/77, loss: 0.09311774373054504 2023-01-23 23:56:41.202485: step: 188/77, loss: 0.002651342423632741 2023-01-23 23:56:42.516724: step: 192/77, loss: 4.315126716392115e-05 2023-01-23 23:56:43.860508: step: 196/77, loss: 0.0016577377682551742 2023-01-23 23:56:45.155349: step: 200/77, loss: 4.362039180705324e-05 2023-01-23 23:56:46.465457: step: 204/77, loss: 0.0006896284176036716 2023-01-23 23:56:47.751160: step: 208/77, loss: 0.00015805228031240404 2023-01-23 23:56:49.034535: step: 212/77, loss: 0.0008956255624070764 2023-01-23 23:56:50.266677: step: 216/77, loss: 2.9472585083567537e-06 2023-01-23 23:56:51.572801: step: 220/77, loss: 0.008746463805437088 2023-01-23 23:56:52.898547: step: 224/77, loss: 2.413982542748272e-07 2023-01-23 23:56:54.231324: step: 228/77, loss: 2.847241921699606e-05 2023-01-23 23:56:55.544201: step: 232/77, loss: 0.00012537377187982202 2023-01-23 23:56:56.897505: step: 236/77, loss: 1.7284813793594367e-06 2023-01-23 23:56:58.243508: step: 240/77, loss: 0.005231163930147886 2023-01-23 23:56:59.567346: step: 244/77, loss: 7.504272070946172e-05 2023-01-23 23:57:00.867153: step: 248/77, loss: 0.0022762068547308445 2023-01-23 23:57:02.105593: step: 252/77, loss: 6.481864147644956e-07 2023-01-23 23:57:03.418025: step: 256/77, loss: 6.56557085676468e-06 2023-01-23 23:57:04.705174: step: 260/77, loss: 8.091215590866341e-07 2023-01-23 23:57:06.040822: step: 264/77, loss: 0.0018453343072906137 2023-01-23 23:57:07.356026: step: 268/77, loss: 0.0010375462006777525 2023-01-23 23:57:08.677087: step: 272/77, loss: 0.0003331643238198012 2023-01-23 23:57:09.945862: step: 276/77, loss: 0.01020114403218031 2023-01-23 23:57:11.236291: step: 280/77, loss: 2.0280707758502103e-05 2023-01-23 23:57:12.566487: step: 284/77, loss: 2.369290086789988e-05 2023-01-23 23:57:13.883043: step: 288/77, loss: 5.972215149085969e-05 2023-01-23 23:57:15.175818: step: 292/77, loss: 0.0002550583449192345 2023-01-23 23:57:16.470816: step: 296/77, loss: 0.00032209386699832976 2023-01-23 23:57:17.761807: step: 300/77, loss: 0.008743447251617908 2023-01-23 23:57:19.088785: step: 304/77, loss: 0.004930737894028425 2023-01-23 23:57:20.384134: step: 308/77, loss: 0.0005963391740806401 2023-01-23 23:57:21.669405: step: 312/77, loss: 0.0011195436818525195 2023-01-23 23:57:22.959260: step: 316/77, loss: 0.0007906003156676888 2023-01-23 23:57:24.296530: step: 320/77, loss: 0.000907824607565999 2023-01-23 23:57:25.614466: step: 324/77, loss: 0.00027822371339425445 2023-01-23 23:57:26.939157: step: 328/77, loss: 7.88813122198917e-06 2023-01-23 23:57:28.225096: step: 332/77, loss: 0.0008845935808494687 2023-01-23 23:57:29.481986: step: 336/77, loss: 0.003997663501650095 2023-01-23 23:57:30.779093: step: 340/77, loss: 0.009393981657922268 2023-01-23 23:57:32.120594: step: 344/77, loss: 1.8462516891304404e-05 2023-01-23 23:57:33.427200: step: 348/77, loss: 0.002342846244573593 2023-01-23 23:57:34.730930: step: 352/77, loss: 0.00029463417013175786 2023-01-23 23:57:36.041837: step: 356/77, loss: 6.728203516104259e-06 2023-01-23 23:57:37.326521: step: 360/77, loss: 7.0464707278006244e-06 2023-01-23 23:57:38.581769: step: 364/77, loss: 6.384744665410835e-06 2023-01-23 23:57:39.906639: step: 368/77, loss: 0.00016121372755151242 2023-01-23 23:57:41.181852: step: 372/77, loss: 0.0008221327443607152 2023-01-23 23:57:42.490569: step: 376/77, loss: 1.3854460121365264e-05 2023-01-23 23:57:43.818775: step: 380/77, loss: 0.0015989854000508785 2023-01-23 23:57:45.121335: step: 384/77, loss: 3.710380269694724e-07 2023-01-23 23:57:46.425959: step: 388/77, loss: 0.0377693772315979 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.9726027397260274, 'r': 0.5298507462686567, 'f1': 0.6859903381642511}, 'slot': {'p': 0.5384615384615384, 'r': 0.018867924528301886, 'f1': 0.036458333333333336}, 'combined': 0.025010064412238325, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.9583333333333334, 'r': 0.5149253731343284, 'f1': 0.6699029126213593}, 'slot': {'p': 0.5384615384615384, 'r': 0.018867924528301886, 'f1': 0.036458333333333336}, 'combined': 0.02442354368932039, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.958904109589041, 'r': 0.5223880597014925, 'f1': 0.6763285024154589}, 'slot': {'p': 0.5384615384615384, 'r': 0.018867924528301886, 'f1': 0.036458333333333336}, 'combined': 0.024657809983896942, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:59:27.364364: step: 4/77, loss: 0.00019989734573755413 2023-01-23 23:59:28.686002: step: 8/77, loss: 0.00016956939361989498 2023-01-23 23:59:29.952353: step: 12/77, loss: 0.00010101838415721431 2023-01-23 23:59:31.233673: step: 16/77, loss: 2.011552851399756e-06 2023-01-23 23:59:32.531712: step: 20/77, loss: 2.4170629330910742e-05 2023-01-23 23:59:33.843899: step: 24/77, loss: 1.9858913219650276e-05 2023-01-23 23:59:35.096167: step: 28/77, loss: 2.0115439838264138e-06 2023-01-23 23:59:36.398443: step: 32/77, loss: 9.859095371211879e-06 2023-01-23 23:59:37.726488: step: 36/77, loss: 6.314497295534238e-05 2023-01-23 23:59:39.042684: step: 40/77, loss: 0.0018583748023957014 2023-01-23 23:59:40.313770: step: 44/77, loss: 5.1027829613303766e-05 2023-01-23 23:59:41.610166: step: 48/77, loss: 2.4957269033620832e-06 2023-01-23 23:59:42.938194: step: 52/77, loss: 3.938041118090041e-06 2023-01-23 23:59:44.268606: step: 56/77, loss: 1.0922369710897328e-06 2023-01-23 23:59:45.614905: step: 60/77, loss: 0.01869729720056057 2023-01-23 23:59:46.902790: step: 64/77, loss: 1.926662207551999e-06 2023-01-23 23:59:48.198139: step: 68/77, loss: 7.571773312520236e-05 2023-01-23 23:59:49.524601: step: 72/77, loss: 0.0001227992179337889 2023-01-23 23:59:50.802451: step: 76/77, loss: 0.00013048169785179198 2023-01-23 23:59:52.120820: step: 80/77, loss: 4.857740236730024e-07 2023-01-23 23:59:53.430826: step: 84/77, loss: 0.00011595044634304941 2023-01-23 23:59:54.694910: step: 88/77, loss: 5.274962404655525e-07 2023-01-23 23:59:55.994117: step: 92/77, loss: 0.0015729828737676144 2023-01-23 23:59:57.289839: step: 96/77, loss: 0.0002714064030442387 2023-01-23 23:59:58.622074: step: 100/77, loss: 2.099433231705916e-06 2023-01-23 23:59:59.937020: step: 104/77, loss: 3.167681825289037e-06 2023-01-24 00:00:01.279690: step: 108/77, loss: 6.566444426425733e-06 2023-01-24 00:00:02.553659: step: 112/77, loss: 0.00035041532828472555 2023-01-24 00:00:03.842420: step: 116/77, loss: 4.5660151954507455e-05 2023-01-24 00:00:05.169353: step: 120/77, loss: 7.048171255519264e-07 2023-01-24 00:00:06.429871: step: 124/77, loss: 2.510757440177258e-06 2023-01-24 00:00:07.724276: step: 128/77, loss: 2.2947726563415927e-07 2023-01-24 00:00:09.031405: step: 132/77, loss: 0.00018129732052329928 2023-01-24 00:00:10.355157: step: 136/77, loss: 0.01591060496866703 2023-01-24 00:00:11.670371: step: 140/77, loss: 1.1553951480891556e-05 2023-01-24 00:00:12.935329: step: 144/77, loss: 0.03363659605383873 2023-01-24 00:00:14.227003: step: 148/77, loss: 0.0003402983129490167 2023-01-24 00:00:15.540197: step: 152/77, loss: 2.4763119654380716e-05 2023-01-24 00:00:16.815282: step: 156/77, loss: 7.888831532909535e-06 2023-01-24 00:00:18.149722: step: 160/77, loss: 1.748649447108619e-05 2023-01-24 00:00:19.418115: step: 164/77, loss: 0.00025645800633355975 2023-01-24 00:00:20.771013: step: 168/77, loss: 6.515035056509078e-05 2023-01-24 00:00:22.089218: step: 172/77, loss: 3.0088311177678406e-05 2023-01-24 00:00:23.404271: step: 176/77, loss: 0.0021862066350877285 2023-01-24 00:00:24.658432: step: 180/77, loss: 0.021061912178993225 2023-01-24 00:00:26.002649: step: 184/77, loss: 9.166287782136351e-06 2023-01-24 00:00:27.307181: step: 188/77, loss: 8.791878644842654e-05 2023-01-24 00:00:28.633537: step: 192/77, loss: 0.0008699322934262455 2023-01-24 00:00:29.998344: step: 196/77, loss: 1.5573326891171746e-05 2023-01-24 00:00:31.318315: step: 200/77, loss: 0.0001874137669801712 2023-01-24 00:00:32.603116: step: 204/77, loss: 0.0031759031116962433 2023-01-24 00:00:33.916133: step: 208/77, loss: 2.25968560698675e-05 2023-01-24 00:00:35.163538: step: 212/77, loss: 3.1723657230031677e-06 2023-01-24 00:00:36.437670: step: 216/77, loss: 0.0007224121945910156 2023-01-24 00:00:37.709037: step: 220/77, loss: 4.0004897527978756e-06 2023-01-24 00:00:39.033508: step: 224/77, loss: 8.537290705135092e-05 2023-01-24 00:00:40.353391: step: 228/77, loss: 0.016224876046180725 2023-01-24 00:00:41.629030: step: 232/77, loss: 8.661108950036578e-06 2023-01-24 00:00:42.893090: step: 236/77, loss: 0.0001592586631886661 2023-01-24 00:00:44.223722: step: 240/77, loss: 7.482444198103622e-05 2023-01-24 00:00:45.459953: step: 244/77, loss: 3.312555418233387e-05 2023-01-24 00:00:46.749275: step: 248/77, loss: 0.048145100474357605 2023-01-24 00:00:48.025345: step: 252/77, loss: 0.005470104981213808 2023-01-24 00:00:49.333583: step: 256/77, loss: 0.00039273095899261534 2023-01-24 00:00:50.662546: step: 260/77, loss: 0.001016460475511849 2023-01-24 00:00:51.958250: step: 264/77, loss: 2.205238843089319e-06 2023-01-24 00:00:53.270617: step: 268/77, loss: 5.840338417328894e-06 2023-01-24 00:00:54.607321: step: 272/77, loss: 5.447593593999045e-06 2023-01-24 00:00:55.948400: step: 276/77, loss: 1.457311100239167e-05 2023-01-24 00:00:57.224641: step: 280/77, loss: 3.234842370147817e-05 2023-01-24 00:00:58.558972: step: 284/77, loss: 5.5307500588241965e-06 2023-01-24 00:00:59.892301: step: 288/77, loss: 0.011254596523940563 2023-01-24 00:01:01.224333: step: 292/77, loss: 1.9097829863312654e-05 2023-01-24 00:01:02.566766: step: 296/77, loss: 0.002273230580613017 2023-01-24 00:01:03.845637: step: 300/77, loss: 0.0008224258781410754 2023-01-24 00:01:05.103841: step: 304/77, loss: 5.630550003843382e-05 2023-01-24 00:01:06.382469: step: 308/77, loss: 0.0006396897952072322 2023-01-24 00:01:07.588258: step: 312/77, loss: 0.029791001230478287 2023-01-24 00:01:08.906185: step: 316/77, loss: 1.3331845366337802e-05 2023-01-24 00:01:10.196503: step: 320/77, loss: 0.0009355137008242309 2023-01-24 00:01:11.496266: step: 324/77, loss: 0.0004965240368619561 2023-01-24 00:01:12.823197: step: 328/77, loss: 0.0001969095173990354 2023-01-24 00:01:14.099983: step: 332/77, loss: 3.2388783438364044e-05 2023-01-24 00:01:15.419919: step: 336/77, loss: 0.031088722869753838 2023-01-24 00:01:16.774207: step: 340/77, loss: 0.07551313936710358 2023-01-24 00:01:18.089026: step: 344/77, loss: 0.00021745124831795692 2023-01-24 00:01:19.373760: step: 348/77, loss: 6.191056854731869e-06 2023-01-24 00:01:20.688055: step: 352/77, loss: 0.0001629363396205008 2023-01-24 00:01:22.040006: step: 356/77, loss: 2.5176310373353772e-05 2023-01-24 00:01:23.364274: step: 360/77, loss: 5.35976723767817e-05 2023-01-24 00:01:24.677683: step: 364/77, loss: 3.601461139624007e-05 2023-01-24 00:01:26.032148: step: 368/77, loss: 0.005792475305497646 2023-01-24 00:01:27.322402: step: 372/77, loss: 0.0003093659470323473 2023-01-24 00:01:28.623013: step: 376/77, loss: 0.014475381933152676 2023-01-24 00:01:29.931749: step: 380/77, loss: 0.018493305891752243 2023-01-24 00:01:31.265181: step: 384/77, loss: 6.823409057687968e-05 2023-01-24 00:01:32.584357: step: 388/77, loss: 2.8444370400393382e-05 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5116279069767442, 'r': 0.019766397124887692, 'f1': 0.03806228373702422}, 'combined': 0.026099851705388033, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.4888888888888889, 'r': 0.019766397124887692, 'f1': 0.0379965457685665}, 'combined': 0.02605477424130274, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.4888888888888889, 'r': 0.019766397124887692, 'f1': 0.0379965457685665}, 'combined': 0.02605477424130274, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:03:13.254898: step: 4/77, loss: 0.007430397905409336 2023-01-24 00:03:14.591303: step: 8/77, loss: 3.7073266412335215e-06 2023-01-24 00:03:15.906545: step: 12/77, loss: 0.00017145395395345986 2023-01-24 00:03:17.195532: step: 16/77, loss: 2.0266714273020625e-05 2023-01-24 00:03:18.515182: step: 20/77, loss: 9.06505920283962e-06 2023-01-24 00:03:19.817516: step: 24/77, loss: 0.0007317511481232941 2023-01-24 00:03:21.114227: step: 28/77, loss: 0.0009495350532233715 2023-01-24 00:03:22.448303: step: 32/77, loss: 0.0004258228000253439 2023-01-24 00:03:23.753098: step: 36/77, loss: 0.0002683318161871284 2023-01-24 00:03:25.078434: step: 40/77, loss: 1.2264461474842392e-05 2023-01-24 00:03:26.319290: step: 44/77, loss: 8.771388093009591e-05 2023-01-24 00:03:27.581870: step: 48/77, loss: 0.002332504605874419 2023-01-24 00:03:28.906597: step: 52/77, loss: 0.010463829152286053 2023-01-24 00:03:30.246896: step: 56/77, loss: 7.15622809366323e-05 2023-01-24 00:03:31.551954: step: 60/77, loss: 0.0021188717801123857 2023-01-24 00:03:32.871004: step: 64/77, loss: 0.0003772116033360362 2023-01-24 00:03:34.132467: step: 68/77, loss: 0.0013899505138397217 2023-01-24 00:03:35.457674: step: 72/77, loss: 0.010037485510110855 2023-01-24 00:03:36.800282: step: 76/77, loss: 0.0036808447912335396 2023-01-24 00:03:38.070015: step: 80/77, loss: 0.00031674507772549987 2023-01-24 00:03:39.408021: step: 84/77, loss: 0.0002259061875520274 2023-01-24 00:03:40.680019: step: 88/77, loss: 0.005058180540800095 2023-01-24 00:03:41.961613: step: 92/77, loss: 0.0035927272401750088 2023-01-24 00:03:43.294388: step: 96/77, loss: 0.0003780303231906146 2023-01-24 00:03:44.563439: step: 100/77, loss: 0.056739721447229385 2023-01-24 00:03:45.855465: step: 104/77, loss: 0.027822664007544518 2023-01-24 00:03:47.155828: step: 108/77, loss: 0.009071031585335732 2023-01-24 00:03:48.466798: step: 112/77, loss: 0.0011930856853723526 2023-01-24 00:03:49.784000: step: 116/77, loss: 1.883709046524018e-05 2023-01-24 00:03:51.138805: step: 120/77, loss: 0.0004183150304015726 2023-01-24 00:03:52.482586: step: 124/77, loss: 0.0018396122613921762 2023-01-24 00:03:53.769999: step: 128/77, loss: 2.9145189728296828e-06 2023-01-24 00:03:55.024339: step: 132/77, loss: 0.0014383037341758609 2023-01-24 00:03:56.257711: step: 136/77, loss: 4.455409339243488e-07 2023-01-24 00:03:57.599060: step: 140/77, loss: 0.0012948352377861738 2023-01-24 00:03:58.907291: step: 144/77, loss: 0.034051429480314255 2023-01-24 00:04:00.206574: step: 148/77, loss: 0.00012335414066910744 2023-01-24 00:04:01.553342: step: 152/77, loss: 5.0850761908805e-05 2023-01-24 00:04:02.833634: step: 156/77, loss: 9.871408110484481e-05 2023-01-24 00:04:04.142140: step: 160/77, loss: 8.625948794360738e-06 2023-01-24 00:04:05.470694: step: 164/77, loss: 5.438213975139661e-06 2023-01-24 00:04:06.760448: step: 168/77, loss: 2.664577550604008e-05 2023-01-24 00:04:08.025892: step: 172/77, loss: 0.02862611413002014 2023-01-24 00:04:09.349799: step: 176/77, loss: 0.0001641131821088493 2023-01-24 00:04:10.647315: step: 180/77, loss: 0.0002279715408803895 2023-01-24 00:04:11.967095: step: 184/77, loss: 0.003732960671186447 2023-01-24 00:04:13.255071: step: 188/77, loss: 4.4081476517021656e-05 2023-01-24 00:04:14.584783: step: 192/77, loss: 0.00012288712605368346 2023-01-24 00:04:15.942225: step: 196/77, loss: 0.04758511483669281 2023-01-24 00:04:17.234349: step: 200/77, loss: 0.00033601955510675907 2023-01-24 00:04:18.534147: step: 204/77, loss: 2.661273356352467e-06 2023-01-24 00:04:19.849563: step: 208/77, loss: 0.00040764236473478377 2023-01-24 00:04:21.130277: step: 212/77, loss: 0.0009218723280355334 2023-01-24 00:04:22.396888: step: 216/77, loss: 0.0031653456389904022 2023-01-24 00:04:23.774053: step: 220/77, loss: 0.0006369269103743136 2023-01-24 00:04:25.041500: step: 224/77, loss: 0.0004652494681067765 2023-01-24 00:04:26.384272: step: 228/77, loss: 1.1316476957290433e-05 2023-01-24 00:04:27.723070: step: 232/77, loss: 0.000484424497699365 2023-01-24 00:04:29.046339: step: 236/77, loss: 0.00010217508679488674 2023-01-24 00:04:30.341187: step: 240/77, loss: 8.024393900996074e-05 2023-01-24 00:04:31.640603: step: 244/77, loss: 0.00034927070373669267 2023-01-24 00:04:32.905884: step: 248/77, loss: 0.0011456592474132776 2023-01-24 00:04:34.224189: step: 252/77, loss: 0.00012689371942542493 2023-01-24 00:04:35.549926: step: 256/77, loss: 5.80046144023072e-05 2023-01-24 00:04:36.886780: step: 260/77, loss: 0.00023313306155614555 2023-01-24 00:04:38.226136: step: 264/77, loss: 1.313894972554408e-05 2023-01-24 00:04:39.544000: step: 268/77, loss: 0.08034791797399521 2023-01-24 00:04:40.831010: step: 272/77, loss: 7.919698691694066e-05 2023-01-24 00:04:42.166938: step: 276/77, loss: 0.00041778976446948946 2023-01-24 00:04:43.458792: step: 280/77, loss: 0.00893727969378233 2023-01-24 00:04:44.767677: step: 284/77, loss: 0.011848215013742447 2023-01-24 00:04:46.051623: step: 288/77, loss: 0.004302650224417448 2023-01-24 00:04:47.373546: step: 292/77, loss: 0.00017654645489528775 2023-01-24 00:04:48.674286: step: 296/77, loss: 1.5690379768784624e-06 2023-01-24 00:04:49.965444: step: 300/77, loss: 0.006918806582689285 2023-01-24 00:04:51.268029: step: 304/77, loss: 9.462075922783697e-07 2023-01-24 00:04:52.559139: step: 308/77, loss: 0.0036865954753011465 2023-01-24 00:04:53.884232: step: 312/77, loss: 0.000789219920989126 2023-01-24 00:04:55.218566: step: 316/77, loss: 0.003209709422662854 2023-01-24 00:04:56.449367: step: 320/77, loss: 0.03584851697087288 2023-01-24 00:04:57.709059: step: 324/77, loss: 1.1696663932525553e-05 2023-01-24 00:04:59.022289: step: 328/77, loss: 3.522487304508104e-06 2023-01-24 00:05:00.314256: step: 332/77, loss: 8.103526488412172e-05 2023-01-24 00:05:01.659039: step: 336/77, loss: 0.034114085137844086 2023-01-24 00:05:02.999129: step: 340/77, loss: 0.00014881583047099411 2023-01-24 00:05:04.279481: step: 344/77, loss: 4.456402621144662e-06 2023-01-24 00:05:05.587138: step: 348/77, loss: 3.7652584978786763e-06 2023-01-24 00:05:06.917035: step: 352/77, loss: 9.143738134298474e-05 2023-01-24 00:05:08.227892: step: 356/77, loss: 2.854807462426834e-06 2023-01-24 00:05:09.569292: step: 360/77, loss: 0.016171233728528023 2023-01-24 00:05:10.880113: step: 364/77, loss: 0.00020946790755260736 2023-01-24 00:05:12.240795: step: 368/77, loss: 2.257371079394943e-06 2023-01-24 00:05:13.536838: step: 372/77, loss: 1.5824274441911257e-06 2023-01-24 00:05:14.861329: step: 376/77, loss: 0.01277543418109417 2023-01-24 00:05:16.162374: step: 380/77, loss: 1.7895966948344721e-06 2023-01-24 00:05:17.469739: step: 384/77, loss: 0.000522006128448993 2023-01-24 00:05:18.740430: step: 388/77, loss: 0.0005052937776781619 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Chinese: {'template': {'p': 0.935064935064935, 'r': 0.5373134328358209, 'f1': 0.6824644549763033}, 'slot': {'p': 0.5227272727272727, 'r': 0.020664869721473494, 'f1': 0.03975799481417459}, 'combined': 0.02713341826180635, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5111111111111111, 'r': 0.020664869721473494, 'f1': 0.039723661485319514}, 'combined': 0.02723908216136195, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5454545454545454, 'r': 0.0215633423180593, 'f1': 0.04148660328435609}, 'combined': 0.028447956537844172, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:06:59.499871: step: 4/77, loss: 2.9802315282267955e-08 2023-01-24 00:07:00.805764: step: 8/77, loss: 4.729399370262399e-05 2023-01-24 00:07:02.080310: step: 12/77, loss: 2.3319753381656483e-05 2023-01-24 00:07:03.388718: step: 16/77, loss: 6.732360361638712e-06 2023-01-24 00:07:04.793369: step: 20/77, loss: 0.00017802367801778018 2023-01-24 00:07:06.145226: step: 24/77, loss: 0.0005213890108279884 2023-01-24 00:07:07.448337: step: 28/77, loss: 1.830662222346291e-05 2023-01-24 00:07:08.733878: step: 32/77, loss: 1.3812922361466917e-06 2023-01-24 00:07:10.058502: step: 36/77, loss: 2.0610837964341044e-05 2023-01-24 00:07:11.356656: step: 40/77, loss: 6.030067379469983e-05 2023-01-24 00:07:12.713386: step: 44/77, loss: 0.0003737257211469114 2023-01-24 00:07:14.053774: step: 48/77, loss: 0.013727515004575253 2023-01-24 00:07:15.367897: step: 52/77, loss: 0.00031524596852250397 2023-01-24 00:07:16.673623: step: 56/77, loss: 4.54884275313816e-06 2023-01-24 00:07:17.979367: step: 60/77, loss: 1.8272516172146425e-05 2023-01-24 00:07:19.311918: step: 64/77, loss: 2.406430576229468e-05 2023-01-24 00:07:20.633516: step: 68/77, loss: 0.0020606357138603926 2023-01-24 00:07:21.952948: step: 72/77, loss: 0.0001100394147215411 2023-01-24 00:07:23.257816: step: 76/77, loss: 0.0023476046044379473 2023-01-24 00:07:24.512400: step: 80/77, loss: 0.012934298254549503 2023-01-24 00:07:25.783560: step: 84/77, loss: 0.0006640170468017459 2023-01-24 00:07:27.107157: step: 88/77, loss: 0.055560242384672165 2023-01-24 00:07:28.419192: step: 92/77, loss: 0.0002472563646733761 2023-01-24 00:07:29.761225: step: 96/77, loss: 0.0034907220397144556 2023-01-24 00:07:31.070679: step: 100/77, loss: 5.403320392360911e-05 2023-01-24 00:07:32.385744: step: 104/77, loss: 0.3002181947231293 2023-01-24 00:07:33.753618: step: 108/77, loss: 3.9009173633530736e-05 2023-01-24 00:07:35.109842: step: 112/77, loss: 0.00019515615713316947 2023-01-24 00:07:36.395497: step: 116/77, loss: 6.758770905435085e-05 2023-01-24 00:07:37.664343: step: 120/77, loss: 1.9229602912673727e-05 2023-01-24 00:07:39.001543: step: 124/77, loss: 3.519226083881222e-05 2023-01-24 00:07:40.306794: step: 128/77, loss: 0.03842562437057495 2023-01-24 00:07:41.612435: step: 132/77, loss: 1.1213931429665536e-05 2023-01-24 00:07:42.913245: step: 136/77, loss: 0.0033438641112297773 2023-01-24 00:07:44.218169: step: 140/77, loss: 0.0003317440568935126 2023-01-24 00:07:45.524228: step: 144/77, loss: 0.018152881413698196 2023-01-24 00:07:46.863215: step: 148/77, loss: 0.007869703695178032 2023-01-24 00:07:48.162676: step: 152/77, loss: 1.4549676052411087e-05 2023-01-24 00:07:49.486839: step: 156/77, loss: 0.0007096330518834293 2023-01-24 00:07:50.818505: step: 160/77, loss: 4.861999332206324e-05 2023-01-24 00:07:52.149690: step: 164/77, loss: 8.46480397740379e-05 2023-01-24 00:07:53.446446: step: 168/77, loss: 1.8626415965172782e-07 2023-01-24 00:07:54.765562: step: 172/77, loss: 0.004078193102031946 2023-01-24 00:07:56.074618: step: 176/77, loss: 0.003917471971362829 2023-01-24 00:07:57.362817: step: 180/77, loss: 1.5645575786038535e-06 2023-01-24 00:07:58.681009: step: 184/77, loss: 1.8433138393447734e-05 2023-01-24 00:07:59.990442: step: 188/77, loss: 0.028852352872490883 2023-01-24 00:08:01.338126: step: 192/77, loss: 2.281202569065499e-06 2023-01-24 00:08:02.627922: step: 196/77, loss: 3.7961595808155835e-05 2023-01-24 00:08:03.974103: step: 200/77, loss: 5.125956477058935e-07 2023-01-24 00:08:05.302413: step: 204/77, loss: 1.3097493365421542e-06 2023-01-24 00:08:06.614109: step: 208/77, loss: 6.903317989781499e-05 2023-01-24 00:08:07.919251: step: 212/77, loss: 0.02180004119873047 2023-01-24 00:08:09.238431: step: 216/77, loss: 0.21051611006259918 2023-01-24 00:08:10.544318: step: 220/77, loss: 4.495525354286656e-05 2023-01-24 00:08:11.823385: step: 224/77, loss: 2.0334904547780752e-05 2023-01-24 00:08:13.188852: step: 228/77, loss: 2.8787917472072877e-06 2023-01-24 00:08:14.471240: step: 232/77, loss: 1.4238229596230667e-05 2023-01-24 00:08:15.794451: step: 236/77, loss: 0.0013085382524877787 2023-01-24 00:08:17.122679: step: 240/77, loss: 9.823291475186124e-05 2023-01-24 00:08:18.468300: step: 244/77, loss: 2.773040978354402e-05 2023-01-24 00:08:19.774411: step: 248/77, loss: 4.009727490483783e-06 2023-01-24 00:08:21.081585: step: 252/77, loss: 0.02277224510908127 2023-01-24 00:08:22.385698: step: 256/77, loss: 4.7612797061447054e-05 2023-01-24 00:08:23.675955: step: 260/77, loss: 0.0007515024044550955 2023-01-24 00:08:25.007498: step: 264/77, loss: 0.000233790124184452 2023-01-24 00:08:26.335448: step: 268/77, loss: 0.0009438807610422373 2023-01-24 00:08:27.692399: step: 272/77, loss: 0.00012066077761119232 2023-01-24 00:08:29.016746: step: 276/77, loss: 0.00019588488794397563 2023-01-24 00:08:30.305847: step: 280/77, loss: 1.4081181234359974e-06 2023-01-24 00:08:31.562999: step: 284/77, loss: 9.208631013279955e-07 2023-01-24 00:08:32.859985: step: 288/77, loss: 0.0034223550464957952 2023-01-24 00:08:34.228126: step: 292/77, loss: 0.00241264165379107 2023-01-24 00:08:35.509566: step: 296/77, loss: 0.00011776632163673639 2023-01-24 00:08:36.842826: step: 300/77, loss: 1.5139357856241986e-06 2023-01-24 00:08:38.115160: step: 304/77, loss: 1.3082610621495405e-06 2023-01-24 00:08:39.428649: step: 308/77, loss: 4.947163461110904e-07 2023-01-24 00:08:40.789602: step: 312/77, loss: 0.0002765147655736655 2023-01-24 00:08:42.089434: step: 316/77, loss: 8.433842140220804e-07 2023-01-24 00:08:43.403675: step: 320/77, loss: 0.00043568917317315936 2023-01-24 00:08:44.738641: step: 324/77, loss: 2.890814698730537e-07 2023-01-24 00:08:46.060180: step: 328/77, loss: 2.4800025130389258e-05 2023-01-24 00:08:47.373776: step: 332/77, loss: 1.0922199180640746e-06 2023-01-24 00:08:48.686903: step: 336/77, loss: 0.0032466454431414604 2023-01-24 00:08:50.005928: step: 340/77, loss: 2.011512378885527e-06 2023-01-24 00:08:51.369433: step: 344/77, loss: 0.0068920995108783245 2023-01-24 00:08:52.705989: step: 348/77, loss: 0.00033975904807448387 2023-01-24 00:08:54.049385: step: 352/77, loss: 0.0018391618505120277 2023-01-24 00:08:55.354679: step: 356/77, loss: 0.00025979187921620905 2023-01-24 00:08:56.668525: step: 360/77, loss: 1.081810523828608e-06 2023-01-24 00:08:57.977520: step: 364/77, loss: 9.730581041367259e-06 2023-01-24 00:08:59.304687: step: 368/77, loss: 8.958841499406844e-05 2023-01-24 00:09:00.626633: step: 372/77, loss: 2.4988178211060585e-06 2023-01-24 00:09:01.906855: step: 376/77, loss: 9.418761692359112e-06 2023-01-24 00:09:03.210866: step: 380/77, loss: 3.4123237924177374e-07 2023-01-24 00:09:04.517872: step: 384/77, loss: 0.0009906215127557516 2023-01-24 00:09:05.832027: step: 388/77, loss: 0.00025988329434767365 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.5454545454545454, 'r': 0.0215633423180593, 'f1': 0.04148660328435609}, 'combined': 0.02857096263922636, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.5454545454545454, 'r': 0.0215633423180593, 'f1': 0.04148660328435609}, 'combined': 0.02857096263922636, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.5454545454545454, 'r': 0.0215633423180593, 'f1': 0.04148660328435609}, 'combined': 0.02857096263922636, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:10:47.011902: step: 4/77, loss: 0.02698381617665291 2023-01-24 00:10:48.293948: step: 8/77, loss: 0.014960349537432194 2023-01-24 00:10:49.596362: step: 12/77, loss: 0.00016170661547221243 2023-01-24 00:10:50.915903: step: 16/77, loss: 2.743231789281708e-06 2023-01-24 00:10:52.246053: step: 20/77, loss: 2.041266816377174e-05 2023-01-24 00:10:53.522685: step: 24/77, loss: 1.0182542609982193e-05 2023-01-24 00:10:54.841410: step: 28/77, loss: 0.013845100067555904 2023-01-24 00:10:56.117766: step: 32/77, loss: 1.3410337942332262e-06 2023-01-24 00:10:57.454094: step: 36/77, loss: 0.0002716188901104033 2023-01-24 00:10:58.760444: step: 40/77, loss: 7.084755634423345e-05 2023-01-24 00:11:00.047303: step: 44/77, loss: 7.2348607318417635e-06 2023-01-24 00:11:01.379833: step: 48/77, loss: 3.7784000141982688e-06 2023-01-24 00:11:02.670660: step: 52/77, loss: 4.1414394218008965e-05 2023-01-24 00:11:04.058994: step: 56/77, loss: 0.0008531028870493174 2023-01-24 00:11:05.337343: step: 60/77, loss: 5.092239007353783e-06 2023-01-24 00:11:06.652212: step: 64/77, loss: 1.0877824507815603e-07 2023-01-24 00:11:07.952276: step: 68/77, loss: 2.8638578442041762e-06 2023-01-24 00:11:09.265321: step: 72/77, loss: 1.7592465155757964e-05 2023-01-24 00:11:10.532121: step: 76/77, loss: 0.02608170546591282 2023-01-24 00:11:11.883561: step: 80/77, loss: 0.0017640494043007493 2023-01-24 00:11:13.133642: step: 84/77, loss: 3.150631164317019e-05 2023-01-24 00:11:14.414832: step: 88/77, loss: 0.01277581974864006 2023-01-24 00:11:15.746509: step: 92/77, loss: 0.0038522332906723022 2023-01-24 00:11:17.055990: step: 96/77, loss: 0.005919828079640865 2023-01-24 00:11:18.365219: step: 100/77, loss: 6.7724749897024594e-06 2023-01-24 00:11:19.672528: step: 104/77, loss: 4.903834997094236e-05 2023-01-24 00:11:20.981670: step: 108/77, loss: 0.0008131438517011702 2023-01-24 00:11:22.300335: step: 112/77, loss: 2.5047547751455568e-06 2023-01-24 00:11:23.630528: step: 116/77, loss: 1.7881383485018887e-08 2023-01-24 00:11:24.903762: step: 120/77, loss: 7.390854648292589e-07 2023-01-24 00:11:26.192717: step: 124/77, loss: 1.3411039390121005e-08 2023-01-24 00:11:27.496581: step: 128/77, loss: 0.001617640140466392 2023-01-24 00:11:28.834342: step: 132/77, loss: 2.515138248782023e-06 2023-01-24 00:11:30.121586: step: 136/77, loss: 2.3407696971844416e-06 2023-01-24 00:11:31.464329: step: 140/77, loss: 0.00797793548554182 2023-01-24 00:11:32.769360: step: 144/77, loss: 1.1081776392529719e-05 2023-01-24 00:11:34.064957: step: 148/77, loss: 8.45935646793805e-05 2023-01-24 00:11:35.373134: step: 152/77, loss: 8.520575647708029e-06 2023-01-24 00:11:36.658255: step: 156/77, loss: 0.0008155218092724681 2023-01-24 00:11:37.924322: step: 160/77, loss: 6.945910172362346e-06 2023-01-24 00:11:39.223375: step: 164/77, loss: 1.0877838008127583e-07 2023-01-24 00:11:40.545889: step: 168/77, loss: 0.00039749397546984255 2023-01-24 00:11:41.881632: step: 172/77, loss: 1.5221035027934704e-05 2023-01-24 00:11:43.246234: step: 176/77, loss: 0.00025134222232736647 2023-01-24 00:11:44.542393: step: 180/77, loss: 2.8312189925827624e-08 2023-01-24 00:11:45.857832: step: 184/77, loss: 0.0001161669279099442 2023-01-24 00:11:47.177783: step: 188/77, loss: 0.001666489290073514 2023-01-24 00:11:48.479443: step: 192/77, loss: 0.0012751700123772025 2023-01-24 00:11:49.774064: step: 196/77, loss: 0.007269307505339384 2023-01-24 00:11:51.093864: step: 200/77, loss: 2.5331956976515357e-08 2023-01-24 00:11:52.366009: step: 204/77, loss: 0.0032305726781487465 2023-01-24 00:11:53.687930: step: 208/77, loss: 8.356692887900863e-06 2023-01-24 00:11:54.974443: step: 212/77, loss: 1.1679512681439519e-05 2023-01-24 00:11:56.274202: step: 216/77, loss: 9.30438909563236e-06 2023-01-24 00:11:57.589498: step: 220/77, loss: 1.3792471690976527e-05 2023-01-24 00:11:58.867921: step: 224/77, loss: 6.201413634698838e-05 2023-01-24 00:12:00.152653: step: 228/77, loss: 0.05839015915989876 2023-01-24 00:12:01.497331: step: 232/77, loss: 0.0013058074982836843 2023-01-24 00:12:02.812927: step: 236/77, loss: 0.00023657410929445177 2023-01-24 00:12:04.156230: step: 240/77, loss: 0.00731617258861661 2023-01-24 00:12:05.469920: step: 244/77, loss: 1.2996385521546472e-05 2023-01-24 00:12:06.780143: step: 248/77, loss: 9.02002375369193e-06 2023-01-24 00:12:08.091894: step: 252/77, loss: 1.4901160305669237e-09 2023-01-24 00:12:09.371578: step: 256/77, loss: 0.028248654678463936 2023-01-24 00:12:10.675508: step: 260/77, loss: 0.0002220904134446755 2023-01-24 00:12:12.001224: step: 264/77, loss: 3.0514261197822634e-06 2023-01-24 00:12:13.341723: step: 268/77, loss: 0.0024806009605526924 2023-01-24 00:12:14.658810: step: 272/77, loss: 6.70551330017588e-08 2023-01-24 00:12:15.958047: step: 276/77, loss: 0.018847720697522163 2023-01-24 00:12:17.245526: step: 280/77, loss: 7.949468272272497e-05 2023-01-24 00:12:18.499833: step: 284/77, loss: 3.2037277719609847e-07 2023-01-24 00:12:19.838855: step: 288/77, loss: 3.218242682123673e-06 2023-01-24 00:12:21.121774: step: 292/77, loss: 7.455462309735594e-06 2023-01-24 00:12:22.385856: step: 296/77, loss: 0.002329503884539008 2023-01-24 00:12:23.675483: step: 300/77, loss: 0.002198019064962864 2023-01-24 00:12:24.986058: step: 304/77, loss: 7.818923040758818e-05 2023-01-24 00:12:26.316869: step: 308/77, loss: 7.755040314805228e-06 2023-01-24 00:12:27.618547: step: 312/77, loss: 0.05237256735563278 2023-01-24 00:12:28.882707: step: 316/77, loss: 2.682207700388517e-08 2023-01-24 00:12:30.225537: step: 320/77, loss: 0.0048786792904138565 2023-01-24 00:12:31.502289: step: 324/77, loss: 7.092779696904472e-07 2023-01-24 00:12:32.782812: step: 328/77, loss: 1.3091619621263817e-05 2023-01-24 00:12:34.101029: step: 332/77, loss: 1.5555854133708635e-06 2023-01-24 00:12:35.366400: step: 336/77, loss: 2.3005568436929025e-06 2023-01-24 00:12:36.661600: step: 340/77, loss: 2.3130747649702244e-05 2023-01-24 00:12:38.010026: step: 344/77, loss: 4.930124123347923e-05 2023-01-24 00:12:39.303865: step: 348/77, loss: 0.01467626728117466 2023-01-24 00:12:40.615183: step: 352/77, loss: 0.0 2023-01-24 00:12:41.901526: step: 356/77, loss: 0.000560337386559695 2023-01-24 00:12:43.214054: step: 360/77, loss: 9.936784408637322e-06 2023-01-24 00:12:44.496419: step: 364/77, loss: 3.043984543182887e-05 2023-01-24 00:12:45.797707: step: 368/77, loss: 4.470347647611561e-09 2023-01-24 00:12:47.138916: step: 372/77, loss: 2.0830659650528105e-06 2023-01-24 00:12:48.435140: step: 376/77, loss: 0.000701559241861105 2023-01-24 00:12:49.766829: step: 380/77, loss: 0.00021446785831358284 2023-01-24 00:12:51.082014: step: 384/77, loss: 4.815194188267924e-06 2023-01-24 00:12:52.416440: step: 388/77, loss: 3.844836464850232e-05 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Chinese: {'template': {'p': 0.9605263157894737, 'r': 0.5447761194029851, 'f1': 0.6952380952380952}, 'slot': {'p': 0.4864864864864865, 'r': 0.016172506738544475, 'f1': 0.03130434782608696}, 'combined': 0.021763975155279502, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Korean: {'template': {'p': 0.96, 'r': 0.5373134328358209, 'f1': 0.6889952153110048}, 'slot': {'p': 0.5, 'r': 0.016172506738544475, 'f1': 0.03133159268929504}, 'combined': 0.021587317450997543, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.96, 'r': 0.5373134328358209, 'f1': 0.6889952153110048}, 'slot': {'p': 0.5, 'r': 0.016172506738544475, 'f1': 0.03133159268929504}, 'combined': 0.021587317450997543, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:14:32.779759: step: 4/77, loss: 7.4505797087454084e-09 2023-01-24 00:14:34.118412: step: 8/77, loss: 0.0012389952316880226 2023-01-24 00:14:35.422021: step: 12/77, loss: 0.0008553470834158361 2023-01-24 00:14:36.706286: step: 16/77, loss: 0.001485483255237341 2023-01-24 00:14:37.978745: step: 20/77, loss: 1.9099739802186377e-05 2023-01-24 00:14:39.236412: step: 24/77, loss: 0.00016891444101929665 2023-01-24 00:14:40.568640: step: 28/77, loss: 0.008944258093833923 2023-01-24 00:14:41.857784: step: 32/77, loss: 5.243182386038825e-06 2023-01-24 00:14:43.226333: step: 36/77, loss: 4.7396704758284613e-05 2023-01-24 00:14:44.519337: step: 40/77, loss: 1.192092824453539e-08 2023-01-24 00:14:45.777003: step: 44/77, loss: 5.602820237982087e-05 2023-01-24 00:14:47.104348: step: 48/77, loss: 7.584470154142764e-07 2023-01-24 00:14:48.426515: step: 52/77, loss: 1.2671500371652655e-05 2023-01-24 00:14:49.733171: step: 56/77, loss: 0.013027241453528404 2023-01-24 00:14:51.001590: step: 60/77, loss: 2.7997486995445797e-06 2023-01-24 00:14:52.252560: step: 64/77, loss: 5.140071880305186e-05 2023-01-24 00:14:53.586231: step: 68/77, loss: 2.4520515580661595e-05 2023-01-24 00:14:54.913785: step: 72/77, loss: 5.601465090876445e-05 2023-01-24 00:14:56.170390: step: 76/77, loss: 0.000764780561439693 2023-01-24 00:14:57.460730: step: 80/77, loss: 0.00017838591884355992 2023-01-24 00:14:58.762316: step: 84/77, loss: 0.002022041007876396 2023-01-24 00:15:00.097426: step: 88/77, loss: 1.7858295905170962e-05 2023-01-24 00:15:01.375346: step: 92/77, loss: 3.2931208693298686e-07 2023-01-24 00:15:02.695081: step: 96/77, loss: 0.00017654162365943193 2023-01-24 00:15:04.034445: step: 100/77, loss: 1.0698643109208206e-06 2023-01-24 00:15:05.419533: step: 104/77, loss: 0.0002982413861900568 2023-01-24 00:15:06.749662: step: 108/77, loss: 1.660316229390446e-05 2023-01-24 00:15:08.071156: step: 112/77, loss: 0.05332023650407791 2023-01-24 00:15:09.355556: step: 116/77, loss: 1.0651420780050103e-05 2023-01-24 00:15:10.606057: step: 120/77, loss: 8.344643021018783e-08 2023-01-24 00:15:11.916284: step: 124/77, loss: 0.0026168618351221085 2023-01-24 00:15:13.219451: step: 128/77, loss: 6.367082096403465e-05 2023-01-24 00:15:14.478796: step: 132/77, loss: 0.0002877833612728864 2023-01-24 00:15:15.812249: step: 136/77, loss: 1.1413845868446515e-06 2023-01-24 00:15:17.104174: step: 140/77, loss: 0.00026432372396811843 2023-01-24 00:15:18.389866: step: 144/77, loss: 1.5199152869627142e-07 2023-01-24 00:15:19.688855: step: 148/77, loss: 3.5932185710407794e-05 2023-01-24 00:15:20.978472: step: 152/77, loss: 0.01380218006670475 2023-01-24 00:15:22.244920: step: 156/77, loss: 6.514821961900452e-06 2023-01-24 00:15:23.539425: step: 160/77, loss: 0.0014075781218707561 2023-01-24 00:15:24.812467: step: 164/77, loss: 1.0381198080722243e-05 2023-01-24 00:15:26.119988: step: 168/77, loss: 3.829587740256102e-07 2023-01-24 00:15:27.394138: step: 172/77, loss: 9.25333893064817e-07 2023-01-24 00:15:28.677550: step: 176/77, loss: 8.659628656459972e-05 2023-01-24 00:15:29.973996: step: 180/77, loss: 1.9891298506991006e-05 2023-01-24 00:15:31.226906: step: 184/77, loss: 9.536709200119731e-08 2023-01-24 00:15:32.531656: step: 188/77, loss: 0.0009981810580939054 2023-01-24 00:15:33.836928: step: 192/77, loss: 0.007413438055664301 2023-01-24 00:15:35.084604: step: 196/77, loss: 0.00016956772014964372 2023-01-24 00:15:36.355112: step: 200/77, loss: 0.011380542069673538 2023-01-24 00:15:37.570382: step: 204/77, loss: 3.677731365314685e-05 2023-01-24 00:15:38.882636: step: 208/77, loss: 3.720476797752781e-06 2023-01-24 00:15:40.210747: step: 212/77, loss: 7.52019423089223e-06 2023-01-24 00:15:41.520388: step: 216/77, loss: 4.250639449310256e-06 2023-01-24 00:15:42.820245: step: 220/77, loss: 0.002503826282918453 2023-01-24 00:15:44.159137: step: 224/77, loss: 8.589692697569262e-06 2023-01-24 00:15:45.488558: step: 228/77, loss: 0.000257056177360937 2023-01-24 00:15:46.766226: step: 232/77, loss: 0.011861293576657772 2023-01-24 00:15:48.097251: step: 236/77, loss: 8.238661393988878e-05 2023-01-24 00:15:49.413118: step: 240/77, loss: 3.692115114972694e-06 2023-01-24 00:15:50.762070: step: 244/77, loss: 0.0013775610132142901 2023-01-24 00:15:52.125991: step: 248/77, loss: 5.708014214178547e-06 2023-01-24 00:15:53.432915: step: 252/77, loss: 9.803019929677248e-05 2023-01-24 00:15:54.689701: step: 256/77, loss: 1.1860923905260279e-06 2023-01-24 00:15:56.000922: step: 260/77, loss: 3.4864085591834737e-06 2023-01-24 00:15:57.283396: step: 264/77, loss: 0.0 2023-01-24 00:15:58.572386: step: 268/77, loss: 4.917311002827773e-07 2023-01-24 00:15:59.892689: step: 272/77, loss: 2.5791382540774066e-06 2023-01-24 00:16:01.194618: step: 276/77, loss: 0.00025995992473326623 2023-01-24 00:16:02.504737: step: 280/77, loss: 0.0004886506358161569 2023-01-24 00:16:03.804041: step: 284/77, loss: 1.5720268038421636e-06 2023-01-24 00:16:05.118782: step: 288/77, loss: 0.0006751567125320435 2023-01-24 00:16:06.396943: step: 292/77, loss: 6.645806251981412e-07 2023-01-24 00:16:07.710147: step: 296/77, loss: 1.4095946880843258e-06 2023-01-24 00:16:09.003068: step: 300/77, loss: 0.00038762306212447584 2023-01-24 00:16:10.324062: step: 304/77, loss: 6.694863259326667e-05 2023-01-24 00:16:11.604943: step: 308/77, loss: 9.282422251999378e-05 2023-01-24 00:16:12.965086: step: 312/77, loss: 1.0120409569935873e-05 2023-01-24 00:16:14.298952: step: 316/77, loss: 7.882480304033379e-07 2023-01-24 00:16:15.598633: step: 320/77, loss: 0.0034625986590981483 2023-01-24 00:16:16.959672: step: 324/77, loss: 1.8059590729535557e-06 2023-01-24 00:16:18.244494: step: 328/77, loss: 0.015585715882480145 2023-01-24 00:16:19.514556: step: 332/77, loss: 0.0009471587836742401 2023-01-24 00:16:20.797510: step: 336/77, loss: 0.017104748636484146 2023-01-24 00:16:22.143040: step: 340/77, loss: 3.7698914638895076e-06 2023-01-24 00:16:23.475565: step: 344/77, loss: 1.548868021927774e-05 2023-01-24 00:16:24.745007: step: 348/77, loss: 5.443015652417671e-06 2023-01-24 00:16:26.024932: step: 352/77, loss: 9.432288265998068e-07 2023-01-24 00:16:27.351705: step: 356/77, loss: 1.0172500878979918e-05 2023-01-24 00:16:28.678279: step: 360/77, loss: 5.2717285143444315e-05 2023-01-24 00:16:29.988785: step: 364/77, loss: 3.0212870115065016e-05 2023-01-24 00:16:31.287095: step: 368/77, loss: 0.0006890245713293552 2023-01-24 00:16:32.584900: step: 372/77, loss: 0.00010110129369422793 2023-01-24 00:16:33.905945: step: 376/77, loss: 2.0903256881865673e-05 2023-01-24 00:16:35.242773: step: 380/77, loss: 3.038154318346642e-06 2023-01-24 00:16:36.532405: step: 384/77, loss: 5.572960048993991e-07 2023-01-24 00:16:37.829831: step: 388/77, loss: 1.3288265108712949e-05 ================================================== Loss: 0.002 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Chinese: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.5365853658536586, 'r': 0.019766397124887692, 'f1': 0.038128249566724434}, 'combined': 0.026396480469270765, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Korean: {'template': {'p': 0.9733333333333334, 'r': 0.5447761194029851, 'f1': 0.6985645933014354}, 'slot': {'p': 0.5348837209302325, 'r': 0.020664869721473494, 'f1': 0.03979238754325259}, 'combined': 0.027797553020645346, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Russian: {'template': {'p': 0.9733333333333334, 'r': 0.5447761194029851, 'f1': 0.6985645933014354}, 'slot': {'p': 0.5348837209302325, 'r': 0.020664869721473494, 'f1': 0.03979238754325259}, 'combined': 0.027797553020645346, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4}