Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:44:38.035853: step: 4/77, loss: 1.040492296218872 2023-01-23 22:44:39.306079: step: 8/77, loss: 1.0503621101379395 2023-01-23 22:44:40.601698: step: 12/77, loss: 1.05849289894104 2023-01-23 22:44:41.948607: step: 16/77, loss: 1.0572280883789062 2023-01-23 22:44:43.277878: step: 20/77, loss: 1.0390177965164185 2023-01-23 22:44:44.585280: step: 24/77, loss: 1.0531857013702393 2023-01-23 22:44:45.876145: step: 28/77, loss: 1.057201862335205 2023-01-23 22:44:47.197365: step: 32/77, loss: 1.0431196689605713 2023-01-23 22:44:48.522609: step: 36/77, loss: 1.0291414260864258 2023-01-23 22:44:49.809807: step: 40/77, loss: 1.0287554264068604 2023-01-23 22:44:51.107023: step: 44/77, loss: 1.0200912952423096 2023-01-23 22:44:52.374187: step: 48/77, loss: 1.006379246711731 2023-01-23 22:44:53.690246: step: 52/77, loss: 1.0050897598266602 2023-01-23 22:44:55.007986: step: 56/77, loss: 0.9895217418670654 2023-01-23 22:44:56.335551: step: 60/77, loss: 0.9909875392913818 2023-01-23 22:44:57.668747: step: 64/77, loss: 0.9767402410507202 2023-01-23 22:44:58.942943: step: 68/77, loss: 0.9686396718025208 2023-01-23 22:45:00.293413: step: 72/77, loss: 0.9502049684524536 2023-01-23 22:45:01.672436: step: 76/77, loss: 0.948758065700531 2023-01-23 22:45:02.970074: step: 80/77, loss: 0.9231487512588501 2023-01-23 22:45:04.330004: step: 84/77, loss: 0.9294416904449463 2023-01-23 22:45:05.633281: step: 88/77, loss: 0.902397632598877 2023-01-23 22:45:06.969884: step: 92/77, loss: 0.8896970152854919 2023-01-23 22:45:08.275216: step: 96/77, loss: 0.8582167029380798 2023-01-23 22:45:09.572950: step: 100/77, loss: 0.8548195958137512 2023-01-23 22:45:10.872946: step: 104/77, loss: 0.8246533274650574 2023-01-23 22:45:12.215232: step: 108/77, loss: 0.8212116956710815 2023-01-23 22:45:13.537805: step: 112/77, loss: 0.8350183963775635 2023-01-23 22:45:14.825372: step: 116/77, loss: 0.7815088033676147 2023-01-23 22:45:16.144559: step: 120/77, loss: 0.7805500030517578 2023-01-23 22:45:17.496507: step: 124/77, loss: 0.7633838653564453 2023-01-23 22:45:18.756150: step: 128/77, loss: 0.7470266819000244 2023-01-23 22:45:20.118196: step: 132/77, loss: 0.7090791463851929 2023-01-23 22:45:21.445813: step: 136/77, loss: 0.6641875505447388 2023-01-23 22:45:22.717216: step: 140/77, loss: 0.7156832218170166 2023-01-23 22:45:24.023748: step: 144/77, loss: 0.6273938417434692 2023-01-23 22:45:25.347079: step: 148/77, loss: 0.6106159687042236 2023-01-23 22:45:26.698763: step: 152/77, loss: 0.6362186670303345 2023-01-23 22:45:28.047810: step: 156/77, loss: 0.5732383728027344 2023-01-23 22:45:29.346271: step: 160/77, loss: 0.5817826986312866 2023-01-23 22:45:30.718118: step: 164/77, loss: 0.5319560170173645 2023-01-23 22:45:32.024974: step: 168/77, loss: 0.5354362726211548 2023-01-23 22:45:33.338097: step: 172/77, loss: 0.462534099817276 2023-01-23 22:45:34.627595: step: 176/77, loss: 0.47266411781311035 2023-01-23 22:45:35.934612: step: 180/77, loss: 0.480622798204422 2023-01-23 22:45:37.265206: step: 184/77, loss: 0.3768722116947174 2023-01-23 22:45:38.556679: step: 188/77, loss: 0.35718005895614624 2023-01-23 22:45:39.873515: step: 192/77, loss: 0.3514782190322876 2023-01-23 22:45:41.185306: step: 196/77, loss: 0.316677451133728 2023-01-23 22:45:42.512871: step: 200/77, loss: 0.2975374460220337 2023-01-23 22:45:43.853943: step: 204/77, loss: 0.3182145059108734 2023-01-23 22:45:45.202431: step: 208/77, loss: 0.21221506595611572 2023-01-23 22:45:46.519250: step: 212/77, loss: 0.21120867133140564 2023-01-23 22:45:47.849228: step: 216/77, loss: 0.1704612374305725 2023-01-23 22:45:49.166898: step: 220/77, loss: 0.3029862642288208 2023-01-23 22:45:50.485474: step: 224/77, loss: 0.22104257345199585 2023-01-23 22:45:51.799549: step: 228/77, loss: 0.24535319209098816 2023-01-23 22:45:53.134908: step: 232/77, loss: 0.22974175214767456 2023-01-23 22:45:54.425418: step: 236/77, loss: 0.2581561803817749 2023-01-23 22:45:55.766119: step: 240/77, loss: 0.12163802981376648 2023-01-23 22:45:57.088687: step: 244/77, loss: 0.2190374732017517 2023-01-23 22:45:58.392958: step: 248/77, loss: 0.108123280107975 2023-01-23 22:45:59.748426: step: 252/77, loss: 0.20003128051757812 2023-01-23 22:46:01.059023: step: 256/77, loss: 0.22386214137077332 2023-01-23 22:46:02.341329: step: 260/77, loss: 0.206337109208107 2023-01-23 22:46:03.644498: step: 264/77, loss: 0.07493802160024643 2023-01-23 22:46:04.933229: step: 268/77, loss: 0.08836229145526886 2023-01-23 22:46:06.271703: step: 272/77, loss: 0.1717900037765503 2023-01-23 22:46:07.608438: step: 276/77, loss: 0.12574687600135803 2023-01-23 22:46:08.919188: step: 280/77, loss: 0.18826636672019958 2023-01-23 22:46:10.230195: step: 284/77, loss: 0.05894722789525986 2023-01-23 22:46:11.549311: step: 288/77, loss: 0.15189287066459656 2023-01-23 22:46:12.885372: step: 292/77, loss: 0.05881006643176079 2023-01-23 22:46:14.180923: step: 296/77, loss: 0.13599269092082977 2023-01-23 22:46:15.471873: step: 300/77, loss: 0.06342396885156631 2023-01-23 22:46:16.796804: step: 304/77, loss: 0.1103890985250473 2023-01-23 22:46:18.138372: step: 308/77, loss: 0.14803536236286163 2023-01-23 22:46:19.447319: step: 312/77, loss: 0.03357389196753502 2023-01-23 22:46:20.818503: step: 316/77, loss: 0.05214579403400421 2023-01-23 22:46:22.137784: step: 320/77, loss: 0.289453387260437 2023-01-23 22:46:23.471869: step: 324/77, loss: 0.07445473968982697 2023-01-23 22:46:24.759593: step: 328/77, loss: 0.06806229054927826 2023-01-23 22:46:26.046285: step: 332/77, loss: 0.09790605306625366 2023-01-23 22:46:27.330220: step: 336/77, loss: 0.30805832147598267 2023-01-23 22:46:28.632240: step: 340/77, loss: 0.17872123420238495 2023-01-23 22:46:29.952354: step: 344/77, loss: 0.06522324681282043 2023-01-23 22:46:31.296719: step: 348/77, loss: 0.031129013746976852 2023-01-23 22:46:32.660076: step: 352/77, loss: 0.07004313170909882 2023-01-23 22:46:34.042519: step: 356/77, loss: 0.05774754285812378 2023-01-23 22:46:35.365737: step: 360/77, loss: 0.14162006974220276 2023-01-23 22:46:36.640704: step: 364/77, loss: 0.13135913014411926 2023-01-23 22:46:37.955040: step: 368/77, loss: 0.1805039495229721 2023-01-23 22:46:39.380358: step: 372/77, loss: 0.04826679825782776 2023-01-23 22:46:40.701055: step: 376/77, loss: 0.12572729587554932 2023-01-23 22:46:42.017255: step: 380/77, loss: 0.10801220685243607 2023-01-23 22:46:43.328277: step: 384/77, loss: 0.07356271892786026 2023-01-23 22:46:44.644245: step: 388/77, loss: 0.18298789858818054 ================================================== Loss: 0.479 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:48:49.114060: step: 4/77, loss: 0.14832714200019836 2023-01-23 22:48:50.484546: step: 8/77, loss: 0.12168654799461365 2023-01-23 22:48:51.745228: step: 12/77, loss: 0.0857740044593811 2023-01-23 22:48:53.028277: step: 16/77, loss: 0.15103742480278015 2023-01-23 22:48:54.346016: step: 20/77, loss: 0.12808871269226074 2023-01-23 22:48:55.667189: step: 24/77, loss: 0.05073537304997444 2023-01-23 22:48:56.971129: step: 28/77, loss: 0.11223308742046356 2023-01-23 22:48:58.257348: step: 32/77, loss: 0.15117031335830688 2023-01-23 22:48:59.575079: step: 36/77, loss: 0.21810725331306458 2023-01-23 22:49:00.936559: step: 40/77, loss: 0.12143763899803162 2023-01-23 22:49:02.253655: step: 44/77, loss: 0.08966831862926483 2023-01-23 22:49:03.535385: step: 48/77, loss: 0.06273593008518219 2023-01-23 22:49:04.812620: step: 52/77, loss: 0.13776537775993347 2023-01-23 22:49:06.095176: step: 56/77, loss: 0.12719234824180603 2023-01-23 22:49:07.374979: step: 60/77, loss: 0.057064518332481384 2023-01-23 22:49:08.687025: step: 64/77, loss: 0.16016772389411926 2023-01-23 22:49:09.997248: step: 68/77, loss: 0.11384375393390656 2023-01-23 22:49:11.359739: step: 72/77, loss: 0.08540131151676178 2023-01-23 22:49:12.656011: step: 76/77, loss: 0.12404485046863556 2023-01-23 22:49:13.959777: step: 80/77, loss: 0.3094620704650879 2023-01-23 22:49:15.314635: step: 84/77, loss: 0.1255866438150406 2023-01-23 22:49:16.665487: step: 88/77, loss: 0.1157902330160141 2023-01-23 22:49:18.000781: step: 92/77, loss: 0.22449104487895966 2023-01-23 22:49:19.290964: step: 96/77, loss: 0.036930665373802185 2023-01-23 22:49:20.589213: step: 100/77, loss: 0.0713043063879013 2023-01-23 22:49:21.915162: step: 104/77, loss: 0.12061722576618195 2023-01-23 22:49:23.228485: step: 108/77, loss: 0.051825400441884995 2023-01-23 22:49:24.522590: step: 112/77, loss: 0.037453874945640564 2023-01-23 22:49:25.811195: step: 116/77, loss: 0.16997961699962616 2023-01-23 22:49:27.154340: step: 120/77, loss: 0.055220380425453186 2023-01-23 22:49:28.484038: step: 124/77, loss: 0.1438325196504593 2023-01-23 22:49:29.819385: step: 128/77, loss: 0.10651086270809174 2023-01-23 22:49:31.139437: step: 132/77, loss: 0.09997448325157166 2023-01-23 22:49:32.439393: step: 136/77, loss: 0.12448174506425858 2023-01-23 22:49:33.789838: step: 140/77, loss: 0.1139465942978859 2023-01-23 22:49:35.133422: step: 144/77, loss: 0.10619348287582397 2023-01-23 22:49:36.435283: step: 148/77, loss: 0.11222274601459503 2023-01-23 22:49:37.754101: step: 152/77, loss: 0.10357049852609634 2023-01-23 22:49:39.041292: step: 156/77, loss: 0.11287228763103485 2023-01-23 22:49:40.387937: step: 160/77, loss: 0.15079429745674133 2023-01-23 22:49:41.690830: step: 164/77, loss: 0.11793462932109833 2023-01-23 22:49:43.047904: step: 168/77, loss: 0.05568506568670273 2023-01-23 22:49:44.427756: step: 172/77, loss: 0.1194445788860321 2023-01-23 22:49:45.752531: step: 176/77, loss: 0.03512217849493027 2023-01-23 22:49:47.077057: step: 180/77, loss: 0.14646495878696442 2023-01-23 22:49:48.442482: step: 184/77, loss: 0.07254618406295776 2023-01-23 22:49:49.761221: step: 188/77, loss: 0.038304783403873444 2023-01-23 22:49:51.045611: step: 192/77, loss: 0.0619526244699955 2023-01-23 22:49:52.403871: step: 196/77, loss: 0.16268327832221985 2023-01-23 22:49:53.701826: step: 200/77, loss: 0.1339617669582367 2023-01-23 22:49:55.033670: step: 204/77, loss: 0.06768632680177689 2023-01-23 22:49:56.301780: step: 208/77, loss: 0.06987083703279495 2023-01-23 22:49:57.623616: step: 212/77, loss: 0.061561498790979385 2023-01-23 22:49:58.995539: step: 216/77, loss: 0.04402967542409897 2023-01-23 22:50:00.335741: step: 220/77, loss: 0.09891396760940552 2023-01-23 22:50:01.668860: step: 224/77, loss: 0.17626222968101501 2023-01-23 22:50:03.023882: step: 228/77, loss: 0.028642630204558372 2023-01-23 22:50:04.332592: step: 232/77, loss: 0.10021352767944336 2023-01-23 22:50:05.627074: step: 236/77, loss: 0.0462426021695137 2023-01-23 22:50:06.973119: step: 240/77, loss: 0.09565180540084839 2023-01-23 22:50:08.321114: step: 244/77, loss: 0.10396598279476166 2023-01-23 22:50:09.679000: step: 248/77, loss: 0.11824971437454224 2023-01-23 22:50:10.978575: step: 252/77, loss: 0.11199741065502167 2023-01-23 22:50:12.286453: step: 256/77, loss: 0.08092804253101349 2023-01-23 22:50:13.577558: step: 260/77, loss: 0.061664629727602005 2023-01-23 22:50:14.898441: step: 264/77, loss: 0.07963520288467407 2023-01-23 22:50:16.210088: step: 268/77, loss: 0.1210048645734787 2023-01-23 22:50:17.556143: step: 272/77, loss: 0.12631292641162872 2023-01-23 22:50:18.836877: step: 276/77, loss: 0.07200046628713608 2023-01-23 22:50:20.154613: step: 280/77, loss: 0.04203527420759201 2023-01-23 22:50:21.462350: step: 284/77, loss: 0.11212755739688873 2023-01-23 22:50:22.776431: step: 288/77, loss: 0.03090221807360649 2023-01-23 22:50:24.135323: step: 292/77, loss: 0.05292743444442749 2023-01-23 22:50:25.433263: step: 296/77, loss: 0.03338020667433739 2023-01-23 22:50:26.717909: step: 300/77, loss: 0.12936441600322723 2023-01-23 22:50:28.010252: step: 304/77, loss: 0.25219258666038513 2023-01-23 22:50:29.372500: step: 308/77, loss: 0.04136262834072113 2023-01-23 22:50:30.712703: step: 312/77, loss: 0.1652255654335022 2023-01-23 22:50:32.018584: step: 316/77, loss: 0.4338652193546295 2023-01-23 22:50:33.350388: step: 320/77, loss: 0.03826376423239708 2023-01-23 22:50:34.670808: step: 324/77, loss: 0.16006599366664886 2023-01-23 22:50:36.019366: step: 328/77, loss: 0.08961412310600281 2023-01-23 22:50:37.343811: step: 332/77, loss: 0.07449474930763245 2023-01-23 22:50:38.664255: step: 336/77, loss: 0.11611238121986389 2023-01-23 22:50:39.978375: step: 340/77, loss: 0.035404592752456665 2023-01-23 22:50:41.267468: step: 344/77, loss: 0.09039437025785446 2023-01-23 22:50:42.549876: step: 348/77, loss: 0.14755980670452118 2023-01-23 22:50:43.887079: step: 352/77, loss: 0.04100664332509041 2023-01-23 22:50:45.235674: step: 356/77, loss: 0.033255741000175476 2023-01-23 22:50:46.607707: step: 360/77, loss: 0.06925242394208908 2023-01-23 22:50:47.963342: step: 364/77, loss: 0.16693246364593506 2023-01-23 22:50:49.302317: step: 368/77, loss: 0.08979891240596771 2023-01-23 22:50:50.635044: step: 372/77, loss: 0.12025587260723114 2023-01-23 22:50:51.951402: step: 376/77, loss: 0.0694204643368721 2023-01-23 22:50:53.292571: step: 380/77, loss: 0.18677574396133423 2023-01-23 22:50:54.603701: step: 384/77, loss: 0.11339643597602844 2023-01-23 22:50:55.935408: step: 388/77, loss: 0.06470693647861481 ================================================== Loss: 0.107 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:52:42.442348: step: 4/77, loss: 0.08788135647773743 2023-01-23 22:52:43.785466: step: 8/77, loss: 0.08346004039049149 2023-01-23 22:52:45.037760: step: 12/77, loss: 0.17841340601444244 2023-01-23 22:52:46.346972: step: 16/77, loss: 0.054879773408174515 2023-01-23 22:52:47.649972: step: 20/77, loss: 0.058798372745513916 2023-01-23 22:52:48.982918: step: 24/77, loss: 0.08873613178730011 2023-01-23 22:52:50.268292: step: 28/77, loss: 0.04994595795869827 2023-01-23 22:52:51.604814: step: 32/77, loss: 0.17656423151493073 2023-01-23 22:52:52.927108: step: 36/77, loss: 0.18922853469848633 2023-01-23 22:52:54.272835: step: 40/77, loss: 0.17291899025440216 2023-01-23 22:52:55.615987: step: 44/77, loss: 0.0713772103190422 2023-01-23 22:52:56.903605: step: 48/77, loss: 0.03998291864991188 2023-01-23 22:52:58.215743: step: 52/77, loss: 0.05664319917559624 2023-01-23 22:52:59.532444: step: 56/77, loss: 0.23095250129699707 2023-01-23 22:53:00.887843: step: 60/77, loss: 0.10259470343589783 2023-01-23 22:53:02.210649: step: 64/77, loss: 0.07569926232099533 2023-01-23 22:53:03.492287: step: 68/77, loss: 0.044563233852386475 2023-01-23 22:53:04.820192: step: 72/77, loss: 0.06827056407928467 2023-01-23 22:53:06.110192: step: 76/77, loss: 0.17188124358654022 2023-01-23 22:53:07.401182: step: 80/77, loss: 0.25089898705482483 2023-01-23 22:53:08.710039: step: 84/77, loss: 0.09476040303707123 2023-01-23 22:53:10.055267: step: 88/77, loss: 0.0689239576458931 2023-01-23 22:53:11.365920: step: 92/77, loss: 0.11733505129814148 2023-01-23 22:53:12.724474: step: 96/77, loss: 0.033800750970840454 2023-01-23 22:53:14.012230: step: 100/77, loss: 0.059796176850795746 2023-01-23 22:53:15.312513: step: 104/77, loss: 0.11064587533473969 2023-01-23 22:53:16.621948: step: 108/77, loss: 0.0926768034696579 2023-01-23 22:53:17.927532: step: 112/77, loss: 0.09673449397087097 2023-01-23 22:53:19.225905: step: 116/77, loss: 0.1307319700717926 2023-01-23 22:53:20.588690: step: 120/77, loss: 0.10335015505552292 2023-01-23 22:53:21.968096: step: 124/77, loss: 0.100394606590271 2023-01-23 22:53:23.323672: step: 128/77, loss: 0.07573147118091583 2023-01-23 22:53:24.629843: step: 132/77, loss: 0.12420970946550369 2023-01-23 22:53:25.949874: step: 136/77, loss: 0.04513493925333023 2023-01-23 22:53:27.216170: step: 140/77, loss: 0.07229103893041611 2023-01-23 22:53:28.532657: step: 144/77, loss: 0.055163364857435226 2023-01-23 22:53:29.849403: step: 148/77, loss: 0.06094565987586975 2023-01-23 22:53:31.131769: step: 152/77, loss: 0.018917806446552277 2023-01-23 22:53:32.427717: step: 156/77, loss: 0.07961555570363998 2023-01-23 22:53:33.737109: step: 160/77, loss: 0.03949800878763199 2023-01-23 22:53:35.033304: step: 164/77, loss: 0.0688449963927269 2023-01-23 22:53:36.346619: step: 168/77, loss: 0.009676387533545494 2023-01-23 22:53:37.678985: step: 172/77, loss: 0.07024532556533813 2023-01-23 22:53:38.981637: step: 176/77, loss: 0.017646795138716698 2023-01-23 22:53:40.316060: step: 180/77, loss: 0.01941852644085884 2023-01-23 22:53:41.610719: step: 184/77, loss: 0.011306710541248322 2023-01-23 22:53:42.956273: step: 188/77, loss: 0.008137895725667477 2023-01-23 22:53:44.272412: step: 192/77, loss: 0.08202481269836426 2023-01-23 22:53:45.601495: step: 196/77, loss: 0.09872758388519287 2023-01-23 22:53:46.894807: step: 200/77, loss: 0.022330768406391144 2023-01-23 22:53:48.208087: step: 204/77, loss: 0.07424715161323547 2023-01-23 22:53:49.581388: step: 208/77, loss: 0.04477814584970474 2023-01-23 22:53:50.900600: step: 212/77, loss: 0.06019924581050873 2023-01-23 22:53:52.192764: step: 216/77, loss: 0.01825646311044693 2023-01-23 22:53:53.533628: step: 220/77, loss: 0.04087033122777939 2023-01-23 22:53:54.851520: step: 224/77, loss: 0.01391011942178011 2023-01-23 22:53:56.155112: step: 228/77, loss: 0.07943939417600632 2023-01-23 22:53:57.519015: step: 232/77, loss: 0.010478628799319267 2023-01-23 22:53:58.853113: step: 236/77, loss: 0.1004880741238594 2023-01-23 22:54:00.173266: step: 240/77, loss: 0.10186842828989029 2023-01-23 22:54:01.541365: step: 244/77, loss: 0.07950830459594727 2023-01-23 22:54:02.832520: step: 248/77, loss: 0.0045279888436198235 2023-01-23 22:54:04.136047: step: 252/77, loss: 0.0602598637342453 2023-01-23 22:54:05.406999: step: 256/77, loss: 0.36025530099868774 2023-01-23 22:54:06.712888: step: 260/77, loss: 0.061081767082214355 2023-01-23 22:54:08.043552: step: 264/77, loss: 0.02526148408651352 2023-01-23 22:54:09.339502: step: 268/77, loss: 0.05052194744348526 2023-01-23 22:54:10.662333: step: 272/77, loss: 0.039078567177057266 2023-01-23 22:54:11.989398: step: 276/77, loss: 0.04826498031616211 2023-01-23 22:54:13.353403: step: 280/77, loss: 0.03132627531886101 2023-01-23 22:54:14.658883: step: 284/77, loss: 0.037446968257427216 2023-01-23 22:54:15.948893: step: 288/77, loss: 0.07468254119157791 2023-01-23 22:54:17.275448: step: 292/77, loss: 0.07227995991706848 2023-01-23 22:54:18.561488: step: 296/77, loss: 0.015393940731883049 2023-01-23 22:54:19.834048: step: 300/77, loss: 0.009335112757980824 2023-01-23 22:54:21.193012: step: 304/77, loss: 0.050310514867305756 2023-01-23 22:54:22.509449: step: 308/77, loss: 0.1641617864370346 2023-01-23 22:54:23.867600: step: 312/77, loss: 0.048241421580314636 2023-01-23 22:54:25.180507: step: 316/77, loss: 0.04193336144089699 2023-01-23 22:54:26.490987: step: 320/77, loss: 0.01817990653216839 2023-01-23 22:54:27.782223: step: 324/77, loss: 0.09641657024621964 2023-01-23 22:54:29.085269: step: 328/77, loss: 0.08564266562461853 2023-01-23 22:54:30.418870: step: 332/77, loss: 0.02044866979122162 2023-01-23 22:54:31.748732: step: 336/77, loss: 0.01461329497396946 2023-01-23 22:54:33.043690: step: 340/77, loss: 0.020636137574911118 2023-01-23 22:54:34.374749: step: 344/77, loss: 0.0273189265280962 2023-01-23 22:54:35.732751: step: 348/77, loss: 0.01171826757490635 2023-01-23 22:54:37.022654: step: 352/77, loss: 0.030976679176092148 2023-01-23 22:54:38.299563: step: 356/77, loss: 0.04481905698776245 2023-01-23 22:54:39.595103: step: 360/77, loss: 0.016529062762856483 2023-01-23 22:54:40.886328: step: 364/77, loss: 0.02783080004155636 2023-01-23 22:54:42.207258: step: 368/77, loss: 0.3587649166584015 2023-01-23 22:54:43.552897: step: 372/77, loss: 0.03467598557472229 2023-01-23 22:54:44.850646: step: 376/77, loss: 0.06850849837064743 2023-01-23 22:54:46.167561: step: 380/77, loss: 0.01407004427164793 2023-01-23 22:54:47.454506: step: 384/77, loss: 0.0250600166618824 2023-01-23 22:54:48.811422: step: 388/77, loss: 0.07285062223672867 ================================================== Loss: 0.073 -------------------- Dev Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test Chinese: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Dev Korean: {'template': {'p': 0.9210526315789473, 'r': 0.5833333333333334, 'f1': 0.7142857142857143}, 'slot': {'p': 0.43478260869565216, 'r': 0.03780718336483932, 'f1': 0.06956521739130435}, 'combined': 0.04968944099378882, 'epoch': 2} Test Korean: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Dev Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test Russian: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test for Chinese: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 0.9210526315789473, 'r': 0.5833333333333334, 'f1': 0.7142857142857143}, 'slot': {'p': 0.43478260869565216, 'r': 0.03780718336483932, 'f1': 0.06956521739130435}, 'combined': 0.04968944099378882, 'epoch': 2} Test for Korean: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test for Russian: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:56:49.845659: step: 4/77, loss: 0.03292298689484596 2023-01-23 22:56:51.176040: step: 8/77, loss: 0.033934395760297775 2023-01-23 22:56:52.516034: step: 12/77, loss: 0.10713164508342743 2023-01-23 22:56:53.837469: step: 16/77, loss: 0.08980081230401993 2023-01-23 22:56:55.118882: step: 20/77, loss: 0.3165131211280823 2023-01-23 22:56:56.461000: step: 24/77, loss: 0.06864053755998611 2023-01-23 22:56:57.846276: step: 28/77, loss: 0.02137758769094944 2023-01-23 22:56:59.111110: step: 32/77, loss: 0.02520819380879402 2023-01-23 22:57:00.422457: step: 36/77, loss: 0.026509536430239677 2023-01-23 22:57:01.752550: step: 40/77, loss: 0.015540415421128273 2023-01-23 22:57:03.078948: step: 44/77, loss: 0.03514610230922699 2023-01-23 22:57:04.384907: step: 48/77, loss: 0.016079070046544075 2023-01-23 22:57:05.668785: step: 52/77, loss: 0.021822217851877213 2023-01-23 22:57:06.975304: step: 56/77, loss: 0.004361532628536224 2023-01-23 22:57:08.294308: step: 60/77, loss: 0.08357305824756622 2023-01-23 22:57:09.650922: step: 64/77, loss: 0.02250819467008114 2023-01-23 22:57:11.011885: step: 68/77, loss: 0.0820513665676117 2023-01-23 22:57:12.299660: step: 72/77, loss: 0.025214381515979767 2023-01-23 22:57:13.653172: step: 76/77, loss: 0.013879728503525257 2023-01-23 22:57:14.954971: step: 80/77, loss: 0.03811607137322426 2023-01-23 22:57:16.262711: step: 84/77, loss: 0.023846963420510292 2023-01-23 22:57:17.538546: step: 88/77, loss: 0.10572272539138794 2023-01-23 22:57:18.857135: step: 92/77, loss: 0.027351083233952522 2023-01-23 22:57:20.207041: step: 96/77, loss: 0.013311273418366909 2023-01-23 22:57:21.492477: step: 100/77, loss: 0.09269560128450394 2023-01-23 22:57:22.829416: step: 104/77, loss: 0.009473717771470547 2023-01-23 22:57:24.151205: step: 108/77, loss: 0.006472132168710232 2023-01-23 22:57:25.424844: step: 112/77, loss: 0.0017005936242640018 2023-01-23 22:57:26.771810: step: 116/77, loss: 0.05240405350923538 2023-01-23 22:57:28.054566: step: 120/77, loss: 0.05236407369375229 2023-01-23 22:57:29.362031: step: 124/77, loss: 0.010209540836513042 2023-01-23 22:57:30.728624: step: 128/77, loss: 0.139913409948349 2023-01-23 22:57:32.135865: step: 132/77, loss: 0.031847041100263596 2023-01-23 22:57:33.436633: step: 136/77, loss: 0.07764370739459991 2023-01-23 22:57:34.731379: step: 140/77, loss: 0.026239609345793724 2023-01-23 22:57:36.012552: step: 144/77, loss: 0.04461614042520523 2023-01-23 22:57:37.317370: step: 148/77, loss: 0.003413321916013956 2023-01-23 22:57:38.642484: step: 152/77, loss: 0.00506990123540163 2023-01-23 22:57:39.982272: step: 156/77, loss: 0.07757072895765305 2023-01-23 22:57:41.238099: step: 160/77, loss: 0.005857846699655056 2023-01-23 22:57:42.574372: step: 164/77, loss: 0.05243443325161934 2023-01-23 22:57:43.891503: step: 168/77, loss: 0.007364882621914148 2023-01-23 22:57:45.162260: step: 172/77, loss: 0.007191166281700134 2023-01-23 22:57:46.508406: step: 176/77, loss: 0.09894745796918869 2023-01-23 22:57:47.845387: step: 180/77, loss: 0.030295606702566147 2023-01-23 22:57:49.189402: step: 184/77, loss: 0.0666690245270729 2023-01-23 22:57:50.534537: step: 188/77, loss: 0.02367621660232544 2023-01-23 22:57:51.829916: step: 192/77, loss: 0.04850950092077255 2023-01-23 22:57:53.157251: step: 196/77, loss: 0.007937368005514145 2023-01-23 22:57:54.518115: step: 200/77, loss: 0.004608646966516972 2023-01-23 22:57:55.786355: step: 204/77, loss: 0.1033419519662857 2023-01-23 22:57:57.089416: step: 208/77, loss: 0.07606571912765503 2023-01-23 22:57:58.408261: step: 212/77, loss: 0.010289874859154224 2023-01-23 22:57:59.698226: step: 216/77, loss: 0.2712690234184265 2023-01-23 22:58:00.954138: step: 220/77, loss: 0.01715884730219841 2023-01-23 22:58:02.286144: step: 224/77, loss: 0.009004155173897743 2023-01-23 22:58:03.585602: step: 228/77, loss: 0.03872823715209961 2023-01-23 22:58:04.958534: step: 232/77, loss: 0.08715996891260147 2023-01-23 22:58:06.266865: step: 236/77, loss: 0.0037528513930737972 2023-01-23 22:58:07.585380: step: 240/77, loss: 0.028906524181365967 2023-01-23 22:58:08.953712: step: 244/77, loss: 0.013030106201767921 2023-01-23 22:58:10.296183: step: 248/77, loss: 0.03233833983540535 2023-01-23 22:58:11.609552: step: 252/77, loss: 0.05106557160615921 2023-01-23 22:58:12.941360: step: 256/77, loss: 0.013097782619297504 2023-01-23 22:58:14.230141: step: 260/77, loss: 0.01939336583018303 2023-01-23 22:58:15.534132: step: 264/77, loss: 0.03218713402748108 2023-01-23 22:58:16.832056: step: 268/77, loss: 0.03535167872905731 2023-01-23 22:58:18.155795: step: 272/77, loss: 0.017765024676918983 2023-01-23 22:58:19.470997: step: 276/77, loss: 0.062223754823207855 2023-01-23 22:58:20.782578: step: 280/77, loss: 0.014603447169065475 2023-01-23 22:58:22.069386: step: 284/77, loss: 0.03279239684343338 2023-01-23 22:58:23.344879: step: 288/77, loss: 0.03463967889547348 2023-01-23 22:58:24.631347: step: 292/77, loss: 0.08478177338838577 2023-01-23 22:58:25.996965: step: 296/77, loss: 0.002551364479586482 2023-01-23 22:58:27.305556: step: 300/77, loss: 0.006090979091823101 2023-01-23 22:58:28.618960: step: 304/77, loss: 0.0030389754101634026 2023-01-23 22:58:29.999155: step: 308/77, loss: 0.05313324183225632 2023-01-23 22:58:31.312494: step: 312/77, loss: 0.021794088184833527 2023-01-23 22:58:32.611296: step: 316/77, loss: 0.02054758369922638 2023-01-23 22:58:33.931292: step: 320/77, loss: 0.041535671800374985 2023-01-23 22:58:35.266257: step: 324/77, loss: 0.025039827451109886 2023-01-23 22:58:36.609151: step: 328/77, loss: 0.023139623925089836 2023-01-23 22:58:37.902425: step: 332/77, loss: 0.01770506612956524 2023-01-23 22:58:39.239152: step: 336/77, loss: 0.0133819580078125 2023-01-23 22:58:40.543964: step: 340/77, loss: 0.10492324829101562 2023-01-23 22:58:41.900795: step: 344/77, loss: 0.04167582839727402 2023-01-23 22:58:43.210897: step: 348/77, loss: 0.07900797575712204 2023-01-23 22:58:44.535569: step: 352/77, loss: 0.00408164644613862 2023-01-23 22:58:45.905192: step: 356/77, loss: 0.04344362020492554 2023-01-23 22:58:47.262574: step: 360/77, loss: 0.0015304482076317072 2023-01-23 22:58:48.555376: step: 364/77, loss: 0.005748497322201729 2023-01-23 22:58:49.930067: step: 368/77, loss: 0.10104496031999588 2023-01-23 22:58:51.246566: step: 372/77, loss: 0.02256450429558754 2023-01-23 22:58:52.497710: step: 376/77, loss: 0.0722183957695961 2023-01-23 22:58:53.797010: step: 380/77, loss: 0.041157275438308716 2023-01-23 22:58:55.094223: step: 384/77, loss: 0.10450397431850433 2023-01-23 22:58:56.438039: step: 388/77, loss: 0.008872132748365402 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Chinese: {'template': {'p': 0.9655172413793104, 'r': 0.4375, 'f1': 0.6021505376344085}, 'slot': {'p': 0.7, 'r': 0.01282051282051282, 'f1': 0.025179856115107917}, 'combined': 0.015162063897269281, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Korean: {'template': {'p': 0.9642857142857143, 'r': 0.421875, 'f1': 0.5869565217391304}, 'slot': {'p': 0.7142857142857143, 'r': 0.013736263736263736, 'f1': 0.026954177897574125}, 'combined': 0.015820930505097856, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Russian: {'template': {'p': 0.9642857142857143, 'r': 0.421875, 'f1': 0.5869565217391304}, 'slot': {'p': 0.7, 'r': 0.01282051282051282, 'f1': 0.025179856115107917}, 'combined': 0.014779480763215514, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test for Chinese: {'template': {'p': 0.9655172413793104, 'r': 0.4375, 'f1': 0.6021505376344085}, 'slot': {'p': 0.7, 'r': 0.01282051282051282, 'f1': 0.025179856115107917}, 'combined': 0.015162063897269281, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test for Korean: {'template': {'p': 0.9642857142857143, 'r': 0.421875, 'f1': 0.5869565217391304}, 'slot': {'p': 0.7142857142857143, 'r': 0.013736263736263736, 'f1': 0.026954177897574125}, 'combined': 0.015820930505097856, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test for Russian: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:01:01.334896: step: 4/77, loss: 0.03197958320379257 2023-01-23 23:01:02.627561: step: 8/77, loss: 0.012238221243023872 2023-01-23 23:01:03.891324: step: 12/77, loss: 0.012658842839300632 2023-01-23 23:01:05.191623: step: 16/77, loss: 0.05696278437972069 2023-01-23 23:01:06.510220: step: 20/77, loss: 0.06434786319732666 2023-01-23 23:01:07.868070: step: 24/77, loss: 0.027839547023177147 2023-01-23 23:01:09.217080: step: 28/77, loss: 0.004860618617385626 2023-01-23 23:01:10.591883: step: 32/77, loss: 0.09483452886343002 2023-01-23 23:01:11.874441: step: 36/77, loss: 0.019608458504080772 2023-01-23 23:01:13.167103: step: 40/77, loss: 0.01649227924644947 2023-01-23 23:01:14.513443: step: 44/77, loss: 0.07741818577051163 2023-01-23 23:01:15.848362: step: 48/77, loss: 0.043329864740371704 2023-01-23 23:01:17.164341: step: 52/77, loss: 0.03802216053009033 2023-01-23 23:01:18.472599: step: 56/77, loss: 0.14712420105934143 2023-01-23 23:01:19.761246: step: 60/77, loss: 0.021073415875434875 2023-01-23 23:01:21.032006: step: 64/77, loss: 0.10768307000398636 2023-01-23 23:01:22.337993: step: 68/77, loss: 0.006619085557758808 2023-01-23 23:01:23.647012: step: 72/77, loss: 0.025189127773046494 2023-01-23 23:01:24.948962: step: 76/77, loss: 0.01855679415166378 2023-01-23 23:01:26.282768: step: 80/77, loss: 0.008611946366727352 2023-01-23 23:01:27.564764: step: 84/77, loss: 0.004469956737011671 2023-01-23 23:01:28.855733: step: 88/77, loss: 0.018613183870911598 2023-01-23 23:01:30.189087: step: 92/77, loss: 0.16133186221122742 2023-01-23 23:01:31.474621: step: 96/77, loss: 0.008658718317747116 2023-01-23 23:01:32.814335: step: 100/77, loss: 0.020171891897916794 2023-01-23 23:01:34.129981: step: 104/77, loss: 0.009985811077058315 2023-01-23 23:01:35.470265: step: 108/77, loss: 0.03879367187619209 2023-01-23 23:01:36.769690: step: 112/77, loss: 0.05209491029381752 2023-01-23 23:01:38.037126: step: 116/77, loss: 0.007992057129740715 2023-01-23 23:01:39.321100: step: 120/77, loss: 0.022754168137907982 2023-01-23 23:01:40.673038: step: 124/77, loss: 0.017588071525096893 2023-01-23 23:01:41.972218: step: 128/77, loss: 0.025300100445747375 2023-01-23 23:01:43.278310: step: 132/77, loss: 0.008288135752081871 2023-01-23 23:01:44.566225: step: 136/77, loss: 0.01869431883096695 2023-01-23 23:01:45.826169: step: 140/77, loss: 0.07826438546180725 2023-01-23 23:01:47.121871: step: 144/77, loss: 0.018914898857474327 2023-01-23 23:01:48.432700: step: 148/77, loss: 0.14493244886398315 2023-01-23 23:01:49.705121: step: 152/77, loss: 0.0015709679573774338 2023-01-23 23:01:51.028471: step: 156/77, loss: 0.12118136137723923 2023-01-23 23:01:52.364794: step: 160/77, loss: 0.03633274883031845 2023-01-23 23:01:53.692411: step: 164/77, loss: 0.018765099346637726 2023-01-23 23:01:55.063053: step: 168/77, loss: 0.0025905310176312923 2023-01-23 23:01:56.397875: step: 172/77, loss: 0.0021830382756888866 2023-01-23 23:01:57.688428: step: 176/77, loss: 0.009825218468904495 2023-01-23 23:01:58.960211: step: 180/77, loss: 0.016480347141623497 2023-01-23 23:02:00.245006: step: 184/77, loss: 0.03498028963804245 2023-01-23 23:02:01.537316: step: 188/77, loss: 0.003769653383642435 2023-01-23 23:02:02.809649: step: 192/77, loss: 0.025297515094280243 2023-01-23 23:02:04.189755: step: 196/77, loss: 0.019557366147637367 2023-01-23 23:02:05.524819: step: 200/77, loss: 0.018995683640241623 2023-01-23 23:02:06.862873: step: 204/77, loss: 0.0028202475514262915 2023-01-23 23:02:08.117643: step: 208/77, loss: 0.016043715178966522 2023-01-23 23:02:09.463874: step: 212/77, loss: 0.06249503046274185 2023-01-23 23:02:10.819582: step: 216/77, loss: 0.015046648681163788 2023-01-23 23:02:12.136881: step: 220/77, loss: 0.013907751999795437 2023-01-23 23:02:13.465718: step: 224/77, loss: 0.045797016471624374 2023-01-23 23:02:14.787182: step: 228/77, loss: 0.07333850115537643 2023-01-23 23:02:16.164820: step: 232/77, loss: 0.010844893753528595 2023-01-23 23:02:17.484083: step: 236/77, loss: 0.03742639720439911 2023-01-23 23:02:18.804775: step: 240/77, loss: 0.05061040818691254 2023-01-23 23:02:20.140593: step: 244/77, loss: 0.02508923038840294 2023-01-23 23:02:21.494121: step: 248/77, loss: 0.060133058577775955 2023-01-23 23:02:22.846368: step: 252/77, loss: 0.04423247650265694 2023-01-23 23:02:24.120791: step: 256/77, loss: 0.04805321246385574 2023-01-23 23:02:25.439686: step: 260/77, loss: 0.05071249231696129 2023-01-23 23:02:26.756703: step: 264/77, loss: 0.01529052946716547 2023-01-23 23:02:28.082745: step: 268/77, loss: 0.001533987233415246 2023-01-23 23:02:29.389841: step: 272/77, loss: 0.059901487082242966 2023-01-23 23:02:30.753620: step: 276/77, loss: 0.004801694769412279 2023-01-23 23:02:32.064914: step: 280/77, loss: 0.029555046930909157 2023-01-23 23:02:33.386780: step: 284/77, loss: 0.014943293295800686 2023-01-23 23:02:34.701050: step: 288/77, loss: 0.12821191549301147 2023-01-23 23:02:36.052285: step: 292/77, loss: 0.12393166869878769 2023-01-23 23:02:37.365042: step: 296/77, loss: 0.005087250843644142 2023-01-23 23:02:38.683452: step: 300/77, loss: 0.06435203552246094 2023-01-23 23:02:39.966086: step: 304/77, loss: 0.10260597616434097 2023-01-23 23:02:41.270285: step: 308/77, loss: 0.017721746116876602 2023-01-23 23:02:42.577900: step: 312/77, loss: 0.05021507292985916 2023-01-23 23:02:43.836598: step: 316/77, loss: 0.029303917661309242 2023-01-23 23:02:45.161894: step: 320/77, loss: 0.1269276887178421 2023-01-23 23:02:46.508052: step: 324/77, loss: 0.06324280053377151 2023-01-23 23:02:47.783935: step: 328/77, loss: 0.029106508940458298 2023-01-23 23:02:49.109093: step: 332/77, loss: 0.04430750384926796 2023-01-23 23:02:50.424145: step: 336/77, loss: 0.05504322797060013 2023-01-23 23:02:51.755297: step: 340/77, loss: 0.02148296684026718 2023-01-23 23:02:53.124659: step: 344/77, loss: 0.035367049276828766 2023-01-23 23:02:54.443352: step: 348/77, loss: 0.019233345985412598 2023-01-23 23:02:55.748618: step: 352/77, loss: 0.008263535797595978 2023-01-23 23:02:57.079530: step: 356/77, loss: 0.044725432991981506 2023-01-23 23:02:58.373048: step: 360/77, loss: 0.012253060936927795 2023-01-23 23:02:59.670072: step: 364/77, loss: 0.01013081893324852 2023-01-23 23:03:00.997989: step: 368/77, loss: 0.044951606541872025 2023-01-23 23:03:02.305530: step: 372/77, loss: 0.04763204604387283 2023-01-23 23:03:03.601591: step: 376/77, loss: 0.05810655653476715 2023-01-23 23:03:04.862441: step: 380/77, loss: 0.02208855375647545 2023-01-23 23:03:06.196057: step: 384/77, loss: 0.008081790059804916 2023-01-23 23:03:07.482754: step: 388/77, loss: 0.016579212620854378 ================================================== Loss: 0.038 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:05:20.679132: step: 4/77, loss: 0.019590124487876892 2023-01-23 23:05:22.009132: step: 8/77, loss: 0.023027315735816956 2023-01-23 23:05:23.293913: step: 12/77, loss: 0.029082786291837692 2023-01-23 23:05:24.612305: step: 16/77, loss: 0.026010677218437195 2023-01-23 23:05:25.956814: step: 20/77, loss: 0.012876695021986961 2023-01-23 23:05:27.209263: step: 24/77, loss: 0.0071772001683712006 2023-01-23 23:05:28.476083: step: 28/77, loss: 0.013369128108024597 2023-01-23 23:05:29.776268: step: 32/77, loss: 0.00957412552088499 2023-01-23 23:05:31.100152: step: 36/77, loss: 0.009424794465303421 2023-01-23 23:05:32.382367: step: 40/77, loss: 0.02984057366847992 2023-01-23 23:05:33.725360: step: 44/77, loss: 0.07836680859327316 2023-01-23 23:05:35.082251: step: 48/77, loss: 0.04779861867427826 2023-01-23 23:05:36.415167: step: 52/77, loss: 0.02072271518409252 2023-01-23 23:05:37.729763: step: 56/77, loss: 0.028426162898540497 2023-01-23 23:05:39.034512: step: 60/77, loss: 0.020091162994503975 2023-01-23 23:05:40.340044: step: 64/77, loss: 0.03691019117832184 2023-01-23 23:05:41.631906: step: 68/77, loss: 0.08650784194469452 2023-01-23 23:05:42.961881: step: 72/77, loss: 0.08476677536964417 2023-01-23 23:05:44.279685: step: 76/77, loss: 0.021942488849163055 2023-01-23 23:05:45.605666: step: 80/77, loss: 0.010857520624995232 2023-01-23 23:05:46.936667: step: 84/77, loss: 0.03199823573231697 2023-01-23 23:05:48.322396: step: 88/77, loss: 0.036400698125362396 2023-01-23 23:05:49.628157: step: 92/77, loss: 0.058628231287002563 2023-01-23 23:05:50.919150: step: 96/77, loss: 0.03350096940994263 2023-01-23 23:05:52.202841: step: 100/77, loss: 0.054468218237161636 2023-01-23 23:05:53.521120: step: 104/77, loss: 0.007558372337371111 2023-01-23 23:05:54.869510: step: 108/77, loss: 0.014154444448649883 2023-01-23 23:05:56.181572: step: 112/77, loss: 0.00751089584082365 2023-01-23 23:05:57.557173: step: 116/77, loss: 0.00582252349704504 2023-01-23 23:05:58.857934: step: 120/77, loss: 0.010972294956445694 2023-01-23 23:06:00.169033: step: 124/77, loss: 0.021045461297035217 2023-01-23 23:06:01.493562: step: 128/77, loss: 0.0017005748813971877 2023-01-23 23:06:02.823348: step: 132/77, loss: 0.04809816554188728 2023-01-23 23:06:04.119833: step: 136/77, loss: 0.010070187970995903 2023-01-23 23:06:05.410207: step: 140/77, loss: 0.00459901150316 2023-01-23 23:06:06.737172: step: 144/77, loss: 0.010928267613053322 2023-01-23 23:06:08.001380: step: 148/77, loss: 0.018509894609451294 2023-01-23 23:06:09.318968: step: 152/77, loss: 0.014490798115730286 2023-01-23 23:06:10.597195: step: 156/77, loss: 0.003958669491112232 2023-01-23 23:06:11.929571: step: 160/77, loss: 0.01771484687924385 2023-01-23 23:06:13.235698: step: 164/77, loss: 0.008731606416404247 2023-01-23 23:06:14.609086: step: 168/77, loss: 0.03572859242558479 2023-01-23 23:06:15.977532: step: 172/77, loss: 0.020078785717487335 2023-01-23 23:06:17.272849: step: 176/77, loss: 0.002374204806983471 2023-01-23 23:06:18.603605: step: 180/77, loss: 0.013053692877292633 2023-01-23 23:06:19.947039: step: 184/77, loss: 0.04168858379125595 2023-01-23 23:06:21.338331: step: 188/77, loss: 0.030219757929444313 2023-01-23 23:06:22.655048: step: 192/77, loss: 0.08529096841812134 2023-01-23 23:06:23.954259: step: 196/77, loss: 0.03557312861084938 2023-01-23 23:06:25.312360: step: 200/77, loss: 0.0012289071455597878 2023-01-23 23:06:26.629544: step: 204/77, loss: 0.028531398624181747 2023-01-23 23:06:27.960593: step: 208/77, loss: 0.0033116433769464493 2023-01-23 23:06:29.302192: step: 212/77, loss: 0.0015021846629679203 2023-01-23 23:06:30.651132: step: 216/77, loss: 0.07549797743558884 2023-01-23 23:06:31.993670: step: 220/77, loss: 0.062390752136707306 2023-01-23 23:06:33.298387: step: 224/77, loss: 0.03732169046998024 2023-01-23 23:06:34.583832: step: 228/77, loss: 0.000875171332154423 2023-01-23 23:06:35.956158: step: 232/77, loss: 0.07455716282129288 2023-01-23 23:06:37.276189: step: 236/77, loss: 0.0032223116140812635 2023-01-23 23:06:38.588547: step: 240/77, loss: 0.002118849428370595 2023-01-23 23:06:39.915058: step: 244/77, loss: 0.012293724343180656 2023-01-23 23:06:41.201197: step: 248/77, loss: 0.06768789142370224 2023-01-23 23:06:42.513560: step: 252/77, loss: 0.08260074257850647 2023-01-23 23:06:43.888737: step: 256/77, loss: 0.09293807297945023 2023-01-23 23:06:45.220273: step: 260/77, loss: 0.031117822974920273 2023-01-23 23:06:46.490990: step: 264/77, loss: 0.003587220562621951 2023-01-23 23:06:47.771658: step: 268/77, loss: 0.01646401360630989 2023-01-23 23:06:49.110952: step: 272/77, loss: 0.0020924354903399944 2023-01-23 23:06:50.439323: step: 276/77, loss: 0.0006436010007746518 2023-01-23 23:06:51.774186: step: 280/77, loss: 0.005888496059924364 2023-01-23 23:06:53.134912: step: 284/77, loss: 0.003980573266744614 2023-01-23 23:06:54.486175: step: 288/77, loss: 0.040291935205459595 2023-01-23 23:06:55.801945: step: 292/77, loss: 0.004146946594119072 2023-01-23 23:06:57.082887: step: 296/77, loss: 0.029858361929655075 2023-01-23 23:06:58.439647: step: 300/77, loss: 0.09856970608234406 2023-01-23 23:06:59.786962: step: 304/77, loss: 0.049976229667663574 2023-01-23 23:07:01.122813: step: 308/77, loss: 0.07172711193561554 2023-01-23 23:07:02.455658: step: 312/77, loss: 0.03535992652177811 2023-01-23 23:07:03.751279: step: 316/77, loss: 0.005112402141094208 2023-01-23 23:07:05.068378: step: 320/77, loss: 0.0004511027073021978 2023-01-23 23:07:06.364442: step: 324/77, loss: 0.02633637934923172 2023-01-23 23:07:07.717497: step: 328/77, loss: 0.005359982140362263 2023-01-23 23:07:09.101413: step: 332/77, loss: 0.0010264780139550567 2023-01-23 23:07:10.406236: step: 336/77, loss: 0.051413118839263916 2023-01-23 23:07:11.706517: step: 340/77, loss: 0.005643540993332863 2023-01-23 23:07:13.031390: step: 344/77, loss: 0.09154492616653442 2023-01-23 23:07:14.386657: step: 348/77, loss: 0.31316933035850525 2023-01-23 23:07:15.707734: step: 352/77, loss: 0.0036383122205734253 2023-01-23 23:07:16.998539: step: 356/77, loss: 0.007959169335663319 2023-01-23 23:07:18.327186: step: 360/77, loss: 0.021040400490164757 2023-01-23 23:07:19.657568: step: 364/77, loss: 0.00927928276360035 2023-01-23 23:07:20.998310: step: 368/77, loss: 0.01899535395205021 2023-01-23 23:07:22.277445: step: 372/77, loss: 0.018808195367455482 2023-01-23 23:07:23.569414: step: 376/77, loss: 0.0040769632905721664 2023-01-23 23:07:24.923645: step: 380/77, loss: 0.0692996233701706 2023-01-23 23:07:26.211324: step: 384/77, loss: 0.011889062821865082 2023-01-23 23:07:27.572767: step: 388/77, loss: 0.20114099979400635 ================================================== Loss: 0.032 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Chinese: {'template': {'p': 0.9841269841269841, 'r': 0.484375, 'f1': 0.6492146596858638}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.011697561435781332, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Korean: {'template': {'p': 0.9841269841269841, 'r': 0.484375, 'f1': 0.6492146596858638}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.011697561435781332, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Russian: {'template': {'p': 0.984375, 'r': 0.4921875, 'f1': 0.65625}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.011824324324324327, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:09:14.800345: step: 4/77, loss: 0.023742811754345894 2023-01-23 23:09:16.078146: step: 8/77, loss: 0.0024210514966398478 2023-01-23 23:09:17.360997: step: 12/77, loss: 0.031761955469846725 2023-01-23 23:09:18.606073: step: 16/77, loss: 0.023868165910243988 2023-01-23 23:09:19.941842: step: 20/77, loss: 0.0014466585125774145 2023-01-23 23:09:21.227816: step: 24/77, loss: 0.02680317685008049 2023-01-23 23:09:22.533219: step: 28/77, loss: 0.0022497575264424086 2023-01-23 23:09:23.833218: step: 32/77, loss: 0.05933031439781189 2023-01-23 23:09:25.134198: step: 36/77, loss: 0.0024521953891962767 2023-01-23 23:09:26.401484: step: 40/77, loss: 0.003981100395321846 2023-01-23 23:09:27.680037: step: 44/77, loss: 0.06482931226491928 2023-01-23 23:09:29.044918: step: 48/77, loss: 0.03707173466682434 2023-01-23 23:09:30.369118: step: 52/77, loss: 0.006385110784322023 2023-01-23 23:09:31.666987: step: 56/77, loss: 0.03251034766435623 2023-01-23 23:09:32.932019: step: 60/77, loss: 0.01326783001422882 2023-01-23 23:09:34.256692: step: 64/77, loss: 0.03662087395787239 2023-01-23 23:09:35.527304: step: 68/77, loss: 0.022754115983843803 2023-01-23 23:09:36.849574: step: 72/77, loss: 0.007800246123224497 2023-01-23 23:09:38.196176: step: 76/77, loss: 0.037100229412317276 2023-01-23 23:09:39.469731: step: 80/77, loss: 0.002311698393896222 2023-01-23 23:09:40.812168: step: 84/77, loss: 0.0027715619653463364 2023-01-23 23:09:42.084351: step: 88/77, loss: 0.15742728114128113 2023-01-23 23:09:43.415469: step: 92/77, loss: 0.016606274992227554 2023-01-23 23:09:44.699970: step: 96/77, loss: 0.013384426012635231 2023-01-23 23:09:46.088117: step: 100/77, loss: 0.032797813415527344 2023-01-23 23:09:47.430924: step: 104/77, loss: 0.008585028350353241 2023-01-23 23:09:48.640877: step: 108/77, loss: 0.02555439993739128 2023-01-23 23:09:49.976527: step: 112/77, loss: 0.009502370841801167 2023-01-23 23:09:51.292963: step: 116/77, loss: 0.0031764251179993153 2023-01-23 23:09:52.611834: step: 120/77, loss: 0.002601939719170332 2023-01-23 23:09:53.910542: step: 124/77, loss: 0.0356743261218071 2023-01-23 23:09:55.242532: step: 128/77, loss: 0.015332273207604885 2023-01-23 23:09:56.594042: step: 132/77, loss: 0.012819968163967133 2023-01-23 23:09:57.910794: step: 136/77, loss: 0.030379636213183403 2023-01-23 23:09:59.207807: step: 140/77, loss: 0.00024762097746133804 2023-01-23 23:10:00.501623: step: 144/77, loss: 0.0017826403491199017 2023-01-23 23:10:01.800027: step: 148/77, loss: 0.025525454431772232 2023-01-23 23:10:03.124482: step: 152/77, loss: 0.009922852739691734 2023-01-23 23:10:04.428272: step: 156/77, loss: 0.030485069379210472 2023-01-23 23:10:05.762196: step: 160/77, loss: 0.015530181117355824 2023-01-23 23:10:07.097370: step: 164/77, loss: 0.05367741733789444 2023-01-23 23:10:08.384096: step: 168/77, loss: 0.0014602728188037872 2023-01-23 23:10:09.683105: step: 172/77, loss: 0.09227390587329865 2023-01-23 23:10:10.967354: step: 176/77, loss: 0.0007054744055494666 2023-01-23 23:10:12.281393: step: 180/77, loss: 0.09929867088794708 2023-01-23 23:10:13.593393: step: 184/77, loss: 0.002446091268211603 2023-01-23 23:10:14.929764: step: 188/77, loss: 0.04151029884815216 2023-01-23 23:10:16.272580: step: 192/77, loss: 0.04282946512103081 2023-01-23 23:10:17.593178: step: 196/77, loss: 0.12917132675647736 2023-01-23 23:10:18.918392: step: 200/77, loss: 0.07418958842754364 2023-01-23 23:10:20.241559: step: 204/77, loss: 0.10528351366519928 2023-01-23 23:10:21.523357: step: 208/77, loss: 0.05371832475066185 2023-01-23 23:10:22.847426: step: 212/77, loss: 0.013339102268218994 2023-01-23 23:10:24.166796: step: 216/77, loss: 0.041499387472867966 2023-01-23 23:10:25.437609: step: 220/77, loss: 0.03752455860376358 2023-01-23 23:10:26.768843: step: 224/77, loss: 0.019607428461313248 2023-01-23 23:10:28.095202: step: 228/77, loss: 0.014202874153852463 2023-01-23 23:10:29.393441: step: 232/77, loss: 0.05125613883137703 2023-01-23 23:10:30.722596: step: 236/77, loss: 0.035229314118623734 2023-01-23 23:10:32.027096: step: 240/77, loss: 0.060807421803474426 2023-01-23 23:10:33.347893: step: 244/77, loss: 0.007476100232452154 2023-01-23 23:10:34.651090: step: 248/77, loss: 0.00987208727747202 2023-01-23 23:10:35.988085: step: 252/77, loss: 0.010732964612543583 2023-01-23 23:10:37.290531: step: 256/77, loss: 0.016423512250185013 2023-01-23 23:10:38.630120: step: 260/77, loss: 0.047358062118291855 2023-01-23 23:10:39.960408: step: 264/77, loss: 0.06571957468986511 2023-01-23 23:10:41.265518: step: 268/77, loss: 0.013337070122361183 2023-01-23 23:10:42.601669: step: 272/77, loss: 0.029291309416294098 2023-01-23 23:10:43.901651: step: 276/77, loss: 0.0034126536920666695 2023-01-23 23:10:45.219978: step: 280/77, loss: 0.014468264766037464 2023-01-23 23:10:46.611844: step: 284/77, loss: 0.03605595603585243 2023-01-23 23:10:47.910799: step: 288/77, loss: 0.017270473763346672 2023-01-23 23:10:49.206461: step: 292/77, loss: 0.01606428064405918 2023-01-23 23:10:50.552850: step: 296/77, loss: 0.01870177686214447 2023-01-23 23:10:51.864962: step: 300/77, loss: 0.01481685135513544 2023-01-23 23:10:53.203250: step: 304/77, loss: 0.014699216932058334 2023-01-23 23:10:54.496069: step: 308/77, loss: 0.011047665029764175 2023-01-23 23:10:55.862685: step: 312/77, loss: 0.01075662486255169 2023-01-23 23:10:57.185358: step: 316/77, loss: 0.029328038915991783 2023-01-23 23:10:58.510788: step: 320/77, loss: 0.01761351153254509 2023-01-23 23:10:59.805419: step: 324/77, loss: 0.012456723488867283 2023-01-23 23:11:01.086711: step: 328/77, loss: 0.020048733800649643 2023-01-23 23:11:02.486286: step: 332/77, loss: 0.05049295350909233 2023-01-23 23:11:03.785530: step: 336/77, loss: 0.04084024205803871 2023-01-23 23:11:05.148877: step: 340/77, loss: 0.00574206979945302 2023-01-23 23:11:06.431432: step: 344/77, loss: 0.06762515753507614 2023-01-23 23:11:07.719799: step: 348/77, loss: 0.017670320346951485 2023-01-23 23:11:09.015442: step: 352/77, loss: 0.021465200930833817 2023-01-23 23:11:10.373452: step: 356/77, loss: 0.03226961940526962 2023-01-23 23:11:11.697833: step: 360/77, loss: 0.042687464505434036 2023-01-23 23:11:13.004191: step: 364/77, loss: 0.011565061286091805 2023-01-23 23:11:14.330264: step: 368/77, loss: 0.10414603352546692 2023-01-23 23:11:15.666341: step: 372/77, loss: 0.00794203020632267 2023-01-23 23:11:17.037437: step: 376/77, loss: 0.036036621779203415 2023-01-23 23:11:18.346976: step: 380/77, loss: 0.0044304742477834225 2023-01-23 23:11:19.611567: step: 384/77, loss: 0.08746032416820526 2023-01-23 23:11:20.869075: step: 388/77, loss: 0.016607046127319336 ================================================== Loss: 0.029 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.45, 'f1': 0.6206896551724138}, 'slot': {'p': 0.5416666666666666, 'r': 0.024574669187145556, 'f1': 0.04701627486437612}, 'combined': 0.029182515433061045, 'epoch': 6} Test Chinese: {'template': {'p': 0.9838709677419355, 'r': 0.4765625, 'f1': 0.6421052631578947}, 'slot': {'p': 0.5263157894736842, 'r': 0.009157509157509158, 'f1': 0.018001800180018006}, 'combined': 0.011559050641906298, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5333333333333333, 'r': 0.030245746691871456, 'f1': 0.05724508050089446}, 'combined': 0.03816338700059631, 'epoch': 6} Test Korean: {'template': {'p': 0.9836065573770492, 'r': 0.46875, 'f1': 0.6349206349206349}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.011440011440011442, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.45, 'f1': 0.6206896551724138}, 'slot': {'p': 0.5384615384615384, 'r': 0.026465028355387523, 'f1': 0.05045045045045044}, 'combined': 0.03131407269338304, 'epoch': 6} Test Russian: {'template': {'p': 0.9838709677419355, 'r': 0.4765625, 'f1': 0.6421052631578947}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.01156946420104315, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:13:07.599250: step: 4/77, loss: 0.0031209909357130527 2023-01-23 23:13:08.898472: step: 8/77, loss: 0.08783739805221558 2023-01-23 23:13:10.191313: step: 12/77, loss: 0.0652967095375061 2023-01-23 23:13:11.481293: step: 16/77, loss: 0.009294316172599792 2023-01-23 23:13:12.806845: step: 20/77, loss: 0.01650993712246418 2023-01-23 23:13:14.086643: step: 24/77, loss: 0.006153582129627466 2023-01-23 23:13:15.423769: step: 28/77, loss: 0.005866794381290674 2023-01-23 23:13:16.781163: step: 32/77, loss: 0.005742167588323355 2023-01-23 23:13:18.095587: step: 36/77, loss: 0.01967783458530903 2023-01-23 23:13:19.400131: step: 40/77, loss: 0.014993395656347275 2023-01-23 23:13:20.715761: step: 44/77, loss: 0.019814079627394676 2023-01-23 23:13:22.065033: step: 48/77, loss: 0.010505028069019318 2023-01-23 23:13:23.389257: step: 52/77, loss: 0.03083161637187004 2023-01-23 23:13:24.672971: step: 56/77, loss: 0.004766981117427349 2023-01-23 23:13:26.017072: step: 60/77, loss: 0.002936827950179577 2023-01-23 23:13:27.349454: step: 64/77, loss: 0.0015104500344023108 2023-01-23 23:13:28.619934: step: 68/77, loss: 0.001246319618076086 2023-01-23 23:13:29.947319: step: 72/77, loss: 0.00507638044655323 2023-01-23 23:13:31.282260: step: 76/77, loss: 0.021538907662034035 2023-01-23 23:13:32.536564: step: 80/77, loss: 0.07426765561103821 2023-01-23 23:13:33.821716: step: 84/77, loss: 0.0063897836953401566 2023-01-23 23:13:35.105379: step: 88/77, loss: 0.011027699336409569 2023-01-23 23:13:36.478977: step: 92/77, loss: 0.001949557103216648 2023-01-23 23:13:37.808878: step: 96/77, loss: 0.0005657103611156344 2023-01-23 23:13:39.111652: step: 100/77, loss: 0.05404621735215187 2023-01-23 23:13:40.388824: step: 104/77, loss: 0.02471388503909111 2023-01-23 23:13:41.685545: step: 108/77, loss: 0.04665075242519379 2023-01-23 23:13:43.035754: step: 112/77, loss: 0.0053888545371592045 2023-01-23 23:13:44.312395: step: 116/77, loss: 0.07282565534114838 2023-01-23 23:13:45.626276: step: 120/77, loss: 0.0022747742477804422 2023-01-23 23:13:46.967038: step: 124/77, loss: 0.07615106552839279 2023-01-23 23:13:48.264813: step: 128/77, loss: 0.022499775514006615 2023-01-23 23:13:49.541805: step: 132/77, loss: 0.02364405244588852 2023-01-23 23:13:50.848865: step: 136/77, loss: 0.05657701939344406 2023-01-23 23:13:52.139521: step: 140/77, loss: 0.0018406548770144582 2023-01-23 23:13:53.489945: step: 144/77, loss: 0.05552219972014427 2023-01-23 23:13:54.782953: step: 148/77, loss: 0.018158987164497375 2023-01-23 23:13:56.087734: step: 152/77, loss: 0.009586824104189873 2023-01-23 23:13:57.408390: step: 156/77, loss: 0.008389465510845184 2023-01-23 23:13:58.717805: step: 160/77, loss: 0.04054231569170952 2023-01-23 23:14:00.062359: step: 164/77, loss: 0.020485740154981613 2023-01-23 23:14:01.384894: step: 168/77, loss: 0.024340663105249405 2023-01-23 23:14:02.690867: step: 172/77, loss: 0.009585918858647346 2023-01-23 23:14:04.019171: step: 176/77, loss: 0.018610753118991852 2023-01-23 23:14:05.340816: step: 180/77, loss: 0.016117611899971962 2023-01-23 23:14:06.648790: step: 184/77, loss: 0.06756845861673355 2023-01-23 23:14:07.978912: step: 188/77, loss: 0.019038718193769455 2023-01-23 23:14:09.301383: step: 192/77, loss: 0.04110538959503174 2023-01-23 23:14:10.631338: step: 196/77, loss: 0.03693665936589241 2023-01-23 23:14:12.019847: step: 200/77, loss: 0.021302184090018272 2023-01-23 23:14:13.347464: step: 204/77, loss: 0.002271834993734956 2023-01-23 23:14:14.686287: step: 208/77, loss: 0.043990932404994965 2023-01-23 23:14:16.022772: step: 212/77, loss: 0.021895771846175194 2023-01-23 23:14:17.374673: step: 216/77, loss: 0.007647061720490456 2023-01-23 23:14:18.718021: step: 220/77, loss: 0.13254640996456146 2023-01-23 23:14:20.038386: step: 224/77, loss: 0.027036642655730247 2023-01-23 23:14:21.373622: step: 228/77, loss: 0.04646734148263931 2023-01-23 23:14:22.692190: step: 232/77, loss: 0.0006018686690367758 2023-01-23 23:14:24.008098: step: 236/77, loss: 0.011303732171654701 2023-01-23 23:14:25.371629: step: 240/77, loss: 0.015093307942152023 2023-01-23 23:14:26.697577: step: 244/77, loss: 0.0010103249223902822 2023-01-23 23:14:28.020800: step: 248/77, loss: 0.00565328449010849 2023-01-23 23:14:29.322768: step: 252/77, loss: 0.02016383968293667 2023-01-23 23:14:30.632103: step: 256/77, loss: 0.02276289090514183 2023-01-23 23:14:31.945645: step: 260/77, loss: 0.11638316512107849 2023-01-23 23:14:33.262499: step: 264/77, loss: 0.008772538974881172 2023-01-23 23:14:34.574958: step: 268/77, loss: 0.00944832805544138 2023-01-23 23:14:35.900129: step: 272/77, loss: 0.004265233408659697 2023-01-23 23:14:37.207788: step: 276/77, loss: 0.0005268824170343578 2023-01-23 23:14:38.565426: step: 280/77, loss: 0.00032209057826548815 2023-01-23 23:14:39.925956: step: 284/77, loss: 0.001351947314105928 2023-01-23 23:14:41.288722: step: 288/77, loss: 0.0057523720897734165 2023-01-23 23:14:42.618450: step: 292/77, loss: 0.010685701854526997 2023-01-23 23:14:43.969258: step: 296/77, loss: 0.0319397896528244 2023-01-23 23:14:45.315270: step: 300/77, loss: 0.06816261261701584 2023-01-23 23:14:46.652982: step: 304/77, loss: 0.005405607167631388 2023-01-23 23:14:47.917453: step: 308/77, loss: 0.01193196326494217 2023-01-23 23:14:49.186831: step: 312/77, loss: 0.006270136684179306 2023-01-23 23:14:50.511791: step: 316/77, loss: 0.003654724918305874 2023-01-23 23:14:51.869719: step: 320/77, loss: 0.006270033307373524 2023-01-23 23:14:53.161531: step: 324/77, loss: 0.011288300156593323 2023-01-23 23:14:54.489548: step: 328/77, loss: 0.041433122009038925 2023-01-23 23:14:55.832288: step: 332/77, loss: 0.010194050148129463 2023-01-23 23:14:57.139177: step: 336/77, loss: 0.013408026657998562 2023-01-23 23:14:58.474178: step: 340/77, loss: 0.017604660242795944 2023-01-23 23:14:59.760053: step: 344/77, loss: 0.030220985412597656 2023-01-23 23:15:01.076810: step: 348/77, loss: 0.009208658710122108 2023-01-23 23:15:02.417310: step: 352/77, loss: 0.0023348366376012564 2023-01-23 23:15:03.742143: step: 356/77, loss: 0.012271026149392128 2023-01-23 23:15:05.055531: step: 360/77, loss: 0.015157275833189487 2023-01-23 23:15:06.408194: step: 364/77, loss: 0.0011631695087999105 2023-01-23 23:15:07.747446: step: 368/77, loss: 0.01727611944079399 2023-01-23 23:15:09.025884: step: 372/77, loss: 0.0032124074641615152 2023-01-23 23:15:10.341194: step: 376/77, loss: 0.0028875365387648344 2023-01-23 23:15:11.670414: step: 380/77, loss: 0.0026917150244116783 2023-01-23 23:15:12.999873: step: 384/77, loss: 0.02284029684960842 2023-01-23 23:15:14.367723: step: 388/77, loss: 0.021598536521196365 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 7} Test Chinese: {'template': {'p': 0.9545454545454546, 'r': 0.4921875, 'f1': 0.6494845360824743}, 'slot': {'p': 0.6842105263157895, 'r': 0.011904761904761904, 'f1': 0.023402340234023402}, 'combined': 0.015199458090138911, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 7} Test Korean: {'template': {'p': 0.9545454545454546, 'r': 0.4921875, 'f1': 0.6494845360824743}, 'slot': {'p': 0.6842105263157895, 'r': 0.011904761904761904, 'f1': 0.023402340234023402}, 'combined': 0.015199458090138911, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 7} Test Russian: {'template': {'p': 0.9545454545454546, 'r': 0.4921875, 'f1': 0.6494845360824743}, 'slot': {'p': 0.6842105263157895, 'r': 0.011904761904761904, 'f1': 0.023402340234023402}, 'combined': 0.015199458090138911, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:17:01.255635: step: 4/77, loss: 0.003462360007688403 2023-01-23 23:17:02.581901: step: 8/77, loss: 0.03158210217952728 2023-01-23 23:17:03.883120: step: 12/77, loss: 0.027815116569399834 2023-01-23 23:17:05.169586: step: 16/77, loss: 0.010638121515512466 2023-01-23 23:17:06.523936: step: 20/77, loss: 0.021807294338941574 2023-01-23 23:17:07.792435: step: 24/77, loss: 0.08031920343637466 2023-01-23 23:17:09.091506: step: 28/77, loss: 0.02158435806632042 2023-01-23 23:17:10.370450: step: 32/77, loss: 0.038904719054698944 2023-01-23 23:17:11.690914: step: 36/77, loss: 0.03839685022830963 2023-01-23 23:17:12.971989: step: 40/77, loss: 0.03866554796695709 2023-01-23 23:17:14.272719: step: 44/77, loss: 6.99927331879735e-05 2023-01-23 23:17:15.581107: step: 48/77, loss: 0.03688203543424606 2023-01-23 23:17:16.934695: step: 52/77, loss: 0.004742838907986879 2023-01-23 23:17:18.234420: step: 56/77, loss: 0.028267303481698036 2023-01-23 23:17:19.519590: step: 60/77, loss: 0.09787152707576752 2023-01-23 23:17:20.819178: step: 64/77, loss: 0.007006022147834301 2023-01-23 23:17:22.181023: step: 68/77, loss: 0.009687730111181736 2023-01-23 23:17:23.492847: step: 72/77, loss: 0.0010902268113568425 2023-01-23 23:17:24.799724: step: 76/77, loss: 0.00017482312978245318 2023-01-23 23:17:26.053230: step: 80/77, loss: 0.0049322182312607765 2023-01-23 23:17:27.403997: step: 84/77, loss: 0.015547310933470726 2023-01-23 23:17:28.677037: step: 88/77, loss: 0.01092690508812666 2023-01-23 23:17:30.011732: step: 92/77, loss: 0.000868482340592891 2023-01-23 23:17:31.312479: step: 96/77, loss: 0.021374428644776344 2023-01-23 23:17:32.627290: step: 100/77, loss: 0.03838520497083664 2023-01-23 23:17:33.972780: step: 104/77, loss: 0.02330004796385765 2023-01-23 23:17:35.243000: step: 108/77, loss: 0.0010328067000955343 2023-01-23 23:17:36.545067: step: 112/77, loss: 0.041627462953329086 2023-01-23 23:17:37.890931: step: 116/77, loss: 0.016076091676950455 2023-01-23 23:17:39.180930: step: 120/77, loss: 0.0010974672622978687 2023-01-23 23:17:40.497121: step: 124/77, loss: 0.06853155046701431 2023-01-23 23:17:41.772869: step: 128/77, loss: 0.047840192914009094 2023-01-23 23:17:43.039682: step: 132/77, loss: 0.0012306292774155736 2023-01-23 23:17:44.396173: step: 136/77, loss: 0.01438758336007595 2023-01-23 23:17:45.707300: step: 140/77, loss: 0.03334816172719002 2023-01-23 23:17:47.008038: step: 144/77, loss: 0.0003137671446893364 2023-01-23 23:17:48.281672: step: 148/77, loss: 0.0004342859610915184 2023-01-23 23:17:49.551387: step: 152/77, loss: 0.032907724380493164 2023-01-23 23:17:50.886334: step: 156/77, loss: 0.07321220636367798 2023-01-23 23:17:52.170503: step: 160/77, loss: 0.008549018763005733 2023-01-23 23:17:53.505659: step: 164/77, loss: 0.00022818568686489016 2023-01-23 23:17:54.845389: step: 168/77, loss: 0.00012675569450948387 2023-01-23 23:17:56.131671: step: 172/77, loss: 0.04355047643184662 2023-01-23 23:17:57.429461: step: 176/77, loss: 0.015296213328838348 2023-01-23 23:17:58.752568: step: 180/77, loss: 0.00031988683622330427 2023-01-23 23:18:00.011844: step: 184/77, loss: 3.0125163902994245e-05 2023-01-23 23:18:01.308741: step: 188/77, loss: 0.0007129679434001446 2023-01-23 23:18:02.598183: step: 192/77, loss: 0.007171159144490957 2023-01-23 23:18:03.885570: step: 196/77, loss: 0.04945923015475273 2023-01-23 23:18:05.133915: step: 200/77, loss: 0.0023911334574222565 2023-01-23 23:18:06.472961: step: 204/77, loss: 0.06517422944307327 2023-01-23 23:18:07.784447: step: 208/77, loss: 0.022393066436052322 2023-01-23 23:18:09.097444: step: 212/77, loss: 0.014836644753813744 2023-01-23 23:18:10.409731: step: 216/77, loss: 0.019449369981884956 2023-01-23 23:18:11.764845: step: 220/77, loss: 0.0018990390235558152 2023-01-23 23:18:13.060069: step: 224/77, loss: 0.028624853119254112 2023-01-23 23:18:14.366971: step: 228/77, loss: 0.010941157117486 2023-01-23 23:18:15.650611: step: 232/77, loss: 0.005102076567709446 2023-01-23 23:18:16.967246: step: 236/77, loss: 0.06938213109970093 2023-01-23 23:18:18.298869: step: 240/77, loss: 0.034478601068258286 2023-01-23 23:18:19.583909: step: 244/77, loss: 0.03948065638542175 2023-01-23 23:18:20.914512: step: 248/77, loss: 0.026814214885234833 2023-01-23 23:18:22.304096: step: 252/77, loss: 0.036289360374212265 2023-01-23 23:18:23.569639: step: 256/77, loss: 0.003869938664138317 2023-01-23 23:18:24.897850: step: 260/77, loss: 0.011877670884132385 2023-01-23 23:18:26.270828: step: 264/77, loss: 0.021154792979359627 2023-01-23 23:18:27.583465: step: 268/77, loss: 0.018810829147696495 2023-01-23 23:18:28.923856: step: 272/77, loss: 0.12980136275291443 2023-01-23 23:18:30.262715: step: 276/77, loss: 0.01171032339334488 2023-01-23 23:18:31.609781: step: 280/77, loss: 0.027855847030878067 2023-01-23 23:18:32.915549: step: 284/77, loss: 0.006162848323583603 2023-01-23 23:18:34.259811: step: 288/77, loss: 0.023323755711317062 2023-01-23 23:18:35.546349: step: 292/77, loss: 0.009866025298833847 2023-01-23 23:18:36.932823: step: 296/77, loss: 0.05878325179219246 2023-01-23 23:18:38.279043: step: 300/77, loss: 0.006912640295922756 2023-01-23 23:18:39.576568: step: 304/77, loss: 0.002645879751071334 2023-01-23 23:18:40.898414: step: 308/77, loss: 0.040564458817243576 2023-01-23 23:18:42.233822: step: 312/77, loss: 0.028605114668607712 2023-01-23 23:18:43.599805: step: 316/77, loss: 0.005716841202229261 2023-01-23 23:18:44.953071: step: 320/77, loss: 0.002674217103049159 2023-01-23 23:18:46.301106: step: 324/77, loss: 0.019984597340226173 2023-01-23 23:18:47.596932: step: 328/77, loss: 0.04892556741833687 2023-01-23 23:18:48.931956: step: 332/77, loss: 0.04373926669359207 2023-01-23 23:18:50.239555: step: 336/77, loss: 0.017949160188436508 2023-01-23 23:18:51.531911: step: 340/77, loss: 0.009541014209389687 2023-01-23 23:18:52.825836: step: 344/77, loss: 0.020409800112247467 2023-01-23 23:18:54.125733: step: 348/77, loss: 0.00047758466098457575 2023-01-23 23:18:55.413220: step: 352/77, loss: 0.0053678578697144985 2023-01-23 23:18:56.699245: step: 356/77, loss: 0.005686678923666477 2023-01-23 23:18:58.025617: step: 360/77, loss: 0.011399084702134132 2023-01-23 23:18:59.354192: step: 364/77, loss: 0.002073788084089756 2023-01-23 23:19:00.622128: step: 368/77, loss: 0.0068801334127783775 2023-01-23 23:19:01.924769: step: 372/77, loss: 0.036192066967487335 2023-01-23 23:19:03.204703: step: 376/77, loss: 0.022756557911634445 2023-01-23 23:19:04.494718: step: 380/77, loss: 0.024984458461403847 2023-01-23 23:19:05.810640: step: 384/77, loss: 0.000745010154787451 2023-01-23 23:19:07.161804: step: 388/77, loss: 0.009983447380363941 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9821428571428571, 'r': 0.4296875, 'f1': 0.5978260869565216}, 'slot': {'p': 0.625, 'r': 0.009157509157509158, 'f1': 0.01805054151624549}, 'combined': 0.01079108460210328, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.9821428571428571, 'r': 0.4296875, 'f1': 0.5978260869565216}, 'slot': {'p': 0.625, 'r': 0.009157509157509158, 'f1': 0.01805054151624549}, 'combined': 0.01079108460210328, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9833333333333333, 'r': 0.4609375, 'f1': 0.6276595744680851}, 'slot': {'p': 0.625, 'r': 0.009157509157509158, 'f1': 0.01805054151624549}, 'combined': 0.011329595207005147, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:20:53.744515: step: 4/77, loss: 0.003912047017365694 2023-01-23 23:20:55.030461: step: 8/77, loss: 0.011383119970560074 2023-01-23 23:20:56.328327: step: 12/77, loss: 0.051056768745183945 2023-01-23 23:20:57.643417: step: 16/77, loss: 0.005373623222112656 2023-01-23 23:20:58.990864: step: 20/77, loss: 0.009975888766348362 2023-01-23 23:21:00.288756: step: 24/77, loss: 0.014537742361426353 2023-01-23 23:21:01.573933: step: 28/77, loss: 0.025953613221645355 2023-01-23 23:21:02.903539: step: 32/77, loss: 0.007894574664533138 2023-01-23 23:21:04.221049: step: 36/77, loss: 0.020549921318888664 2023-01-23 23:21:05.484118: step: 40/77, loss: 0.008908596821129322 2023-01-23 23:21:06.812759: step: 44/77, loss: 0.001432577962987125 2023-01-23 23:21:08.075859: step: 48/77, loss: 0.02323325350880623 2023-01-23 23:21:09.378701: step: 52/77, loss: 0.01578327640891075 2023-01-23 23:21:10.685237: step: 56/77, loss: 0.000378149765310809 2023-01-23 23:21:12.015302: step: 60/77, loss: 0.010163774713873863 2023-01-23 23:21:13.321336: step: 64/77, loss: 0.032410383224487305 2023-01-23 23:21:14.594734: step: 68/77, loss: 0.00013186398427933455 2023-01-23 23:21:15.913796: step: 72/77, loss: 0.015592390671372414 2023-01-23 23:21:17.205623: step: 76/77, loss: 0.005880615673959255 2023-01-23 23:21:18.567357: step: 80/77, loss: 0.0011790425051003695 2023-01-23 23:21:19.918683: step: 84/77, loss: 0.03505949676036835 2023-01-23 23:21:21.241783: step: 88/77, loss: 0.017734795808792114 2023-01-23 23:21:22.554570: step: 92/77, loss: 0.002167154336348176 2023-01-23 23:21:23.839802: step: 96/77, loss: 0.07464326918125153 2023-01-23 23:21:25.139595: step: 100/77, loss: 0.06874995678663254 2023-01-23 23:21:26.427306: step: 104/77, loss: 0.00012492010137066245 2023-01-23 23:21:27.768929: step: 108/77, loss: 0.008151310496032238 2023-01-23 23:21:29.070837: step: 112/77, loss: 0.003479942213743925 2023-01-23 23:21:30.419911: step: 116/77, loss: 0.014868896454572678 2023-01-23 23:21:31.793519: step: 120/77, loss: 0.015556391328573227 2023-01-23 23:21:33.022879: step: 124/77, loss: 0.029294028878211975 2023-01-23 23:21:34.337583: step: 128/77, loss: 0.02460947260260582 2023-01-23 23:21:35.644788: step: 132/77, loss: 0.009721241891384125 2023-01-23 23:21:36.966025: step: 136/77, loss: 0.0009847991168498993 2023-01-23 23:21:38.243384: step: 140/77, loss: 0.010228264145553112 2023-01-23 23:21:39.541694: step: 144/77, loss: 0.022964006289839745 2023-01-23 23:21:40.882510: step: 148/77, loss: 0.040076129138469696 2023-01-23 23:21:42.183522: step: 152/77, loss: 0.007666187360882759 2023-01-23 23:21:43.470382: step: 156/77, loss: 0.056761860847473145 2023-01-23 23:21:44.781671: step: 160/77, loss: 0.0006916196434758604 2023-01-23 23:21:46.075758: step: 164/77, loss: 9.866947948466986e-05 2023-01-23 23:21:47.404759: step: 168/77, loss: 0.005803945939987898 2023-01-23 23:21:48.750987: step: 172/77, loss: 0.02239947021007538 2023-01-23 23:21:50.049639: step: 176/77, loss: 0.04482104629278183 2023-01-23 23:21:51.400542: step: 180/77, loss: 0.01325925998389721 2023-01-23 23:21:52.710304: step: 184/77, loss: 0.022729776799678802 2023-01-23 23:21:54.075796: step: 188/77, loss: 0.007351192645728588 2023-01-23 23:21:55.408420: step: 192/77, loss: 0.005582916084676981 2023-01-23 23:21:56.743042: step: 196/77, loss: 0.0008952165953814983 2023-01-23 23:21:58.021037: step: 200/77, loss: 0.013445856980979443 2023-01-23 23:21:59.280637: step: 204/77, loss: 0.09656139463186264 2023-01-23 23:22:00.593059: step: 208/77, loss: 0.019810235127806664 2023-01-23 23:22:01.930432: step: 212/77, loss: 0.01166454330086708 2023-01-23 23:22:03.268479: step: 216/77, loss: 0.07258975505828857 2023-01-23 23:22:04.533382: step: 220/77, loss: 0.0005941896233707666 2023-01-23 23:22:05.866090: step: 224/77, loss: 0.06122061610221863 2023-01-23 23:22:07.162056: step: 228/77, loss: 0.0010990884620696306 2023-01-23 23:22:08.412409: step: 232/77, loss: 0.021423693746328354 2023-01-23 23:22:09.775694: step: 236/77, loss: 0.12998279929161072 2023-01-23 23:22:11.107719: step: 240/77, loss: 0.023686395958065987 2023-01-23 23:22:12.464762: step: 244/77, loss: 0.012482582591474056 2023-01-23 23:22:13.770546: step: 248/77, loss: 0.005338149145245552 2023-01-23 23:22:15.084908: step: 252/77, loss: 0.00028367474442347884 2023-01-23 23:22:16.387355: step: 256/77, loss: 0.013595180585980415 2023-01-23 23:22:17.689710: step: 260/77, loss: 0.0002446919970680028 2023-01-23 23:22:19.031498: step: 264/77, loss: 0.01875019259750843 2023-01-23 23:22:20.390263: step: 268/77, loss: 0.006992523558437824 2023-01-23 23:22:21.737063: step: 272/77, loss: 0.010164832696318626 2023-01-23 23:22:23.068774: step: 276/77, loss: 0.005523860454559326 2023-01-23 23:22:24.448568: step: 280/77, loss: 0.00024588676751591265 2023-01-23 23:22:25.789419: step: 284/77, loss: 0.002962901024147868 2023-01-23 23:22:27.120110: step: 288/77, loss: 0.001597255701199174 2023-01-23 23:22:28.482662: step: 292/77, loss: 0.00734774861484766 2023-01-23 23:22:29.796624: step: 296/77, loss: 0.0006774789653718472 2023-01-23 23:22:31.159816: step: 300/77, loss: 0.016190657392144203 2023-01-23 23:22:32.481674: step: 304/77, loss: 0.007922903634607792 2023-01-23 23:22:33.794432: step: 308/77, loss: 0.02948911488056183 2023-01-23 23:22:35.097278: step: 312/77, loss: 0.02065003663301468 2023-01-23 23:22:36.428320: step: 316/77, loss: 0.004155493341386318 2023-01-23 23:22:37.772694: step: 320/77, loss: 0.0011718124151229858 2023-01-23 23:22:39.080271: step: 324/77, loss: 0.0003155616286676377 2023-01-23 23:22:40.387541: step: 328/77, loss: 0.03292486071586609 2023-01-23 23:22:41.724716: step: 332/77, loss: 0.02266281470656395 2023-01-23 23:22:43.057036: step: 336/77, loss: 0.02113211527466774 2023-01-23 23:22:44.383202: step: 340/77, loss: 0.017704568803310394 2023-01-23 23:22:45.724982: step: 344/77, loss: 0.001365205505862832 2023-01-23 23:22:47.041394: step: 348/77, loss: 9.205719106830657e-05 2023-01-23 23:22:48.418111: step: 352/77, loss: 0.039498742669820786 2023-01-23 23:22:49.737998: step: 356/77, loss: 0.014530006796121597 2023-01-23 23:22:51.089694: step: 360/77, loss: 0.019658163189888 2023-01-23 23:22:52.427861: step: 364/77, loss: 0.01574498787522316 2023-01-23 23:22:53.755186: step: 368/77, loss: 0.025630172342061996 2023-01-23 23:22:55.093276: step: 372/77, loss: 0.00019986522966064513 2023-01-23 23:22:56.476017: step: 376/77, loss: 0.0015250144060701132 2023-01-23 23:22:57.867019: step: 380/77, loss: 0.04629107564687729 2023-01-23 23:22:59.179743: step: 384/77, loss: 0.00029715109849348664 2023-01-23 23:23:00.500274: step: 388/77, loss: 0.028631635010242462 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9285714285714286, 'r': 0.5078125, 'f1': 0.6565656565656566}, 'slot': {'p': 0.5555555555555556, 'r': 0.013736263736263736, 'f1': 0.026809651474530835}, 'combined': 0.017602296422671762, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.9305555555555556, 'r': 0.5234375, 'f1': 0.6699999999999999}, 'slot': {'p': 0.5555555555555556, 'r': 0.013736263736263736, 'f1': 0.026809651474530835}, 'combined': 0.017962466487935657, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.515625, 'f1': 0.6633165829145728}, 'slot': {'p': 0.5555555555555556, 'r': 0.013736263736263736, 'f1': 0.026809651474530835}, 'combined': 0.017783286405216432, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:24:47.176991: step: 4/77, loss: 0.04615463688969612 2023-01-23 23:24:48.520879: step: 8/77, loss: 0.009117464534938335 2023-01-23 23:24:49.822378: step: 12/77, loss: 0.003044185694307089 2023-01-23 23:24:51.136841: step: 16/77, loss: 0.007869887165725231 2023-01-23 23:24:52.504987: step: 20/77, loss: 0.03679301217198372 2023-01-23 23:24:53.793034: step: 24/77, loss: 0.0019312759395688772 2023-01-23 23:24:55.098869: step: 28/77, loss: 0.010774192400276661 2023-01-23 23:24:56.407948: step: 32/77, loss: 0.029460368677973747 2023-01-23 23:24:57.747864: step: 36/77, loss: 0.0028629382140934467 2023-01-23 23:24:59.074527: step: 40/77, loss: 0.0027221275959163904 2023-01-23 23:25:00.390214: step: 44/77, loss: 0.004545790143311024 2023-01-23 23:25:01.683340: step: 48/77, loss: 0.023755038157105446 2023-01-23 23:25:03.019769: step: 52/77, loss: 2.516713266231818e-06 2023-01-23 23:25:04.363197: step: 56/77, loss: 0.0995924100279808 2023-01-23 23:25:05.660128: step: 60/77, loss: 0.011077972128987312 2023-01-23 23:25:06.916334: step: 64/77, loss: 0.002754708519205451 2023-01-23 23:25:08.202567: step: 68/77, loss: 0.056307896971702576 2023-01-23 23:25:09.475577: step: 72/77, loss: 0.0011351814027875662 2023-01-23 23:25:10.826311: step: 76/77, loss: 0.005272203125059605 2023-01-23 23:25:12.121051: step: 80/77, loss: 0.04300892353057861 2023-01-23 23:25:13.440211: step: 84/77, loss: 0.0004559697408694774 2023-01-23 23:25:14.763642: step: 88/77, loss: 0.003254904178902507 2023-01-23 23:25:16.081208: step: 92/77, loss: 0.052556414157152176 2023-01-23 23:25:17.429767: step: 96/77, loss: 0.00029989806353114545 2023-01-23 23:25:18.717362: step: 100/77, loss: 0.020337561145424843 2023-01-23 23:25:20.006620: step: 104/77, loss: 0.0005727419047616422 2023-01-23 23:25:21.347970: step: 108/77, loss: 0.004704848863184452 2023-01-23 23:25:22.680254: step: 112/77, loss: 0.007665609009563923 2023-01-23 23:25:23.937432: step: 116/77, loss: 4.7921581426635385e-05 2023-01-23 23:25:25.272350: step: 120/77, loss: 0.031694717705249786 2023-01-23 23:25:26.561770: step: 124/77, loss: 0.00014315726002678275 2023-01-23 23:25:27.903879: step: 128/77, loss: 0.015583023428916931 2023-01-23 23:25:29.214895: step: 132/77, loss: 0.02313140407204628 2023-01-23 23:25:30.528528: step: 136/77, loss: 0.03468454256653786 2023-01-23 23:25:31.862012: step: 140/77, loss: 0.002859762404114008 2023-01-23 23:25:33.189817: step: 144/77, loss: 0.00634431466460228 2023-01-23 23:25:34.535245: step: 148/77, loss: 0.005873150657862425 2023-01-23 23:25:35.866209: step: 152/77, loss: 0.0020783240906894207 2023-01-23 23:25:37.146034: step: 156/77, loss: 0.004159911070019007 2023-01-23 23:25:38.440225: step: 160/77, loss: 0.0023721123579889536 2023-01-23 23:25:39.722029: step: 164/77, loss: 0.003274687333032489 2023-01-23 23:25:41.013384: step: 168/77, loss: 0.005437308922410011 2023-01-23 23:25:42.318806: step: 172/77, loss: 0.0010760590666905046 2023-01-23 23:25:43.675661: step: 176/77, loss: 8.248248195741326e-05 2023-01-23 23:25:44.988242: step: 180/77, loss: 0.00022509020345751196 2023-01-23 23:25:46.293390: step: 184/77, loss: 0.11335831880569458 2023-01-23 23:25:47.599032: step: 188/77, loss: 0.045995332300662994 2023-01-23 23:25:48.897317: step: 192/77, loss: 1.6271269487333484e-06 2023-01-23 23:25:50.209987: step: 196/77, loss: 0.010354535654187202 2023-01-23 23:25:51.530765: step: 200/77, loss: 0.0004379808669909835 2023-01-23 23:25:52.843971: step: 204/77, loss: 0.00743110803887248 2023-01-23 23:25:54.147709: step: 208/77, loss: 0.042238425463438034 2023-01-23 23:25:55.502906: step: 212/77, loss: 0.009544878266751766 2023-01-23 23:25:56.804459: step: 216/77, loss: 0.0026782380882650614 2023-01-23 23:25:58.118091: step: 220/77, loss: 0.006199941039085388 2023-01-23 23:25:59.439480: step: 224/77, loss: 0.016368556767702103 2023-01-23 23:26:00.763469: step: 228/77, loss: 0.001818418619222939 2023-01-23 23:26:02.115384: step: 232/77, loss: 0.0004000907065346837 2023-01-23 23:26:03.495774: step: 236/77, loss: 0.019138023257255554 2023-01-23 23:26:04.801916: step: 240/77, loss: 0.007999700494110584 2023-01-23 23:26:06.100458: step: 244/77, loss: 0.001023939112201333 2023-01-23 23:26:07.392797: step: 248/77, loss: 0.016954539343714714 2023-01-23 23:26:08.689828: step: 252/77, loss: 0.006783746648579836 2023-01-23 23:26:10.052708: step: 256/77, loss: 0.006279125344008207 2023-01-23 23:26:11.340210: step: 260/77, loss: 0.013124539516866207 2023-01-23 23:26:12.717505: step: 264/77, loss: 0.02867840602993965 2023-01-23 23:26:14.063419: step: 268/77, loss: 0.06440456211566925 2023-01-23 23:26:15.330430: step: 272/77, loss: 0.0008166446350514889 2023-01-23 23:26:16.665988: step: 276/77, loss: 0.02754484862089157 2023-01-23 23:26:18.002544: step: 280/77, loss: 0.05714397504925728 2023-01-23 23:26:19.317456: step: 284/77, loss: 0.05230758339166641 2023-01-23 23:26:20.605317: step: 288/77, loss: 0.043070804327726364 2023-01-23 23:26:21.929161: step: 292/77, loss: 0.002076654462143779 2023-01-23 23:26:23.217969: step: 296/77, loss: 0.009285686537623405 2023-01-23 23:26:24.540702: step: 300/77, loss: 0.0024392385967075825 2023-01-23 23:26:25.895291: step: 304/77, loss: 0.057006798684597015 2023-01-23 23:26:27.191850: step: 308/77, loss: 0.006346026435494423 2023-01-23 23:26:28.570678: step: 312/77, loss: 0.015370495617389679 2023-01-23 23:26:29.864550: step: 316/77, loss: 0.015006231144070625 2023-01-23 23:26:31.180240: step: 320/77, loss: 0.0007069883868098259 2023-01-23 23:26:32.561770: step: 324/77, loss: 0.046590473502874374 2023-01-23 23:26:33.869376: step: 328/77, loss: 0.021039793267846107 2023-01-23 23:26:35.224719: step: 332/77, loss: 0.0467706099152565 2023-01-23 23:26:36.483493: step: 336/77, loss: 0.0013578898506239057 2023-01-23 23:26:37.795508: step: 340/77, loss: 0.0328650176525116 2023-01-23 23:26:39.120945: step: 344/77, loss: 0.00026520888786762953 2023-01-23 23:26:40.466652: step: 348/77, loss: 0.012248726561665535 2023-01-23 23:26:41.803499: step: 352/77, loss: 0.07332906872034073 2023-01-23 23:26:43.193959: step: 356/77, loss: 0.01326354593038559 2023-01-23 23:26:44.524950: step: 360/77, loss: 0.012125191278755665 2023-01-23 23:26:45.886962: step: 364/77, loss: 0.01191934198141098 2023-01-23 23:26:47.234291: step: 368/77, loss: 6.407341425074264e-05 2023-01-23 23:26:48.569111: step: 372/77, loss: 0.01194518432021141 2023-01-23 23:26:49.849451: step: 376/77, loss: 0.032971225678920746 2023-01-23 23:26:51.145483: step: 380/77, loss: 0.020709317177534103 2023-01-23 23:26:52.462627: step: 384/77, loss: 0.00015790096949785948 2023-01-23 23:26:53.815022: step: 388/77, loss: 0.0025844480842351913 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9836065573770492, 'r': 0.46875, 'f1': 0.6349206349206349}, 'slot': {'p': 0.6818181818181818, 'r': 0.013736263736263736, 'f1': 0.026929982046678635}, 'combined': 0.017098401299478497, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.967741935483871, 'r': 0.46875, 'f1': 0.631578947368421}, 'slot': {'p': 0.6521739130434783, 'r': 0.013736263736263736, 'f1': 0.026905829596412557}, 'combined': 0.01699315553457635, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9833333333333333, 'r': 0.4609375, 'f1': 0.6276595744680851}, 'slot': {'p': 0.6818181818181818, 'r': 0.013736263736263736, 'f1': 0.026929982046678635}, 'combined': 0.01690286107185148, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:28:42.208940: step: 4/77, loss: 0.00036224426003172994 2023-01-23 23:28:43.472784: step: 8/77, loss: 0.00040116641321219504 2023-01-23 23:28:44.734249: step: 12/77, loss: 0.006214071996510029 2023-01-23 23:28:46.083916: step: 16/77, loss: 0.024180032312870026 2023-01-23 23:28:47.423189: step: 20/77, loss: 0.023085253313183784 2023-01-23 23:28:48.771839: step: 24/77, loss: 0.020584464073181152 2023-01-23 23:28:50.059909: step: 28/77, loss: 0.00010433314309921116 2023-01-23 23:28:51.360779: step: 32/77, loss: 0.027797989547252655 2023-01-23 23:28:52.717878: step: 36/77, loss: 0.013356706127524376 2023-01-23 23:28:54.028561: step: 40/77, loss: 0.054990656673908234 2023-01-23 23:28:55.324955: step: 44/77, loss: 0.002558821579441428 2023-01-23 23:28:56.626646: step: 48/77, loss: 0.010729584842920303 2023-01-23 23:28:57.908907: step: 52/77, loss: 0.0019385579507797956 2023-01-23 23:28:59.210278: step: 56/77, loss: 0.021367311477661133 2023-01-23 23:29:00.542798: step: 60/77, loss: 0.00020064935961272568 2023-01-23 23:29:01.908131: step: 64/77, loss: 0.09076997637748718 2023-01-23 23:29:03.234377: step: 68/77, loss: 0.008457686752080917 2023-01-23 23:29:04.561548: step: 72/77, loss: 0.011427009478211403 2023-01-23 23:29:05.832225: step: 76/77, loss: 6.459790893131867e-05 2023-01-23 23:29:07.172119: step: 80/77, loss: 0.0007144349510781467 2023-01-23 23:29:08.466635: step: 84/77, loss: 0.006785162724554539 2023-01-23 23:29:09.797422: step: 88/77, loss: 0.004394386429339647 2023-01-23 23:29:11.112563: step: 92/77, loss: 0.004328886978328228 2023-01-23 23:29:12.445532: step: 96/77, loss: 2.6869380235439166e-05 2023-01-23 23:29:13.815639: step: 100/77, loss: 0.03273552283644676 2023-01-23 23:29:15.136612: step: 104/77, loss: 0.004109046421945095 2023-01-23 23:29:16.402320: step: 108/77, loss: 0.0020677947904914618 2023-01-23 23:29:17.714680: step: 112/77, loss: 0.04142068699002266 2023-01-23 23:29:19.043559: step: 116/77, loss: 0.012463448569178581 2023-01-23 23:29:20.383922: step: 120/77, loss: 0.032798901200294495 2023-01-23 23:29:21.720315: step: 124/77, loss: 0.00578728411346674 2023-01-23 23:29:22.990474: step: 128/77, loss: 0.0483129508793354 2023-01-23 23:29:24.314501: step: 132/77, loss: 0.010041739791631699 2023-01-23 23:29:25.606695: step: 136/77, loss: 0.003338428447023034 2023-01-23 23:29:26.945046: step: 140/77, loss: 0.001665607444010675 2023-01-23 23:29:28.325956: step: 144/77, loss: 0.00014822985394857824 2023-01-23 23:29:29.690723: step: 148/77, loss: 0.019720718264579773 2023-01-23 23:29:30.987506: step: 152/77, loss: 0.0004685567400883883 2023-01-23 23:29:32.355099: step: 156/77, loss: 0.006013353355228901 2023-01-23 23:29:33.690077: step: 160/77, loss: 0.019151905551552773 2023-01-23 23:29:34.977039: step: 164/77, loss: 0.02454826608300209 2023-01-23 23:29:36.301828: step: 168/77, loss: 0.01621721312403679 2023-01-23 23:29:37.612829: step: 172/77, loss: 0.08445467799901962 2023-01-23 23:29:38.907281: step: 176/77, loss: 0.047589078545570374 2023-01-23 23:29:40.214006: step: 180/77, loss: 0.011298105120658875 2023-01-23 23:29:41.578498: step: 184/77, loss: 0.0011730461847037077 2023-01-23 23:29:42.956006: step: 188/77, loss: 0.017566129565238953 2023-01-23 23:29:44.248469: step: 192/77, loss: 0.009225753135979176 2023-01-23 23:29:45.603126: step: 196/77, loss: 0.008841684088110924 2023-01-23 23:29:46.886344: step: 200/77, loss: 0.0313703790307045 2023-01-23 23:29:48.190482: step: 204/77, loss: 0.018745075911283493 2023-01-23 23:29:49.480748: step: 208/77, loss: 0.016669992357492447 2023-01-23 23:29:50.820196: step: 212/77, loss: 0.11202440410852432 2023-01-23 23:29:52.161682: step: 216/77, loss: 0.004480287898331881 2023-01-23 23:29:53.460795: step: 220/77, loss: 0.056660331785678864 2023-01-23 23:29:54.851568: step: 224/77, loss: 0.002400397788733244 2023-01-23 23:29:56.156066: step: 228/77, loss: 0.00011129678750876337 2023-01-23 23:29:57.513085: step: 232/77, loss: 0.00022100968635641038 2023-01-23 23:29:58.845361: step: 236/77, loss: 0.006116272881627083 2023-01-23 23:30:00.213738: step: 240/77, loss: 0.0031299712136387825 2023-01-23 23:30:01.555167: step: 244/77, loss: 0.05327416956424713 2023-01-23 23:30:02.859152: step: 248/77, loss: 0.011465567164123058 2023-01-23 23:30:04.149405: step: 252/77, loss: 0.0021157297305762768 2023-01-23 23:30:05.511171: step: 256/77, loss: 0.0025393886025995016 2023-01-23 23:30:06.798868: step: 260/77, loss: 0.005280718207359314 2023-01-23 23:30:08.113025: step: 264/77, loss: 0.00016338759451173246 2023-01-23 23:30:09.446971: step: 268/77, loss: 0.030775373801589012 2023-01-23 23:30:10.736351: step: 272/77, loss: 8.42304652906023e-05 2023-01-23 23:30:12.035364: step: 276/77, loss: 0.020594937726855278 2023-01-23 23:30:13.331257: step: 280/77, loss: 0.0014261136529967189 2023-01-23 23:30:14.682408: step: 284/77, loss: 0.005536660086363554 2023-01-23 23:30:16.021106: step: 288/77, loss: 0.00045688700629398227 2023-01-23 23:30:17.367916: step: 292/77, loss: 0.009080913849174976 2023-01-23 23:30:18.691966: step: 296/77, loss: 0.00039632292464375496 2023-01-23 23:30:20.042787: step: 300/77, loss: 0.10056258738040924 2023-01-23 23:30:21.362095: step: 304/77, loss: 9.19805188459577e-06 2023-01-23 23:30:22.662723: step: 308/77, loss: 0.03023666888475418 2023-01-23 23:30:23.957093: step: 312/77, loss: 0.08584782481193542 2023-01-23 23:30:25.255104: step: 316/77, loss: 0.009489987976849079 2023-01-23 23:30:26.633133: step: 320/77, loss: 0.011331534944474697 2023-01-23 23:30:27.953379: step: 324/77, loss: 0.013444948941469193 2023-01-23 23:30:29.238029: step: 328/77, loss: 0.014004156924784184 2023-01-23 23:30:30.576791: step: 332/77, loss: 0.0021559440065175295 2023-01-23 23:30:31.881575: step: 336/77, loss: 0.023692995309829712 2023-01-23 23:30:33.155504: step: 340/77, loss: 0.04169423505663872 2023-01-23 23:30:34.476445: step: 344/77, loss: 0.004395222757011652 2023-01-23 23:30:35.848881: step: 348/77, loss: 0.0006789501057937741 2023-01-23 23:30:37.143502: step: 352/77, loss: 0.002909147646278143 2023-01-23 23:30:38.521823: step: 356/77, loss: 0.008771540597081184 2023-01-23 23:30:39.842165: step: 360/77, loss: 0.003884167643263936 2023-01-23 23:30:41.160376: step: 364/77, loss: 0.003442424349486828 2023-01-23 23:30:42.507046: step: 368/77, loss: 0.00011853533214889467 2023-01-23 23:30:43.811858: step: 372/77, loss: 0.00014925809227861464 2023-01-23 23:30:45.138050: step: 376/77, loss: 0.030259141698479652 2023-01-23 23:30:46.477548: step: 380/77, loss: 0.007407361175864935 2023-01-23 23:30:47.773655: step: 384/77, loss: 0.04872460663318634 2023-01-23 23:30:49.066858: step: 388/77, loss: 0.004564455710351467 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9666666666666667, 'r': 0.453125, 'f1': 0.6170212765957447}, 'slot': {'p': 0.5454545454545454, 'r': 0.01098901098901099, 'f1': 0.021543985637342913}, 'combined': 0.013293097520913712, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9661016949152542, 'r': 0.4453125, 'f1': 0.6096256684491977}, 'slot': {'p': 0.5454545454545454, 'r': 0.01098901098901099, 'f1': 0.021543985637342913}, 'combined': 0.013133766645225088, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9830508474576272, 'r': 0.453125, 'f1': 0.6203208556149733}, 'slot': {'p': 0.6, 'r': 0.01098901098901099, 'f1': 0.02158273381294964}, 'combined': 0.013388219905359136, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:32:36.014018: step: 4/77, loss: 6.716536154272035e-05 2023-01-23 23:32:37.362430: step: 8/77, loss: 0.14508292078971863 2023-01-23 23:32:38.678143: step: 12/77, loss: 0.0064772190526127815 2023-01-23 23:32:39.950518: step: 16/77, loss: 0.00020000609220005572 2023-01-23 23:32:41.221111: step: 20/77, loss: 0.0016990758012980223 2023-01-23 23:32:42.540027: step: 24/77, loss: 0.04430164396762848 2023-01-23 23:32:43.875766: step: 28/77, loss: 0.02811635658144951 2023-01-23 23:32:45.221307: step: 32/77, loss: 0.0021595361176878214 2023-01-23 23:32:46.525647: step: 36/77, loss: 0.0029978426173329353 2023-01-23 23:32:47.807843: step: 40/77, loss: 0.006254137028008699 2023-01-23 23:32:49.160666: step: 44/77, loss: 0.006620858795940876 2023-01-23 23:32:50.442018: step: 48/77, loss: 0.03224121034145355 2023-01-23 23:32:51.754536: step: 52/77, loss: 0.0015977731673046947 2023-01-23 23:32:53.049253: step: 56/77, loss: 0.001971067627891898 2023-01-23 23:32:54.341227: step: 60/77, loss: 0.003533907700330019 2023-01-23 23:32:55.590083: step: 64/77, loss: 0.028958367183804512 2023-01-23 23:32:56.877283: step: 68/77, loss: 0.01046693790704012 2023-01-23 23:32:58.177946: step: 72/77, loss: 0.0007335816044360399 2023-01-23 23:32:59.460682: step: 76/77, loss: 0.01121562160551548 2023-01-23 23:33:00.796368: step: 80/77, loss: 0.01169790979474783 2023-01-23 23:33:02.131202: step: 84/77, loss: 0.0018057833658531308 2023-01-23 23:33:03.441178: step: 88/77, loss: 0.005374482832849026 2023-01-23 23:33:04.745789: step: 92/77, loss: 0.024878591299057007 2023-01-23 23:33:06.084980: step: 96/77, loss: 0.0021314818877726793 2023-01-23 23:33:07.369879: step: 100/77, loss: 0.0036977045238018036 2023-01-23 23:33:08.715195: step: 104/77, loss: 0.003813137300312519 2023-01-23 23:33:10.035310: step: 108/77, loss: 0.0020447312854230404 2023-01-23 23:33:11.371007: step: 112/77, loss: 0.057621728628873825 2023-01-23 23:33:12.752376: step: 116/77, loss: 0.002673014998435974 2023-01-23 23:33:14.113409: step: 120/77, loss: 0.06338480114936829 2023-01-23 23:33:15.462879: step: 124/77, loss: 0.02054738998413086 2023-01-23 23:33:16.748992: step: 128/77, loss: 0.018381303176283836 2023-01-23 23:33:18.079502: step: 132/77, loss: 0.01054394245147705 2023-01-23 23:33:19.365641: step: 136/77, loss: 0.00022755435202270746 2023-01-23 23:33:20.658245: step: 140/77, loss: 0.0012879862915724516 2023-01-23 23:33:22.026626: step: 144/77, loss: 0.03543628752231598 2023-01-23 23:33:23.313443: step: 148/77, loss: 0.034065961837768555 2023-01-23 23:33:24.624352: step: 152/77, loss: 0.0007926194812171161 2023-01-23 23:33:25.954776: step: 156/77, loss: 0.019189316779375076 2023-01-23 23:33:27.270753: step: 160/77, loss: 0.005916177295148373 2023-01-23 23:33:28.594099: step: 164/77, loss: 0.006899761967360973 2023-01-23 23:33:29.900954: step: 168/77, loss: 0.01533865462988615 2023-01-23 23:33:31.205270: step: 172/77, loss: 0.0008392666350118816 2023-01-23 23:33:32.509412: step: 176/77, loss: 0.0041745989583432674 2023-01-23 23:33:33.842117: step: 180/77, loss: 0.024267667904496193 2023-01-23 23:33:35.144265: step: 184/77, loss: 0.012002028524875641 2023-01-23 23:33:36.441255: step: 188/77, loss: 0.000731311272829771 2023-01-23 23:33:37.775021: step: 192/77, loss: 0.0017883798573166132 2023-01-23 23:33:39.101885: step: 196/77, loss: 0.006359480787068605 2023-01-23 23:33:40.433429: step: 200/77, loss: 0.0010414841817691922 2023-01-23 23:33:41.767576: step: 204/77, loss: 0.004932164680212736 2023-01-23 23:33:43.106591: step: 208/77, loss: 0.011990321800112724 2023-01-23 23:33:44.410629: step: 212/77, loss: 0.05553290247917175 2023-01-23 23:33:45.771790: step: 216/77, loss: 0.0007119431393221021 2023-01-23 23:33:47.085593: step: 220/77, loss: 1.3023453675486962e-06 2023-01-23 23:33:48.424353: step: 224/77, loss: 0.004885970614850521 2023-01-23 23:33:49.707579: step: 228/77, loss: 0.00034167556441389024 2023-01-23 23:33:51.078149: step: 232/77, loss: 0.03612995520234108 2023-01-23 23:33:52.376933: step: 236/77, loss: 0.00044193086796440184 2023-01-23 23:33:53.710105: step: 240/77, loss: 0.00014060882676858455 2023-01-23 23:33:54.990549: step: 244/77, loss: 0.012359784916043282 2023-01-23 23:33:56.303410: step: 248/77, loss: 8.537257235730067e-05 2023-01-23 23:33:57.606416: step: 252/77, loss: 0.0018495420226827264 2023-01-23 23:33:58.953843: step: 256/77, loss: 0.012446880340576172 2023-01-23 23:34:00.231596: step: 260/77, loss: 0.00018789272871799767 2023-01-23 23:34:01.522715: step: 264/77, loss: 0.036935579031705856 2023-01-23 23:34:02.823205: step: 268/77, loss: 0.004805781878530979 2023-01-23 23:34:04.162579: step: 272/77, loss: 0.051274560391902924 2023-01-23 23:34:05.496513: step: 276/77, loss: 0.005366981960833073 2023-01-23 23:34:06.809152: step: 280/77, loss: 0.011775230057537556 2023-01-23 23:34:08.158639: step: 284/77, loss: 0.0029582062270492315 2023-01-23 23:34:09.473322: step: 288/77, loss: 0.007654991932213306 2023-01-23 23:34:10.756716: step: 292/77, loss: 0.012157164514064789 2023-01-23 23:34:12.035745: step: 296/77, loss: 0.011476476676762104 2023-01-23 23:34:13.391101: step: 300/77, loss: 0.0006907251081429422 2023-01-23 23:34:14.709181: step: 304/77, loss: 0.042078647762537 2023-01-23 23:34:15.991221: step: 308/77, loss: 0.0014963550493121147 2023-01-23 23:34:17.354880: step: 312/77, loss: 0.006883785128593445 2023-01-23 23:34:18.609099: step: 316/77, loss: 0.01745801977813244 2023-01-23 23:34:19.856531: step: 320/77, loss: 0.008958478458225727 2023-01-23 23:34:21.174629: step: 324/77, loss: 0.01516663283109665 2023-01-23 23:34:22.465146: step: 328/77, loss: 0.0009179931366816163 2023-01-23 23:34:23.812385: step: 332/77, loss: 0.0005924751749262214 2023-01-23 23:34:25.144496: step: 336/77, loss: 0.028950592502951622 2023-01-23 23:34:26.449816: step: 340/77, loss: 0.010792708955705166 2023-01-23 23:34:27.720593: step: 344/77, loss: 0.0005969545454718173 2023-01-23 23:34:29.034018: step: 348/77, loss: 0.0005601674783974886 2023-01-23 23:34:30.311782: step: 352/77, loss: 0.0011363797821104527 2023-01-23 23:34:31.627751: step: 356/77, loss: 0.027163442224264145 2023-01-23 23:34:32.944735: step: 360/77, loss: 0.011760690249502659 2023-01-23 23:34:34.273452: step: 364/77, loss: 0.0001635812222957611 2023-01-23 23:34:35.561099: step: 368/77, loss: 0.03414410725235939 2023-01-23 23:34:36.894721: step: 372/77, loss: 0.007519087288528681 2023-01-23 23:34:38.194990: step: 376/77, loss: 2.949499867099803e-05 2023-01-23 23:34:39.502141: step: 380/77, loss: 0.03247775137424469 2023-01-23 23:34:40.810510: step: 384/77, loss: 0.005735212471336126 2023-01-23 23:34:42.111237: step: 388/77, loss: 0.007732848171144724 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Chinese: {'template': {'p': 0.96875, 'r': 0.484375, 'f1': 0.6458333333333334}, 'slot': {'p': 0.68, 'r': 0.015567765567765568, 'f1': 0.030438675022381376}, 'combined': 0.01965831095195464, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Korean: {'template': {'p': 0.96875, 'r': 0.484375, 'f1': 0.6458333333333334}, 'slot': {'p': 0.68, 'r': 0.015567765567765568, 'f1': 0.030438675022381376}, 'combined': 0.01965831095195464, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Russian: {'template': {'p': 0.96875, 'r': 0.484375, 'f1': 0.6458333333333334}, 'slot': {'p': 0.68, 'r': 0.015567765567765568, 'f1': 0.030438675022381376}, 'combined': 0.01965831095195464, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:36:28.875227: step: 4/77, loss: 0.009995035827159882 2023-01-23 23:36:30.158010: step: 8/77, loss: 0.00022118259221315384 2023-01-23 23:36:31.501911: step: 12/77, loss: 0.002348837675526738 2023-01-23 23:36:32.782942: step: 16/77, loss: 0.015405723825097084 2023-01-23 23:36:34.030339: step: 20/77, loss: 0.0051343501545488834 2023-01-23 23:36:35.331552: step: 24/77, loss: 0.006473531015217304 2023-01-23 23:36:36.643692: step: 28/77, loss: 0.054217107594013214 2023-01-23 23:36:37.986288: step: 32/77, loss: 1.1163228919031098e-05 2023-01-23 23:36:39.297907: step: 36/77, loss: 0.020336586982011795 2023-01-23 23:36:40.578490: step: 40/77, loss: 0.05220355466008186 2023-01-23 23:36:41.854863: step: 44/77, loss: 4.464634184841998e-05 2023-01-23 23:36:43.151328: step: 48/77, loss: 0.025243086740374565 2023-01-23 23:36:44.480283: step: 52/77, loss: 0.05261071026325226 2023-01-23 23:36:45.772409: step: 56/77, loss: 0.01674208790063858 2023-01-23 23:36:47.100057: step: 60/77, loss: 5.1860555686289445e-05 2023-01-23 23:36:48.391655: step: 64/77, loss: 0.00012300141679588705 2023-01-23 23:36:49.703961: step: 68/77, loss: 0.006085620261728764 2023-01-23 23:36:50.999990: step: 72/77, loss: 2.66003335127607e-05 2023-01-23 23:36:52.321506: step: 76/77, loss: 0.00011043615086236969 2023-01-23 23:36:53.583364: step: 80/77, loss: 0.0026714566629379988 2023-01-23 23:36:54.902750: step: 84/77, loss: 0.03106573037803173 2023-01-23 23:36:56.181823: step: 88/77, loss: 0.031264789402484894 2023-01-23 23:36:57.481245: step: 92/77, loss: 0.001054988824762404 2023-01-23 23:36:58.814037: step: 96/77, loss: 0.0007731158402748406 2023-01-23 23:37:00.143270: step: 100/77, loss: 0.0005604913458228111 2023-01-23 23:37:01.469373: step: 104/77, loss: 0.007275127340108156 2023-01-23 23:37:02.768560: step: 108/77, loss: 9.56045332713984e-05 2023-01-23 23:37:04.097202: step: 112/77, loss: 0.0005544309969991446 2023-01-23 23:37:05.444711: step: 116/77, loss: 0.02516276203095913 2023-01-23 23:37:06.768398: step: 120/77, loss: 0.0228984784334898 2023-01-23 23:37:08.060353: step: 124/77, loss: 0.005786824971437454 2023-01-23 23:37:09.372814: step: 128/77, loss: 0.0007653665379621089 2023-01-23 23:37:10.675452: step: 132/77, loss: 0.05804044008255005 2023-01-23 23:37:11.970967: step: 136/77, loss: 0.0004368829831946641 2023-01-23 23:37:13.308510: step: 140/77, loss: 0.01609526388347149 2023-01-23 23:37:14.607268: step: 144/77, loss: 0.009050913155078888 2023-01-23 23:37:15.917095: step: 148/77, loss: 0.004335467703640461 2023-01-23 23:37:17.227771: step: 152/77, loss: 0.00035594069049693644 2023-01-23 23:37:18.579229: step: 156/77, loss: 0.005551069974899292 2023-01-23 23:37:19.881511: step: 160/77, loss: 0.10360582917928696 2023-01-23 23:37:21.204436: step: 164/77, loss: 0.006701688282191753 2023-01-23 23:37:22.503187: step: 168/77, loss: 0.00022893882123753428 2023-01-23 23:37:23.795662: step: 172/77, loss: 0.02077857404947281 2023-01-23 23:37:25.133578: step: 176/77, loss: 0.015844902023673058 2023-01-23 23:37:26.393695: step: 180/77, loss: 0.04306226223707199 2023-01-23 23:37:27.726328: step: 184/77, loss: 0.04048726707696915 2023-01-23 23:37:28.969160: step: 188/77, loss: 0.004294191021472216 2023-01-23 23:37:30.273816: step: 192/77, loss: 0.003412810154259205 2023-01-23 23:37:31.562602: step: 196/77, loss: 0.014952167868614197 2023-01-23 23:37:32.896964: step: 200/77, loss: 0.00016821689496282488 2023-01-23 23:37:34.242939: step: 204/77, loss: 0.003225933061912656 2023-01-23 23:37:35.564173: step: 208/77, loss: 0.030794963240623474 2023-01-23 23:37:36.910518: step: 212/77, loss: 0.0027160472236573696 2023-01-23 23:37:38.223929: step: 216/77, loss: 0.0658402293920517 2023-01-23 23:37:39.513929: step: 220/77, loss: 0.0005345541285350919 2023-01-23 23:37:40.831024: step: 224/77, loss: 0.0006551474798470736 2023-01-23 23:37:42.142158: step: 228/77, loss: 0.0009930375963449478 2023-01-23 23:37:43.488470: step: 232/77, loss: 0.00024232860596384853 2023-01-23 23:37:44.832378: step: 236/77, loss: 0.0362166166305542 2023-01-23 23:37:46.158355: step: 240/77, loss: 0.037811048328876495 2023-01-23 23:37:47.497440: step: 244/77, loss: 0.04655441641807556 2023-01-23 23:37:48.823678: step: 248/77, loss: 0.006816321052610874 2023-01-23 23:37:50.198370: step: 252/77, loss: 0.0006618615007027984 2023-01-23 23:37:51.560777: step: 256/77, loss: 0.047126851975917816 2023-01-23 23:37:52.865358: step: 260/77, loss: 0.0028675626963377 2023-01-23 23:37:54.230608: step: 264/77, loss: 0.006751200184226036 2023-01-23 23:37:55.595668: step: 268/77, loss: 0.029957851395010948 2023-01-23 23:37:56.914902: step: 272/77, loss: 0.009517940692603588 2023-01-23 23:37:58.248721: step: 276/77, loss: 0.00022375909611582756 2023-01-23 23:37:59.556142: step: 280/77, loss: 0.0030457484535872936 2023-01-23 23:38:00.851534: step: 284/77, loss: 0.006273590959608555 2023-01-23 23:38:02.216157: step: 288/77, loss: 0.0011199575383216143 2023-01-23 23:38:03.522636: step: 292/77, loss: 0.0001741000305628404 2023-01-23 23:38:04.825543: step: 296/77, loss: 0.0022353699896484613 2023-01-23 23:38:06.095029: step: 300/77, loss: 0.0030100643634796143 2023-01-23 23:38:07.417952: step: 304/77, loss: 0.08724191784858704 2023-01-23 23:38:08.694253: step: 308/77, loss: 0.004720780998468399 2023-01-23 23:38:10.012100: step: 312/77, loss: 0.00046639557695016265 2023-01-23 23:38:11.311797: step: 316/77, loss: 0.00041480723302811384 2023-01-23 23:38:12.682335: step: 320/77, loss: 0.01986285112798214 2023-01-23 23:38:14.042812: step: 324/77, loss: 0.026395024731755257 2023-01-23 23:38:15.371571: step: 328/77, loss: 0.036015816032886505 2023-01-23 23:38:16.720883: step: 332/77, loss: 0.0005104574374854565 2023-01-23 23:38:18.020896: step: 336/77, loss: 0.010636835359036922 2023-01-23 23:38:19.268268: step: 340/77, loss: 0.018415292724967003 2023-01-23 23:38:20.553893: step: 344/77, loss: 0.0005754455924034119 2023-01-23 23:38:21.906764: step: 348/77, loss: 0.0002713290450628847 2023-01-23 23:38:23.264451: step: 352/77, loss: 3.4492208214942366e-05 2023-01-23 23:38:24.607258: step: 356/77, loss: 0.04332399368286133 2023-01-23 23:38:25.903924: step: 360/77, loss: 0.021294377744197845 2023-01-23 23:38:27.280242: step: 364/77, loss: 0.0004992512986063957 2023-01-23 23:38:28.613091: step: 368/77, loss: 0.0013733096420764923 2023-01-23 23:38:29.942994: step: 372/77, loss: 0.0025527013931423426 2023-01-23 23:38:31.300203: step: 376/77, loss: 2.774977110675536e-05 2023-01-23 23:38:32.675235: step: 380/77, loss: 0.0006133266724646091 2023-01-23 23:38:34.023381: step: 384/77, loss: 0.011961428448557854 2023-01-23 23:38:35.335600: step: 388/77, loss: 0.0016036704182624817 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.546875, 'f1': 0.7}, 'slot': {'p': 0.6129032258064516, 'r': 0.0173992673992674, 'f1': 0.033837934105075684}, 'combined': 0.02368655387355298, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9710144927536232, 'r': 0.5234375, 'f1': 0.6802030456852792}, 'slot': {'p': 0.6333333333333333, 'r': 0.0173992673992674, 'f1': 0.0338680926916221}, 'combined': 0.0230371798003927, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.9714285714285714, 'r': 0.53125, 'f1': 0.6868686868686867}, 'slot': {'p': 0.6071428571428571, 'r': 0.015567765567765568, 'f1': 0.030357142857142853}, 'combined': 0.020851370851370846, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:40:22.426716: step: 4/77, loss: 0.0004200860275886953 2023-01-23 23:40:23.695507: step: 8/77, loss: 0.0004749511426780373 2023-01-23 23:40:24.995030: step: 12/77, loss: 0.01922205276787281 2023-01-23 23:40:26.267698: step: 16/77, loss: 0.0049668727442622185 2023-01-23 23:40:27.612613: step: 20/77, loss: 0.00548710860311985 2023-01-23 23:40:28.898057: step: 24/77, loss: 0.009446317330002785 2023-01-23 23:40:30.209556: step: 28/77, loss: 0.056420616805553436 2023-01-23 23:40:31.498994: step: 32/77, loss: 0.0008900321554392576 2023-01-23 23:40:32.876421: step: 36/77, loss: 0.010750795714557171 2023-01-23 23:40:34.211471: step: 40/77, loss: 0.03467971459031105 2023-01-23 23:40:35.520643: step: 44/77, loss: 1.3400214811554179e-05 2023-01-23 23:40:36.818819: step: 48/77, loss: 0.04462679103016853 2023-01-23 23:40:38.171488: step: 52/77, loss: 0.0011556717799976468 2023-01-23 23:40:39.462472: step: 56/77, loss: 0.004821427166461945 2023-01-23 23:40:40.713751: step: 60/77, loss: 0.0010934629244729877 2023-01-23 23:40:42.002743: step: 64/77, loss: 0.002542417263612151 2023-01-23 23:40:43.286996: step: 68/77, loss: 0.054598476737737656 2023-01-23 23:40:44.590055: step: 72/77, loss: 1.2503600373747759e-05 2023-01-23 23:40:45.873153: step: 76/77, loss: 0.01632210798561573 2023-01-23 23:40:47.212699: step: 80/77, loss: 0.0004365661588963121 2023-01-23 23:40:48.518933: step: 84/77, loss: 0.009737212210893631 2023-01-23 23:40:49.799713: step: 88/77, loss: 0.005879779811948538 2023-01-23 23:40:51.178569: step: 92/77, loss: 0.002779336180537939 2023-01-23 23:40:52.486944: step: 96/77, loss: 0.0002582361048553139 2023-01-23 23:40:53.830366: step: 100/77, loss: 0.003720135660842061 2023-01-23 23:40:55.123884: step: 104/77, loss: 0.025107156485319138 2023-01-23 23:40:56.402592: step: 108/77, loss: 0.015690365806221962 2023-01-23 23:40:57.735247: step: 112/77, loss: 0.0029986624140292406 2023-01-23 23:40:59.077900: step: 116/77, loss: 0.0048570032231509686 2023-01-23 23:41:00.423019: step: 120/77, loss: 0.00633400259539485 2023-01-23 23:41:01.729725: step: 124/77, loss: 0.005726975854486227 2023-01-23 23:41:03.018191: step: 128/77, loss: 0.0002349330607103184 2023-01-23 23:41:04.354856: step: 132/77, loss: 0.06281039118766785 2023-01-23 23:41:05.648356: step: 136/77, loss: 0.013547773472964764 2023-01-23 23:41:07.004354: step: 140/77, loss: 0.005808471702039242 2023-01-23 23:41:08.331286: step: 144/77, loss: 0.027540259063243866 2023-01-23 23:41:09.650951: step: 148/77, loss: 6.303073314484209e-05 2023-01-23 23:41:10.957632: step: 152/77, loss: 0.023322701454162598 2023-01-23 23:41:12.248500: step: 156/77, loss: 0.02620372734963894 2023-01-23 23:41:13.555663: step: 160/77, loss: 0.007477977313101292 2023-01-23 23:41:14.889240: step: 164/77, loss: 0.0016014814609661698 2023-01-23 23:41:16.209433: step: 168/77, loss: 0.02817235141992569 2023-01-23 23:41:17.521732: step: 172/77, loss: 0.0029439725913107395 2023-01-23 23:41:18.829443: step: 176/77, loss: 0.012270371429622173 2023-01-23 23:41:20.139922: step: 180/77, loss: 0.01205486711114645 2023-01-23 23:41:21.464916: step: 184/77, loss: 0.00012254132889211178 2023-01-23 23:41:22.770350: step: 188/77, loss: 0.002939145313575864 2023-01-23 23:41:24.071356: step: 192/77, loss: 5.6999630032805726e-05 2023-01-23 23:41:25.344215: step: 196/77, loss: 0.07307901233434677 2023-01-23 23:41:26.648435: step: 200/77, loss: 0.0009206432150676847 2023-01-23 23:41:27.929982: step: 204/77, loss: 0.00030497522675432265 2023-01-23 23:41:29.291025: step: 208/77, loss: 0.0005431080353446305 2023-01-23 23:41:30.567815: step: 212/77, loss: 0.00023502598924096674 2023-01-23 23:41:31.904098: step: 216/77, loss: 0.00036915275268256664 2023-01-23 23:41:33.234135: step: 220/77, loss: 0.02349918708205223 2023-01-23 23:41:34.525987: step: 224/77, loss: 0.0042792558670043945 2023-01-23 23:41:35.821911: step: 228/77, loss: 0.0006924453191459179 2023-01-23 23:41:37.140734: step: 232/77, loss: 0.00022316054673865438 2023-01-23 23:41:38.492381: step: 236/77, loss: 0.008597703650593758 2023-01-23 23:41:39.822529: step: 240/77, loss: 0.0010497706243768334 2023-01-23 23:41:41.121253: step: 244/77, loss: 0.005961691495031118 2023-01-23 23:41:42.458483: step: 248/77, loss: 0.00014646831550635397 2023-01-23 23:41:43.807527: step: 252/77, loss: 0.06768776476383209 2023-01-23 23:41:45.150232: step: 256/77, loss: 0.0005682706250809133 2023-01-23 23:41:46.539394: step: 260/77, loss: 0.003363983705639839 2023-01-23 23:41:47.867935: step: 264/77, loss: 2.314460652996786e-05 2023-01-23 23:41:49.159132: step: 268/77, loss: 0.024550795555114746 2023-01-23 23:41:50.435571: step: 272/77, loss: 0.00848584808409214 2023-01-23 23:41:51.735784: step: 276/77, loss: 6.662925443379208e-05 2023-01-23 23:41:53.048632: step: 280/77, loss: 0.024089567363262177 2023-01-23 23:41:54.346786: step: 284/77, loss: 0.007724351715296507 2023-01-23 23:41:55.675311: step: 288/77, loss: 0.0013219267129898071 2023-01-23 23:41:56.980561: step: 292/77, loss: 0.015313958749175072 2023-01-23 23:41:58.348048: step: 296/77, loss: 0.014368615113198757 2023-01-23 23:41:59.658122: step: 300/77, loss: 0.04230882599949837 2023-01-23 23:42:01.010589: step: 304/77, loss: 0.0017491618636995554 2023-01-23 23:42:02.273107: step: 308/77, loss: 0.00044850510312244296 2023-01-23 23:42:03.569183: step: 312/77, loss: 0.001114065176807344 2023-01-23 23:42:04.908033: step: 316/77, loss: 0.00840886402875185 2023-01-23 23:42:06.263393: step: 320/77, loss: 0.006510584149509668 2023-01-23 23:42:07.555077: step: 324/77, loss: 0.0036016865633428097 2023-01-23 23:42:08.922646: step: 328/77, loss: 0.003660577815026045 2023-01-23 23:42:10.247002: step: 332/77, loss: 0.018879014998674393 2023-01-23 23:42:11.591398: step: 336/77, loss: 0.046105917543172836 2023-01-23 23:42:12.959600: step: 340/77, loss: 0.0011975467205047607 2023-01-23 23:42:14.247719: step: 344/77, loss: 0.0047290450893342495 2023-01-23 23:42:15.578160: step: 348/77, loss: 0.0072173988446593285 2023-01-23 23:42:16.878854: step: 352/77, loss: 0.021729473024606705 2023-01-23 23:42:18.169598: step: 356/77, loss: 0.001801485545001924 2023-01-23 23:42:19.497869: step: 360/77, loss: 0.004448288585990667 2023-01-23 23:42:20.812213: step: 364/77, loss: 0.018222050741314888 2023-01-23 23:42:22.103560: step: 368/77, loss: 0.0042145997285842896 2023-01-23 23:42:23.416446: step: 372/77, loss: 0.011724326759576797 2023-01-23 23:42:24.762697: step: 376/77, loss: 0.0015771070029586554 2023-01-23 23:42:26.143285: step: 380/77, loss: 0.000674558337777853 2023-01-23 23:42:27.484171: step: 384/77, loss: 0.009776659309864044 2023-01-23 23:42:28.802474: step: 388/77, loss: 0.0007911527063697577 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Chinese: {'template': {'p': 0.9538461538461539, 'r': 0.484375, 'f1': 0.6424870466321243}, 'slot': {'p': 0.53125, 'r': 0.015567765567765568, 'f1': 0.0302491103202847}, 'combined': 0.019434661552929028, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Korean: {'template': {'p': 0.9682539682539683, 'r': 0.4765625, 'f1': 0.6387434554973821}, 'slot': {'p': 0.5666666666666667, 'r': 0.015567765567765568, 'f1': 0.030303030303030307}, 'combined': 0.01935586228779946, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Russian: {'template': {'p': 0.9682539682539683, 'r': 0.4765625, 'f1': 0.6387434554973821}, 'slot': {'p': 0.5666666666666667, 'r': 0.015567765567765568, 'f1': 0.030303030303030307}, 'combined': 0.01935586228779946, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:44:15.340878: step: 4/77, loss: 0.010837767273187637 2023-01-23 23:44:16.642895: step: 8/77, loss: 0.0006001390866003931 2023-01-23 23:44:18.009286: step: 12/77, loss: 0.0002572540252003819 2023-01-23 23:44:19.335949: step: 16/77, loss: 0.0035696025006473064 2023-01-23 23:44:20.644920: step: 20/77, loss: 0.0007004659855738282 2023-01-23 23:44:21.925873: step: 24/77, loss: 0.0023328056558966637 2023-01-23 23:44:23.214607: step: 28/77, loss: 0.00011439307127147913 2023-01-23 23:44:24.566320: step: 32/77, loss: 0.005891487468034029 2023-01-23 23:44:25.820325: step: 36/77, loss: 0.00023794885782990605 2023-01-23 23:44:27.134902: step: 40/77, loss: 7.563876715721563e-05 2023-01-23 23:44:28.458260: step: 44/77, loss: 0.0006197237526066601 2023-01-23 23:44:29.841196: step: 48/77, loss: 0.00023501500254496932 2023-01-23 23:44:31.156992: step: 52/77, loss: 0.013790091499686241 2023-01-23 23:44:32.488826: step: 56/77, loss: 0.028487298637628555 2023-01-23 23:44:33.832833: step: 60/77, loss: 1.4275335161073599e-05 2023-01-23 23:44:35.095155: step: 64/77, loss: 1.5512880054302514e-05 2023-01-23 23:44:36.412458: step: 68/77, loss: 0.02364487014710903 2023-01-23 23:44:37.753059: step: 72/77, loss: 0.002446085214614868 2023-01-23 23:44:39.038724: step: 76/77, loss: 0.02038494497537613 2023-01-23 23:44:40.371456: step: 80/77, loss: 0.0563809759914875 2023-01-23 23:44:41.701564: step: 84/77, loss: 0.00593971973285079 2023-01-23 23:44:43.102053: step: 88/77, loss: 8.265776705229655e-05 2023-01-23 23:44:44.462119: step: 92/77, loss: 0.02026337757706642 2023-01-23 23:44:45.803764: step: 96/77, loss: 0.0005820993683300912 2023-01-23 23:44:47.090379: step: 100/77, loss: 1.4649514923803508e-05 2023-01-23 23:44:48.360369: step: 104/77, loss: 6.332544217002578e-06 2023-01-23 23:44:49.632620: step: 108/77, loss: 0.20827950537204742 2023-01-23 23:44:50.969765: step: 112/77, loss: 0.0017105141887441278 2023-01-23 23:44:52.269857: step: 116/77, loss: 0.0018553459085524082 2023-01-23 23:44:53.580349: step: 120/77, loss: 0.00021237392502371222 2023-01-23 23:44:54.891566: step: 124/77, loss: 2.5977966288337484e-05 2023-01-23 23:44:56.196522: step: 128/77, loss: 9.777469676919281e-05 2023-01-23 23:44:57.494593: step: 132/77, loss: 1.1249124327150639e-05 2023-01-23 23:44:58.800443: step: 136/77, loss: 5.5485616030637175e-06 2023-01-23 23:45:00.136243: step: 140/77, loss: 0.00467675132676959 2023-01-23 23:45:01.455494: step: 144/77, loss: 0.025886178016662598 2023-01-23 23:45:02.761783: step: 148/77, loss: 0.023800842463970184 2023-01-23 23:45:04.056079: step: 152/77, loss: 0.0014288400998339057 2023-01-23 23:45:05.338455: step: 156/77, loss: 0.002464765915647149 2023-01-23 23:45:06.690503: step: 160/77, loss: 0.00029669213108718395 2023-01-23 23:45:08.006849: step: 164/77, loss: 0.006217610090970993 2023-01-23 23:45:09.296064: step: 168/77, loss: 0.0005582318408414721 2023-01-23 23:45:10.584912: step: 172/77, loss: 0.004611868876963854 2023-01-23 23:45:11.898857: step: 176/77, loss: 0.004949961323291063 2023-01-23 23:45:13.185445: step: 180/77, loss: 0.021964555606245995 2023-01-23 23:45:14.476361: step: 184/77, loss: 0.0010839662281796336 2023-01-23 23:45:15.817420: step: 188/77, loss: 0.11259245872497559 2023-01-23 23:45:17.159373: step: 192/77, loss: 0.03340409696102142 2023-01-23 23:45:18.476168: step: 196/77, loss: 0.032646629959344864 2023-01-23 23:45:19.785074: step: 200/77, loss: 0.01984964869916439 2023-01-23 23:45:21.078377: step: 204/77, loss: 0.0004216528031975031 2023-01-23 23:45:22.381213: step: 208/77, loss: 0.0052557592280209064 2023-01-23 23:45:23.729298: step: 212/77, loss: 0.029595471918582916 2023-01-23 23:45:25.047480: step: 216/77, loss: 0.0032760680187493563 2023-01-23 23:45:26.333636: step: 220/77, loss: 0.019138285890221596 2023-01-23 23:45:27.611897: step: 224/77, loss: 0.02345498651266098 2023-01-23 23:45:28.951343: step: 228/77, loss: 0.0026856400072574615 2023-01-23 23:45:30.284541: step: 232/77, loss: 0.016967257484793663 2023-01-23 23:45:31.582480: step: 236/77, loss: 0.00011248209921177477 2023-01-23 23:45:32.902722: step: 240/77, loss: 0.0003898591094184667 2023-01-23 23:45:34.252617: step: 244/77, loss: 0.04799313098192215 2023-01-23 23:45:35.613476: step: 248/77, loss: 0.009683486074209213 2023-01-23 23:45:36.930534: step: 252/77, loss: 0.0008936700760386884 2023-01-23 23:45:38.237368: step: 256/77, loss: 0.00401071785017848 2023-01-23 23:45:39.549257: step: 260/77, loss: 0.011171936057507992 2023-01-23 23:45:40.825521: step: 264/77, loss: 5.710707409889437e-05 2023-01-23 23:45:42.158918: step: 268/77, loss: 0.02173326350748539 2023-01-23 23:45:43.509816: step: 272/77, loss: 0.00019250279001425952 2023-01-23 23:45:44.817830: step: 276/77, loss: 0.00044429523404687643 2023-01-23 23:45:46.174926: step: 280/77, loss: 0.015425225719809532 2023-01-23 23:45:47.533531: step: 284/77, loss: 1.926892036863137e-05 2023-01-23 23:45:48.773857: step: 288/77, loss: 0.006432825233787298 2023-01-23 23:45:50.092266: step: 292/77, loss: 0.00099134910851717 2023-01-23 23:45:51.377546: step: 296/77, loss: 7.043761434033513e-05 2023-01-23 23:45:52.684611: step: 300/77, loss: 0.0030199948232620955 2023-01-23 23:45:53.990403: step: 304/77, loss: 0.0016736382385715842 2023-01-23 23:45:55.313512: step: 308/77, loss: 0.014939763583242893 2023-01-23 23:45:56.641062: step: 312/77, loss: 7.201347762020305e-05 2023-01-23 23:45:57.930069: step: 316/77, loss: 0.03253569081425667 2023-01-23 23:45:59.309530: step: 320/77, loss: 0.04321937635540962 2023-01-23 23:46:00.618382: step: 324/77, loss: 0.056001633405685425 2023-01-23 23:46:01.947595: step: 328/77, loss: 0.0018117651343345642 2023-01-23 23:46:03.309341: step: 332/77, loss: 8.461129255010746e-06 2023-01-23 23:46:04.612959: step: 336/77, loss: 1.3149092410458252e-05 2023-01-23 23:46:05.877967: step: 340/77, loss: 0.0001768352958606556 2023-01-23 23:46:07.200546: step: 344/77, loss: 0.000923643063288182 2023-01-23 23:46:08.509544: step: 348/77, loss: 0.03135452792048454 2023-01-23 23:46:09.784582: step: 352/77, loss: 1.5779828572703991e-06 2023-01-23 23:46:11.113870: step: 356/77, loss: 0.0012817180249840021 2023-01-23 23:46:12.455010: step: 360/77, loss: 0.010822507552802563 2023-01-23 23:46:13.775990: step: 364/77, loss: 0.04603930190205574 2023-01-23 23:46:15.086329: step: 368/77, loss: 0.1464909315109253 2023-01-23 23:46:16.368774: step: 372/77, loss: 0.00020046159625053406 2023-01-23 23:46:17.705891: step: 376/77, loss: 0.0011036631185561419 2023-01-23 23:46:19.051060: step: 380/77, loss: 0.0055919550359249115 2023-01-23 23:46:20.367132: step: 384/77, loss: 0.020800791680812836 2023-01-23 23:46:21.652344: step: 388/77, loss: 0.029528755694627762 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.9545454545454546, 'r': 0.4921875, 'f1': 0.6494845360824743}, 'slot': {'p': 0.6071428571428571, 'r': 0.015567765567765568, 'f1': 0.030357142857142853}, 'combined': 0.019716494845360824, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.9264705882352942, 'r': 0.4921875, 'f1': 0.6428571428571428}, 'slot': {'p': 0.5666666666666667, 'r': 0.015567765567765568, 'f1': 0.030303030303030307}, 'combined': 0.01948051948051948, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.9402985074626866, 'r': 0.4921875, 'f1': 0.6461538461538462}, 'slot': {'p': 0.5666666666666667, 'r': 0.015567765567765568, 'f1': 0.030303030303030307}, 'combined': 0.019580419580419586, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:48:08.419095: step: 4/77, loss: 1.1221683053008746e-05 2023-01-23 23:48:09.711144: step: 8/77, loss: 3.455118348938413e-05 2023-01-23 23:48:11.037615: step: 12/77, loss: 0.0018443784210830927 2023-01-23 23:48:12.353963: step: 16/77, loss: 0.007893680594861507 2023-01-23 23:48:13.601897: step: 20/77, loss: 0.00171194679569453 2023-01-23 23:48:14.951760: step: 24/77, loss: 0.004791442304849625 2023-01-23 23:48:16.229100: step: 28/77, loss: 0.005079994443804026 2023-01-23 23:48:17.551331: step: 32/77, loss: 0.0013969866558909416 2023-01-23 23:48:18.880544: step: 36/77, loss: 0.0005662287003360689 2023-01-23 23:48:20.207264: step: 40/77, loss: 0.006739386357367039 2023-01-23 23:48:21.482242: step: 44/77, loss: 0.057136066257953644 2023-01-23 23:48:22.773471: step: 48/77, loss: 0.08786522597074509 2023-01-23 23:48:24.096575: step: 52/77, loss: 0.0006959072197787464 2023-01-23 23:48:25.425017: step: 56/77, loss: 8.594767132308334e-05 2023-01-23 23:48:26.727792: step: 60/77, loss: 0.0023656277917325497 2023-01-23 23:48:28.054820: step: 64/77, loss: 9.632251021685079e-06 2023-01-23 23:48:29.402627: step: 68/77, loss: 0.010727917775511742 2023-01-23 23:48:30.733245: step: 72/77, loss: 5.180209336685948e-05 2023-01-23 23:48:32.062534: step: 76/77, loss: 1.1807405826402828e-05 2023-01-23 23:48:33.405272: step: 80/77, loss: 0.007526985835283995 2023-01-23 23:48:34.785510: step: 84/77, loss: 0.03580911085009575 2023-01-23 23:48:36.134850: step: 88/77, loss: 8.36990075185895e-05 2023-01-23 23:48:37.498033: step: 92/77, loss: 0.00021380602265708148 2023-01-23 23:48:38.823848: step: 96/77, loss: 5.718777629226679e-06 2023-01-23 23:48:40.143059: step: 100/77, loss: 0.0013107493286952376 2023-01-23 23:48:41.523909: step: 104/77, loss: 0.0012287443969398737 2023-01-23 23:48:42.885663: step: 108/77, loss: 0.0061523388139903545 2023-01-23 23:48:44.195109: step: 112/77, loss: 0.0012815805384889245 2023-01-23 23:48:45.587435: step: 116/77, loss: 0.020566733554005623 2023-01-23 23:48:46.886828: step: 120/77, loss: 0.002454740460962057 2023-01-23 23:48:48.224787: step: 124/77, loss: 0.0029755199793726206 2023-01-23 23:48:49.549528: step: 128/77, loss: 0.012791633605957031 2023-01-23 23:48:50.886039: step: 132/77, loss: 6.327753362711519e-05 2023-01-23 23:48:52.167397: step: 136/77, loss: 0.008632343262434006 2023-01-23 23:48:53.457225: step: 140/77, loss: 8.667247311677784e-05 2023-01-23 23:48:54.772894: step: 144/77, loss: 7.0642381615471095e-06 2023-01-23 23:48:56.114120: step: 148/77, loss: 0.009543057531118393 2023-01-23 23:48:57.475600: step: 152/77, loss: 6.317175575532019e-06 2023-01-23 23:48:58.819193: step: 156/77, loss: 0.01206673588603735 2023-01-23 23:49:00.129294: step: 160/77, loss: 0.0038095468189567327 2023-01-23 23:49:01.411168: step: 164/77, loss: 0.0035688632633537054 2023-01-23 23:49:02.707465: step: 168/77, loss: 0.0001779919257387519 2023-01-23 23:49:04.067642: step: 172/77, loss: 0.07000274211168289 2023-01-23 23:49:05.391937: step: 176/77, loss: 0.0008181778248399496 2023-01-23 23:49:06.639256: step: 180/77, loss: 5.645014880428789e-06 2023-01-23 23:49:07.956914: step: 184/77, loss: 0.0024146586656570435 2023-01-23 23:49:09.301737: step: 188/77, loss: 0.0016151170711964369 2023-01-23 23:49:10.608183: step: 192/77, loss: 0.016128059476614 2023-01-23 23:49:11.929558: step: 196/77, loss: 0.0013105386169627309 2023-01-23 23:49:13.262615: step: 200/77, loss: 0.0015561481704935431 2023-01-23 23:49:14.623400: step: 204/77, loss: 0.0778515636920929 2023-01-23 23:49:15.939494: step: 208/77, loss: 1.0541395567997824e-05 2023-01-23 23:49:17.183367: step: 212/77, loss: 0.0022206148132681847 2023-01-23 23:49:18.486364: step: 216/77, loss: 0.0011248596711084247 2023-01-23 23:49:19.795706: step: 220/77, loss: 7.704282324993983e-05 2023-01-23 23:49:21.144103: step: 224/77, loss: 0.0004228210891596973 2023-01-23 23:49:22.455626: step: 228/77, loss: 0.029599463567137718 2023-01-23 23:49:23.803009: step: 232/77, loss: 0.031230268999934196 2023-01-23 23:49:25.116655: step: 236/77, loss: 0.01613214612007141 2023-01-23 23:49:26.406919: step: 240/77, loss: 0.0005347841652110219 2023-01-23 23:49:27.738092: step: 244/77, loss: 8.026025170693174e-06 2023-01-23 23:49:29.026829: step: 248/77, loss: 0.06191571429371834 2023-01-23 23:49:30.380818: step: 252/77, loss: 1.5007139154477045e-05 2023-01-23 23:49:31.715346: step: 256/77, loss: 0.004064864944666624 2023-01-23 23:49:33.013183: step: 260/77, loss: 0.004756799899041653 2023-01-23 23:49:34.312486: step: 264/77, loss: 0.0007279182900674641 2023-01-23 23:49:35.634333: step: 268/77, loss: 0.0008052777266129851 2023-01-23 23:49:36.964014: step: 272/77, loss: 2.942733544841758e-06 2023-01-23 23:49:38.233043: step: 276/77, loss: 0.009561686776578426 2023-01-23 23:49:39.555248: step: 280/77, loss: 3.643817763077095e-05 2023-01-23 23:49:40.872298: step: 284/77, loss: 2.342433617741335e-06 2023-01-23 23:49:42.189304: step: 288/77, loss: 1.1547286703716964e-05 2023-01-23 23:49:43.510042: step: 292/77, loss: 8.301382877107244e-06 2023-01-23 23:49:44.831820: step: 296/77, loss: 0.011198680847883224 2023-01-23 23:49:46.156924: step: 300/77, loss: 0.00016585066623520106 2023-01-23 23:49:47.461147: step: 304/77, loss: 7.458726759068668e-05 2023-01-23 23:49:48.760753: step: 308/77, loss: 0.00017216156993526965 2023-01-23 23:49:50.047917: step: 312/77, loss: 0.00261271302588284 2023-01-23 23:49:51.341102: step: 316/77, loss: 0.03502047806978226 2023-01-23 23:49:52.622579: step: 320/77, loss: 0.049201589077711105 2023-01-23 23:49:53.956863: step: 324/77, loss: 1.7195624195665005e-06 2023-01-23 23:49:55.252969: step: 328/77, loss: 0.0020926424767822027 2023-01-23 23:49:56.596983: step: 332/77, loss: 0.0003969683893956244 2023-01-23 23:49:57.869418: step: 336/77, loss: 0.007147402036935091 2023-01-23 23:49:59.215478: step: 340/77, loss: 2.195253546233289e-05 2023-01-23 23:50:00.540063: step: 344/77, loss: 0.00015550327952951193 2023-01-23 23:50:01.872514: step: 348/77, loss: 0.0010230513289570808 2023-01-23 23:50:03.162153: step: 352/77, loss: 0.055711206048727036 2023-01-23 23:50:04.480464: step: 356/77, loss: 0.16461989283561707 2023-01-23 23:50:05.751610: step: 360/77, loss: 0.0035247791092842817 2023-01-23 23:50:07.105961: step: 364/77, loss: 0.0020639460999518633 2023-01-23 23:50:08.427964: step: 368/77, loss: 0.0005306448438204825 2023-01-23 23:50:09.791862: step: 372/77, loss: 0.0008472758927382529 2023-01-23 23:50:11.126184: step: 376/77, loss: 0.00011176289262948558 2023-01-23 23:50:12.417777: step: 380/77, loss: 0.01918557472527027 2023-01-23 23:50:13.734253: step: 384/77, loss: 0.0978318601846695 2023-01-23 23:50:15.015296: step: 388/77, loss: 0.0711839497089386 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.9315068493150684, 'r': 0.53125, 'f1': 0.6766169154228856}, 'slot': {'p': 0.6, 'r': 0.013736263736263736, 'f1': 0.026857654431512987}, 'combined': 0.018172343296944112, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.9178082191780822, 'r': 0.5234375, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.013736263736263736, 'f1': 0.026809651474530835}, 'combined': 0.017873100983020557, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.9444444444444444, 'r': 0.53125, 'f1': 0.6799999999999999}, 'slot': {'p': 0.6, 'r': 0.013736263736263736, 'f1': 0.026857654431512987}, 'combined': 0.01826320501342883, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:52:01.873370: step: 4/77, loss: 0.0006470663356594741 2023-01-23 23:52:03.210977: step: 8/77, loss: 0.001525210915133357 2023-01-23 23:52:04.474028: step: 12/77, loss: 3.1704777939012274e-05 2023-01-23 23:52:05.758695: step: 16/77, loss: 0.038258712738752365 2023-01-23 23:52:07.051114: step: 20/77, loss: 0.020408688113093376 2023-01-23 23:52:08.370341: step: 24/77, loss: 9.983767768062535e-08 2023-01-23 23:52:09.665700: step: 28/77, loss: 0.006725261453539133 2023-01-23 23:52:10.956888: step: 32/77, loss: 0.005994974169880152 2023-01-23 23:52:12.281125: step: 36/77, loss: 0.06461626291275024 2023-01-23 23:52:13.630072: step: 40/77, loss: 0.0004826833610422909 2023-01-23 23:52:14.939137: step: 44/77, loss: 0.0012849881313741207 2023-01-23 23:52:16.262204: step: 48/77, loss: 8.283742499770597e-05 2023-01-23 23:52:17.559017: step: 52/77, loss: 1.056480641636881e-06 2023-01-23 23:52:18.858730: step: 56/77, loss: 0.0065393103286623955 2023-01-23 23:52:20.237268: step: 60/77, loss: 0.0013470399426296353 2023-01-23 23:52:21.515588: step: 64/77, loss: 0.022369032725691795 2023-01-23 23:52:22.836213: step: 68/77, loss: 0.015005374327301979 2023-01-23 23:52:24.133192: step: 72/77, loss: 0.05512889474630356 2023-01-23 23:52:25.393244: step: 76/77, loss: 0.004738078452646732 2023-01-23 23:52:26.764377: step: 80/77, loss: 0.01645592227578163 2023-01-23 23:52:27.995316: step: 84/77, loss: 3.1562663934892043e-05 2023-01-23 23:52:29.301820: step: 88/77, loss: 0.005582800600677729 2023-01-23 23:52:30.616401: step: 92/77, loss: 7.547959830844775e-05 2023-01-23 23:52:31.922468: step: 96/77, loss: 0.006869758944958448 2023-01-23 23:52:33.161462: step: 100/77, loss: 0.0005420552333816886 2023-01-23 23:52:34.432530: step: 104/77, loss: 3.939624548365828e-06 2023-01-23 23:52:35.693454: step: 108/77, loss: 0.005915517918765545 2023-01-23 23:52:36.984935: step: 112/77, loss: 0.005799043457955122 2023-01-23 23:52:38.332164: step: 116/77, loss: 0.0006891103694215417 2023-01-23 23:52:39.661123: step: 120/77, loss: 0.01312168687582016 2023-01-23 23:52:41.008883: step: 124/77, loss: 0.0011892368784174323 2023-01-23 23:52:42.348369: step: 128/77, loss: 0.0014223118778318167 2023-01-23 23:52:43.690092: step: 132/77, loss: 0.0015876988181844354 2023-01-23 23:52:44.989896: step: 136/77, loss: 7.348333019763231e-05 2023-01-23 23:52:46.359035: step: 140/77, loss: 0.015635933727025986 2023-01-23 23:52:47.634309: step: 144/77, loss: 0.008616279810667038 2023-01-23 23:52:48.966506: step: 148/77, loss: 0.0005551224458031356 2023-01-23 23:52:50.307079: step: 152/77, loss: 0.00016584055265411735 2023-01-23 23:52:51.595545: step: 156/77, loss: 0.047848138958215714 2023-01-23 23:52:52.870114: step: 160/77, loss: 0.0003821274731308222 2023-01-23 23:52:54.145746: step: 164/77, loss: 0.003461926942691207 2023-01-23 23:52:55.501067: step: 168/77, loss: 0.020093290135264397 2023-01-23 23:52:56.812282: step: 172/77, loss: 0.011807311326265335 2023-01-23 23:52:58.111183: step: 176/77, loss: 0.0002801486407406628 2023-01-23 23:52:59.416398: step: 180/77, loss: 2.6831652576220222e-05 2023-01-23 23:53:00.775841: step: 184/77, loss: 0.08000031113624573 2023-01-23 23:53:02.078767: step: 188/77, loss: 3.856466355500743e-05 2023-01-23 23:53:03.431390: step: 192/77, loss: 0.0019287059549242258 2023-01-23 23:53:04.759687: step: 196/77, loss: 0.014379382133483887 2023-01-23 23:53:06.092641: step: 200/77, loss: 0.0005766113172285259 2023-01-23 23:53:07.424212: step: 204/77, loss: 5.584115206147544e-05 2023-01-23 23:53:08.754677: step: 208/77, loss: 0.0003904126351699233 2023-01-23 23:53:10.068061: step: 212/77, loss: 4.62036359749618e-06 2023-01-23 23:53:11.356673: step: 216/77, loss: 0.0032101867254823446 2023-01-23 23:53:12.678082: step: 220/77, loss: 0.0033433244097977877 2023-01-23 23:53:14.088261: step: 224/77, loss: 0.020648961886763573 2023-01-23 23:53:15.397745: step: 228/77, loss: 0.0001718879648251459 2023-01-23 23:53:16.660092: step: 232/77, loss: 0.004351540934294462 2023-01-23 23:53:18.020538: step: 236/77, loss: 0.0005007802392356098 2023-01-23 23:53:19.339252: step: 240/77, loss: 0.00028103828663006425 2023-01-23 23:53:20.644261: step: 244/77, loss: 0.023118944838643074 2023-01-23 23:53:21.958222: step: 248/77, loss: 0.007660750299692154 2023-01-23 23:53:23.292594: step: 252/77, loss: 2.7252701784163946e-06 2023-01-23 23:53:24.646901: step: 256/77, loss: 0.0017567173345014453 2023-01-23 23:53:25.940008: step: 260/77, loss: 0.0019385181367397308 2023-01-23 23:53:27.200606: step: 264/77, loss: 4.470348091700771e-09 2023-01-23 23:53:28.509088: step: 268/77, loss: 0.0003453208482824266 2023-01-23 23:53:29.853852: step: 272/77, loss: 0.021467359736561775 2023-01-23 23:53:31.152946: step: 276/77, loss: 0.01447715051472187 2023-01-23 23:53:32.474514: step: 280/77, loss: 0.008323321118950844 2023-01-23 23:53:33.755363: step: 284/77, loss: 0.043272241950035095 2023-01-23 23:53:35.101942: step: 288/77, loss: 0.014325212687253952 2023-01-23 23:53:36.446499: step: 292/77, loss: 0.0008731039706617594 2023-01-23 23:53:37.799035: step: 296/77, loss: 0.11861124634742737 2023-01-23 23:53:39.076606: step: 300/77, loss: 0.03012103959918022 2023-01-23 23:53:40.390488: step: 304/77, loss: 0.04930185526609421 2023-01-23 23:53:41.692273: step: 308/77, loss: 5.2927560318494216e-05 2023-01-23 23:53:42.995489: step: 312/77, loss: 0.03037060610949993 2023-01-23 23:53:44.341449: step: 316/77, loss: 0.000295668316539377 2023-01-23 23:53:45.618435: step: 320/77, loss: 0.001755043282173574 2023-01-23 23:53:46.973171: step: 324/77, loss: 9.944363409886137e-05 2023-01-23 23:53:48.324921: step: 328/77, loss: 0.02383231185376644 2023-01-23 23:53:49.647323: step: 332/77, loss: 0.0026795738376677036 2023-01-23 23:53:50.930125: step: 336/77, loss: 0.022737769410014153 2023-01-23 23:53:52.241654: step: 340/77, loss: 0.0007600174867548048 2023-01-23 23:53:53.542083: step: 344/77, loss: 2.845424387487583e-05 2023-01-23 23:53:54.919131: step: 348/77, loss: 0.0003975990694016218 2023-01-23 23:53:56.227828: step: 352/77, loss: 0.0001918773486977443 2023-01-23 23:53:57.513372: step: 356/77, loss: 0.018601376563310623 2023-01-23 23:53:58.822754: step: 360/77, loss: 0.01445725467056036 2023-01-23 23:54:00.157077: step: 364/77, loss: 0.0005568335764110088 2023-01-23 23:54:01.507338: step: 368/77, loss: 2.918254176620394e-05 2023-01-23 23:54:02.823722: step: 372/77, loss: 0.0012686774134635925 2023-01-23 23:54:04.113974: step: 376/77, loss: 0.035819850862026215 2023-01-23 23:54:05.416587: step: 380/77, loss: 0.00040851483936421573 2023-01-23 23:54:06.746156: step: 384/77, loss: 0.0002468058664817363 2023-01-23 23:54:08.053589: step: 388/77, loss: 0.00334012508392334 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.4838709677419355, 'r': 0.013736263736263736, 'f1': 0.026714158504007122}, 'combined': 0.017445981063841386, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.4838709677419355, 'r': 0.013736263736263736, 'f1': 0.026714158504007122}, 'combined': 0.017445981063841386, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.46875, 'r': 0.013736263736263736, 'f1': 0.02669039145907473}, 'combined': 0.01743045972837533, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:55:54.794981: step: 4/77, loss: 0.006145583000034094 2023-01-23 23:55:56.091111: step: 8/77, loss: 0.06270524114370346 2023-01-23 23:55:57.426866: step: 12/77, loss: 0.012457462027668953 2023-01-23 23:55:58.735499: step: 16/77, loss: 4.878042818745598e-05 2023-01-23 23:56:00.004069: step: 20/77, loss: 0.005514089018106461 2023-01-23 23:56:01.331067: step: 24/77, loss: 0.0011011639144271612 2023-01-23 23:56:02.670840: step: 28/77, loss: 0.002201240276917815 2023-01-23 23:56:04.003833: step: 32/77, loss: 0.09032203257083893 2023-01-23 23:56:05.355599: step: 36/77, loss: 0.0004244197625666857 2023-01-23 23:56:06.701943: step: 40/77, loss: 0.0036006891168653965 2023-01-23 23:56:07.982677: step: 44/77, loss: 0.0006497090798802674 2023-01-23 23:56:09.309688: step: 48/77, loss: 0.0001267041079699993 2023-01-23 23:56:10.597032: step: 52/77, loss: 0.0015523693291470408 2023-01-23 23:56:11.860877: step: 56/77, loss: 6.539768037328031e-06 2023-01-23 23:56:13.181034: step: 60/77, loss: 0.01422657910734415 2023-01-23 23:56:14.510622: step: 64/77, loss: 0.0015929980436339974 2023-01-23 23:56:15.803213: step: 68/77, loss: 0.0010093044256791472 2023-01-23 23:56:17.109928: step: 72/77, loss: 0.1789524108171463 2023-01-23 23:56:18.439035: step: 76/77, loss: 0.007612825371325016 2023-01-23 23:56:19.758199: step: 80/77, loss: 0.006012186408042908 2023-01-23 23:56:21.102863: step: 84/77, loss: 0.0017496285727247596 2023-01-23 23:56:22.433351: step: 88/77, loss: 0.00408580806106329 2023-01-23 23:56:23.754185: step: 92/77, loss: 0.040068477392196655 2023-01-23 23:56:25.040083: step: 96/77, loss: 0.0015697141643613577 2023-01-23 23:56:26.357010: step: 100/77, loss: 0.017410293221473694 2023-01-23 23:56:27.663200: step: 104/77, loss: 2.817763061102596e-06 2023-01-23 23:56:28.977859: step: 108/77, loss: 0.019066637381911278 2023-01-23 23:56:30.263451: step: 112/77, loss: 0.011317798867821693 2023-01-23 23:56:31.574629: step: 116/77, loss: 0.026462309062480927 2023-01-23 23:56:32.912313: step: 120/77, loss: 5.888029409106821e-05 2023-01-23 23:56:34.237780: step: 124/77, loss: 0.0030098133720457554 2023-01-23 23:56:35.570701: step: 128/77, loss: 0.0036532857920974493 2023-01-23 23:56:36.868402: step: 132/77, loss: 0.04920339956879616 2023-01-23 23:56:38.269030: step: 136/77, loss: 0.01856566034257412 2023-01-23 23:56:39.618802: step: 140/77, loss: 6.372792995534837e-05 2023-01-23 23:56:40.969438: step: 144/77, loss: 8.016057108761743e-06 2023-01-23 23:56:42.312349: step: 148/77, loss: 0.0004995768540538847 2023-01-23 23:56:43.640633: step: 152/77, loss: 0.06058551371097565 2023-01-23 23:56:44.941314: step: 156/77, loss: 3.7383480957942083e-05 2023-01-23 23:56:46.184255: step: 160/77, loss: 0.003941821400076151 2023-01-23 23:56:47.519525: step: 164/77, loss: 0.0005088653997518122 2023-01-23 23:56:48.835677: step: 168/77, loss: 1.7664906408754177e-05 2023-01-23 23:56:50.174891: step: 172/77, loss: 0.0021961110178381205 2023-01-23 23:56:51.504976: step: 176/77, loss: 0.008128118701279163 2023-01-23 23:56:52.844167: step: 180/77, loss: 0.033387646079063416 2023-01-23 23:56:54.135783: step: 184/77, loss: 0.014834368601441383 2023-01-23 23:56:55.515569: step: 188/77, loss: 0.0006523691117763519 2023-01-23 23:56:56.810279: step: 192/77, loss: 0.01223810575902462 2023-01-23 23:56:58.181026: step: 196/77, loss: 0.0009100798051804304 2023-01-23 23:56:59.515975: step: 200/77, loss: 0.0030384385026991367 2023-01-23 23:57:00.880127: step: 204/77, loss: 0.004068047273904085 2023-01-23 23:57:02.200289: step: 208/77, loss: 0.0010591279715299606 2023-01-23 23:57:03.547245: step: 212/77, loss: 0.00010257431131321937 2023-01-23 23:57:04.865227: step: 216/77, loss: 0.00011126314348075539 2023-01-23 23:57:06.172313: step: 220/77, loss: 4.5440105168381706e-05 2023-01-23 23:57:07.479154: step: 224/77, loss: 0.017476027831435204 2023-01-23 23:57:08.850405: step: 228/77, loss: 0.028088154271245003 2023-01-23 23:57:10.164939: step: 232/77, loss: 7.048526458675042e-05 2023-01-23 23:57:11.541797: step: 236/77, loss: 0.004286248702555895 2023-01-23 23:57:12.902282: step: 240/77, loss: 8.81606865732465e-06 2023-01-23 23:57:14.262502: step: 244/77, loss: 0.0001825095241656527 2023-01-23 23:57:15.584587: step: 248/77, loss: 0.001119068474508822 2023-01-23 23:57:16.908811: step: 252/77, loss: 3.7998961488483474e-05 2023-01-23 23:57:18.290623: step: 256/77, loss: 0.0009253112366423011 2023-01-23 23:57:19.542879: step: 260/77, loss: 0.0012064689071848989 2023-01-23 23:57:20.881479: step: 264/77, loss: 5.0296126573812217e-05 2023-01-23 23:57:22.190726: step: 268/77, loss: 7.039660704322159e-05 2023-01-23 23:57:23.498261: step: 272/77, loss: 0.009951543062925339 2023-01-23 23:57:24.817654: step: 276/77, loss: 0.00021127743821125478 2023-01-23 23:57:26.149595: step: 280/77, loss: 5.046322985435836e-05 2023-01-23 23:57:27.506007: step: 284/77, loss: 0.03399345651268959 2023-01-23 23:57:28.810224: step: 288/77, loss: 0.0733359232544899 2023-01-23 23:57:30.115460: step: 292/77, loss: 0.002043990883976221 2023-01-23 23:57:31.453134: step: 296/77, loss: 0.030689438804984093 2023-01-23 23:57:32.798109: step: 300/77, loss: 0.005732610356062651 2023-01-23 23:57:34.139076: step: 304/77, loss: 1.6658834283589385e-05 2023-01-23 23:57:35.461952: step: 308/77, loss: 0.050420764833688736 2023-01-23 23:57:36.724697: step: 312/77, loss: 0.001651364378631115 2023-01-23 23:57:38.051487: step: 316/77, loss: 0.00013521264190785587 2023-01-23 23:57:39.372373: step: 320/77, loss: 5.329089981387369e-05 2023-01-23 23:57:40.678922: step: 324/77, loss: 0.00023122054699342698 2023-01-23 23:57:41.999710: step: 328/77, loss: 0.0006129042012616992 2023-01-23 23:57:43.316950: step: 332/77, loss: 0.007153256330639124 2023-01-23 23:57:44.580143: step: 336/77, loss: 0.00283637223765254 2023-01-23 23:57:45.900366: step: 340/77, loss: 0.009734341874718666 2023-01-23 23:57:47.199666: step: 344/77, loss: 0.0005088862963020802 2023-01-23 23:57:48.519503: step: 348/77, loss: 1.630113615647133e-06 2023-01-23 23:57:49.870336: step: 352/77, loss: 0.0002557302941568196 2023-01-23 23:57:51.162180: step: 356/77, loss: 4.768367034557741e-08 2023-01-23 23:57:52.459822: step: 360/77, loss: 4.550819721771404e-05 2023-01-23 23:57:53.764439: step: 364/77, loss: 3.684824150695931e-06 2023-01-23 23:57:55.078095: step: 368/77, loss: 0.001974704908207059 2023-01-23 23:57:56.379665: step: 372/77, loss: 0.00014736379671376199 2023-01-23 23:57:57.663798: step: 376/77, loss: 0.001268755062483251 2023-01-23 23:57:58.937671: step: 380/77, loss: 0.0002300547348568216 2023-01-23 23:58:00.269002: step: 384/77, loss: 0.005775130353868008 2023-01-23 23:58:01.535413: step: 388/77, loss: 7.426962110912427e-05 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9661016949152542, 'r': 0.4453125, 'f1': 0.6096256684491977}, 'slot': {'p': 0.5161290322580645, 'r': 0.014652014652014652, 'f1': 0.028495102404274268}, 'combined': 0.017371345850734042, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9661016949152542, 'r': 0.4453125, 'f1': 0.6096256684491977}, 'slot': {'p': 0.5, 'r': 0.014652014652014652, 'f1': 0.028469750889679717}, 'combined': 0.01735589091670314, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9661016949152542, 'r': 0.4453125, 'f1': 0.6096256684491977}, 'slot': {'p': 0.5, 'r': 0.013736263736263736, 'f1': 0.026737967914438505}, 'combined': 0.016300151562812774, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:59:48.171592: step: 4/77, loss: 0.0010036565363407135 2023-01-23 23:59:49.501987: step: 8/77, loss: 0.002950843423604965 2023-01-23 23:59:50.800625: step: 12/77, loss: 0.0068945009261369705 2023-01-23 23:59:52.082091: step: 16/77, loss: 0.03682752698659897 2023-01-23 23:59:53.425850: step: 20/77, loss: 8.210972737288103e-05 2023-01-23 23:59:54.782808: step: 24/77, loss: 0.00015185496886260808 2023-01-23 23:59:56.089027: step: 28/77, loss: 0.0012898561544716358 2023-01-23 23:59:57.399874: step: 32/77, loss: 0.049722883850336075 2023-01-23 23:59:58.696344: step: 36/77, loss: 0.0025275088846683502 2023-01-24 00:00:00.032005: step: 40/77, loss: 0.0003120983310509473 2023-01-24 00:00:01.329223: step: 44/77, loss: 0.00011641572928056121 2023-01-24 00:00:02.647796: step: 48/77, loss: 0.0001375876454403624 2023-01-24 00:00:03.990360: step: 52/77, loss: 0.0001678016851656139 2023-01-24 00:00:05.360218: step: 56/77, loss: 0.025235647335648537 2023-01-24 00:00:06.605697: step: 60/77, loss: 0.01438209693878889 2023-01-24 00:00:07.930333: step: 64/77, loss: 0.0008003878756426275 2023-01-24 00:00:09.234663: step: 68/77, loss: 8.286495722131804e-05 2023-01-24 00:00:10.534195: step: 72/77, loss: 4.659318165067816e-06 2023-01-24 00:00:11.812259: step: 76/77, loss: 1.2725478882202879e-05 2023-01-24 00:00:13.124789: step: 80/77, loss: 5.763382068835199e-05 2023-01-24 00:00:14.415827: step: 84/77, loss: 0.00048236100701615214 2023-01-24 00:00:15.783429: step: 88/77, loss: 6.148970715003088e-05 2023-01-24 00:00:17.111699: step: 92/77, loss: 0.00012766192958224565 2023-01-24 00:00:18.428586: step: 96/77, loss: 0.041304394602775574 2023-01-24 00:00:19.653451: step: 100/77, loss: 0.0013864014763385057 2023-01-24 00:00:20.925477: step: 104/77, loss: 0.019462674856185913 2023-01-24 00:00:22.298214: step: 108/77, loss: 6.260276131797582e-05 2023-01-24 00:00:23.575690: step: 112/77, loss: 0.006182023324072361 2023-01-24 00:00:24.842472: step: 116/77, loss: 0.003532269038259983 2023-01-24 00:00:26.150329: step: 120/77, loss: 0.004249984864145517 2023-01-24 00:00:27.500537: step: 124/77, loss: 0.0005928549799136817 2023-01-24 00:00:28.803993: step: 128/77, loss: 0.0006586603703908622 2023-01-24 00:00:30.111563: step: 132/77, loss: 0.0019883771892637014 2023-01-24 00:00:31.402383: step: 136/77, loss: 0.0007436954183503985 2023-01-24 00:00:32.716736: step: 140/77, loss: 0.06668004393577576 2023-01-24 00:00:34.035114: step: 144/77, loss: 2.946595850517042e-05 2023-01-24 00:00:35.358732: step: 148/77, loss: 0.044872432947158813 2023-01-24 00:00:36.650802: step: 152/77, loss: 7.701337744947523e-05 2023-01-24 00:00:38.006293: step: 156/77, loss: 0.00036041653947904706 2023-01-24 00:00:39.321018: step: 160/77, loss: 0.002154907677322626 2023-01-24 00:00:40.645618: step: 164/77, loss: 5.0706959882518277e-05 2023-01-24 00:00:41.955535: step: 168/77, loss: 0.021075882017612457 2023-01-24 00:00:43.322363: step: 172/77, loss: 7.449004624504596e-05 2023-01-24 00:00:44.614848: step: 176/77, loss: 0.0007037436589598656 2023-01-24 00:00:45.955057: step: 180/77, loss: 0.005983549170196056 2023-01-24 00:00:47.344017: step: 184/77, loss: 0.00024994040722958744 2023-01-24 00:00:48.653108: step: 188/77, loss: 0.0027954450342804193 2023-01-24 00:00:49.920441: step: 192/77, loss: 1.932227132783737e-05 2023-01-24 00:00:51.244767: step: 196/77, loss: 8.82583117345348e-05 2023-01-24 00:00:52.576090: step: 200/77, loss: 0.012760159559547901 2023-01-24 00:00:53.888572: step: 204/77, loss: 0.021436506882309914 2023-01-24 00:00:55.187326: step: 208/77, loss: 0.01409657672047615 2023-01-24 00:00:56.472597: step: 212/77, loss: 0.0010179778328165412 2023-01-24 00:00:57.781089: step: 216/77, loss: 0.04306625947356224 2023-01-24 00:00:59.103141: step: 220/77, loss: 0.04988136142492294 2023-01-24 00:01:00.395754: step: 224/77, loss: 0.0002051626070169732 2023-01-24 00:01:01.726346: step: 228/77, loss: 4.13992784160655e-05 2023-01-24 00:01:03.067322: step: 232/77, loss: 2.0824325474677607e-05 2023-01-24 00:01:04.408823: step: 236/77, loss: 0.03562576696276665 2023-01-24 00:01:05.731725: step: 240/77, loss: 2.7725507607101463e-05 2023-01-24 00:01:06.998002: step: 244/77, loss: 0.01949121057987213 2023-01-24 00:01:08.298487: step: 248/77, loss: 8.667161637276877e-06 2023-01-24 00:01:09.640253: step: 252/77, loss: 0.012959137558937073 2023-01-24 00:01:10.896684: step: 256/77, loss: 0.00032826358801685274 2023-01-24 00:01:12.223935: step: 260/77, loss: 0.11627621948719025 2023-01-24 00:01:13.537364: step: 264/77, loss: 0.09269588440656662 2023-01-24 00:01:14.834524: step: 268/77, loss: 1.3629623026645277e-05 2023-01-24 00:01:16.175261: step: 272/77, loss: 0.003814670955762267 2023-01-24 00:01:17.473479: step: 276/77, loss: 0.001455141231417656 2023-01-24 00:01:18.790310: step: 280/77, loss: 0.0021209963597357273 2023-01-24 00:01:20.107646: step: 284/77, loss: 0.00024511825176887214 2023-01-24 00:01:21.438285: step: 288/77, loss: 0.0009873814415186644 2023-01-24 00:01:22.750845: step: 292/77, loss: 2.3900884116301313e-06 2023-01-24 00:01:24.076961: step: 296/77, loss: 0.009080913849174976 2023-01-24 00:01:25.373939: step: 300/77, loss: 0.00018391606863588095 2023-01-24 00:01:26.647199: step: 304/77, loss: 0.00011140089191030711 2023-01-24 00:01:27.934947: step: 308/77, loss: 0.001048402744345367 2023-01-24 00:01:29.201406: step: 312/77, loss: 0.015014705248177052 2023-01-24 00:01:30.530811: step: 316/77, loss: 0.0017719214083626866 2023-01-24 00:01:31.846924: step: 320/77, loss: 0.00010908178228419274 2023-01-24 00:01:33.159284: step: 324/77, loss: 0.006568096112459898 2023-01-24 00:01:34.493241: step: 328/77, loss: 5.54263788217213e-05 2023-01-24 00:01:35.816654: step: 332/77, loss: 0.00237331073731184 2023-01-24 00:01:37.179000: step: 336/77, loss: 0.02834990620613098 2023-01-24 00:01:38.457182: step: 340/77, loss: 0.0236161220818758 2023-01-24 00:01:39.813682: step: 344/77, loss: 0.000626914668828249 2023-01-24 00:01:41.143781: step: 348/77, loss: 0.005827236454933882 2023-01-24 00:01:42.457035: step: 352/77, loss: 0.0011040312238037586 2023-01-24 00:01:43.834545: step: 356/77, loss: 9.292290633311495e-05 2023-01-24 00:01:45.170818: step: 360/77, loss: 2.6329313186579384e-05 2023-01-24 00:01:46.513226: step: 364/77, loss: 0.00045233272248879075 2023-01-24 00:01:47.798424: step: 368/77, loss: 0.00047104203258641064 2023-01-24 00:01:49.118120: step: 372/77, loss: 0.022833313792943954 2023-01-24 00:01:50.399354: step: 376/77, loss: 0.0005413969047367573 2023-01-24 00:01:51.653477: step: 380/77, loss: 0.00017948381719179451 2023-01-24 00:01:52.901317: step: 384/77, loss: 0.01384049467742443 2023-01-24 00:01:54.218921: step: 388/77, loss: 0.0009254040778614581 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.9344262295081968, 'r': 0.4453125, 'f1': 0.6031746031746033}, 'slot': {'p': 0.45714285714285713, 'r': 0.014652014652014652, 'f1': 0.02839396628216504}, 'combined': 0.017126519344797964, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.9344262295081968, 'r': 0.4453125, 'f1': 0.6031746031746033}, 'slot': {'p': 0.47058823529411764, 'r': 0.014652014652014652, 'f1': 0.02841918294849023}, 'combined': 0.017141729397502043, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.9354838709677419, 'r': 0.453125, 'f1': 0.6105263157894737}, 'slot': {'p': 0.48484848484848486, 'r': 0.014652014652014652, 'f1': 0.028444444444444442}, 'combined': 0.01736608187134503, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:03:41.012114: step: 4/77, loss: 0.0007864898070693016 2023-01-24 00:03:42.330043: step: 8/77, loss: 0.010362272150814533 2023-01-24 00:03:43.692628: step: 12/77, loss: 0.006819052156060934 2023-01-24 00:03:44.960989: step: 16/77, loss: 2.1676016331184655e-05 2023-01-24 00:03:46.248927: step: 20/77, loss: 0.0011354070156812668 2023-01-24 00:03:47.522751: step: 24/77, loss: 2.8951749300176743e-06 2023-01-24 00:03:48.860426: step: 28/77, loss: 0.0001309202198171988 2023-01-24 00:03:50.208842: step: 32/77, loss: 0.003243007929995656 2023-01-24 00:03:51.503942: step: 36/77, loss: 0.002287351293489337 2023-01-24 00:03:52.817457: step: 40/77, loss: 7.462866778951138e-05 2023-01-24 00:03:54.060968: step: 44/77, loss: 0.0029088289011269808 2023-01-24 00:03:55.376407: step: 48/77, loss: 0.00041750201489776373 2023-01-24 00:03:56.722392: step: 52/77, loss: 0.0008609117357991636 2023-01-24 00:03:58.025259: step: 56/77, loss: 0.0004486891266424209 2023-01-24 00:03:59.389451: step: 60/77, loss: 0.0018713414901867509 2023-01-24 00:04:00.725364: step: 64/77, loss: 0.006790338084101677 2023-01-24 00:04:02.059508: step: 68/77, loss: 0.0003210757568012923 2023-01-24 00:04:03.392242: step: 72/77, loss: 0.08046431094408035 2023-01-24 00:04:04.727488: step: 76/77, loss: 3.1100178603082895e-05 2023-01-24 00:04:06.077537: step: 80/77, loss: 0.0002698895405046642 2023-01-24 00:04:07.376470: step: 84/77, loss: 0.0010851433034986258 2023-01-24 00:04:08.693812: step: 88/77, loss: 0.011157970875501633 2023-01-24 00:04:10.007628: step: 92/77, loss: 0.0004891256103292108 2023-01-24 00:04:11.337407: step: 96/77, loss: 0.03444315120577812 2023-01-24 00:04:12.670020: step: 100/77, loss: 0.0015314036281779408 2023-01-24 00:04:14.016682: step: 104/77, loss: 0.006588106043636799 2023-01-24 00:04:15.342646: step: 108/77, loss: 0.010639192536473274 2023-01-24 00:04:16.689596: step: 112/77, loss: 6.6985526245844085e-06 2023-01-24 00:04:17.989137: step: 116/77, loss: 7.102488598320633e-05 2023-01-24 00:04:19.311087: step: 120/77, loss: 0.001098927459679544 2023-01-24 00:04:20.628429: step: 124/77, loss: 2.5048670067917556e-05 2023-01-24 00:04:21.888575: step: 128/77, loss: 6.630965003751044e-07 2023-01-24 00:04:23.244160: step: 132/77, loss: 5.726329982280731e-05 2023-01-24 00:04:24.572026: step: 136/77, loss: 0.00011796267790487036 2023-01-24 00:04:25.923417: step: 140/77, loss: 0.0009620689670555294 2023-01-24 00:04:27.229135: step: 144/77, loss: 0.0058373697102069855 2023-01-24 00:04:28.549613: step: 148/77, loss: 0.0002568000345490873 2023-01-24 00:04:29.851016: step: 152/77, loss: 0.0041648694314062595 2023-01-24 00:04:31.190881: step: 156/77, loss: 0.01177394948899746 2023-01-24 00:04:32.504877: step: 160/77, loss: 0.000455582223366946 2023-01-24 00:04:33.855716: step: 164/77, loss: 0.0003266449202783406 2023-01-24 00:04:35.169669: step: 168/77, loss: 0.00044424354564398527 2023-01-24 00:04:36.486223: step: 172/77, loss: 1.743532993714325e-05 2023-01-24 00:04:37.806520: step: 176/77, loss: 0.0009272638126276433 2023-01-24 00:04:39.181744: step: 180/77, loss: 1.468725622544298e-05 2023-01-24 00:04:40.547542: step: 184/77, loss: 0.0016138491919264197 2023-01-24 00:04:41.862660: step: 188/77, loss: 1.6808103282528464e-06 2023-01-24 00:04:43.183366: step: 192/77, loss: 0.001134151709266007 2023-01-24 00:04:44.475411: step: 196/77, loss: 0.004834283143281937 2023-01-24 00:04:45.820358: step: 200/77, loss: 0.037488024681806564 2023-01-24 00:04:47.148630: step: 204/77, loss: 1.0326210713174078e-06 2023-01-24 00:04:48.443868: step: 208/77, loss: 0.006991858594119549 2023-01-24 00:04:49.772610: step: 212/77, loss: 7.4390841291460674e-06 2023-01-24 00:04:51.087108: step: 216/77, loss: 2.5584324703231687e-06 2023-01-24 00:04:52.384368: step: 220/77, loss: 0.0009339148527942598 2023-01-24 00:04:53.670606: step: 224/77, loss: 0.027637531980872154 2023-01-24 00:04:55.028878: step: 228/77, loss: 5.2310802857391536e-05 2023-01-24 00:04:56.372899: step: 232/77, loss: 4.627784073818475e-05 2023-01-24 00:04:57.651964: step: 236/77, loss: 0.0001499750214861706 2023-01-24 00:04:58.953336: step: 240/77, loss: 0.0003451184311416 2023-01-24 00:05:00.191301: step: 244/77, loss: 0.025616183876991272 2023-01-24 00:05:01.462492: step: 248/77, loss: 0.015998056158423424 2023-01-24 00:05:02.817715: step: 252/77, loss: 0.010580199770629406 2023-01-24 00:05:04.132585: step: 256/77, loss: 0.04575950279831886 2023-01-24 00:05:05.445730: step: 260/77, loss: 7.59999529691413e-05 2023-01-24 00:05:06.720774: step: 264/77, loss: 0.020015671849250793 2023-01-24 00:05:08.013173: step: 268/77, loss: 2.7577889341046102e-05 2023-01-24 00:05:09.324590: step: 272/77, loss: 9.327914085588418e-07 2023-01-24 00:05:10.611373: step: 276/77, loss: 0.00043893905240111053 2023-01-24 00:05:11.934836: step: 280/77, loss: 3.862224912154488e-05 2023-01-24 00:05:13.280906: step: 284/77, loss: 0.006454653572291136 2023-01-24 00:05:14.658880: step: 288/77, loss: 2.1047469999757595e-05 2023-01-24 00:05:15.960421: step: 292/77, loss: 0.0005261976039037108 2023-01-24 00:05:17.285706: step: 296/77, loss: 4.018589152110508e-06 2023-01-24 00:05:18.541508: step: 300/77, loss: 0.001395822619087994 2023-01-24 00:05:19.845065: step: 304/77, loss: 0.00048785883700475097 2023-01-24 00:05:21.161907: step: 308/77, loss: 0.0035083997063338757 2023-01-24 00:05:22.527059: step: 312/77, loss: 8.830250590108335e-05 2023-01-24 00:05:23.874562: step: 316/77, loss: 0.0009523604530841112 2023-01-24 00:05:25.207679: step: 320/77, loss: 1.5574347344227135e-05 2023-01-24 00:05:26.511145: step: 324/77, loss: 0.00030986740603111684 2023-01-24 00:05:27.827133: step: 328/77, loss: 0.00013391696847975254 2023-01-24 00:05:29.183871: step: 332/77, loss: 0.0010573953622952104 2023-01-24 00:05:30.452918: step: 336/77, loss: 0.0008024564012885094 2023-01-24 00:05:31.746962: step: 340/77, loss: 0.0025499192997813225 2023-01-24 00:05:33.101941: step: 344/77, loss: 0.006977582350373268 2023-01-24 00:05:34.455129: step: 348/77, loss: 5.410162884800229e-06 2023-01-24 00:05:35.780924: step: 352/77, loss: 0.025690611451864243 2023-01-24 00:05:37.123368: step: 356/77, loss: 0.0002882384869735688 2023-01-24 00:05:38.426022: step: 360/77, loss: 0.02115679532289505 2023-01-24 00:05:39.672174: step: 364/77, loss: 9.5250470621977e-05 2023-01-24 00:05:41.036061: step: 368/77, loss: 0.102179154753685 2023-01-24 00:05:42.376004: step: 372/77, loss: 0.01381751149892807 2023-01-24 00:05:43.667004: step: 376/77, loss: 0.00032086059218272567 2023-01-24 00:05:44.931263: step: 380/77, loss: 0.001289880950935185 2023-01-24 00:05:46.233236: step: 384/77, loss: 0.00010622338595567271 2023-01-24 00:05:47.597034: step: 388/77, loss: 0.02440432272851467 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9436619718309859, 'r': 0.5234375, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5, 'r': 0.0173992673992674, 'f1': 0.033628318584070796}, 'combined': 0.022644194423444654, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9436619718309859, 'r': 0.5234375, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5135135135135135, 'r': 0.0173992673992674, 'f1': 0.03365810451727192}, 'combined': 0.02266425128298712, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9436619718309859, 'r': 0.5234375, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5, 'r': 0.0173992673992674, 'f1': 0.033628318584070796}, 'combined': 0.022644194423444654, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:07:34.329542: step: 4/77, loss: 0.022796493023633957 2023-01-24 00:07:35.630686: step: 8/77, loss: 0.0644044354557991 2023-01-24 00:07:36.897975: step: 12/77, loss: 0.026123739778995514 2023-01-24 00:07:38.204591: step: 16/77, loss: 0.0022905736695975065 2023-01-24 00:07:39.550348: step: 20/77, loss: 0.02266693487763405 2023-01-24 00:07:40.877418: step: 24/77, loss: 1.8297017959412187e-05 2023-01-24 00:07:42.179325: step: 28/77, loss: 5.624979530693963e-05 2023-01-24 00:07:43.516191: step: 32/77, loss: 0.029617756605148315 2023-01-24 00:07:44.888446: step: 36/77, loss: 9.667923222878017e-06 2023-01-24 00:07:46.195160: step: 40/77, loss: 0.21155443787574768 2023-01-24 00:07:47.500312: step: 44/77, loss: 0.0017198350979015231 2023-01-24 00:07:48.824593: step: 48/77, loss: 0.004426266066730022 2023-01-24 00:07:50.190128: step: 52/77, loss: 1.6712678188923746e-05 2023-01-24 00:07:51.538366: step: 56/77, loss: 0.0006967654335312545 2023-01-24 00:07:52.875873: step: 60/77, loss: 0.001149756950326264 2023-01-24 00:07:54.210973: step: 64/77, loss: 0.002224504482001066 2023-01-24 00:07:55.519181: step: 68/77, loss: 7.791274583723862e-06 2023-01-24 00:07:56.855961: step: 72/77, loss: 0.0003872505621984601 2023-01-24 00:07:58.249204: step: 76/77, loss: 0.0235122237354517 2023-01-24 00:07:59.582350: step: 80/77, loss: 0.01756272278726101 2023-01-24 00:08:00.919639: step: 84/77, loss: 0.011864123865962029 2023-01-24 00:08:02.199472: step: 88/77, loss: 0.0001767357753124088 2023-01-24 00:08:03.481327: step: 92/77, loss: 3.5476004995871335e-06 2023-01-24 00:08:04.773245: step: 96/77, loss: 9.594253060640767e-05 2023-01-24 00:08:06.138752: step: 100/77, loss: 0.0008208492654375732 2023-01-24 00:08:07.433877: step: 104/77, loss: 6.475725058407988e-06 2023-01-24 00:08:08.733470: step: 108/77, loss: 0.005691048689186573 2023-01-24 00:08:10.023749: step: 112/77, loss: 4.101833837921731e-06 2023-01-24 00:08:11.360495: step: 116/77, loss: 0.0011443725088611245 2023-01-24 00:08:12.677862: step: 120/77, loss: 8.79143897236645e-07 2023-01-24 00:08:14.011263: step: 124/77, loss: 0.000219722292968072 2023-01-24 00:08:15.313983: step: 128/77, loss: 0.0002379810030106455 2023-01-24 00:08:16.599941: step: 132/77, loss: 8.631691889604554e-05 2023-01-24 00:08:17.932483: step: 136/77, loss: 0.00938483141362667 2023-01-24 00:08:19.183076: step: 140/77, loss: 0.0001489740243414417 2023-01-24 00:08:20.559197: step: 144/77, loss: 1.5052801245474257e-05 2023-01-24 00:08:21.837096: step: 148/77, loss: 0.026118695735931396 2023-01-24 00:08:23.120117: step: 152/77, loss: 0.07806071639060974 2023-01-24 00:08:24.496164: step: 156/77, loss: 5.0516373448772356e-05 2023-01-24 00:08:25.816591: step: 160/77, loss: 0.0023060047533363104 2023-01-24 00:08:27.157730: step: 164/77, loss: 0.00032793389982543886 2023-01-24 00:08:28.515813: step: 168/77, loss: 0.00041060629882849753 2023-01-24 00:08:29.806608: step: 172/77, loss: 3.9587655919604003e-05 2023-01-24 00:08:31.180888: step: 176/77, loss: 0.00928050372749567 2023-01-24 00:08:32.464363: step: 180/77, loss: 0.0007714069215580821 2023-01-24 00:08:33.749614: step: 184/77, loss: 0.0021703136153519154 2023-01-24 00:08:35.113281: step: 188/77, loss: 5.304771661940322e-07 2023-01-24 00:08:36.361817: step: 192/77, loss: 0.024455228820443153 2023-01-24 00:08:37.655576: step: 196/77, loss: 0.001490587368607521 2023-01-24 00:08:38.958912: step: 200/77, loss: 0.00020829432469327003 2023-01-24 00:08:40.265933: step: 204/77, loss: 0.00034719277755357325 2023-01-24 00:08:41.620805: step: 208/77, loss: 1.3317236152943224e-05 2023-01-24 00:08:42.941360: step: 212/77, loss: 9.691724699223414e-05 2023-01-24 00:08:44.293978: step: 216/77, loss: 0.23184849321842194 2023-01-24 00:08:45.530045: step: 220/77, loss: 0.00047657586401328444 2023-01-24 00:08:46.887531: step: 224/77, loss: 0.00017672436661086977 2023-01-24 00:08:48.232971: step: 228/77, loss: 0.0009486278286203742 2023-01-24 00:08:49.558682: step: 232/77, loss: 1.5350226021837443e-05 2023-01-24 00:08:50.856683: step: 236/77, loss: 0.00015230315329972655 2023-01-24 00:08:52.134155: step: 240/77, loss: 0.12049225717782974 2023-01-24 00:08:53.435628: step: 244/77, loss: 0.0015533717814832926 2023-01-24 00:08:54.795295: step: 248/77, loss: 0.00011515563528519124 2023-01-24 00:08:56.110383: step: 252/77, loss: 0.0006750301108695567 2023-01-24 00:08:57.357059: step: 256/77, loss: 0.0003167404211126268 2023-01-24 00:08:58.671416: step: 260/77, loss: 4.348805668996647e-05 2023-01-24 00:08:59.993631: step: 264/77, loss: 0.0006341171683743596 2023-01-24 00:09:01.288994: step: 268/77, loss: 0.00039305503014475107 2023-01-24 00:09:02.531294: step: 272/77, loss: 0.0063155763782560825 2023-01-24 00:09:03.826202: step: 276/77, loss: 0.015269141644239426 2023-01-24 00:09:05.177947: step: 280/77, loss: 0.000924046034924686 2023-01-24 00:09:06.483851: step: 284/77, loss: 0.0018953699618577957 2023-01-24 00:09:07.806972: step: 288/77, loss: 0.0002120360150001943 2023-01-24 00:09:09.149054: step: 292/77, loss: 0.009600703604519367 2023-01-24 00:09:10.432764: step: 296/77, loss: 0.006479513365775347 2023-01-24 00:09:11.694332: step: 300/77, loss: 0.0001296757982345298 2023-01-24 00:09:13.027679: step: 304/77, loss: 0.01025966927409172 2023-01-24 00:09:14.360435: step: 308/77, loss: 3.9645419747103006e-05 2023-01-24 00:09:15.678254: step: 312/77, loss: 8.368302951566875e-05 2023-01-24 00:09:17.018491: step: 316/77, loss: 0.07057785987854004 2023-01-24 00:09:18.362550: step: 320/77, loss: 9.552506526233628e-05 2023-01-24 00:09:19.639201: step: 324/77, loss: 0.027393445372581482 2023-01-24 00:09:20.944055: step: 328/77, loss: 0.05229645222425461 2023-01-24 00:09:22.259741: step: 332/77, loss: 3.1888174589767004e-07 2023-01-24 00:09:23.577077: step: 336/77, loss: 4.003276990260929e-05 2023-01-24 00:09:24.935828: step: 340/77, loss: 0.005392936524003744 2023-01-24 00:09:26.281601: step: 344/77, loss: 0.00012345942377578467 2023-01-24 00:09:27.618006: step: 348/77, loss: 0.00040279352106153965 2023-01-24 00:09:28.965247: step: 352/77, loss: 0.000411268207244575 2023-01-24 00:09:30.302560: step: 356/77, loss: 0.0016155533958226442 2023-01-24 00:09:31.660039: step: 360/77, loss: 0.00015825206355657429 2023-01-24 00:09:33.011112: step: 364/77, loss: 0.030476603657007217 2023-01-24 00:09:34.268936: step: 368/77, loss: 0.0009514524135738611 2023-01-24 00:09:35.573615: step: 372/77, loss: 0.00010489935812074691 2023-01-24 00:09:36.888605: step: 376/77, loss: 1.1905699466296937e-06 2023-01-24 00:09:38.257834: step: 380/77, loss: 0.0012081509921699762 2023-01-24 00:09:39.568991: step: 384/77, loss: 0.030084820464253426 2023-01-24 00:09:40.884578: step: 388/77, loss: 0.01937655545771122 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.9705882352941176, 'r': 0.515625, 'f1': 0.673469387755102}, 'slot': {'p': 0.53125, 'r': 0.015567765567765568, 'f1': 0.0302491103202847}, 'combined': 0.020371849807538676, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.9558823529411765, 'r': 0.5078125, 'f1': 0.6632653061224489}, 'slot': {'p': 0.5151515151515151, 'r': 0.015567765567765568, 'f1': 0.030222222222222227}, 'combined': 0.020045351473922904, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.9714285714285714, 'r': 0.53125, 'f1': 0.6868686868686867}, 'slot': {'p': 0.5161290322580645, 'r': 0.014652014652014652, 'f1': 0.028495102404274268}, 'combined': 0.019572393570612624, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:11:28.073757: step: 4/77, loss: 0.0034078971948474646 2023-01-24 00:11:29.375541: step: 8/77, loss: 0.017262566834688187 2023-01-24 00:11:30.742620: step: 12/77, loss: 0.00021428756008390337 2023-01-24 00:11:32.092359: step: 16/77, loss: 0.013926129788160324 2023-01-24 00:11:33.423780: step: 20/77, loss: 1.1324713113936014e-06 2023-01-24 00:11:34.750528: step: 24/77, loss: 0.00899231806397438 2023-01-24 00:11:36.090824: step: 28/77, loss: 5.312186112860218e-05 2023-01-24 00:11:37.423494: step: 32/77, loss: 0.000548401556443423 2023-01-24 00:11:38.719666: step: 36/77, loss: 2.640987622726243e-05 2023-01-24 00:11:40.016851: step: 40/77, loss: 0.0005899526877328753 2023-01-24 00:11:41.328514: step: 44/77, loss: 0.04716620221734047 2023-01-24 00:11:42.643264: step: 48/77, loss: 8.14858649391681e-05 2023-01-24 00:11:43.938043: step: 52/77, loss: 2.397433718215325e-06 2023-01-24 00:11:45.258287: step: 56/77, loss: 0.03809444233775139 2023-01-24 00:11:46.592033: step: 60/77, loss: 4.114320836379193e-05 2023-01-24 00:11:47.854942: step: 64/77, loss: 0.0005176113336347044 2023-01-24 00:11:49.127104: step: 68/77, loss: 0.00018408146570436656 2023-01-24 00:11:50.451185: step: 72/77, loss: 0.0005031367181800306 2023-01-24 00:11:51.727441: step: 76/77, loss: 0.00011992393410764635 2023-01-24 00:11:52.992088: step: 80/77, loss: 0.08048339188098907 2023-01-24 00:11:54.279186: step: 84/77, loss: 0.0002451002364978194 2023-01-24 00:11:55.582628: step: 88/77, loss: 0.008995145559310913 2023-01-24 00:11:56.853659: step: 92/77, loss: 0.0007470811833627522 2023-01-24 00:11:58.187848: step: 96/77, loss: 0.00022808456560596824 2023-01-24 00:11:59.541831: step: 100/77, loss: 2.975380266434513e-05 2023-01-24 00:12:00.898981: step: 104/77, loss: 6.816323002567515e-05 2023-01-24 00:12:02.239689: step: 108/77, loss: 5.16979162057396e-05 2023-01-24 00:12:03.532638: step: 112/77, loss: 3.284345075371675e-05 2023-01-24 00:12:04.850573: step: 116/77, loss: 0.01112334430217743 2023-01-24 00:12:06.208742: step: 120/77, loss: 0.025925541296601295 2023-01-24 00:12:07.507670: step: 124/77, loss: 6.150685658212751e-05 2023-01-24 00:12:08.849202: step: 128/77, loss: 0.0017336109885945916 2023-01-24 00:12:10.187448: step: 132/77, loss: 0.012070889584720135 2023-01-24 00:12:11.536812: step: 136/77, loss: 0.032958123832941055 2023-01-24 00:12:12.825587: step: 140/77, loss: 0.04690272733569145 2023-01-24 00:12:14.156591: step: 144/77, loss: 6.709252375003416e-06 2023-01-24 00:12:15.501907: step: 148/77, loss: 0.0639430582523346 2023-01-24 00:12:16.832042: step: 152/77, loss: 0.0006111941183917224 2023-01-24 00:12:18.138360: step: 156/77, loss: 0.0046700369566679 2023-01-24 00:12:19.524785: step: 160/77, loss: 0.0025426128413528204 2023-01-24 00:12:20.859491: step: 164/77, loss: 0.0014713435666635633 2023-01-24 00:12:22.155335: step: 168/77, loss: 5.589408829109743e-05 2023-01-24 00:12:23.484207: step: 172/77, loss: 0.016112398356199265 2023-01-24 00:12:24.791298: step: 176/77, loss: 0.013365473598241806 2023-01-24 00:12:26.078274: step: 180/77, loss: 0.0001512926974100992 2023-01-24 00:12:27.384049: step: 184/77, loss: 0.009979977272450924 2023-01-24 00:12:28.691364: step: 188/77, loss: 0.6408872604370117 2023-01-24 00:12:30.049646: step: 192/77, loss: 8.001852620509453e-07 2023-01-24 00:12:31.382102: step: 196/77, loss: 2.3612466975464486e-05 2023-01-24 00:12:32.739959: step: 200/77, loss: 0.002872622571885586 2023-01-24 00:12:34.089890: step: 204/77, loss: 0.00032813759753480554 2023-01-24 00:12:35.395807: step: 208/77, loss: 0.017089366912841797 2023-01-24 00:12:36.712867: step: 212/77, loss: 0.00020115751249250025 2023-01-24 00:12:38.036139: step: 216/77, loss: 0.00011558117694221437 2023-01-24 00:12:39.346425: step: 220/77, loss: 0.0005581318982876837 2023-01-24 00:12:40.638107: step: 224/77, loss: 0.005376110784709454 2023-01-24 00:12:41.966202: step: 228/77, loss: 2.5518758775433525e-05 2023-01-24 00:12:43.306323: step: 232/77, loss: 0.0942663624882698 2023-01-24 00:12:44.569136: step: 236/77, loss: 6.049809826436103e-07 2023-01-24 00:12:45.833826: step: 240/77, loss: 0.0003028716309927404 2023-01-24 00:12:47.198518: step: 244/77, loss: 0.000771807215642184 2023-01-24 00:12:48.508181: step: 248/77, loss: 0.0033995232079178095 2023-01-24 00:12:49.812284: step: 252/77, loss: 0.0004318088758736849 2023-01-24 00:12:51.119785: step: 256/77, loss: 0.00012441341823432595 2023-01-24 00:12:52.481270: step: 260/77, loss: 0.0021375638898462057 2023-01-24 00:12:53.759605: step: 264/77, loss: 0.002261765766888857 2023-01-24 00:12:55.052366: step: 268/77, loss: 1.0147630746359937e-05 2023-01-24 00:12:56.389891: step: 272/77, loss: 6.760523683624342e-05 2023-01-24 00:12:57.723730: step: 276/77, loss: 0.0008380015497095883 2023-01-24 00:12:59.057672: step: 280/77, loss: 0.015756692737340927 2023-01-24 00:13:00.364183: step: 284/77, loss: 0.00037149080890230834 2023-01-24 00:13:01.651916: step: 288/77, loss: 0.0002747896360233426 2023-01-24 00:13:02.973094: step: 292/77, loss: 0.00038542161928489804 2023-01-24 00:13:04.339426: step: 296/77, loss: 0.0015570932300761342 2023-01-24 00:13:05.647638: step: 300/77, loss: 0.05393500253558159 2023-01-24 00:13:06.976746: step: 304/77, loss: 5.701546251657419e-05 2023-01-24 00:13:08.312624: step: 308/77, loss: 7.498095510527492e-05 2023-01-24 00:13:09.644793: step: 312/77, loss: 0.01547918003052473 2023-01-24 00:13:10.917740: step: 316/77, loss: 0.00011507688031997532 2023-01-24 00:13:12.236721: step: 320/77, loss: 1.5538498701062053e-05 2023-01-24 00:13:13.520217: step: 324/77, loss: 0.0026777207385748625 2023-01-24 00:13:14.815144: step: 328/77, loss: 0.009971034713089466 2023-01-24 00:13:16.130920: step: 332/77, loss: 4.887807517661713e-05 2023-01-24 00:13:17.442064: step: 336/77, loss: 0.05552070215344429 2023-01-24 00:13:18.769638: step: 340/77, loss: 0.00015315160271711648 2023-01-24 00:13:20.080132: step: 344/77, loss: 0.0002867273869924247 2023-01-24 00:13:21.379656: step: 348/77, loss: 0.0023228887002915144 2023-01-24 00:13:22.742842: step: 352/77, loss: 6.802859388699289e-06 2023-01-24 00:13:24.030224: step: 356/77, loss: 0.007060936186462641 2023-01-24 00:13:25.369428: step: 360/77, loss: 0.02428770251572132 2023-01-24 00:13:26.758250: step: 364/77, loss: 0.0007483771769329906 2023-01-24 00:13:28.036917: step: 368/77, loss: 0.03832714259624481 2023-01-24 00:13:29.290828: step: 372/77, loss: 0.00018787103181239218 2023-01-24 00:13:30.593182: step: 376/77, loss: 0.0006022527231834829 2023-01-24 00:13:31.892796: step: 380/77, loss: 0.0023554773069918156 2023-01-24 00:13:33.207464: step: 384/77, loss: 0.005550259258598089 2023-01-24 00:13:34.484642: step: 388/77, loss: 6.707603461109102e-06 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 22} Test Chinese: {'template': {'p': 0.9836065573770492, 'r': 0.46875, 'f1': 0.6349206349206349}, 'slot': {'p': 0.6521739130434783, 'r': 0.013736263736263736, 'f1': 0.026905829596412557}, 'combined': 0.01708306641042067, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 22} Test Korean: {'template': {'p': 0.9833333333333333, 'r': 0.4609375, 'f1': 0.6276595744680851}, 'slot': {'p': 0.6818181818181818, 'r': 0.013736263736263736, 'f1': 0.026929982046678635}, 'combined': 0.01690286107185148, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 22} Test Russian: {'template': {'p': 0.9833333333333333, 'r': 0.4609375, 'f1': 0.6276595744680851}, 'slot': {'p': 0.6818181818181818, 'r': 0.013736263736263736, 'f1': 0.026929982046678635}, 'combined': 0.01690286107185148, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:15:21.224821: step: 4/77, loss: 0.02925044298171997 2023-01-24 00:15:22.547739: step: 8/77, loss: 0.0030780029483139515 2023-01-24 00:15:23.848724: step: 12/77, loss: 0.030284637585282326 2023-01-24 00:15:25.154434: step: 16/77, loss: 0.00010271882638335228 2023-01-24 00:15:26.449597: step: 20/77, loss: 0.0010229551699012518 2023-01-24 00:15:27.753687: step: 24/77, loss: 0.0057197012938559055 2023-01-24 00:15:29.008383: step: 28/77, loss: 0.0016985785914584994 2023-01-24 00:15:30.364262: step: 32/77, loss: 0.017662620171904564 2023-01-24 00:15:31.607096: step: 36/77, loss: 0.0006728796288371086 2023-01-24 00:15:32.877234: step: 40/77, loss: 2.5985159481933806e-06 2023-01-24 00:15:34.149253: step: 44/77, loss: 0.00010429038957227021 2023-01-24 00:15:35.449435: step: 48/77, loss: 0.04060396924614906 2023-01-24 00:15:36.756537: step: 52/77, loss: 0.012450166046619415 2023-01-24 00:15:37.967014: step: 56/77, loss: 0.005256613250821829 2023-01-24 00:15:39.271557: step: 60/77, loss: 1.236334992427146e-05 2023-01-24 00:15:40.597294: step: 64/77, loss: 2.8647409635595977e-05 2023-01-24 00:15:41.954956: step: 68/77, loss: 4.4482527300715446e-05 2023-01-24 00:15:43.233840: step: 72/77, loss: 0.02085484005510807 2023-01-24 00:15:44.519444: step: 76/77, loss: 2.1111116438987665e-05 2023-01-24 00:15:45.869653: step: 80/77, loss: 0.0007840099860914052 2023-01-24 00:15:47.125589: step: 84/77, loss: 0.00127530621830374 2023-01-24 00:15:48.420471: step: 88/77, loss: 3.6371507121657487e-06 2023-01-24 00:15:49.743055: step: 92/77, loss: 0.008171175606548786 2023-01-24 00:15:51.066583: step: 96/77, loss: 0.02187040075659752 2023-01-24 00:15:52.385467: step: 100/77, loss: 0.008003543131053448 2023-01-24 00:15:53.654243: step: 104/77, loss: 0.004421391524374485 2023-01-24 00:15:54.968714: step: 108/77, loss: 4.918003469356336e-05 2023-01-24 00:15:56.228310: step: 112/77, loss: 0.011352593079209328 2023-01-24 00:15:57.598637: step: 116/77, loss: 0.001593520981259644 2023-01-24 00:15:58.896656: step: 120/77, loss: 0.031238067895174026 2023-01-24 00:16:00.202260: step: 124/77, loss: 0.01812879368662834 2023-01-24 00:16:01.489489: step: 128/77, loss: 0.0009220782667398453 2023-01-24 00:16:02.818361: step: 132/77, loss: 0.023734448477625847 2023-01-24 00:16:04.130899: step: 136/77, loss: 1.326091296505183e-05 2023-01-24 00:16:05.413187: step: 140/77, loss: 0.004073227755725384 2023-01-24 00:16:06.672788: step: 144/77, loss: 0.0015277594793587923 2023-01-24 00:16:07.980053: step: 148/77, loss: 0.00916389748454094 2023-01-24 00:16:09.289184: step: 152/77, loss: 0.0002302402281202376 2023-01-24 00:16:10.553650: step: 156/77, loss: 0.00012056231935275719 2023-01-24 00:16:11.828323: step: 160/77, loss: 0.0014544213190674782 2023-01-24 00:16:13.177248: step: 164/77, loss: 0.00019257509848102927 2023-01-24 00:16:14.483995: step: 168/77, loss: 3.402936636121012e-05 2023-01-24 00:16:15.802951: step: 172/77, loss: 1.1194384569535032e-05 2023-01-24 00:16:17.125981: step: 176/77, loss: 0.029281653463840485 2023-01-24 00:16:18.429845: step: 180/77, loss: 0.05051398277282715 2023-01-24 00:16:19.708037: step: 184/77, loss: 2.362795567023568e-05 2023-01-24 00:16:20.979562: step: 188/77, loss: 0.00014806289982516319 2023-01-24 00:16:22.253212: step: 192/77, loss: 1.3202284208091442e-06 2023-01-24 00:16:23.545047: step: 196/77, loss: 5.061740739620291e-05 2023-01-24 00:16:24.846547: step: 200/77, loss: 0.009350545704364777 2023-01-24 00:16:26.124169: step: 204/77, loss: 0.014052278362214565 2023-01-24 00:16:27.439447: step: 208/77, loss: 8.189300569938496e-05 2023-01-24 00:16:28.758291: step: 212/77, loss: 0.007922136224806309 2023-01-24 00:16:30.129076: step: 216/77, loss: 8.792057997197844e-06 2023-01-24 00:16:31.426140: step: 220/77, loss: 0.0014992207288742065 2023-01-24 00:16:32.724257: step: 224/77, loss: 0.033945001661777496 2023-01-24 00:16:34.048445: step: 228/77, loss: 9.25993881537579e-05 2023-01-24 00:16:35.386660: step: 232/77, loss: 0.00026498493389226496 2023-01-24 00:16:36.685762: step: 236/77, loss: 2.707368139454047e-06 2023-01-24 00:16:38.051693: step: 240/77, loss: 0.02109416015446186 2023-01-24 00:16:39.324016: step: 244/77, loss: 0.007108455523848534 2023-01-24 00:16:40.614296: step: 248/77, loss: 0.0002526230236981064 2023-01-24 00:16:41.889028: step: 252/77, loss: 0.00032294943230226636 2023-01-24 00:16:43.190734: step: 256/77, loss: 0.0007726695039309561 2023-01-24 00:16:44.543710: step: 260/77, loss: 9.100021270569414e-05 2023-01-24 00:16:45.820980: step: 264/77, loss: 0.07174643129110336 2023-01-24 00:16:47.096486: step: 268/77, loss: 6.0867492720717564e-05 2023-01-24 00:16:48.400647: step: 272/77, loss: 0.0005447212024591863 2023-01-24 00:16:49.722382: step: 276/77, loss: 0.0001453039440093562 2023-01-24 00:16:51.045095: step: 280/77, loss: 0.00035742539330385625 2023-01-24 00:16:52.382194: step: 284/77, loss: 0.03960578143596649 2023-01-24 00:16:53.725872: step: 288/77, loss: 0.00041772908298298717 2023-01-24 00:16:55.030064: step: 292/77, loss: 2.4745031623751856e-05 2023-01-24 00:16:56.328247: step: 296/77, loss: 0.015246432274580002 2023-01-24 00:16:57.633155: step: 300/77, loss: 0.0002701383491512388 2023-01-24 00:16:58.913518: step: 304/77, loss: 0.000890142924617976 2023-01-24 00:17:00.206468: step: 308/77, loss: 0.0024266818072646856 2023-01-24 00:17:01.508644: step: 312/77, loss: 0.0005810699076391757 2023-01-24 00:17:02.748399: step: 316/77, loss: 8.0965746747097e-06 2023-01-24 00:17:04.066992: step: 320/77, loss: 0.0001801561884349212 2023-01-24 00:17:05.368989: step: 324/77, loss: 0.0010214989306405187 2023-01-24 00:17:06.624250: step: 328/77, loss: 6.109410719545849e-07 2023-01-24 00:17:07.946647: step: 332/77, loss: 0.03397783264517784 2023-01-24 00:17:09.264967: step: 336/77, loss: 4.2231633415212855e-05 2023-01-24 00:17:10.557457: step: 340/77, loss: 0.000951576919760555 2023-01-24 00:17:11.835837: step: 344/77, loss: 7.280572754098102e-05 2023-01-24 00:17:13.175469: step: 348/77, loss: 0.001599346986040473 2023-01-24 00:17:14.480708: step: 352/77, loss: 0.02990417554974556 2023-01-24 00:17:15.792741: step: 356/77, loss: 0.0002445604186505079 2023-01-24 00:17:17.170641: step: 360/77, loss: 0.1017698273062706 2023-01-24 00:17:18.493629: step: 364/77, loss: 0.024318577721714973 2023-01-24 00:17:19.802322: step: 368/77, loss: 5.249897367320955e-05 2023-01-24 00:17:21.144141: step: 372/77, loss: 0.0047577316872775555 2023-01-24 00:17:22.428427: step: 376/77, loss: 0.0014664152404293418 2023-01-24 00:17:23.766242: step: 380/77, loss: 0.00015006517060101032 2023-01-24 00:17:25.068994: step: 384/77, loss: 4.29098290624097e-05 2023-01-24 00:17:26.408552: step: 388/77, loss: 3.932953404728323e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 0.9705882352941176, 'r': 0.55, 'f1': 0.7021276595744681}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.047056174715749195, 'epoch': 23} Test Chinese: {'template': {'p': 0.9577464788732394, 'r': 0.53125, 'f1': 0.6834170854271355}, 'slot': {'p': 0.4722222222222222, 'r': 0.015567765567765568, 'f1': 0.030141843971631208}, 'combined': 0.020599451156491675, 'epoch': 23} Dev Korean: {'template': {'p': 0.9705882352941176, 'r': 0.55, 'f1': 0.7021276595744681}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.047056174715749195, 'epoch': 23} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.53125, 'f1': 0.6834170854271355}, 'slot': {'p': 0.4722222222222222, 'r': 0.015567765567765568, 'f1': 0.030141843971631208}, 'combined': 0.020599451156491675, 'epoch': 23} Dev Russian: {'template': {'p': 0.9705882352941176, 'r': 0.55, 'f1': 0.7021276595744681}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.047056174715749195, 'epoch': 23} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5390625, 'f1': 0.69}, 'slot': {'p': 0.4722222222222222, 'r': 0.015567765567765568, 'f1': 0.030141843971631208}, 'combined': 0.02079787234042553, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:19:11.735107: step: 4/77, loss: 0.00027425645384937525 2023-01-24 00:19:13.050260: step: 8/77, loss: 0.00812295451760292 2023-01-24 00:19:14.352915: step: 12/77, loss: 0.00017981577548198402 2023-01-24 00:19:15.668186: step: 16/77, loss: 2.915915956691606e-06 2023-01-24 00:19:16.981472: step: 20/77, loss: 0.015080899000167847 2023-01-24 00:19:18.287951: step: 24/77, loss: 0.00021564430790022016 2023-01-24 00:19:19.612200: step: 28/77, loss: 0.013282394967973232 2023-01-24 00:19:20.982752: step: 32/77, loss: 0.00043583771912381053 2023-01-24 00:19:22.307600: step: 36/77, loss: 0.0005709958495572209 2023-01-24 00:19:23.576274: step: 40/77, loss: 0.012487326748669147 2023-01-24 00:19:24.881979: step: 44/77, loss: 0.0021974733099341393 2023-01-24 00:19:26.215563: step: 48/77, loss: 0.0002029470488196239 2023-01-24 00:19:27.510928: step: 52/77, loss: 0.0013817045837640762 2023-01-24 00:19:28.835194: step: 56/77, loss: 0.0020415547769516706 2023-01-24 00:19:30.141810: step: 60/77, loss: 0.0067010316997766495 2023-01-24 00:19:31.491097: step: 64/77, loss: 0.00014627687050960958 2023-01-24 00:19:32.820179: step: 68/77, loss: 3.250454756198451e-05 2023-01-24 00:19:34.143056: step: 72/77, loss: 0.011801852844655514 2023-01-24 00:19:35.445974: step: 76/77, loss: 7.098000423866324e-06 2023-01-24 00:19:36.726523: step: 80/77, loss: 0.002970988629385829 2023-01-24 00:19:37.981374: step: 84/77, loss: 0.0003883733879774809 2023-01-24 00:19:39.325539: step: 88/77, loss: 0.004043085966259241 2023-01-24 00:19:40.598361: step: 92/77, loss: 4.335409903433174e-05 2023-01-24 00:19:41.847040: step: 96/77, loss: 0.0009157421300187707 2023-01-24 00:19:43.185210: step: 100/77, loss: 0.00040233746403828263 2023-01-24 00:19:44.490043: step: 104/77, loss: 0.007378537207841873 2023-01-24 00:19:45.817862: step: 108/77, loss: 0.0011341262143105268 2023-01-24 00:19:47.182218: step: 112/77, loss: 0.005580637603998184 2023-01-24 00:19:48.530913: step: 116/77, loss: 0.0007130467565730214 2023-01-24 00:19:49.830238: step: 120/77, loss: 0.0002832773025147617 2023-01-24 00:19:51.140085: step: 124/77, loss: 0.06424663960933685 2023-01-24 00:19:52.503615: step: 128/77, loss: 1.1567653928068466e-05 2023-01-24 00:19:53.875540: step: 132/77, loss: 0.12748531997203827 2023-01-24 00:19:55.152626: step: 136/77, loss: 0.0014793593436479568 2023-01-24 00:19:56.467258: step: 140/77, loss: 0.035108745098114014 2023-01-24 00:19:57.830914: step: 144/77, loss: 0.029069863259792328 2023-01-24 00:19:59.175991: step: 148/77, loss: 0.0004990094457753003 2023-01-24 00:20:00.548991: step: 152/77, loss: 0.0552806556224823 2023-01-24 00:20:01.921991: step: 156/77, loss: 5.599405994871631e-05 2023-01-24 00:20:03.175222: step: 160/77, loss: 0.0010671545751392841 2023-01-24 00:20:04.480378: step: 164/77, loss: 9.503433102509007e-05 2023-01-24 00:20:05.765458: step: 168/77, loss: 0.00013206909352447838 2023-01-24 00:20:07.091887: step: 172/77, loss: 0.00036414500209502876 2023-01-24 00:20:08.381876: step: 176/77, loss: 0.000315420504193753 2023-01-24 00:20:09.713493: step: 180/77, loss: 0.010863608680665493 2023-01-24 00:20:11.067770: step: 184/77, loss: 0.0009430536883883178 2023-01-24 00:20:12.389977: step: 188/77, loss: 5.8920064475387335e-05 2023-01-24 00:20:13.733388: step: 192/77, loss: 0.0001401538320351392 2023-01-24 00:20:15.037602: step: 196/77, loss: 0.0005722360219806433 2023-01-24 00:20:16.337493: step: 200/77, loss: 0.0019929315894842148 2023-01-24 00:20:17.631630: step: 204/77, loss: 9.994933861889876e-06 2023-01-24 00:20:18.960158: step: 208/77, loss: 0.0018155412981286645 2023-01-24 00:20:20.239767: step: 212/77, loss: 0.0002928571484517306 2023-01-24 00:20:21.544955: step: 216/77, loss: 0.0019531085854396224 2023-01-24 00:20:22.847520: step: 220/77, loss: 0.0004826158401556313 2023-01-24 00:20:24.144458: step: 224/77, loss: 0.0001760215818649158 2023-01-24 00:20:25.417040: step: 228/77, loss: 0.0014431884046643972 2023-01-24 00:20:26.714029: step: 232/77, loss: 1.1321411875542253e-05 2023-01-24 00:20:28.040243: step: 236/77, loss: 2.3370621420326643e-05 2023-01-24 00:20:29.382322: step: 240/77, loss: 0.026028109714388847 2023-01-24 00:20:30.760537: step: 244/77, loss: 0.028926406055688858 2023-01-24 00:20:32.077025: step: 248/77, loss: 0.00455608032643795 2023-01-24 00:20:33.345062: step: 252/77, loss: 5.085330121801235e-05 2023-01-24 00:20:34.662292: step: 256/77, loss: 0.0015485123731195927 2023-01-24 00:20:35.929230: step: 260/77, loss: 0.0001962407404789701 2023-01-24 00:20:37.240473: step: 264/77, loss: 0.00013164323172532022 2023-01-24 00:20:38.524390: step: 268/77, loss: 0.015872662886977196 2023-01-24 00:20:39.832634: step: 272/77, loss: 0.004312410019338131 2023-01-24 00:20:41.152909: step: 276/77, loss: 0.000288039242150262 2023-01-24 00:20:42.463085: step: 280/77, loss: 0.0019080275669693947 2023-01-24 00:20:43.799090: step: 284/77, loss: 0.004176371265202761 2023-01-24 00:20:45.116797: step: 288/77, loss: 0.002108911285176873 2023-01-24 00:20:46.458324: step: 292/77, loss: 0.0026260290760546923 2023-01-24 00:20:47.821389: step: 296/77, loss: 0.00010112720337929204 2023-01-24 00:20:49.136260: step: 300/77, loss: 0.014249087311327457 2023-01-24 00:20:50.461927: step: 304/77, loss: 0.014059971086680889 2023-01-24 00:20:51.740996: step: 308/77, loss: 0.001644664560444653 2023-01-24 00:20:53.039525: step: 312/77, loss: 1.3291371487866854e-06 2023-01-24 00:20:54.336833: step: 316/77, loss: 0.03550218790769577 2023-01-24 00:20:55.682434: step: 320/77, loss: 0.00010493921581655741 2023-01-24 00:20:56.973812: step: 324/77, loss: 8.642347529530525e-05 2023-01-24 00:20:58.293438: step: 328/77, loss: 6.938765181985218e-06 2023-01-24 00:20:59.553700: step: 332/77, loss: 1.317476107942639e-05 2023-01-24 00:21:00.873061: step: 336/77, loss: 0.05840739607810974 2023-01-24 00:21:02.192494: step: 340/77, loss: 0.0179511196911335 2023-01-24 00:21:03.506279: step: 344/77, loss: 0.00040283938869833946 2023-01-24 00:21:04.811622: step: 348/77, loss: 4.836374046135461e-06 2023-01-24 00:21:06.105252: step: 352/77, loss: 8.54506652103737e-06 2023-01-24 00:21:07.409915: step: 356/77, loss: 0.0029704140033572912 2023-01-24 00:21:08.727428: step: 360/77, loss: 3.103607696175459e-06 2023-01-24 00:21:10.006870: step: 364/77, loss: 0.0021579840686172247 2023-01-24 00:21:11.295688: step: 368/77, loss: 1.236791860037556e-07 2023-01-24 00:21:12.583243: step: 372/77, loss: 2.22967064473778e-05 2023-01-24 00:21:13.866951: step: 376/77, loss: 4.1783634515013546e-05 2023-01-24 00:21:15.234379: step: 380/77, loss: 7.955438377393875e-06 2023-01-24 00:21:16.527399: step: 384/77, loss: 0.04268309473991394 2023-01-24 00:21:17.790898: step: 388/77, loss: 0.005105479154735804 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.9838709677419355, 'r': 0.4765625, 'f1': 0.6421052631578947}, 'slot': {'p': 0.5714285714285714, 'r': 0.014652014652014652, 'f1': 0.02857142857142857}, 'combined': 0.018345864661654134, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.9682539682539683, 'r': 0.4765625, 'f1': 0.6387434554973821}, 'slot': {'p': 0.5517241379310345, 'r': 0.014652014652014652, 'f1': 0.028545941123996433}, 'combined': 0.018233533073966305, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.9838709677419355, 'r': 0.4765625, 'f1': 0.6421052631578947}, 'slot': {'p': 0.5714285714285714, 'r': 0.014652014652014652, 'f1': 0.02857142857142857}, 'combined': 0.018345864661654134, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:23:02.177743: step: 4/77, loss: 0.00018349630408920348 2023-01-24 00:23:03.516946: step: 8/77, loss: 0.0008007582509890199 2023-01-24 00:23:04.793908: step: 12/77, loss: 0.00010514700261410326 2023-01-24 00:23:06.047571: step: 16/77, loss: 1.3514750207832549e-05 2023-01-24 00:23:07.342643: step: 20/77, loss: 4.604392529472534e-07 2023-01-24 00:23:08.670431: step: 24/77, loss: 0.001322218682616949 2023-01-24 00:23:09.971836: step: 28/77, loss: 3.4719457175924617e-07 2023-01-24 00:23:11.261139: step: 32/77, loss: 4.3251011447864585e-06 2023-01-24 00:23:12.535364: step: 36/77, loss: 0.02783278562128544 2023-01-24 00:23:13.868390: step: 40/77, loss: 0.0031175236217677593 2023-01-24 00:23:15.170784: step: 44/77, loss: 2.2962005914450856e-06 2023-01-24 00:23:16.471595: step: 48/77, loss: 0.011918090283870697 2023-01-24 00:23:17.811992: step: 52/77, loss: 0.00037522020284086466 2023-01-24 00:23:19.148814: step: 56/77, loss: 4.917350224786787e-07 2023-01-24 00:23:20.454378: step: 60/77, loss: 6.5202999394387e-05 2023-01-24 00:23:21.788034: step: 64/77, loss: 0.0005991798243485391 2023-01-24 00:23:23.080061: step: 68/77, loss: 9.144550858763978e-06 2023-01-24 00:23:24.425869: step: 72/77, loss: 0.00012539334420580417 2023-01-24 00:23:25.734765: step: 76/77, loss: 4.0023842302616686e-05 2023-01-24 00:23:27.023588: step: 80/77, loss: 7.099514732544776e-06 2023-01-24 00:23:28.323008: step: 84/77, loss: 0.0011885878629982471 2023-01-24 00:23:29.586351: step: 88/77, loss: 5.400500958785415e-05 2023-01-24 00:23:30.866357: step: 92/77, loss: 0.00023997691459953785 2023-01-24 00:23:32.169408: step: 96/77, loss: 0.00022390282538253814 2023-01-24 00:23:33.489532: step: 100/77, loss: 2.1500749426195398e-05 2023-01-24 00:23:34.813724: step: 104/77, loss: 0.0019608705770224333 2023-01-24 00:23:36.125932: step: 108/77, loss: 9.088594197237398e-06 2023-01-24 00:23:37.442701: step: 112/77, loss: 6.883950845804065e-05 2023-01-24 00:23:38.762088: step: 116/77, loss: 0.017398864030838013 2023-01-24 00:23:40.054251: step: 120/77, loss: 0.00023948725720401853 2023-01-24 00:23:41.301665: step: 124/77, loss: 2.577894235855638e-07 2023-01-24 00:23:42.559387: step: 128/77, loss: 0.0002802223898470402 2023-01-24 00:23:43.879351: step: 132/77, loss: 0.00019985133258160204 2023-01-24 00:23:45.184443: step: 136/77, loss: 0.00013735542597714812 2023-01-24 00:23:46.481225: step: 140/77, loss: 0.0003241307276766747 2023-01-24 00:23:47.768053: step: 144/77, loss: 0.03896763175725937 2023-01-24 00:23:49.018941: step: 148/77, loss: 3.923150870832615e-06 2023-01-24 00:23:50.312685: step: 152/77, loss: 7.137002830859274e-05 2023-01-24 00:23:51.611701: step: 156/77, loss: 2.2531385184265673e-05 2023-01-24 00:23:52.898236: step: 160/77, loss: 0.00044442887883633375 2023-01-24 00:23:54.182091: step: 164/77, loss: 0.00019603893451858312 2023-01-24 00:23:55.465239: step: 168/77, loss: 0.013213117606937885 2023-01-24 00:23:56.771932: step: 172/77, loss: 0.0005997586413286626 2023-01-24 00:23:58.080756: step: 176/77, loss: 0.010040149092674255 2023-01-24 00:23:59.352435: step: 180/77, loss: 0.002243348164483905 2023-01-24 00:24:00.682703: step: 184/77, loss: 0.0003383896255400032 2023-01-24 00:24:01.957100: step: 188/77, loss: 0.00788046233355999 2023-01-24 00:24:03.239151: step: 192/77, loss: 2.4015640519792214e-05 2023-01-24 00:24:04.563226: step: 196/77, loss: 0.00046700090751983225 2023-01-24 00:24:05.868139: step: 200/77, loss: 3.809694680967368e-05 2023-01-24 00:24:07.147120: step: 204/77, loss: 5.260309626464732e-05 2023-01-24 00:24:08.400586: step: 208/77, loss: 1.1607809256020118e-06 2023-01-24 00:24:09.683962: step: 212/77, loss: 7.758028004900552e-06 2023-01-24 00:24:10.955247: step: 216/77, loss: 4.928519774693996e-05 2023-01-24 00:24:12.266343: step: 220/77, loss: 0.004321509972214699 2023-01-24 00:24:13.595348: step: 224/77, loss: 0.0005570852081291378 2023-01-24 00:24:14.895750: step: 228/77, loss: 0.0009109312086366117 2023-01-24 00:24:16.207861: step: 232/77, loss: 9.968431550078094e-05 2023-01-24 00:24:17.526924: step: 236/77, loss: 6.817249959567562e-05 2023-01-24 00:24:18.809069: step: 240/77, loss: 5.587870077761181e-07 2023-01-24 00:24:20.145155: step: 244/77, loss: 0.004906816873699427 2023-01-24 00:24:21.421315: step: 248/77, loss: 5.3022653446532786e-05 2023-01-24 00:24:22.720018: step: 252/77, loss: 0.075285904109478 2023-01-24 00:24:24.001117: step: 256/77, loss: 0.0018372680060565472 2023-01-24 00:24:25.365982: step: 260/77, loss: 7.74833097239025e-05 2023-01-24 00:24:26.695292: step: 264/77, loss: 0.00038714701076969504 2023-01-24 00:24:28.072867: step: 268/77, loss: 0.0013665605802088976 2023-01-24 00:24:29.400861: step: 272/77, loss: 0.003437809646129608 2023-01-24 00:24:30.750309: step: 276/77, loss: 0.008186004124581814 2023-01-24 00:24:32.061496: step: 280/77, loss: 0.0013224020367488265 2023-01-24 00:24:33.352621: step: 284/77, loss: 0.0003919194859918207 2023-01-24 00:24:34.685219: step: 288/77, loss: 0.002622979925945401 2023-01-24 00:24:35.986154: step: 292/77, loss: 1.1907065527339e-05 2023-01-24 00:24:37.250138: step: 296/77, loss: 1.3178715562389698e-05 2023-01-24 00:24:38.498977: step: 300/77, loss: 6.871306595712667e-06 2023-01-24 00:24:39.778805: step: 304/77, loss: 1.4133414879324846e-05 2023-01-24 00:24:41.078249: step: 308/77, loss: 2.1382591057772515e-06 2023-01-24 00:24:42.351104: step: 312/77, loss: 0.0003209102724213153 2023-01-24 00:24:43.655981: step: 316/77, loss: 1.0722834304033313e-05 2023-01-24 00:24:45.006819: step: 320/77, loss: 0.04908447712659836 2023-01-24 00:24:46.316010: step: 324/77, loss: 0.016604645177721977 2023-01-24 00:24:47.587461: step: 328/77, loss: 0.01431315392255783 2023-01-24 00:24:48.956949: step: 332/77, loss: 3.631072104326449e-05 2023-01-24 00:24:50.258144: step: 336/77, loss: 5.555855022976175e-05 2023-01-24 00:24:51.524474: step: 340/77, loss: 0.004288515541702509 2023-01-24 00:24:52.841164: step: 344/77, loss: 9.268520079785958e-05 2023-01-24 00:24:54.149030: step: 348/77, loss: 3.883116733049974e-06 2023-01-24 00:24:55.489283: step: 352/77, loss: 1.0829372513398994e-05 2023-01-24 00:24:56.817113: step: 356/77, loss: 2.267902118546772e-06 2023-01-24 00:24:58.147399: step: 360/77, loss: 2.5786117475945503e-05 2023-01-24 00:24:59.430710: step: 364/77, loss: 4.100824662600644e-05 2023-01-24 00:25:00.734579: step: 368/77, loss: 2.7342935936758295e-05 2023-01-24 00:25:02.033683: step: 372/77, loss: 0.014448979869484901 2023-01-24 00:25:03.414132: step: 376/77, loss: 0.0063176341354846954 2023-01-24 00:25:04.735918: step: 380/77, loss: 0.2428482174873352 2023-01-24 00:25:06.074782: step: 384/77, loss: 1.3098239833198022e-05 2023-01-24 00:25:07.379756: step: 388/77, loss: 0.0023457477800548077 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9571428571428572, 'r': 0.5234375, 'f1': 0.6767676767676768}, 'slot': {'p': 0.5151515151515151, 'r': 0.015567765567765568, 'f1': 0.030222222222222227}, 'combined': 0.02045342312008979, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9852941176470589, 'r': 0.5234375, 'f1': 0.6836734693877551}, 'slot': {'p': 0.5862068965517241, 'r': 0.015567765567765568, 'f1': 0.030330062444246204}, 'combined': 0.020735859018005058, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9705882352941176, 'r': 0.515625, 'f1': 0.673469387755102}, 'slot': {'p': 0.5862068965517241, 'r': 0.015567765567765568, 'f1': 0.030330062444246204}, 'combined': 0.020426368584900503, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:26:50.771777: step: 4/77, loss: 0.0005963248549960554 2023-01-24 00:26:52.104186: step: 8/77, loss: 9.357670478493674e-07 2023-01-24 00:26:53.428681: step: 12/77, loss: 0.0045157684944570065 2023-01-24 00:26:54.710388: step: 16/77, loss: 4.600652573572006e-06 2023-01-24 00:26:55.995558: step: 20/77, loss: 7.227034188872494e-07 2023-01-24 00:26:57.271713: step: 24/77, loss: 1.8262548110214993e-05 2023-01-24 00:26:58.577201: step: 28/77, loss: 3.110050965915434e-05 2023-01-24 00:26:59.892214: step: 32/77, loss: 0.00761661259457469 2023-01-24 00:27:01.222682: step: 36/77, loss: 0.0003865267208311707 2023-01-24 00:27:02.523874: step: 40/77, loss: 7.569610170321539e-05 2023-01-24 00:27:03.816038: step: 44/77, loss: 6.36163167655468e-06 2023-01-24 00:27:05.124076: step: 48/77, loss: 1.2602094102476258e-05 2023-01-24 00:27:06.430277: step: 52/77, loss: 3.6071519389224704e-06 2023-01-24 00:27:07.761703: step: 56/77, loss: 6.752765784767689e-06 2023-01-24 00:27:09.039593: step: 60/77, loss: 0.0022517337929457426 2023-01-24 00:27:10.343248: step: 64/77, loss: 1.1220433862035861e-06 2023-01-24 00:27:11.669997: step: 68/77, loss: 0.00022226989676710218 2023-01-24 00:27:12.991821: step: 72/77, loss: 2.300361666129902e-05 2023-01-24 00:27:14.292934: step: 76/77, loss: 0.0028344355523586273 2023-01-24 00:27:15.551690: step: 80/77, loss: 5.2059611334698275e-05 2023-01-24 00:27:16.858420: step: 84/77, loss: 2.0697179934359156e-06 2023-01-24 00:27:18.192982: step: 88/77, loss: 3.142713103443384e-05 2023-01-24 00:27:19.504013: step: 92/77, loss: 3.7010131563874893e-06 2023-01-24 00:27:20.835984: step: 96/77, loss: 3.6026956422574585e-06 2023-01-24 00:27:22.148186: step: 100/77, loss: 0.007741418667137623 2023-01-24 00:27:23.455377: step: 104/77, loss: 0.00021697793272323906 2023-01-24 00:27:24.794945: step: 108/77, loss: 0.0007774491677992046 2023-01-24 00:27:26.100061: step: 112/77, loss: 0.00023736809089314193 2023-01-24 00:27:27.383038: step: 116/77, loss: 7.488654773624148e-06 2023-01-24 00:27:28.718779: step: 120/77, loss: 0.00014339134213514626 2023-01-24 00:27:30.039203: step: 124/77, loss: 6.211158051883103e-06 2023-01-24 00:27:31.322805: step: 128/77, loss: 1.733893259370234e-05 2023-01-24 00:27:32.611825: step: 132/77, loss: 0.000151737971464172 2023-01-24 00:27:33.979296: step: 136/77, loss: 4.7963334509404376e-05 2023-01-24 00:27:35.280589: step: 140/77, loss: 0.005837517324835062 2023-01-24 00:27:36.576488: step: 144/77, loss: 0.016650885343551636 2023-01-24 00:27:37.893357: step: 148/77, loss: 0.042808350175619125 2023-01-24 00:27:39.198088: step: 152/77, loss: 2.871876858989708e-05 2023-01-24 00:27:40.515527: step: 156/77, loss: 9.238529514732363e-07 2023-01-24 00:27:41.857558: step: 160/77, loss: 9.037885320140049e-06 2023-01-24 00:27:43.218235: step: 164/77, loss: 0.0006664558313786983 2023-01-24 00:27:44.518217: step: 168/77, loss: 1.110103994506062e-06 2023-01-24 00:27:45.811477: step: 172/77, loss: 6.452086154240533e-07 2023-01-24 00:27:47.157996: step: 176/77, loss: 0.018400171771645546 2023-01-24 00:27:48.493423: step: 180/77, loss: 0.0945664495229721 2023-01-24 00:27:49.813751: step: 184/77, loss: 2.980229574234272e-08 2023-01-24 00:27:51.145438: step: 188/77, loss: 0.0011638787109404802 2023-01-24 00:27:52.498911: step: 192/77, loss: 0.017570655792951584 2023-01-24 00:27:53.840940: step: 196/77, loss: 2.538481385272462e-05 2023-01-24 00:27:55.136606: step: 200/77, loss: 2.4982262402772903e-05 2023-01-24 00:27:56.427658: step: 204/77, loss: 0.0003960702451877296 2023-01-24 00:27:57.757624: step: 208/77, loss: 6.644204859185265e-06 2023-01-24 00:27:59.075317: step: 212/77, loss: 0.00917132943868637 2023-01-24 00:28:00.355499: step: 216/77, loss: 0.018628239631652832 2023-01-24 00:28:01.672908: step: 220/77, loss: 0.016373340040445328 2023-01-24 00:28:02.958858: step: 224/77, loss: 8.663587323098909e-06 2023-01-24 00:28:04.288346: step: 228/77, loss: 0.00015038135461509228 2023-01-24 00:28:05.618976: step: 232/77, loss: 1.5028490452095866e-05 2023-01-24 00:28:06.972292: step: 236/77, loss: 6.243532197913737e-07 2023-01-24 00:28:08.290200: step: 240/77, loss: 1.8378557797404937e-05 2023-01-24 00:28:09.593379: step: 244/77, loss: 0.0005751802236773074 2023-01-24 00:28:10.944097: step: 248/77, loss: 8.018794324016199e-05 2023-01-24 00:28:12.204710: step: 252/77, loss: 5.468697850119497e-07 2023-01-24 00:28:13.522179: step: 256/77, loss: 3.7664656247216044e-06 2023-01-24 00:28:14.819221: step: 260/77, loss: 5.274927752907388e-05 2023-01-24 00:28:16.151764: step: 264/77, loss: 0.01224945392459631 2023-01-24 00:28:17.423328: step: 268/77, loss: 0.0007441304042004049 2023-01-24 00:28:18.718782: step: 272/77, loss: 8.493542509313556e-07 2023-01-24 00:28:20.016211: step: 276/77, loss: 1.2965811038156971e-05 2023-01-24 00:28:21.322274: step: 280/77, loss: 0.004589627962559462 2023-01-24 00:28:22.657479: step: 284/77, loss: 0.050548847764730453 2023-01-24 00:28:23.979546: step: 288/77, loss: 0.0001131748576881364 2023-01-24 00:28:25.311856: step: 292/77, loss: 0.0035917344503104687 2023-01-24 00:28:26.608968: step: 296/77, loss: 0.0008585210307501256 2023-01-24 00:28:27.949562: step: 300/77, loss: 0.009906351566314697 2023-01-24 00:28:29.288431: step: 304/77, loss: 0.0002823400718625635 2023-01-24 00:28:30.590319: step: 308/77, loss: 0.01213001273572445 2023-01-24 00:28:31.864731: step: 312/77, loss: 5.42655334356823e-06 2023-01-24 00:28:33.149554: step: 316/77, loss: 0.006417948752641678 2023-01-24 00:28:34.424723: step: 320/77, loss: 0.0020375254098325968 2023-01-24 00:28:35.717405: step: 324/77, loss: 0.014030457474291325 2023-01-24 00:28:37.028278: step: 328/77, loss: 1.0271183782606386e-05 2023-01-24 00:28:38.380203: step: 332/77, loss: 6.595694139832631e-05 2023-01-24 00:28:39.721401: step: 336/77, loss: 1.1324874549245578e-07 2023-01-24 00:28:41.053508: step: 340/77, loss: 9.327890779786685e-07 2023-01-24 00:28:42.367919: step: 344/77, loss: 1.0981971172441263e-06 2023-01-24 00:28:43.690567: step: 348/77, loss: 0.0008091052295640111 2023-01-24 00:28:45.023069: step: 352/77, loss: 0.028471339493989944 2023-01-24 00:28:46.379664: step: 356/77, loss: 0.0009936308488249779 2023-01-24 00:28:47.703779: step: 360/77, loss: 0.010523582808673382 2023-01-24 00:28:49.008510: step: 364/77, loss: 1.226344465976581e-06 2023-01-24 00:28:50.296186: step: 368/77, loss: 2.78790389529604e-06 2023-01-24 00:28:51.597803: step: 372/77, loss: 5.760260137321893e-06 2023-01-24 00:28:52.907985: step: 376/77, loss: 2.6933328626910225e-05 2023-01-24 00:28:54.170968: step: 380/77, loss: 0.014928015880286694 2023-01-24 00:28:55.525101: step: 384/77, loss: 5.522427454707213e-05 2023-01-24 00:28:56.896218: step: 388/77, loss: 0.0033049150370061398 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 26} Test Chinese: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.4473684210526316, 'r': 0.015567765567765568, 'f1': 0.030088495575221242}, 'combined': 0.01964962976340979, 'epoch': 26} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 26} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.5, 'f1': 0.649746192893401}, 'slot': {'p': 0.4358974358974359, 'r': 0.015567765567765568, 'f1': 0.030061892130857647}, 'combined': 0.019532599963196846, 'epoch': 26} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 26} Test Russian: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.4473684210526316, 'r': 0.015567765567765568, 'f1': 0.030088495575221242}, 'combined': 0.01964962976340979, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:30:40.513656: step: 4/77, loss: 0.0020336946472525597 2023-01-24 00:30:41.776324: step: 8/77, loss: 0.018356280401349068 2023-01-24 00:30:43.124177: step: 12/77, loss: 0.06053902581334114 2023-01-24 00:30:44.444477: step: 16/77, loss: 6.077674242987996e-06 2023-01-24 00:30:45.768454: step: 20/77, loss: 0.031503964215517044 2023-01-24 00:30:47.099491: step: 24/77, loss: 0.0019097666954621673 2023-01-24 00:30:48.347456: step: 28/77, loss: 4.221747803967446e-05 2023-01-24 00:30:49.669377: step: 32/77, loss: 0.0009349957690574229 2023-01-24 00:30:50.960837: step: 36/77, loss: 7.898625881352928e-06 2023-01-24 00:30:52.186035: step: 40/77, loss: 7.117487257346511e-05 2023-01-24 00:30:53.459960: step: 44/77, loss: 7.294975512195379e-05 2023-01-24 00:30:54.780610: step: 48/77, loss: 0.04563151299953461 2023-01-24 00:30:56.090835: step: 52/77, loss: 2.12905270018382e-05 2023-01-24 00:30:57.359547: step: 56/77, loss: 1.4865798220853321e-05 2023-01-24 00:30:58.670423: step: 60/77, loss: 3.891951109835645e-06 2023-01-24 00:31:00.006001: step: 64/77, loss: 3.099417540397553e-07 2023-01-24 00:31:01.271306: step: 68/77, loss: 2.789420250337571e-06 2023-01-24 00:31:02.570022: step: 72/77, loss: 0.0001048161429935135 2023-01-24 00:31:03.890010: step: 76/77, loss: 3.1000083254184574e-05 2023-01-24 00:31:05.226289: step: 80/77, loss: 0.010710970498621464 2023-01-24 00:31:06.558319: step: 84/77, loss: 7.88254112649156e-07 2023-01-24 00:31:07.905286: step: 88/77, loss: 0.0006356869707815349 2023-01-24 00:31:09.251075: step: 92/77, loss: 0.0023964433930814266 2023-01-24 00:31:10.465125: step: 96/77, loss: 0.018310893326997757 2023-01-24 00:31:11.780430: step: 100/77, loss: 6.962125917198136e-05 2023-01-24 00:31:13.105273: step: 104/77, loss: 0.0013482404174283147 2023-01-24 00:31:14.410915: step: 108/77, loss: 1.6118830899358727e-05 2023-01-24 00:31:15.708734: step: 112/77, loss: 0.004443638492375612 2023-01-24 00:31:16.989207: step: 116/77, loss: 6.946529902052134e-05 2023-01-24 00:31:18.261371: step: 120/77, loss: 5.4241332691162825e-05 2023-01-24 00:31:19.584332: step: 124/77, loss: 0.00019961423822678626 2023-01-24 00:31:20.846085: step: 128/77, loss: 5.965904620097717e-06 2023-01-24 00:31:22.103306: step: 132/77, loss: 7.897602216644373e-08 2023-01-24 00:31:23.404055: step: 136/77, loss: 1.139921550930012e-06 2023-01-24 00:31:24.690872: step: 140/77, loss: 0.0010861429618671536 2023-01-24 00:31:25.971031: step: 144/77, loss: 5.543186034628889e-07 2023-01-24 00:31:27.307467: step: 148/77, loss: 0.012882563285529613 2023-01-24 00:31:28.606886: step: 152/77, loss: 1.2950848031323403e-05 2023-01-24 00:31:29.925360: step: 156/77, loss: 1.0281780049581357e-07 2023-01-24 00:31:31.190834: step: 160/77, loss: 0.006291708908975124 2023-01-24 00:31:32.495762: step: 164/77, loss: 4.768329517901293e-07 2023-01-24 00:31:33.781974: step: 168/77, loss: 5.200457735554664e-07 2023-01-24 00:31:35.087742: step: 172/77, loss: 3.1454194413527148e-06 2023-01-24 00:31:36.399240: step: 176/77, loss: 0.08373827487230301 2023-01-24 00:31:37.698590: step: 180/77, loss: 2.7380117899156176e-05 2023-01-24 00:31:38.973660: step: 184/77, loss: 0.00020311328989919275 2023-01-24 00:31:40.237008: step: 188/77, loss: 8.150796020345297e-07 2023-01-24 00:31:41.566218: step: 192/77, loss: 1.0640987966326065e-05 2023-01-24 00:31:42.890224: step: 196/77, loss: 4.972180704498896e-06 2023-01-24 00:31:44.234542: step: 200/77, loss: 7.004695362411439e-06 2023-01-24 00:31:45.555645: step: 204/77, loss: 4.842814860239741e-07 2023-01-24 00:31:46.854825: step: 208/77, loss: 6.608444527955726e-05 2023-01-24 00:31:48.109365: step: 212/77, loss: 0.0008418294601142406 2023-01-24 00:31:49.455264: step: 216/77, loss: 0.00019111075380351394 2023-01-24 00:31:50.744266: step: 220/77, loss: 0.0033331357408314943 2023-01-24 00:31:52.016766: step: 224/77, loss: 5.2410439820960164e-05 2023-01-24 00:31:53.325058: step: 228/77, loss: 4.4703465817974575e-08 2023-01-24 00:31:54.638306: step: 232/77, loss: 2.5035567887243815e-05 2023-01-24 00:31:55.940369: step: 236/77, loss: 0.0004050828283652663 2023-01-24 00:31:57.253113: step: 240/77, loss: 5.2385298658919055e-06 2023-01-24 00:31:58.557531: step: 244/77, loss: 3.755078807898826e-07 2023-01-24 00:31:59.812137: step: 248/77, loss: 0.002330003073439002 2023-01-24 00:32:01.105343: step: 252/77, loss: 1.5153957519942196e-06 2023-01-24 00:32:02.474008: step: 256/77, loss: 0.0001904086529975757 2023-01-24 00:32:03.825039: step: 260/77, loss: 0.00027098722057417035 2023-01-24 00:32:05.176178: step: 264/77, loss: 0.00016721387510187924 2023-01-24 00:32:06.493925: step: 268/77, loss: 9.611950372345746e-05 2023-01-24 00:32:07.813642: step: 272/77, loss: 4.1280334698967636e-05 2023-01-24 00:32:09.150071: step: 276/77, loss: 0.060717128217220306 2023-01-24 00:32:10.477313: step: 280/77, loss: 3.148373934891424e-06 2023-01-24 00:32:11.812461: step: 284/77, loss: 0.02487768419086933 2023-01-24 00:32:13.154094: step: 288/77, loss: 0.0035835355520248413 2023-01-24 00:32:14.449950: step: 292/77, loss: 6.977240991545841e-05 2023-01-24 00:32:15.710032: step: 296/77, loss: 0.01346883550286293 2023-01-24 00:32:17.037188: step: 300/77, loss: 5.304777914716396e-07 2023-01-24 00:32:18.355012: step: 304/77, loss: 0.0012750386958941817 2023-01-24 00:32:19.644322: step: 308/77, loss: 8.595256076660007e-05 2023-01-24 00:32:20.976856: step: 312/77, loss: 9.856934775598347e-05 2023-01-24 00:32:22.246724: step: 316/77, loss: 5.306802449922543e-06 2023-01-24 00:32:23.574730: step: 320/77, loss: 1.4739594007551204e-05 2023-01-24 00:32:24.832243: step: 324/77, loss: 1.0907513114943868e-06 2023-01-24 00:32:26.157009: step: 328/77, loss: 0.008079759776592255 2023-01-24 00:32:27.421783: step: 332/77, loss: 0.0067827519960701466 2023-01-24 00:32:28.737231: step: 336/77, loss: 6.75462870276533e-05 2023-01-24 00:32:30.043905: step: 340/77, loss: 4.3611848923319485e-06 2023-01-24 00:32:31.338076: step: 344/77, loss: 1.0728822985583975e-07 2023-01-24 00:32:32.634834: step: 348/77, loss: 1.3977514754515141e-05 2023-01-24 00:32:33.978489: step: 352/77, loss: 1.1816214282589499e-06 2023-01-24 00:32:35.255083: step: 356/77, loss: 0.00027072866214439273 2023-01-24 00:32:36.602074: step: 360/77, loss: 2.339343836865737e-06 2023-01-24 00:32:37.921049: step: 364/77, loss: 5.420063644123729e-06 2023-01-24 00:32:39.228581: step: 368/77, loss: 1.1458506605777075e-06 2023-01-24 00:32:40.490460: step: 372/77, loss: 0.0017846859991550446 2023-01-24 00:32:41.763978: step: 376/77, loss: 7.981000635481905e-06 2023-01-24 00:32:43.068639: step: 380/77, loss: 6.450262389989803e-06 2023-01-24 00:32:44.352149: step: 384/77, loss: 5.114632585900836e-06 2023-01-24 00:32:45.645993: step: 388/77, loss: 0.011223318055272102 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5625, 'r': 0.016483516483516484, 'f1': 0.032028469750889674}, 'combined': 0.02188083577040978, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Korean: {'template': {'p': 0.92, 'r': 0.5390625, 'f1': 0.6798029556650247}, 'slot': {'p': 0.5142857142857142, 'r': 0.016483516483516484, 'f1': 0.03194321206743567}, 'combined': 0.02171508997687745, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5625, 'r': 0.016483516483516484, 'f1': 0.032028469750889674}, 'combined': 0.02188083577040978, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:34:29.241441: step: 4/77, loss: 1.6569199488003505e-06 2023-01-24 00:34:30.557348: step: 8/77, loss: 0.00440253596752882 2023-01-24 00:34:31.847953: step: 12/77, loss: 0.030564934015274048 2023-01-24 00:34:33.121881: step: 16/77, loss: 0.04476356878876686 2023-01-24 00:34:34.426428: step: 20/77, loss: 1.5125128811632749e-05 2023-01-24 00:34:35.729435: step: 24/77, loss: 0.0059463209472596645 2023-01-24 00:34:37.014679: step: 28/77, loss: 0.0001350680395262316 2023-01-24 00:34:38.316395: step: 32/77, loss: 5.26008307133452e-07 2023-01-24 00:34:39.713273: step: 36/77, loss: 1.583964490237122e-06 2023-01-24 00:34:40.988444: step: 40/77, loss: 0.001235371339134872 2023-01-24 00:34:42.286144: step: 44/77, loss: 4.629687828128226e-05 2023-01-24 00:34:43.595779: step: 48/77, loss: 9.21465743886074e-06 2023-01-24 00:34:44.878140: step: 52/77, loss: 6.062446118448861e-06 2023-01-24 00:34:46.215122: step: 56/77, loss: 0.004732063040137291 2023-01-24 00:34:47.562832: step: 60/77, loss: 1.5944220876917825e-07 2023-01-24 00:34:48.866735: step: 64/77, loss: 0.0011835889890789986 2023-01-24 00:34:50.193498: step: 68/77, loss: 0.002815812826156616 2023-01-24 00:34:51.505969: step: 72/77, loss: 9.246059198630974e-05 2023-01-24 00:34:52.760694: step: 76/77, loss: 0.006792837753891945 2023-01-24 00:34:54.084559: step: 80/77, loss: 4.330431329435669e-05 2023-01-24 00:34:55.419014: step: 84/77, loss: 0.17696473002433777 2023-01-24 00:34:56.735725: step: 88/77, loss: 0.0020602114964276552 2023-01-24 00:34:58.044922: step: 92/77, loss: 8.787129445408937e-06 2023-01-24 00:34:59.341183: step: 96/77, loss: 0.13107673823833466 2023-01-24 00:35:00.649859: step: 100/77, loss: 0.0006606405950151384 2023-01-24 00:35:01.974838: step: 104/77, loss: 0.0017154912929981947 2023-01-24 00:35:03.321158: step: 108/77, loss: 1.216352939081844e-05 2023-01-24 00:35:04.689830: step: 112/77, loss: 0.0022697513923048973 2023-01-24 00:35:06.012754: step: 116/77, loss: 3.87430070247774e-08 2023-01-24 00:35:07.305493: step: 120/77, loss: 0.002314480487257242 2023-01-24 00:35:08.632276: step: 124/77, loss: 9.292403774452396e-06 2023-01-24 00:35:09.898937: step: 128/77, loss: 5.170510848984122e-05 2023-01-24 00:35:11.196773: step: 132/77, loss: 7.686324534006417e-05 2023-01-24 00:35:12.479059: step: 136/77, loss: 7.330068456212757e-06 2023-01-24 00:35:13.821603: step: 140/77, loss: 3.695250143209705e-06 2023-01-24 00:35:15.092389: step: 144/77, loss: 0.0001743622706271708 2023-01-24 00:35:16.454495: step: 148/77, loss: 1.0203694728261326e-05 2023-01-24 00:35:17.797277: step: 152/77, loss: 0.009616820141673088 2023-01-24 00:35:19.116888: step: 156/77, loss: 0.0005464103305712342 2023-01-24 00:35:20.427937: step: 160/77, loss: 1.5138200978981331e-05 2023-01-24 00:35:21.715986: step: 164/77, loss: 1.4187040505930781e-05 2023-01-24 00:35:23.043156: step: 168/77, loss: 0.0017845199909061193 2023-01-24 00:35:24.356621: step: 172/77, loss: 0.0008456232608295977 2023-01-24 00:35:25.650366: step: 176/77, loss: 4.213723514112644e-06 2023-01-24 00:35:26.985705: step: 180/77, loss: 2.3990813247110054e-07 2023-01-24 00:35:28.343406: step: 184/77, loss: 0.009962303563952446 2023-01-24 00:35:29.711531: step: 188/77, loss: 0.001620661118067801 2023-01-24 00:35:31.023505: step: 192/77, loss: 0.0022963618393987417 2023-01-24 00:35:32.329082: step: 196/77, loss: 0.008852764964103699 2023-01-24 00:35:33.640166: step: 200/77, loss: 1.7881298219890596e-07 2023-01-24 00:35:34.934292: step: 204/77, loss: 0.0029082591645419598 2023-01-24 00:35:36.268423: step: 208/77, loss: 0.0013253169599920511 2023-01-24 00:35:37.547909: step: 212/77, loss: 1.2865475582657382e-05 2023-01-24 00:35:38.865075: step: 216/77, loss: 7.049329269648297e-06 2023-01-24 00:35:40.169878: step: 220/77, loss: 0.004812009632587433 2023-01-24 00:35:41.484537: step: 224/77, loss: 1.2530237654573284e-05 2023-01-24 00:35:42.736220: step: 228/77, loss: 1.6643899698465248e-06 2023-01-24 00:35:44.040155: step: 232/77, loss: 0.06316334009170532 2023-01-24 00:35:45.329495: step: 236/77, loss: 0.0038331036921590567 2023-01-24 00:35:46.639707: step: 240/77, loss: 0.0005883581470698118 2023-01-24 00:35:47.995002: step: 244/77, loss: 1.542809695820324e-05 2023-01-24 00:35:49.315086: step: 248/77, loss: 1.5246181646944024e-05 2023-01-24 00:35:50.573922: step: 252/77, loss: 0.0017049933085218072 2023-01-24 00:35:51.867637: step: 256/77, loss: 0.033161669969558716 2023-01-24 00:35:53.209537: step: 260/77, loss: 0.00030278004123829305 2023-01-24 00:35:54.523463: step: 264/77, loss: 0.004430832806974649 2023-01-24 00:35:55.821491: step: 268/77, loss: 6.538610705320025e-06 2023-01-24 00:35:57.143577: step: 272/77, loss: 2.954918636532966e-05 2023-01-24 00:35:58.477204: step: 276/77, loss: 0.020510945469141006 2023-01-24 00:35:59.829173: step: 280/77, loss: 0.00015496351988986135 2023-01-24 00:36:01.101543: step: 284/77, loss: 0.00019689204054884613 2023-01-24 00:36:02.407803: step: 288/77, loss: 0.02547045610845089 2023-01-24 00:36:03.710440: step: 292/77, loss: 1.9126133338431828e-05 2023-01-24 00:36:05.044318: step: 296/77, loss: 3.993159225501586e-06 2023-01-24 00:36:06.332756: step: 300/77, loss: 4.5150034111429704e-07 2023-01-24 00:36:07.605453: step: 304/77, loss: 0.09149489551782608 2023-01-24 00:36:08.922501: step: 308/77, loss: 7.498901140934322e-06 2023-01-24 00:36:10.261129: step: 312/77, loss: 0.05403384566307068 2023-01-24 00:36:11.574593: step: 316/77, loss: 1.805801184673328e-05 2023-01-24 00:36:12.901084: step: 320/77, loss: 0.04299530014395714 2023-01-24 00:36:14.201743: step: 324/77, loss: 1.7881388814089405e-08 2023-01-24 00:36:15.495684: step: 328/77, loss: 8.31848865345819e-06 2023-01-24 00:36:16.839251: step: 332/77, loss: 0.022570772096514702 2023-01-24 00:36:18.152084: step: 336/77, loss: 0.005088389851152897 2023-01-24 00:36:19.447116: step: 340/77, loss: 0.0036787528079003096 2023-01-24 00:36:20.785797: step: 344/77, loss: 3.063491931243334e-06 2023-01-24 00:36:22.075238: step: 348/77, loss: 0.0003421948349568993 2023-01-24 00:36:23.408746: step: 352/77, loss: 0.0001434506702935323 2023-01-24 00:36:24.759080: step: 356/77, loss: 7.787423965055496e-05 2023-01-24 00:36:26.020634: step: 360/77, loss: 2.6348821847932413e-05 2023-01-24 00:36:27.325992: step: 364/77, loss: 1.9668830191221787e-06 2023-01-24 00:36:28.606961: step: 368/77, loss: 2.5388269932591356e-05 2023-01-24 00:36:29.921080: step: 372/77, loss: 4.4703469370688254e-08 2023-01-24 00:36:31.245960: step: 376/77, loss: 0.03512633219361305 2023-01-24 00:36:32.594661: step: 380/77, loss: 0.012198736891150475 2023-01-24 00:36:33.877443: step: 384/77, loss: 0.003197713755071163 2023-01-24 00:36:35.245394: step: 388/77, loss: 0.0030318221542984247 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Chinese: {'template': {'p': 0.9850746268656716, 'r': 0.515625, 'f1': 0.676923076923077}, 'slot': {'p': 0.5161290322580645, 'r': 0.014652014652014652, 'f1': 0.028495102404274268}, 'combined': 0.019288992396739507, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Korean: {'template': {'p': 0.9846153846153847, 'r': 0.5, 'f1': 0.6632124352331606}, 'slot': {'p': 0.5161290322580645, 'r': 0.014652014652014652, 'f1': 0.028495102404274268}, 'combined': 0.018898306257757028, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.9705882352941176, 'r': 0.515625, 'f1': 0.673469387755102}, 'slot': {'p': 0.5, 'r': 0.013736263736263736, 'f1': 0.026737967914438505}, 'combined': 0.018007202881152463, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:38:19.399742: step: 4/77, loss: 9.575464900990482e-06 2023-01-24 00:38:20.737644: step: 8/77, loss: 4.4703465817974575e-08 2023-01-24 00:38:22.071539: step: 12/77, loss: 2.4912869776017033e-06 2023-01-24 00:38:23.339562: step: 16/77, loss: 0.00020589017367456108 2023-01-24 00:38:24.653561: step: 20/77, loss: 0.010029882192611694 2023-01-24 00:38:25.912627: step: 24/77, loss: 0.004283885005861521 2023-01-24 00:38:27.149838: step: 28/77, loss: 2.518277426588611e-07 2023-01-24 00:38:28.459759: step: 32/77, loss: 1.4901119982368982e-07 2023-01-24 00:38:29.763908: step: 36/77, loss: 0.00012935479753650725 2023-01-24 00:38:31.048664: step: 40/77, loss: 4.192922915535746e-06 2023-01-24 00:38:32.375817: step: 44/77, loss: 0.008619138039648533 2023-01-24 00:38:33.682538: step: 48/77, loss: 9.372622571390821e-07 2023-01-24 00:38:34.978954: step: 52/77, loss: 2.7341654913470848e-06 2023-01-24 00:38:36.304344: step: 56/77, loss: 1.3634332844958408e-06 2023-01-24 00:38:37.619076: step: 60/77, loss: 1.0352901881560683e-05 2023-01-24 00:38:38.906053: step: 64/77, loss: 5.2205054089426994e-05 2023-01-24 00:38:40.189717: step: 68/77, loss: 7.682454452151433e-05 2023-01-24 00:38:41.522958: step: 72/77, loss: 0.0089035015553236 2023-01-24 00:38:42.797947: step: 76/77, loss: 4.730820364784449e-06 2023-01-24 00:38:44.079229: step: 80/77, loss: 0.00038829841651022434 2023-01-24 00:38:45.363023: step: 84/77, loss: 0.007953963242471218 2023-01-24 00:38:46.675738: step: 88/77, loss: 0.009263883344829082 2023-01-24 00:38:48.003449: step: 92/77, loss: 0.0070535242557525635 2023-01-24 00:38:49.321672: step: 96/77, loss: 4.276575782569125e-05 2023-01-24 00:38:50.644852: step: 100/77, loss: 9.437465632800013e-05 2023-01-24 00:38:51.946911: step: 104/77, loss: 1.0943134839180857e-05 2023-01-24 00:38:53.279690: step: 108/77, loss: 0.046666789799928665 2023-01-24 00:38:54.607379: step: 112/77, loss: 6.960002065170556e-05 2023-01-24 00:38:55.966898: step: 116/77, loss: 1.4876502973493189e-05 2023-01-24 00:38:57.278242: step: 120/77, loss: 2.0557276002364233e-05 2023-01-24 00:38:58.593526: step: 124/77, loss: 2.9650689157278975e-06 2023-01-24 00:38:59.957259: step: 128/77, loss: 0.01160411350429058 2023-01-24 00:39:01.277401: step: 132/77, loss: 1.8775409671434318e-07 2023-01-24 00:39:02.540863: step: 136/77, loss: 0.002117191907018423 2023-01-24 00:39:03.842589: step: 140/77, loss: 8.289856850751676e-06 2023-01-24 00:39:05.140766: step: 144/77, loss: 3.176602149324026e-06 2023-01-24 00:39:06.447155: step: 148/77, loss: 1.4546119928127155e-05 2023-01-24 00:39:07.770213: step: 152/77, loss: 6.943806170056632e-07 2023-01-24 00:39:09.075586: step: 156/77, loss: 1.3262007314551738e-07 2023-01-24 00:39:10.386310: step: 160/77, loss: 4.4194708607392386e-05 2023-01-24 00:39:11.691976: step: 164/77, loss: 0.0023931171745061874 2023-01-24 00:39:13.033320: step: 168/77, loss: 1.624219407858618e-07 2023-01-24 00:39:14.385606: step: 172/77, loss: 3.7634523323504254e-05 2023-01-24 00:39:15.691283: step: 176/77, loss: 6.586186600543442e-07 2023-01-24 00:39:17.015420: step: 180/77, loss: 2.575074540800415e-05 2023-01-24 00:39:18.304252: step: 184/77, loss: 2.8311990263318876e-07 2023-01-24 00:39:19.589306: step: 188/77, loss: 8.425443957094103e-05 2023-01-24 00:39:20.912464: step: 192/77, loss: 6.786584708606824e-05 2023-01-24 00:39:22.232083: step: 196/77, loss: 0.0005444654962047935 2023-01-24 00:39:23.548029: step: 200/77, loss: 1.1212527169845998e-05 2023-01-24 00:39:24.856216: step: 204/77, loss: 0.06021525710821152 2023-01-24 00:39:26.161853: step: 208/77, loss: 0.0005923082935623825 2023-01-24 00:39:27.478801: step: 212/77, loss: 3.2341471523977816e-05 2023-01-24 00:39:28.791838: step: 216/77, loss: 0.023997997865080833 2023-01-24 00:39:30.098977: step: 220/77, loss: 2.1134215785423294e-05 2023-01-24 00:39:31.408657: step: 224/77, loss: 1.3411697182164062e-05 2023-01-24 00:39:32.721451: step: 228/77, loss: 0.003213142976164818 2023-01-24 00:39:34.046758: step: 232/77, loss: 8.688562957104295e-05 2023-01-24 00:39:35.372584: step: 236/77, loss: 0.038733284920454025 2023-01-24 00:39:36.708978: step: 240/77, loss: 0.0021264494862407446 2023-01-24 00:39:38.061937: step: 244/77, loss: 0.0007159699453040957 2023-01-24 00:39:39.319510: step: 248/77, loss: 0.02838277630507946 2023-01-24 00:39:40.664884: step: 252/77, loss: 4.948479909216985e-05 2023-01-24 00:39:41.967384: step: 256/77, loss: 0.004647047724574804 2023-01-24 00:39:43.332397: step: 260/77, loss: 0.0006775567308068275 2023-01-24 00:39:44.657414: step: 264/77, loss: 5.960457727383073e-08 2023-01-24 00:39:45.960340: step: 268/77, loss: 0.0006495536654256284 2023-01-24 00:39:47.256418: step: 272/77, loss: 0.00016570983279962093 2023-01-24 00:39:48.564266: step: 276/77, loss: 0.00035573996137827635 2023-01-24 00:39:49.827668: step: 280/77, loss: 3.2136176741914824e-05 2023-01-24 00:39:51.129913: step: 284/77, loss: 3.5567613849707413e-06 2023-01-24 00:39:52.431200: step: 288/77, loss: 3.010017337601312e-07 2023-01-24 00:39:53.745690: step: 292/77, loss: 0.008235101588070393 2023-01-24 00:39:55.060624: step: 296/77, loss: 0.0002233803243143484 2023-01-24 00:39:56.359340: step: 300/77, loss: 0.08005578815937042 2023-01-24 00:39:57.660915: step: 304/77, loss: 0.004035983234643936 2023-01-24 00:39:58.935812: step: 308/77, loss: 9.74525846686447e-07 2023-01-24 00:40:00.274747: step: 312/77, loss: 3.2695716072339565e-05 2023-01-24 00:40:01.609812: step: 316/77, loss: 7.405756718981138e-07 2023-01-24 00:40:02.910343: step: 320/77, loss: 3.3630524285399588e-06 2023-01-24 00:40:04.202251: step: 324/77, loss: 3.0411854368139757e-06 2023-01-24 00:40:05.575832: step: 328/77, loss: 2.631331653901725e-06 2023-01-24 00:40:06.893264: step: 332/77, loss: 0.0001946384581970051 2023-01-24 00:40:08.213184: step: 336/77, loss: 0.005614703521132469 2023-01-24 00:40:09.511099: step: 340/77, loss: 2.889034249164979e-06 2023-01-24 00:40:10.838190: step: 344/77, loss: 0.00015798686945345253 2023-01-24 00:40:12.115207: step: 348/77, loss: 0.004673933610320091 2023-01-24 00:40:13.467433: step: 352/77, loss: 8.429842637269758e-06 2023-01-24 00:40:14.754028: step: 356/77, loss: 0.00011223576439078897 2023-01-24 00:40:16.024966: step: 360/77, loss: 5.240143582341261e-05 2023-01-24 00:40:17.357410: step: 364/77, loss: 2.807161035889294e-05 2023-01-24 00:40:18.681222: step: 368/77, loss: 0.003233521245419979 2023-01-24 00:40:20.004137: step: 372/77, loss: 0.08775091171264648 2023-01-24 00:40:21.310939: step: 376/77, loss: 0.0008770658751018345 2023-01-24 00:40:22.586337: step: 380/77, loss: 1.5991987311281264e-05 2023-01-24 00:40:23.861533: step: 384/77, loss: 0.013519562780857086 2023-01-24 00:40:25.162535: step: 388/77, loss: 5.1822891691699624e-05 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Chinese: {'template': {'p': 0.9565217391304348, 'r': 0.515625, 'f1': 0.6700507614213198}, 'slot': {'p': 0.6206896551724138, 'r': 0.016483516483516484, 'f1': 0.032114183764495985}, 'combined': 0.021518133283824722, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Korean: {'template': {'p': 0.9428571428571428, 'r': 0.515625, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.016483516483516484, 'f1': 0.032028469750889674}, 'combined': 0.02135231316725978, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Russian: {'template': {'p': 0.9565217391304348, 'r': 0.515625, 'f1': 0.6700507614213198}, 'slot': {'p': 0.6, 'r': 0.016483516483516484, 'f1': 0.0320855614973262}, 'combined': 0.021498954911914003, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4}