Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([3]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582182328, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:27:53.808309: step: 4/77, loss: 1.0097957849502563 2023-01-23 23:27:55.300831: step: 8/77, loss: 0.9876657128334045 2023-01-23 23:27:56.703152: step: 12/77, loss: 1.006246566772461 2023-01-23 23:27:58.107775: step: 16/77, loss: 0.9947892427444458 2023-01-23 23:27:59.533228: step: 20/77, loss: 0.9852086305618286 2023-01-23 23:28:01.019962: step: 24/77, loss: 0.9930259585380554 2023-01-23 23:28:02.451536: step: 28/77, loss: 0.9912077784538269 2023-01-23 23:28:03.851401: step: 32/77, loss: 0.9729397296905518 2023-01-23 23:28:05.233529: step: 36/77, loss: 0.9823734164237976 2023-01-23 23:28:06.714154: step: 40/77, loss: 0.9720751643180847 2023-01-23 23:28:08.090173: step: 44/77, loss: 0.962503969669342 2023-01-23 23:28:09.538308: step: 48/77, loss: 0.9527497291564941 2023-01-23 23:28:10.936991: step: 52/77, loss: 0.9514380693435669 2023-01-23 23:28:12.389096: step: 56/77, loss: 0.9413738250732422 2023-01-23 23:28:13.833258: step: 60/77, loss: 0.925054669380188 2023-01-23 23:28:15.170461: step: 64/77, loss: 0.9168636798858643 2023-01-23 23:28:16.597575: step: 68/77, loss: 0.9203753471374512 2023-01-23 23:28:18.014992: step: 72/77, loss: 0.8916241526603699 2023-01-23 23:28:19.453753: step: 76/77, loss: 0.8798417448997498 2023-01-23 23:28:20.887334: step: 80/77, loss: 0.8681149482727051 2023-01-23 23:28:22.411811: step: 84/77, loss: 0.8756495714187622 2023-01-23 23:28:23.891847: step: 88/77, loss: 0.8527133464813232 2023-01-23 23:28:25.338683: step: 92/77, loss: 0.8297539949417114 2023-01-23 23:28:26.768512: step: 96/77, loss: 0.8228861093521118 2023-01-23 23:28:28.196124: step: 100/77, loss: 0.8132826089859009 2023-01-23 23:28:29.629096: step: 104/77, loss: 0.7874824404716492 2023-01-23 23:28:31.094021: step: 108/77, loss: 0.7657060623168945 2023-01-23 23:28:32.500366: step: 112/77, loss: 0.745045006275177 2023-01-23 23:28:33.999986: step: 116/77, loss: 0.7560614943504333 2023-01-23 23:28:35.383906: step: 120/77, loss: 0.7308070063591003 2023-01-23 23:28:36.847781: step: 124/77, loss: 0.7113009691238403 2023-01-23 23:28:38.225688: step: 128/77, loss: 0.6648023724555969 2023-01-23 23:28:39.671916: step: 132/77, loss: 0.6685682535171509 2023-01-23 23:28:41.057115: step: 136/77, loss: 0.6537583470344543 2023-01-23 23:28:42.510006: step: 140/77, loss: 0.6488573551177979 2023-01-23 23:28:43.926216: step: 144/77, loss: 0.6188699007034302 2023-01-23 23:28:45.389971: step: 148/77, loss: 0.545028567314148 2023-01-23 23:28:46.814037: step: 152/77, loss: 0.5712795257568359 2023-01-23 23:28:48.252759: step: 156/77, loss: 0.5583752393722534 2023-01-23 23:28:49.689474: step: 160/77, loss: 0.5152299404144287 2023-01-23 23:28:51.181505: step: 164/77, loss: 0.48409515619277954 2023-01-23 23:28:52.627150: step: 168/77, loss: 0.5172240138053894 2023-01-23 23:28:54.043882: step: 172/77, loss: 0.4879964590072632 2023-01-23 23:28:55.533488: step: 176/77, loss: 0.4531494379043579 2023-01-23 23:28:56.946752: step: 180/77, loss: 0.4241258203983307 2023-01-23 23:28:58.418336: step: 184/77, loss: 0.41498085856437683 2023-01-23 23:28:59.930286: step: 188/77, loss: 0.3981754183769226 2023-01-23 23:29:01.416287: step: 192/77, loss: 0.3640326261520386 2023-01-23 23:29:02.857024: step: 196/77, loss: 0.30751991271972656 2023-01-23 23:29:04.359713: step: 200/77, loss: 0.3940797448158264 2023-01-23 23:29:05.791350: step: 204/77, loss: 0.2844552993774414 2023-01-23 23:29:07.254501: step: 208/77, loss: 0.30004727840423584 2023-01-23 23:29:08.649417: step: 212/77, loss: 0.219954252243042 2023-01-23 23:29:09.985610: step: 216/77, loss: 0.38010406494140625 2023-01-23 23:29:11.408032: step: 220/77, loss: 0.2126280814409256 2023-01-23 23:29:12.888896: step: 224/77, loss: 0.18236008286476135 2023-01-23 23:29:14.346411: step: 228/77, loss: 0.16203764081001282 2023-01-23 23:29:15.884180: step: 232/77, loss: 0.21170544624328613 2023-01-23 23:29:17.366799: step: 236/77, loss: 0.17526711523532867 2023-01-23 23:29:18.824456: step: 240/77, loss: 0.13736236095428467 2023-01-23 23:29:20.279512: step: 244/77, loss: 0.29424428939819336 2023-01-23 23:29:21.730230: step: 248/77, loss: 0.15658994019031525 2023-01-23 23:29:23.261943: step: 252/77, loss: 0.24072307348251343 2023-01-23 23:29:24.736931: step: 256/77, loss: 0.15842083096504211 2023-01-23 23:29:26.246158: step: 260/77, loss: 0.14580503106117249 2023-01-23 23:29:27.712071: step: 264/77, loss: 0.07843738049268723 2023-01-23 23:29:29.114226: step: 268/77, loss: 0.15364432334899902 2023-01-23 23:29:30.617285: step: 272/77, loss: 0.14609786868095398 2023-01-23 23:29:32.003319: step: 276/77, loss: 0.10194536298513412 2023-01-23 23:29:33.444939: step: 280/77, loss: 0.11051319539546967 2023-01-23 23:29:34.830609: step: 284/77, loss: 0.32462507486343384 2023-01-23 23:29:36.282905: step: 288/77, loss: 0.12179729342460632 2023-01-23 23:29:37.746761: step: 292/77, loss: 0.24305714666843414 2023-01-23 23:29:39.184372: step: 296/77, loss: 0.06196020543575287 2023-01-23 23:29:40.666554: step: 300/77, loss: 0.03596843406558037 2023-01-23 23:29:42.100048: step: 304/77, loss: 0.17993435263633728 2023-01-23 23:29:43.603675: step: 308/77, loss: 0.15721270442008972 2023-01-23 23:29:45.029800: step: 312/77, loss: 0.1377667635679245 2023-01-23 23:29:46.420912: step: 316/77, loss: 0.07205483317375183 2023-01-23 23:29:47.838180: step: 320/77, loss: 0.10653500258922577 2023-01-23 23:29:49.329298: step: 324/77, loss: 0.09064988791942596 2023-01-23 23:29:50.797183: step: 328/77, loss: 0.09795213490724564 2023-01-23 23:29:52.208152: step: 332/77, loss: 0.17470252513885498 2023-01-23 23:29:53.614188: step: 336/77, loss: 0.06503775715827942 2023-01-23 23:29:55.021148: step: 340/77, loss: 0.12823697924613953 2023-01-23 23:29:56.470076: step: 344/77, loss: 0.08256591856479645 2023-01-23 23:29:57.817571: step: 348/77, loss: 0.08200141787528992 2023-01-23 23:29:59.203365: step: 352/77, loss: 0.13781386613845825 2023-01-23 23:30:00.623127: step: 356/77, loss: 0.04991006851196289 2023-01-23 23:30:02.068033: step: 360/77, loss: 0.3172021806240082 2023-01-23 23:30:03.533762: step: 364/77, loss: 0.07039426267147064 2023-01-23 23:30:04.979709: step: 368/77, loss: 0.05509275197982788 2023-01-23 23:30:06.410291: step: 372/77, loss: 0.06914137303829193 2023-01-23 23:30:07.832059: step: 376/77, loss: 0.10850280523300171 2023-01-23 23:30:09.257848: step: 380/77, loss: 0.13166894018650055 2023-01-23 23:30:10.652381: step: 384/77, loss: 0.07740715146064758 2023-01-23 23:30:12.166740: step: 388/77, loss: 0.06276335567235947 ================================================== Loss: 0.460 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:32:15.347124: step: 4/77, loss: 0.12122565507888794 2023-01-23 23:32:16.801587: step: 8/77, loss: 0.06975126266479492 2023-01-23 23:32:18.294478: step: 12/77, loss: 0.06883430480957031 2023-01-23 23:32:19.779646: step: 16/77, loss: 0.11424947530031204 2023-01-23 23:32:21.180035: step: 20/77, loss: 0.1402619183063507 2023-01-23 23:32:22.666038: step: 24/77, loss: 0.11052996665239334 2023-01-23 23:32:24.023778: step: 28/77, loss: 0.0690561980009079 2023-01-23 23:32:25.427821: step: 32/77, loss: 0.16084325313568115 2023-01-23 23:32:26.805983: step: 36/77, loss: 0.10384456813335419 2023-01-23 23:32:28.203103: step: 40/77, loss: 0.1343913972377777 2023-01-23 23:32:29.661878: step: 44/77, loss: 0.03423989191651344 2023-01-23 23:32:31.091434: step: 48/77, loss: 0.08117079734802246 2023-01-23 23:32:32.587803: step: 52/77, loss: 0.24305084347724915 2023-01-23 23:32:33.995359: step: 56/77, loss: 0.06955840438604355 2023-01-23 23:32:35.412994: step: 60/77, loss: 0.0856841653585434 2023-01-23 23:32:36.831064: step: 64/77, loss: 0.07438933849334717 2023-01-23 23:32:38.311161: step: 68/77, loss: 0.13662199676036835 2023-01-23 23:32:39.762572: step: 72/77, loss: 0.19227546453475952 2023-01-23 23:32:41.099579: step: 76/77, loss: 0.19997961819171906 2023-01-23 23:32:42.543732: step: 80/77, loss: 0.25327473878860474 2023-01-23 23:32:43.990252: step: 84/77, loss: 0.12387385964393616 2023-01-23 23:32:45.417790: step: 88/77, loss: 0.08649803698062897 2023-01-23 23:32:46.924245: step: 92/77, loss: 0.11727029830217361 2023-01-23 23:32:48.286422: step: 96/77, loss: 0.13385401666164398 2023-01-23 23:32:49.756933: step: 100/77, loss: 0.042006321251392365 2023-01-23 23:32:51.173423: step: 104/77, loss: 0.13730527460575104 2023-01-23 23:32:52.598922: step: 108/77, loss: 0.11692111194133759 2023-01-23 23:32:54.053729: step: 112/77, loss: 0.09458288550376892 2023-01-23 23:32:55.552276: step: 116/77, loss: 0.13422513008117676 2023-01-23 23:32:56.987658: step: 120/77, loss: 0.11068707704544067 2023-01-23 23:32:58.379788: step: 124/77, loss: 0.1207418143749237 2023-01-23 23:32:59.869604: step: 128/77, loss: 0.12179193645715714 2023-01-23 23:33:01.257285: step: 132/77, loss: 0.28321126103401184 2023-01-23 23:33:02.724826: step: 136/77, loss: 0.07898715138435364 2023-01-23 23:33:04.127911: step: 140/77, loss: 0.10080458223819733 2023-01-23 23:33:05.613629: step: 144/77, loss: 0.11456061899662018 2023-01-23 23:33:07.058642: step: 148/77, loss: 0.03268774598836899 2023-01-23 23:33:08.485083: step: 152/77, loss: 0.07138832658529282 2023-01-23 23:33:09.901179: step: 156/77, loss: 0.07606998085975647 2023-01-23 23:33:11.363493: step: 160/77, loss: 0.0508023202419281 2023-01-23 23:33:12.843992: step: 164/77, loss: 0.11992565542459488 2023-01-23 23:33:14.257628: step: 168/77, loss: 0.3071916997432709 2023-01-23 23:33:15.696232: step: 172/77, loss: 0.050045184791088104 2023-01-23 23:33:17.164084: step: 176/77, loss: 0.07701658457517624 2023-01-23 23:33:18.615988: step: 180/77, loss: 0.06962858885526657 2023-01-23 23:33:20.085116: step: 184/77, loss: 0.05321573466062546 2023-01-23 23:33:21.468370: step: 188/77, loss: 0.04733038321137428 2023-01-23 23:33:22.904901: step: 192/77, loss: 0.09735030680894852 2023-01-23 23:33:24.348914: step: 196/77, loss: 0.16800400614738464 2023-01-23 23:33:25.839845: step: 200/77, loss: 0.08375978469848633 2023-01-23 23:33:27.307719: step: 204/77, loss: 0.10231446474790573 2023-01-23 23:33:28.707960: step: 208/77, loss: 0.11460596323013306 2023-01-23 23:33:30.162942: step: 212/77, loss: 0.20619343221187592 2023-01-23 23:33:31.620727: step: 216/77, loss: 0.061705246567726135 2023-01-23 23:33:33.100572: step: 220/77, loss: 0.05100015923380852 2023-01-23 23:33:34.550588: step: 224/77, loss: 0.14486533403396606 2023-01-23 23:33:35.967074: step: 228/77, loss: 0.0911254808306694 2023-01-23 23:33:37.401863: step: 232/77, loss: 0.06624509394168854 2023-01-23 23:33:38.879368: step: 236/77, loss: 0.09048707783222198 2023-01-23 23:33:40.356885: step: 240/77, loss: 0.09039495885372162 2023-01-23 23:33:41.877399: step: 244/77, loss: 0.13376855850219727 2023-01-23 23:33:43.273658: step: 248/77, loss: 0.1051325872540474 2023-01-23 23:33:44.710213: step: 252/77, loss: 0.12940678000450134 2023-01-23 23:33:46.171353: step: 256/77, loss: 0.10960350930690765 2023-01-23 23:33:47.655669: step: 260/77, loss: 0.31249040365219116 2023-01-23 23:33:49.102233: step: 264/77, loss: 0.13202910125255585 2023-01-23 23:33:50.571423: step: 268/77, loss: 0.12233851850032806 2023-01-23 23:33:52.004887: step: 272/77, loss: 0.11027183383703232 2023-01-23 23:33:53.452649: step: 276/77, loss: 0.08666466176509857 2023-01-23 23:33:54.864871: step: 280/77, loss: 0.16495409607887268 2023-01-23 23:33:56.285158: step: 284/77, loss: 0.04284519702196121 2023-01-23 23:33:57.735567: step: 288/77, loss: 0.06679358333349228 2023-01-23 23:33:59.253180: step: 292/77, loss: 0.19842630624771118 2023-01-23 23:34:00.744509: step: 296/77, loss: 0.08360859006643295 2023-01-23 23:34:02.156503: step: 300/77, loss: 0.14133182168006897 2023-01-23 23:34:03.645024: step: 304/77, loss: 0.10215363651514053 2023-01-23 23:34:05.037901: step: 308/77, loss: 0.06244270130991936 2023-01-23 23:34:06.444276: step: 312/77, loss: 0.08266845345497131 2023-01-23 23:34:07.884700: step: 316/77, loss: 0.1152535155415535 2023-01-23 23:34:09.349863: step: 320/77, loss: 0.1293995976448059 2023-01-23 23:34:10.820549: step: 324/77, loss: 0.021759741008281708 2023-01-23 23:34:12.251047: step: 328/77, loss: 0.0577072836458683 2023-01-23 23:34:13.608431: step: 332/77, loss: 0.052957139909267426 2023-01-23 23:34:15.051761: step: 336/77, loss: 0.028006015345454216 2023-01-23 23:34:16.486333: step: 340/77, loss: 0.1874495893716812 2023-01-23 23:34:17.853989: step: 344/77, loss: 0.030715854838490486 2023-01-23 23:34:19.246668: step: 348/77, loss: 0.06883832067251205 2023-01-23 23:34:20.689414: step: 352/77, loss: 0.04792249947786331 2023-01-23 23:34:22.077290: step: 356/77, loss: 0.054040245711803436 2023-01-23 23:34:23.470946: step: 360/77, loss: 0.08866354078054428 2023-01-23 23:34:24.965437: step: 364/77, loss: 0.1664765179157257 2023-01-23 23:34:26.371545: step: 368/77, loss: 0.14805133640766144 2023-01-23 23:34:27.797486: step: 372/77, loss: 0.15928471088409424 2023-01-23 23:34:29.264704: step: 376/77, loss: 0.08161871135234833 2023-01-23 23:34:30.728791: step: 380/77, loss: 0.07126414030790329 2023-01-23 23:34:32.159373: step: 384/77, loss: 0.14317861199378967 2023-01-23 23:34:33.576169: step: 388/77, loss: 0.05555611848831177 ================================================== Loss: 0.109 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:36:15.517931: step: 4/77, loss: 0.04601486772298813 2023-01-23 23:36:16.945696: step: 8/77, loss: 0.04817475378513336 2023-01-23 23:36:18.363975: step: 12/77, loss: 0.09909090399742126 2023-01-23 23:36:19.772689: step: 16/77, loss: 0.032988566905260086 2023-01-23 23:36:21.212733: step: 20/77, loss: 0.022017795592546463 2023-01-23 23:36:22.655943: step: 24/77, loss: 0.11184781789779663 2023-01-23 23:36:24.041642: step: 28/77, loss: 0.0631086677312851 2023-01-23 23:36:25.457258: step: 32/77, loss: 0.06439824402332306 2023-01-23 23:36:26.928201: step: 36/77, loss: 0.046433161944150925 2023-01-23 23:36:28.374783: step: 40/77, loss: 0.03827742859721184 2023-01-23 23:36:29.751700: step: 44/77, loss: 0.06895600259304047 2023-01-23 23:36:31.154621: step: 48/77, loss: 0.043843068182468414 2023-01-23 23:36:32.581363: step: 52/77, loss: 0.0473325178027153 2023-01-23 23:36:33.996468: step: 56/77, loss: 0.057506125420331955 2023-01-23 23:36:35.438330: step: 60/77, loss: 0.030457310378551483 2023-01-23 23:36:36.870275: step: 64/77, loss: 0.06665453314781189 2023-01-23 23:36:38.364456: step: 68/77, loss: 0.04986963048577309 2023-01-23 23:36:39.803675: step: 72/77, loss: 0.046548400074243546 2023-01-23 23:36:41.244520: step: 76/77, loss: 0.04934310540556908 2023-01-23 23:36:42.752470: step: 80/77, loss: 0.06297309696674347 2023-01-23 23:36:44.221657: step: 84/77, loss: 0.023975176736712456 2023-01-23 23:36:45.677276: step: 88/77, loss: 0.0738765150308609 2023-01-23 23:36:47.113197: step: 92/77, loss: 0.08002398908138275 2023-01-23 23:36:48.470586: step: 96/77, loss: 0.06041925027966499 2023-01-23 23:36:49.864080: step: 100/77, loss: 0.04932462424039841 2023-01-23 23:36:51.266368: step: 104/77, loss: 0.21958592534065247 2023-01-23 23:36:52.704999: step: 108/77, loss: 0.03155907243490219 2023-01-23 23:36:54.126117: step: 112/77, loss: 0.048089973628520966 2023-01-23 23:36:55.550898: step: 116/77, loss: 0.018027935177087784 2023-01-23 23:36:56.985976: step: 120/77, loss: 0.018941737711429596 2023-01-23 23:36:58.412318: step: 124/77, loss: 0.01080845482647419 2023-01-23 23:36:59.860614: step: 128/77, loss: 0.18481628596782684 2023-01-23 23:37:01.283934: step: 132/77, loss: 0.10867591202259064 2023-01-23 23:37:02.711051: step: 136/77, loss: 0.31321001052856445 2023-01-23 23:37:04.211629: step: 140/77, loss: 0.009379186667501926 2023-01-23 23:37:05.655821: step: 144/77, loss: 0.013940312899649143 2023-01-23 23:37:07.065945: step: 148/77, loss: 0.03576299548149109 2023-01-23 23:37:08.533229: step: 152/77, loss: 0.026389723643660545 2023-01-23 23:37:09.942438: step: 156/77, loss: 0.03446223959326744 2023-01-23 23:37:11.370636: step: 160/77, loss: 0.020441459491848946 2023-01-23 23:37:12.854267: step: 164/77, loss: 0.036716707050800323 2023-01-23 23:37:14.268563: step: 168/77, loss: 0.010801638476550579 2023-01-23 23:37:15.690675: step: 172/77, loss: 0.029459888115525246 2023-01-23 23:37:17.134888: step: 176/77, loss: 0.11619746685028076 2023-01-23 23:37:18.542327: step: 180/77, loss: 0.006916141137480736 2023-01-23 23:37:19.996480: step: 184/77, loss: 0.15354542434215546 2023-01-23 23:37:21.396482: step: 188/77, loss: 0.011987213045358658 2023-01-23 23:37:22.843362: step: 192/77, loss: 0.07709883898496628 2023-01-23 23:37:24.286760: step: 196/77, loss: 0.17823737859725952 2023-01-23 23:37:25.692509: step: 200/77, loss: 0.014225020073354244 2023-01-23 23:37:27.150080: step: 204/77, loss: 0.05122522637248039 2023-01-23 23:37:28.620484: step: 208/77, loss: 0.109002485871315 2023-01-23 23:37:30.120456: step: 212/77, loss: 0.08840032666921616 2023-01-23 23:37:31.607597: step: 216/77, loss: 0.07572910189628601 2023-01-23 23:37:33.069401: step: 220/77, loss: 0.040880873799324036 2023-01-23 23:37:34.518017: step: 224/77, loss: 0.04236864671111107 2023-01-23 23:37:35.932933: step: 228/77, loss: 0.049025196582078934 2023-01-23 23:37:37.355743: step: 232/77, loss: 0.015840142965316772 2023-01-23 23:37:38.855518: step: 236/77, loss: 0.03713483363389969 2023-01-23 23:37:40.250538: step: 240/77, loss: 0.0560375340282917 2023-01-23 23:37:41.711539: step: 244/77, loss: 0.06949000805616379 2023-01-23 23:37:43.208921: step: 248/77, loss: 0.0347122959792614 2023-01-23 23:37:44.647290: step: 252/77, loss: 0.08734209835529327 2023-01-23 23:37:45.965597: step: 256/77, loss: 0.008405013009905815 2023-01-23 23:37:47.364517: step: 260/77, loss: 0.020795222371816635 2023-01-23 23:37:48.847799: step: 264/77, loss: 0.013448954559862614 2023-01-23 23:37:50.250214: step: 268/77, loss: 0.37937384843826294 2023-01-23 23:37:51.740630: step: 272/77, loss: 0.054609525948762894 2023-01-23 23:37:53.189408: step: 276/77, loss: 0.07847777754068375 2023-01-23 23:37:54.654954: step: 280/77, loss: 0.05036351457238197 2023-01-23 23:37:56.156537: step: 284/77, loss: 0.06301073729991913 2023-01-23 23:37:57.525671: step: 288/77, loss: 0.024898221716284752 2023-01-23 23:37:58.935747: step: 292/77, loss: 0.15771490335464478 2023-01-23 23:38:00.382621: step: 296/77, loss: 0.009197291918098927 2023-01-23 23:38:01.851884: step: 300/77, loss: 0.056442517787218094 2023-01-23 23:38:03.331520: step: 304/77, loss: 0.017659693956375122 2023-01-23 23:38:04.800459: step: 308/77, loss: 0.01524116937071085 2023-01-23 23:38:06.248171: step: 312/77, loss: 0.00637422502040863 2023-01-23 23:38:07.626392: step: 316/77, loss: 0.016708776354789734 2023-01-23 23:38:09.093785: step: 320/77, loss: 0.0594242662191391 2023-01-23 23:38:10.504990: step: 324/77, loss: 0.048527806997299194 2023-01-23 23:38:11.967146: step: 328/77, loss: 0.11545515805482864 2023-01-23 23:38:13.386160: step: 332/77, loss: 0.013203427195549011 2023-01-23 23:38:14.874020: step: 336/77, loss: 0.05984625220298767 2023-01-23 23:38:16.271281: step: 340/77, loss: 0.1080063208937645 2023-01-23 23:38:17.633664: step: 344/77, loss: 0.01844736561179161 2023-01-23 23:38:19.108940: step: 348/77, loss: 0.156895712018013 2023-01-23 23:38:20.499117: step: 352/77, loss: 0.05376040190458298 2023-01-23 23:38:21.885977: step: 356/77, loss: 0.012940380722284317 2023-01-23 23:38:23.426416: step: 360/77, loss: 0.02959199622273445 2023-01-23 23:38:24.847340: step: 364/77, loss: 0.015051622875034809 2023-01-23 23:38:26.303593: step: 368/77, loss: 0.026665037497878075 2023-01-23 23:38:27.719694: step: 372/77, loss: 0.026100341230630875 2023-01-23 23:38:29.125528: step: 376/77, loss: 0.017636604607105255 2023-01-23 23:38:30.557338: step: 380/77, loss: 0.09775028377771378 2023-01-23 23:38:32.007499: step: 384/77, loss: 0.06577365100383759 2023-01-23 23:38:33.499592: step: 388/77, loss: 0.013629499822854996 ================================================== Loss: 0.060 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:40:32.210206: step: 4/77, loss: 0.005971512757241726 2023-01-23 23:40:33.631880: step: 8/77, loss: 0.08986285328865051 2023-01-23 23:40:35.126334: step: 12/77, loss: 0.02581588551402092 2023-01-23 23:40:36.545120: step: 16/77, loss: 0.004304011818021536 2023-01-23 23:40:37.978182: step: 20/77, loss: 0.047521576285362244 2023-01-23 23:40:39.381497: step: 24/77, loss: 0.010420912876725197 2023-01-23 23:40:40.829170: step: 28/77, loss: 0.026402922347187996 2023-01-23 23:40:42.245219: step: 32/77, loss: 0.022887449711561203 2023-01-23 23:40:43.660581: step: 36/77, loss: 0.016893871128559113 2023-01-23 23:40:45.141029: step: 40/77, loss: 0.0178556926548481 2023-01-23 23:40:46.516524: step: 44/77, loss: 0.01378310564905405 2023-01-23 23:40:47.951403: step: 48/77, loss: 0.013840382918715477 2023-01-23 23:40:49.388976: step: 52/77, loss: 0.014913130551576614 2023-01-23 23:40:50.851190: step: 56/77, loss: 0.0072315772995352745 2023-01-23 23:40:52.332003: step: 60/77, loss: 0.06961221992969513 2023-01-23 23:40:53.749345: step: 64/77, loss: 0.011401453986763954 2023-01-23 23:40:55.154141: step: 68/77, loss: 0.0013695021625608206 2023-01-23 23:40:56.677146: step: 72/77, loss: 0.06069410964846611 2023-01-23 23:40:58.164638: step: 76/77, loss: 0.0035763708874583244 2023-01-23 23:40:59.676286: step: 80/77, loss: 0.004236510954797268 2023-01-23 23:41:01.084000: step: 84/77, loss: 0.010543843731284142 2023-01-23 23:41:02.490845: step: 88/77, loss: 0.02292841114103794 2023-01-23 23:41:03.870474: step: 92/77, loss: 0.006904575973749161 2023-01-23 23:41:05.289028: step: 96/77, loss: 0.0828978568315506 2023-01-23 23:41:06.693124: step: 100/77, loss: 0.11366309970617294 2023-01-23 23:41:08.106605: step: 104/77, loss: 0.06396742910146713 2023-01-23 23:41:09.560173: step: 108/77, loss: 0.003982246853411198 2023-01-23 23:41:11.000659: step: 112/77, loss: 0.03245307132601738 2023-01-23 23:41:12.515607: step: 116/77, loss: 0.003969680517911911 2023-01-23 23:41:13.985599: step: 120/77, loss: 0.000624045729637146 2023-01-23 23:41:15.423775: step: 124/77, loss: 0.0040672216564416885 2023-01-23 23:41:16.916282: step: 128/77, loss: 0.04338538646697998 2023-01-23 23:41:18.343615: step: 132/77, loss: 0.02423202060163021 2023-01-23 23:41:19.804890: step: 136/77, loss: 0.04370751604437828 2023-01-23 23:41:21.169670: step: 140/77, loss: 0.06708807498216629 2023-01-23 23:41:22.582700: step: 144/77, loss: 0.04993844032287598 2023-01-23 23:41:24.042396: step: 148/77, loss: 0.03304711729288101 2023-01-23 23:41:25.550430: step: 152/77, loss: 0.03260575234889984 2023-01-23 23:41:26.996463: step: 156/77, loss: 0.0576007217168808 2023-01-23 23:41:28.421935: step: 160/77, loss: 0.009530353359878063 2023-01-23 23:41:29.875187: step: 164/77, loss: 0.00638984702527523 2023-01-23 23:41:31.281536: step: 168/77, loss: 0.00925515778362751 2023-01-23 23:41:32.639967: step: 172/77, loss: 0.01808241754770279 2023-01-23 23:41:34.069449: step: 176/77, loss: 0.014505150727927685 2023-01-23 23:41:35.507270: step: 180/77, loss: 0.07409019023180008 2023-01-23 23:41:36.951836: step: 184/77, loss: 0.015948746353387833 2023-01-23 23:41:38.423194: step: 188/77, loss: 0.12979811429977417 2023-01-23 23:41:39.871066: step: 192/77, loss: 0.005695809610188007 2023-01-23 23:41:41.280057: step: 196/77, loss: 0.003760495688766241 2023-01-23 23:41:42.762355: step: 200/77, loss: 0.1478642076253891 2023-01-23 23:41:44.209788: step: 204/77, loss: 0.004346428904682398 2023-01-23 23:41:45.636344: step: 208/77, loss: 0.01687292382121086 2023-01-23 23:41:47.063173: step: 212/77, loss: 0.039575885981321335 2023-01-23 23:41:48.479757: step: 216/77, loss: 0.030470095574855804 2023-01-23 23:41:49.988891: step: 220/77, loss: 0.17650803923606873 2023-01-23 23:41:51.443682: step: 224/77, loss: 0.028570353984832764 2023-01-23 23:41:52.875691: step: 228/77, loss: 0.002066076500341296 2023-01-23 23:41:54.342568: step: 232/77, loss: 0.01230506133288145 2023-01-23 23:41:55.769059: step: 236/77, loss: 0.021028703078627586 2023-01-23 23:41:57.191095: step: 240/77, loss: 0.041566163301467896 2023-01-23 23:41:58.680275: step: 244/77, loss: 0.03247467800974846 2023-01-23 23:42:00.128015: step: 248/77, loss: 0.03296036273241043 2023-01-23 23:42:01.539751: step: 252/77, loss: 0.06024741008877754 2023-01-23 23:42:03.000138: step: 256/77, loss: 0.009157262742519379 2023-01-23 23:42:04.441336: step: 260/77, loss: 0.06374485045671463 2023-01-23 23:42:05.929535: step: 264/77, loss: 0.032092105597257614 2023-01-23 23:42:07.270192: step: 268/77, loss: 0.07532630860805511 2023-01-23 23:42:08.669554: step: 272/77, loss: 0.011283449828624725 2023-01-23 23:42:10.138534: step: 276/77, loss: 0.08449389040470123 2023-01-23 23:42:11.542392: step: 280/77, loss: 0.03654472157359123 2023-01-23 23:42:13.015121: step: 284/77, loss: 0.11908509582281113 2023-01-23 23:42:14.435316: step: 288/77, loss: 0.05096805468201637 2023-01-23 23:42:15.929617: step: 292/77, loss: 0.017733121290802956 2023-01-23 23:42:17.291623: step: 296/77, loss: 0.011005227454006672 2023-01-23 23:42:18.739957: step: 300/77, loss: 0.09521736204624176 2023-01-23 23:42:20.201303: step: 304/77, loss: 0.0839010700583458 2023-01-23 23:42:21.602448: step: 308/77, loss: 0.009480705484747887 2023-01-23 23:42:23.034833: step: 312/77, loss: 0.05214642733335495 2023-01-23 23:42:24.451450: step: 316/77, loss: 0.017187777906656265 2023-01-23 23:42:25.923881: step: 320/77, loss: 0.013568413443863392 2023-01-23 23:42:27.383080: step: 324/77, loss: 0.02575734630227089 2023-01-23 23:42:28.774028: step: 328/77, loss: 0.05008266866207123 2023-01-23 23:42:30.227314: step: 332/77, loss: 0.034605540335178375 2023-01-23 23:42:31.685513: step: 336/77, loss: 0.019046058878302574 2023-01-23 23:42:33.114654: step: 340/77, loss: 0.06698395311832428 2023-01-23 23:42:34.553848: step: 344/77, loss: 0.04300406947731972 2023-01-23 23:42:35.974399: step: 348/77, loss: 0.024131231009960175 2023-01-23 23:42:37.385952: step: 352/77, loss: 0.03506898507475853 2023-01-23 23:42:38.916356: step: 356/77, loss: 0.11473017930984497 2023-01-23 23:42:40.291403: step: 360/77, loss: 0.031168891116976738 2023-01-23 23:42:41.732867: step: 364/77, loss: 0.015158976428210735 2023-01-23 23:42:43.203789: step: 368/77, loss: 0.07996613532304764 2023-01-23 23:42:44.680539: step: 372/77, loss: 0.04004429280757904 2023-01-23 23:42:46.082774: step: 376/77, loss: 0.03189512714743614 2023-01-23 23:42:47.565730: step: 380/77, loss: 0.017405323684215546 2023-01-23 23:42:48.991847: step: 384/77, loss: 0.0033155945129692554 2023-01-23 23:42:50.428772: step: 388/77, loss: 0.029796740040183067 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Chinese: {'template': {'p': 0.9803921568627451, 'r': 0.390625, 'f1': 0.5586592178770949}, 'slot': {'p': 0.6538461538461539, 'r': 0.016683022571148183, 'f1': 0.03253588516746411}, 'combined': 0.018176472160594474, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Korean: {'template': {'p': 0.9803921568627451, 'r': 0.390625, 'f1': 0.5586592178770949}, 'slot': {'p': 0.6538461538461539, 'r': 0.016683022571148183, 'f1': 0.03253588516746411}, 'combined': 0.018176472160594474, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Russian: {'template': {'p': 0.9803921568627451, 'r': 0.390625, 'f1': 0.5586592178770949}, 'slot': {'p': 0.6538461538461539, 'r': 0.016683022571148183, 'f1': 0.03253588516746411}, 'combined': 0.018176472160594474, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:44:32.755978: step: 4/77, loss: 0.06511770933866501 2023-01-23 23:44:34.189127: step: 8/77, loss: 0.02227628417313099 2023-01-23 23:44:35.618766: step: 12/77, loss: 0.02134661376476288 2023-01-23 23:44:37.116837: step: 16/77, loss: 0.09239403158426285 2023-01-23 23:44:38.552250: step: 20/77, loss: 0.013236569240689278 2023-01-23 23:44:40.012304: step: 24/77, loss: 0.04262635484337807 2023-01-23 23:44:41.444272: step: 28/77, loss: 0.04335910826921463 2023-01-23 23:44:42.885038: step: 32/77, loss: 0.11229270696640015 2023-01-23 23:44:44.360776: step: 36/77, loss: 0.020496727898716927 2023-01-23 23:44:45.820164: step: 40/77, loss: 0.008443008176982403 2023-01-23 23:44:47.238423: step: 44/77, loss: 0.05217698588967323 2023-01-23 23:44:48.667819: step: 48/77, loss: 0.03275950998067856 2023-01-23 23:44:50.108631: step: 52/77, loss: 0.04591575637459755 2023-01-23 23:44:51.563432: step: 56/77, loss: 0.009700889699161053 2023-01-23 23:44:53.041889: step: 60/77, loss: 0.026869626715779305 2023-01-23 23:44:54.435490: step: 64/77, loss: 0.012734868563711643 2023-01-23 23:44:55.826410: step: 68/77, loss: 0.004796100780367851 2023-01-23 23:44:57.236294: step: 72/77, loss: 0.05096709728240967 2023-01-23 23:44:58.619250: step: 76/77, loss: 0.03370915725827217 2023-01-23 23:45:00.025279: step: 80/77, loss: 0.021908100694417953 2023-01-23 23:45:01.486513: step: 84/77, loss: 0.01133977621793747 2023-01-23 23:45:02.877978: step: 88/77, loss: 0.022827230393886566 2023-01-23 23:45:04.361825: step: 92/77, loss: 0.03184092789888382 2023-01-23 23:45:05.801360: step: 96/77, loss: 0.04128416255116463 2023-01-23 23:45:07.208994: step: 100/77, loss: 0.07630272209644318 2023-01-23 23:45:08.680814: step: 104/77, loss: 0.0071154991164803505 2023-01-23 23:45:10.138468: step: 108/77, loss: 0.008922640234231949 2023-01-23 23:45:11.598063: step: 112/77, loss: 0.022041644901037216 2023-01-23 23:45:13.100018: step: 116/77, loss: 0.008678006939589977 2023-01-23 23:45:14.559498: step: 120/77, loss: 0.0035237190313637257 2023-01-23 23:45:15.947694: step: 124/77, loss: 0.025985462591052055 2023-01-23 23:45:17.313952: step: 128/77, loss: 0.0029498650692403316 2023-01-23 23:45:18.838606: step: 132/77, loss: 0.05594632774591446 2023-01-23 23:45:20.218836: step: 136/77, loss: 0.013143017888069153 2023-01-23 23:45:21.679782: step: 140/77, loss: 0.014098651707172394 2023-01-23 23:45:23.079201: step: 144/77, loss: 0.014548598788678646 2023-01-23 23:45:24.482673: step: 148/77, loss: 0.022157195955514908 2023-01-23 23:45:25.905489: step: 152/77, loss: 0.023342769593000412 2023-01-23 23:45:27.389505: step: 156/77, loss: 0.07096787542104721 2023-01-23 23:45:28.851968: step: 160/77, loss: 0.041673459112644196 2023-01-23 23:45:30.265240: step: 164/77, loss: 0.0136954914778471 2023-01-23 23:45:31.699919: step: 168/77, loss: 0.01048867218196392 2023-01-23 23:45:33.097423: step: 172/77, loss: 0.04753701761364937 2023-01-23 23:45:34.533662: step: 176/77, loss: 0.015834391117095947 2023-01-23 23:45:35.997857: step: 180/77, loss: 0.009923559613525867 2023-01-23 23:45:37.477739: step: 184/77, loss: 0.1431715339422226 2023-01-23 23:45:38.891145: step: 188/77, loss: 0.07374101132154465 2023-01-23 23:45:40.256913: step: 192/77, loss: 0.04399900138378143 2023-01-23 23:45:41.655519: step: 196/77, loss: 0.03910788521170616 2023-01-23 23:45:43.140122: step: 200/77, loss: 0.03297842666506767 2023-01-23 23:45:44.523723: step: 204/77, loss: 0.06826893985271454 2023-01-23 23:45:46.025418: step: 208/77, loss: 0.006327507086098194 2023-01-23 23:45:47.518260: step: 212/77, loss: 0.004379096440970898 2023-01-23 23:45:48.873623: step: 216/77, loss: 0.021066607907414436 2023-01-23 23:45:50.355751: step: 220/77, loss: 0.03417379409074783 2023-01-23 23:45:51.835504: step: 224/77, loss: 0.05092516541481018 2023-01-23 23:45:53.239265: step: 228/77, loss: 0.013350581750273705 2023-01-23 23:45:54.714806: step: 232/77, loss: 0.012862302362918854 2023-01-23 23:45:56.199122: step: 236/77, loss: 0.0070504057221114635 2023-01-23 23:45:57.639640: step: 240/77, loss: 0.06666646897792816 2023-01-23 23:45:59.090169: step: 244/77, loss: 0.0014184003230184317 2023-01-23 23:46:00.525709: step: 248/77, loss: 0.018787948414683342 2023-01-23 23:46:01.961216: step: 252/77, loss: 0.04476185888051987 2023-01-23 23:46:03.376052: step: 256/77, loss: 0.08340482413768768 2023-01-23 23:46:04.808525: step: 260/77, loss: 0.0024039025884121656 2023-01-23 23:46:06.164373: step: 264/77, loss: 0.025625744834542274 2023-01-23 23:46:07.629538: step: 268/77, loss: 0.013658175244927406 2023-01-23 23:46:09.052222: step: 272/77, loss: 0.062216561287641525 2023-01-23 23:46:10.465840: step: 276/77, loss: 0.03463776782155037 2023-01-23 23:46:11.936142: step: 280/77, loss: 0.003395059145987034 2023-01-23 23:46:13.368679: step: 284/77, loss: 0.10502924025058746 2023-01-23 23:46:14.772334: step: 288/77, loss: 0.06791075319051743 2023-01-23 23:46:16.206700: step: 292/77, loss: 0.04715389013290405 2023-01-23 23:46:17.697626: step: 296/77, loss: 0.024910500273108482 2023-01-23 23:46:19.127016: step: 300/77, loss: 0.02448110841214657 2023-01-23 23:46:20.576545: step: 304/77, loss: 0.015895595774054527 2023-01-23 23:46:22.016515: step: 308/77, loss: 0.05485811457037926 2023-01-23 23:46:23.433339: step: 312/77, loss: 0.05560028553009033 2023-01-23 23:46:24.925403: step: 316/77, loss: 0.057130005210638046 2023-01-23 23:46:26.313120: step: 320/77, loss: 0.020813550800085068 2023-01-23 23:46:27.713714: step: 324/77, loss: 0.05507659539580345 2023-01-23 23:46:29.208042: step: 328/77, loss: 0.042632970958948135 2023-01-23 23:46:30.682208: step: 332/77, loss: 0.017948759719729424 2023-01-23 23:46:32.088444: step: 336/77, loss: 0.025418804958462715 2023-01-23 23:46:33.579183: step: 340/77, loss: 0.01243533007800579 2023-01-23 23:46:35.005784: step: 344/77, loss: 0.008232533000409603 2023-01-23 23:46:36.500333: step: 348/77, loss: 0.005863835569471121 2023-01-23 23:46:38.008753: step: 352/77, loss: 0.0137371476739645 2023-01-23 23:46:39.507696: step: 356/77, loss: 0.04229208081960678 2023-01-23 23:46:40.935743: step: 360/77, loss: 0.018152443692088127 2023-01-23 23:46:42.406076: step: 364/77, loss: 0.016206610947847366 2023-01-23 23:46:43.856980: step: 368/77, loss: 0.05746857821941376 2023-01-23 23:46:45.336823: step: 372/77, loss: 0.049862105399370193 2023-01-23 23:46:46.756663: step: 376/77, loss: 0.00593891367316246 2023-01-23 23:46:48.176247: step: 380/77, loss: 0.02611643448472023 2023-01-23 23:46:49.622856: step: 384/77, loss: 0.0411229208111763 2023-01-23 23:46:51.099613: step: 388/77, loss: 0.017169751226902008 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.48333333333333334, 'f1': 0.6516853932584269}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04367556427481521, 'epoch': 4} Test Chinese: {'template': {'p': 0.8970588235294118, 'r': 0.4765625, 'f1': 0.6224489795918369}, 'slot': {'p': 0.6111111111111112, 'r': 0.010794896957801767, 'f1': 0.02121504339440694}, 'combined': 0.01320528211284514, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.48333333333333334, 'f1': 0.6516853932584269}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.041523317092572336, 'epoch': 4} Test Korean: {'template': {'p': 0.9104477611940298, 'r': 0.4765625, 'f1': 0.6256410256410256}, 'slot': {'p': 0.6111111111111112, 'r': 0.010794896957801767, 'f1': 0.02121504339440694}, 'combined': 0.013273001508295624, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.4666666666666667, 'f1': 0.6363636363636364}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.040547063555913115, 'epoch': 4} Test Russian: {'template': {'p': 0.9104477611940298, 'r': 0.4765625, 'f1': 0.6256410256410256}, 'slot': {'p': 0.6111111111111112, 'r': 0.010794896957801767, 'f1': 0.02121504339440694}, 'combined': 0.013273001508295624, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:48:33.412675: step: 4/77, loss: 0.08117157220840454 2023-01-23 23:48:34.895224: step: 8/77, loss: 0.03891678526997566 2023-01-23 23:48:36.325836: step: 12/77, loss: 0.018629007041454315 2023-01-23 23:48:37.730135: step: 16/77, loss: 0.024092990905046463 2023-01-23 23:48:39.218700: step: 20/77, loss: 0.003024669596925378 2023-01-23 23:48:40.613176: step: 24/77, loss: 0.02051519975066185 2023-01-23 23:48:42.077301: step: 28/77, loss: 0.0053480276837944984 2023-01-23 23:48:43.549472: step: 32/77, loss: 0.0028783066663891077 2023-01-23 23:48:45.012436: step: 36/77, loss: 0.04187828302383423 2023-01-23 23:48:46.456605: step: 40/77, loss: 0.01745690405368805 2023-01-23 23:48:47.901999: step: 44/77, loss: 0.01910899206995964 2023-01-23 23:48:49.329286: step: 48/77, loss: 0.006147223990410566 2023-01-23 23:48:50.809336: step: 52/77, loss: 0.015471047721803188 2023-01-23 23:48:52.202233: step: 56/77, loss: 0.021297479048371315 2023-01-23 23:48:53.664279: step: 60/77, loss: 0.016664618626236916 2023-01-23 23:48:55.101383: step: 64/77, loss: 0.10081305354833603 2023-01-23 23:48:56.564150: step: 68/77, loss: 0.02595415711402893 2023-01-23 23:48:58.021277: step: 72/77, loss: 0.016515161842107773 2023-01-23 23:48:59.577692: step: 76/77, loss: 0.04394027963280678 2023-01-23 23:49:01.031213: step: 80/77, loss: 0.010788904502987862 2023-01-23 23:49:02.470309: step: 84/77, loss: 0.014180425554513931 2023-01-23 23:49:03.906200: step: 88/77, loss: 0.021911922842264175 2023-01-23 23:49:05.349100: step: 92/77, loss: 0.04131493717432022 2023-01-23 23:49:06.783220: step: 96/77, loss: 0.02815232425928116 2023-01-23 23:49:08.192513: step: 100/77, loss: 0.04688156023621559 2023-01-23 23:49:09.668883: step: 104/77, loss: 0.04534637928009033 2023-01-23 23:49:11.095373: step: 108/77, loss: 0.016092343255877495 2023-01-23 23:49:12.574019: step: 112/77, loss: 0.035738199949264526 2023-01-23 23:49:14.003704: step: 116/77, loss: 0.00904413778334856 2023-01-23 23:49:15.431523: step: 120/77, loss: 0.03285384550690651 2023-01-23 23:49:16.834561: step: 124/77, loss: 0.005159964319318533 2023-01-23 23:49:18.239922: step: 128/77, loss: 0.15879254043102264 2023-01-23 23:49:19.706326: step: 132/77, loss: 0.005956803448498249 2023-01-23 23:49:21.135300: step: 136/77, loss: 0.012946855276823044 2023-01-23 23:49:22.517025: step: 140/77, loss: 0.0126056969165802 2023-01-23 23:49:23.957328: step: 144/77, loss: 0.021381687372922897 2023-01-23 23:49:25.387177: step: 148/77, loss: 0.0045577725395560265 2023-01-23 23:49:26.868584: step: 152/77, loss: 0.22539357841014862 2023-01-23 23:49:28.258285: step: 156/77, loss: 0.010866387747228146 2023-01-23 23:49:29.758428: step: 160/77, loss: 0.0005665660719387233 2023-01-23 23:49:31.233839: step: 164/77, loss: 0.03323635086417198 2023-01-23 23:49:32.704045: step: 168/77, loss: 0.008901823312044144 2023-01-23 23:49:34.163998: step: 172/77, loss: 0.06111481040716171 2023-01-23 23:49:35.657605: step: 176/77, loss: 0.0703057274222374 2023-01-23 23:49:37.065389: step: 180/77, loss: 0.02014414593577385 2023-01-23 23:49:38.462052: step: 184/77, loss: 0.03161013126373291 2023-01-23 23:49:39.882379: step: 188/77, loss: 0.002897855592891574 2023-01-23 23:49:41.352877: step: 192/77, loss: 0.016166338697075844 2023-01-23 23:49:42.827966: step: 196/77, loss: 0.018734036013484 2023-01-23 23:49:44.243762: step: 200/77, loss: 0.06107534095644951 2023-01-23 23:49:45.676571: step: 204/77, loss: 0.002570316195487976 2023-01-23 23:49:47.157725: step: 208/77, loss: 0.10831344127655029 2023-01-23 23:49:48.627154: step: 212/77, loss: 0.003239275421947241 2023-01-23 23:49:50.085215: step: 216/77, loss: 0.03504738211631775 2023-01-23 23:49:51.518750: step: 220/77, loss: 0.05260982736945152 2023-01-23 23:49:52.939665: step: 224/77, loss: 0.002353373449295759 2023-01-23 23:49:54.386632: step: 228/77, loss: 0.05163984000682831 2023-01-23 23:49:55.881841: step: 232/77, loss: 0.02085479535162449 2023-01-23 23:49:57.362296: step: 236/77, loss: 0.009827936068177223 2023-01-23 23:49:58.812629: step: 240/77, loss: 0.025967005640268326 2023-01-23 23:50:00.226243: step: 244/77, loss: 0.004151183180510998 2023-01-23 23:50:01.642962: step: 248/77, loss: 0.0167071670293808 2023-01-23 23:50:03.080675: step: 252/77, loss: 0.048930615186691284 2023-01-23 23:50:04.451000: step: 256/77, loss: 0.06841599941253662 2023-01-23 23:50:05.897261: step: 260/77, loss: 0.027259133756160736 2023-01-23 23:50:07.324094: step: 264/77, loss: 0.07450323551893234 2023-01-23 23:50:08.828592: step: 268/77, loss: 0.04453302174806595 2023-01-23 23:50:10.229615: step: 272/77, loss: 0.006317660212516785 2023-01-23 23:50:11.587495: step: 276/77, loss: 0.015550298616290092 2023-01-23 23:50:12.998097: step: 280/77, loss: 0.04075699299573898 2023-01-23 23:50:14.370794: step: 284/77, loss: 0.028539283201098442 2023-01-23 23:50:15.802332: step: 288/77, loss: 0.09774263203144073 2023-01-23 23:50:17.310057: step: 292/77, loss: 0.011181055568158627 2023-01-23 23:50:18.728469: step: 296/77, loss: 0.014936857856810093 2023-01-23 23:50:20.172802: step: 300/77, loss: 0.04972505569458008 2023-01-23 23:50:21.534779: step: 304/77, loss: 0.029812775552272797 2023-01-23 23:50:22.989113: step: 308/77, loss: 0.06259914487600327 2023-01-23 23:50:24.499897: step: 312/77, loss: 0.0034338203258812428 2023-01-23 23:50:25.971326: step: 316/77, loss: 0.07357652485370636 2023-01-23 23:50:27.362368: step: 320/77, loss: 0.013458916917443275 2023-01-23 23:50:28.838128: step: 324/77, loss: 0.10706418007612228 2023-01-23 23:50:30.260847: step: 328/77, loss: 0.061417315155267715 2023-01-23 23:50:31.744652: step: 332/77, loss: 0.013865873217582703 2023-01-23 23:50:33.184953: step: 336/77, loss: 0.023164033889770508 2023-01-23 23:50:34.623653: step: 340/77, loss: 0.017712213099002838 2023-01-23 23:50:36.074661: step: 344/77, loss: 0.002024821937084198 2023-01-23 23:50:37.477322: step: 348/77, loss: 0.00024450241471640766 2023-01-23 23:50:38.909890: step: 352/77, loss: 0.006608347408473492 2023-01-23 23:50:40.376435: step: 356/77, loss: 0.08975375443696976 2023-01-23 23:50:41.882993: step: 360/77, loss: 0.0048473007045686245 2023-01-23 23:50:43.271100: step: 364/77, loss: 0.03499084711074829 2023-01-23 23:50:44.691065: step: 368/77, loss: 0.026558957993984222 2023-01-23 23:50:46.127581: step: 372/77, loss: 0.00982342567294836 2023-01-23 23:50:47.576273: step: 376/77, loss: 0.003917289432138205 2023-01-23 23:50:49.014018: step: 380/77, loss: 0.03307020291686058 2023-01-23 23:50:50.507622: step: 384/77, loss: 0.06564565747976303 2023-01-23 23:50:51.910374: step: 388/77, loss: 0.06091843172907829 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.3333333333333333, 'f1': 0.5}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.035149384885764495, 'epoch': 5} Test Chinese: {'template': {'p': 0.9193548387096774, 'r': 0.4453125, 'f1': 0.5999999999999999}, 'slot': {'p': 0.6190476190476191, 'r': 0.012757605495583905, 'f1': 0.024999999999999998}, 'combined': 0.014999999999999996, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.3333333333333333, 'f1': 0.5}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.035149384885764495, 'epoch': 5} Test Korean: {'template': {'p': 0.9193548387096774, 'r': 0.4453125, 'f1': 0.5999999999999999}, 'slot': {'p': 0.6190476190476191, 'r': 0.012757605495583905, 'f1': 0.024999999999999998}, 'combined': 0.014999999999999996, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.3333333333333333, 'f1': 0.5}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.035149384885764495, 'epoch': 5} Test Russian: {'template': {'p': 0.9047619047619048, 'r': 0.4453125, 'f1': 0.5968586387434556}, 'slot': {'p': 0.6190476190476191, 'r': 0.012757605495583905, 'f1': 0.024999999999999998}, 'combined': 0.014921465968586388, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.021621621621621623, 'epoch': 5} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.025806451612903226, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:52:33.949433: step: 4/77, loss: 0.01459469459950924 2023-01-23 23:52:35.437234: step: 8/77, loss: 0.3601420819759369 2023-01-23 23:52:36.900665: step: 12/77, loss: 0.005000119097530842 2023-01-23 23:52:38.346545: step: 16/77, loss: 0.013874746859073639 2023-01-23 23:52:39.701990: step: 20/77, loss: 0.010326599702239037 2023-01-23 23:52:41.171500: step: 24/77, loss: 0.043253205716609955 2023-01-23 23:52:42.607502: step: 28/77, loss: 0.02528899721801281 2023-01-23 23:52:44.002084: step: 32/77, loss: 0.013174970634281635 2023-01-23 23:52:45.362129: step: 36/77, loss: 0.008078305050730705 2023-01-23 23:52:46.813406: step: 40/77, loss: 0.015973033383488655 2023-01-23 23:52:48.296021: step: 44/77, loss: 0.02415720373392105 2023-01-23 23:52:49.782580: step: 48/77, loss: 0.0253595057874918 2023-01-23 23:52:51.159344: step: 52/77, loss: 0.027250634506344795 2023-01-23 23:52:52.586238: step: 56/77, loss: 0.013301963917911053 2023-01-23 23:52:54.017835: step: 60/77, loss: 0.026193415746092796 2023-01-23 23:52:55.483862: step: 64/77, loss: 0.004933602176606655 2023-01-23 23:52:56.955462: step: 68/77, loss: 0.0014216202544048429 2023-01-23 23:52:58.359186: step: 72/77, loss: 0.018952487036585808 2023-01-23 23:52:59.762466: step: 76/77, loss: 0.052897218614816666 2023-01-23 23:53:01.101271: step: 80/77, loss: 0.0038756094872951508 2023-01-23 23:53:02.503685: step: 84/77, loss: 0.005164342932403088 2023-01-23 23:53:03.909450: step: 88/77, loss: 0.0174753088504076 2023-01-23 23:53:05.321906: step: 92/77, loss: 0.03833255544304848 2023-01-23 23:53:06.723310: step: 96/77, loss: 0.009861546568572521 2023-01-23 23:53:08.153962: step: 100/77, loss: 0.027784064412117004 2023-01-23 23:53:09.659343: step: 104/77, loss: 0.017444532364606857 2023-01-23 23:53:11.074882: step: 108/77, loss: 0.017834315076470375 2023-01-23 23:53:12.602350: step: 112/77, loss: 0.005123637616634369 2023-01-23 23:53:13.996681: step: 116/77, loss: 0.0101096136495471 2023-01-23 23:53:15.448813: step: 120/77, loss: 0.024699628353118896 2023-01-23 23:53:16.920054: step: 124/77, loss: 0.034847695380449295 2023-01-23 23:53:18.370281: step: 128/77, loss: 0.005995428655296564 2023-01-23 23:53:19.783599: step: 132/77, loss: 0.04109932854771614 2023-01-23 23:53:21.207819: step: 136/77, loss: 0.0018515828996896744 2023-01-23 23:53:22.636766: step: 140/77, loss: 0.00424737436696887 2023-01-23 23:53:24.088665: step: 144/77, loss: 0.02598082274198532 2023-01-23 23:53:25.530101: step: 148/77, loss: 0.009949353523552418 2023-01-23 23:53:26.990150: step: 152/77, loss: 0.05457217991352081 2023-01-23 23:53:28.435765: step: 156/77, loss: 0.04540511220693588 2023-01-23 23:53:29.912925: step: 160/77, loss: 0.03349992260336876 2023-01-23 23:53:31.371533: step: 164/77, loss: 0.004050153307616711 2023-01-23 23:53:32.859479: step: 168/77, loss: 0.0016534986207261682 2023-01-23 23:53:34.258328: step: 172/77, loss: 0.006821885704994202 2023-01-23 23:53:35.683571: step: 176/77, loss: 0.012208542786538601 2023-01-23 23:53:37.056066: step: 180/77, loss: 0.03986116126179695 2023-01-23 23:53:38.503537: step: 184/77, loss: 0.03070826269686222 2023-01-23 23:53:39.876418: step: 188/77, loss: 0.010352972894906998 2023-01-23 23:53:41.276897: step: 192/77, loss: 0.005710075609385967 2023-01-23 23:53:42.760848: step: 196/77, loss: 0.007689544465392828 2023-01-23 23:53:44.174063: step: 200/77, loss: 0.022643158212304115 2023-01-23 23:53:45.687265: step: 204/77, loss: 0.002715296810492873 2023-01-23 23:53:47.117164: step: 208/77, loss: 0.0020200214348733425 2023-01-23 23:53:48.614505: step: 212/77, loss: 0.008939304389059544 2023-01-23 23:53:50.078158: step: 216/77, loss: 0.0013139714719727635 2023-01-23 23:53:51.460130: step: 220/77, loss: 0.00673355907201767 2023-01-23 23:53:52.858068: step: 224/77, loss: 0.018460728228092194 2023-01-23 23:53:54.384791: step: 228/77, loss: 0.05799287185072899 2023-01-23 23:53:55.782648: step: 232/77, loss: 0.011782975867390633 2023-01-23 23:53:57.221813: step: 236/77, loss: 0.008568809367716312 2023-01-23 23:53:58.656652: step: 240/77, loss: 0.0007917608018033206 2023-01-23 23:54:00.102072: step: 244/77, loss: 0.029287803918123245 2023-01-23 23:54:01.576524: step: 248/77, loss: 0.03426426649093628 2023-01-23 23:54:03.066782: step: 252/77, loss: 0.047590676695108414 2023-01-23 23:54:04.523354: step: 256/77, loss: 0.35922297835350037 2023-01-23 23:54:05.948034: step: 260/77, loss: 0.009143169037997723 2023-01-23 23:54:07.317337: step: 264/77, loss: 0.0009675707551650703 2023-01-23 23:54:08.766565: step: 268/77, loss: 0.011927079409360886 2023-01-23 23:54:10.243216: step: 272/77, loss: 0.015484156087040901 2023-01-23 23:54:11.642726: step: 276/77, loss: 0.0034936340525746346 2023-01-23 23:54:13.112549: step: 280/77, loss: 0.006303347647190094 2023-01-23 23:54:14.557268: step: 284/77, loss: 0.017508573830127716 2023-01-23 23:54:15.968667: step: 288/77, loss: 0.0041512432508170605 2023-01-23 23:54:17.389971: step: 292/77, loss: 0.011202400550246239 2023-01-23 23:54:18.820591: step: 296/77, loss: 0.04729530215263367 2023-01-23 23:54:20.247264: step: 300/77, loss: 0.054292239248752594 2023-01-23 23:54:21.701267: step: 304/77, loss: 0.03898230940103531 2023-01-23 23:54:23.159716: step: 308/77, loss: 0.016453411430120468 2023-01-23 23:54:24.630486: step: 312/77, loss: 0.0026123595889657736 2023-01-23 23:54:26.077825: step: 316/77, loss: 0.04049624130129814 2023-01-23 23:54:27.561190: step: 320/77, loss: 0.015780622139573097 2023-01-23 23:54:29.008462: step: 324/77, loss: 0.057226769626140594 2023-01-23 23:54:30.408853: step: 328/77, loss: 0.19466768205165863 2023-01-23 23:54:31.803697: step: 332/77, loss: 0.0014788589905947447 2023-01-23 23:54:33.311573: step: 336/77, loss: 0.00024396379012614489 2023-01-23 23:54:34.783020: step: 340/77, loss: 0.038548793643713 2023-01-23 23:54:36.238294: step: 344/77, loss: 0.012706398032605648 2023-01-23 23:54:37.689056: step: 348/77, loss: 0.010326993651688099 2023-01-23 23:54:39.149794: step: 352/77, loss: 0.01408340409398079 2023-01-23 23:54:40.675010: step: 356/77, loss: 0.0048990207724273205 2023-01-23 23:54:42.146808: step: 360/77, loss: 0.007280079182237387 2023-01-23 23:54:43.550489: step: 364/77, loss: 0.05593865364789963 2023-01-23 23:54:44.986759: step: 368/77, loss: 0.007230781018733978 2023-01-23 23:54:46.453860: step: 372/77, loss: 0.05424201488494873 2023-01-23 23:54:47.845532: step: 376/77, loss: 0.01386354211717844 2023-01-23 23:54:49.304131: step: 380/77, loss: 0.003996170591562986 2023-01-23 23:54:50.758674: step: 384/77, loss: 0.0027065842878073454 2023-01-23 23:54:52.177883: step: 388/77, loss: 0.00222976878285408 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Chinese: {'template': {'p': 0.9333333333333333, 'r': 0.546875, 'f1': 0.689655172413793}, 'slot': {'p': 0.5, 'r': 0.009813542688910697, 'f1': 0.0192492781520693}, 'combined': 0.013275364242806412, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Korean: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5, 'r': 0.009813542688910697, 'f1': 0.0192492781520693}, 'combined': 0.013150496955374078, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Russian: {'template': {'p': 0.9333333333333333, 'r': 0.546875, 'f1': 0.689655172413793}, 'slot': {'p': 0.5, 'r': 0.009813542688910697, 'f1': 0.0192492781520693}, 'combined': 0.013275364242806412, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:56:34.210005: step: 4/77, loss: 0.13170115649700165 2023-01-23 23:56:35.651764: step: 8/77, loss: 0.08570437878370285 2023-01-23 23:56:37.106827: step: 12/77, loss: 0.06896840035915375 2023-01-23 23:56:38.594448: step: 16/77, loss: 0.028167221695184708 2023-01-23 23:56:40.114476: step: 20/77, loss: 0.02266467735171318 2023-01-23 23:56:41.556434: step: 24/77, loss: 0.0014462533872574568 2023-01-23 23:56:42.991598: step: 28/77, loss: 0.09091892093420029 2023-01-23 23:56:44.425279: step: 32/77, loss: 0.0003704461269080639 2023-01-23 23:56:45.870478: step: 36/77, loss: 0.0035481564700603485 2023-01-23 23:56:47.328057: step: 40/77, loss: 0.03377827629446983 2023-01-23 23:56:48.751051: step: 44/77, loss: 0.011628975160419941 2023-01-23 23:56:50.231407: step: 48/77, loss: 0.0012261226074770093 2023-01-23 23:56:51.714556: step: 52/77, loss: 0.008258574642241001 2023-01-23 23:56:53.154773: step: 56/77, loss: 0.014765463769435883 2023-01-23 23:56:54.613442: step: 60/77, loss: 0.012880018912255764 2023-01-23 23:56:56.034699: step: 64/77, loss: 0.0017860515508800745 2023-01-23 23:56:57.475171: step: 68/77, loss: 0.010468395426869392 2023-01-23 23:56:58.877245: step: 72/77, loss: 0.03757264092564583 2023-01-23 23:57:00.312954: step: 76/77, loss: 0.018443763256072998 2023-01-23 23:57:01.808695: step: 80/77, loss: 0.005401423200964928 2023-01-23 23:57:03.338209: step: 84/77, loss: 0.048929158598184586 2023-01-23 23:57:04.863159: step: 88/77, loss: 0.11744247376918793 2023-01-23 23:57:06.277170: step: 92/77, loss: 0.00185915338806808 2023-01-23 23:57:07.718529: step: 96/77, loss: 0.008274192921817303 2023-01-23 23:57:09.172401: step: 100/77, loss: 0.009979705326259136 2023-01-23 23:57:10.627772: step: 104/77, loss: 0.0325552262365818 2023-01-23 23:57:12.114543: step: 108/77, loss: 0.0005193643737584352 2023-01-23 23:57:13.576036: step: 112/77, loss: 0.026102589443325996 2023-01-23 23:57:15.057797: step: 116/77, loss: 0.004790120758116245 2023-01-23 23:57:16.563095: step: 120/77, loss: 0.020720547065138817 2023-01-23 23:57:18.002696: step: 124/77, loss: 0.013181292451918125 2023-01-23 23:57:19.397563: step: 128/77, loss: 0.15328733623027802 2023-01-23 23:57:20.883720: step: 132/77, loss: 0.006637411192059517 2023-01-23 23:57:22.338470: step: 136/77, loss: 0.026826925575733185 2023-01-23 23:57:23.726014: step: 140/77, loss: 0.002037283033132553 2023-01-23 23:57:25.170571: step: 144/77, loss: 0.002992324996739626 2023-01-23 23:57:26.591007: step: 148/77, loss: 0.015959719195961952 2023-01-23 23:57:28.101209: step: 152/77, loss: 0.012633001431822777 2023-01-23 23:57:29.510582: step: 156/77, loss: 0.002229101490229368 2023-01-23 23:57:30.868169: step: 160/77, loss: 0.005678039044141769 2023-01-23 23:57:32.265134: step: 164/77, loss: 0.07291337847709656 2023-01-23 23:57:33.677982: step: 168/77, loss: 0.06404541432857513 2023-01-23 23:57:35.089493: step: 172/77, loss: 0.04879661649465561 2023-01-23 23:57:36.571521: step: 176/77, loss: 0.004770440515130758 2023-01-23 23:57:37.937723: step: 180/77, loss: 0.07261113822460175 2023-01-23 23:57:39.334778: step: 184/77, loss: 0.09107305854558945 2023-01-23 23:57:40.765899: step: 188/77, loss: 0.03481244295835495 2023-01-23 23:57:42.176928: step: 192/77, loss: 0.026612192392349243 2023-01-23 23:57:43.684069: step: 196/77, loss: 0.013187481090426445 2023-01-23 23:57:45.058217: step: 200/77, loss: 0.007433123886585236 2023-01-23 23:57:46.461260: step: 204/77, loss: 0.014218202792108059 2023-01-23 23:57:47.852669: step: 208/77, loss: 0.023550231009721756 2023-01-23 23:57:49.346701: step: 212/77, loss: 0.06618601828813553 2023-01-23 23:57:50.785084: step: 216/77, loss: 8.782022632658482e-05 2023-01-23 23:57:52.228842: step: 220/77, loss: 0.19314192235469818 2023-01-23 23:57:53.665721: step: 224/77, loss: 0.006154694594442844 2023-01-23 23:57:55.112065: step: 228/77, loss: 0.04159718006849289 2023-01-23 23:57:56.571042: step: 232/77, loss: 0.051758646965026855 2023-01-23 23:57:58.045962: step: 236/77, loss: 0.045750297605991364 2023-01-23 23:57:59.491031: step: 240/77, loss: 0.02127264067530632 2023-01-23 23:58:00.953963: step: 244/77, loss: 0.01488424651324749 2023-01-23 23:58:02.407412: step: 248/77, loss: 0.039323072880506516 2023-01-23 23:58:03.888502: step: 252/77, loss: 0.038724955171346664 2023-01-23 23:58:05.304196: step: 256/77, loss: 0.016217941418290138 2023-01-23 23:58:06.734931: step: 260/77, loss: 0.00018854241352528334 2023-01-23 23:58:08.138154: step: 264/77, loss: 0.029617827385663986 2023-01-23 23:58:09.608789: step: 268/77, loss: 0.014795455150306225 2023-01-23 23:58:11.021628: step: 272/77, loss: 0.027942290529608727 2023-01-23 23:58:12.458571: step: 276/77, loss: 0.03196156397461891 2023-01-23 23:58:13.889254: step: 280/77, loss: 0.018221847712993622 2023-01-23 23:58:15.380109: step: 284/77, loss: 0.007844426669180393 2023-01-23 23:58:16.839812: step: 288/77, loss: 0.025259993970394135 2023-01-23 23:58:18.239447: step: 292/77, loss: 0.0016258007381111383 2023-01-23 23:58:19.640158: step: 296/77, loss: 0.018152914941310883 2023-01-23 23:58:21.100763: step: 300/77, loss: 0.011314081028103828 2023-01-23 23:58:22.535817: step: 304/77, loss: 0.04237208515405655 2023-01-23 23:58:23.985062: step: 308/77, loss: 0.011828781105577946 2023-01-23 23:58:25.399961: step: 312/77, loss: 0.00927747879177332 2023-01-23 23:58:26.905358: step: 316/77, loss: 0.02505832351744175 2023-01-23 23:58:28.372664: step: 320/77, loss: 0.004240931943058968 2023-01-23 23:58:29.834171: step: 324/77, loss: 0.0011124876327812672 2023-01-23 23:58:31.277540: step: 328/77, loss: 0.003472943790256977 2023-01-23 23:58:32.695813: step: 332/77, loss: 0.021344980224967003 2023-01-23 23:58:34.114410: step: 336/77, loss: 0.054589562118053436 2023-01-23 23:58:35.552587: step: 340/77, loss: 0.028044283390045166 2023-01-23 23:58:36.990702: step: 344/77, loss: 0.013498909771442413 2023-01-23 23:58:38.379697: step: 348/77, loss: 0.003228034358471632 2023-01-23 23:58:39.773847: step: 352/77, loss: 0.000316204852424562 2023-01-23 23:58:41.233707: step: 356/77, loss: 0.009995604865252972 2023-01-23 23:58:42.687592: step: 360/77, loss: 0.018175769597291946 2023-01-23 23:58:44.168925: step: 364/77, loss: 0.0027893283404409885 2023-01-23 23:58:45.580354: step: 368/77, loss: 0.0030180648900568485 2023-01-23 23:58:46.946226: step: 372/77, loss: 0.015148220583796501 2023-01-23 23:58:48.377406: step: 376/77, loss: 0.00036407759762369096 2023-01-23 23:58:49.808631: step: 380/77, loss: 0.03946799784898758 2023-01-23 23:58:51.237780: step: 384/77, loss: 0.0014542130520567298 2023-01-23 23:58:52.688924: step: 388/77, loss: 0.0005557957338169217 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.948051948051948, 'r': 0.5703125, 'f1': 0.7121951219512196}, 'slot': {'p': 0.6, 'r': 0.017664376840039256, 'f1': 0.034318398474737846}, 'combined': 0.02444139598688647, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5625, 'f1': 0.7058823529411765}, 'slot': {'p': 0.6, 'r': 0.017664376840039256, 'f1': 0.034318398474737846}, 'combined': 0.024224751864520833, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.948051948051948, 'r': 0.5703125, 'f1': 0.7121951219512196}, 'slot': {'p': 0.6, 'r': 0.017664376840039256, 'f1': 0.034318398474737846}, 'combined': 0.02444139598688647, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:00:35.819732: step: 4/77, loss: 0.018476076424121857 2023-01-24 00:00:37.309010: step: 8/77, loss: 0.03965684399008751 2023-01-24 00:00:38.799498: step: 12/77, loss: 0.021786324679851532 2023-01-24 00:00:40.252712: step: 16/77, loss: 0.020817160606384277 2023-01-24 00:00:41.648695: step: 20/77, loss: 0.0024067708291113377 2023-01-24 00:00:43.159370: step: 24/77, loss: 0.004701969679445028 2023-01-24 00:00:44.660771: step: 28/77, loss: 0.0046493480913341045 2023-01-24 00:00:46.122734: step: 32/77, loss: 0.02824241667985916 2023-01-24 00:00:47.493936: step: 36/77, loss: 0.016335386782884598 2023-01-24 00:00:48.981521: step: 40/77, loss: 0.020319268107414246 2023-01-24 00:00:50.371058: step: 44/77, loss: 0.016872696578502655 2023-01-24 00:00:51.802819: step: 48/77, loss: 0.0011987756006419659 2023-01-24 00:00:53.263041: step: 52/77, loss: 0.00891688372939825 2023-01-24 00:00:54.701902: step: 56/77, loss: 0.009940674528479576 2023-01-24 00:00:56.089797: step: 60/77, loss: 0.009649500250816345 2023-01-24 00:00:57.485757: step: 64/77, loss: 0.005740232300013304 2023-01-24 00:00:58.882557: step: 68/77, loss: 0.04176686704158783 2023-01-24 00:01:00.391321: step: 72/77, loss: 0.0452922098338604 2023-01-24 00:01:01.798665: step: 76/77, loss: 0.06259751319885254 2023-01-24 00:01:03.272115: step: 80/77, loss: 0.011904904618859291 2023-01-24 00:01:04.697107: step: 84/77, loss: 0.019501540809869766 2023-01-24 00:01:06.138346: step: 88/77, loss: 0.007173976395279169 2023-01-24 00:01:07.611791: step: 92/77, loss: 0.003331177169457078 2023-01-24 00:01:09.056216: step: 96/77, loss: 0.004057619255036116 2023-01-24 00:01:10.485971: step: 100/77, loss: 0.02370486967265606 2023-01-24 00:01:11.896958: step: 104/77, loss: 0.005920400843024254 2023-01-24 00:01:13.333062: step: 108/77, loss: 0.04308168590068817 2023-01-24 00:01:14.802636: step: 112/77, loss: 0.012136734090745449 2023-01-24 00:01:16.292307: step: 116/77, loss: 0.003824051935225725 2023-01-24 00:01:17.683959: step: 120/77, loss: 0.012575005181133747 2023-01-24 00:01:19.136665: step: 124/77, loss: 0.0017235910054296255 2023-01-24 00:01:20.523368: step: 128/77, loss: 0.021024808287620544 2023-01-24 00:01:22.007410: step: 132/77, loss: 0.018988901749253273 2023-01-24 00:01:23.446750: step: 136/77, loss: 0.014004884287714958 2023-01-24 00:01:24.877510: step: 140/77, loss: 0.02050507254898548 2023-01-24 00:01:26.304527: step: 144/77, loss: 0.0723520815372467 2023-01-24 00:01:27.706347: step: 148/77, loss: 0.0015499040018767118 2023-01-24 00:01:29.137886: step: 152/77, loss: 0.08790557086467743 2023-01-24 00:01:30.546584: step: 156/77, loss: 0.0010261915158480406 2023-01-24 00:01:31.986317: step: 160/77, loss: 0.003959516994655132 2023-01-24 00:01:33.407612: step: 164/77, loss: 0.05613886937499046 2023-01-24 00:01:34.860846: step: 168/77, loss: 0.014303348958492279 2023-01-24 00:01:36.288007: step: 172/77, loss: 0.004931567702442408 2023-01-24 00:01:37.720401: step: 176/77, loss: 0.057635094970464706 2023-01-24 00:01:39.165526: step: 180/77, loss: 0.0050613465718925 2023-01-24 00:01:40.609256: step: 184/77, loss: 0.0029486017301678658 2023-01-24 00:01:42.077730: step: 188/77, loss: 0.0020928424783051014 2023-01-24 00:01:43.520376: step: 192/77, loss: 0.006583016831427813 2023-01-24 00:01:45.021340: step: 196/77, loss: 0.016957614570856094 2023-01-24 00:01:46.500345: step: 200/77, loss: 0.0015637626638635993 2023-01-24 00:01:47.960183: step: 204/77, loss: 0.025140559300780296 2023-01-24 00:01:49.416655: step: 208/77, loss: 0.091184601187706 2023-01-24 00:01:50.819799: step: 212/77, loss: 0.009486174210906029 2023-01-24 00:01:52.214425: step: 216/77, loss: 0.004166465252637863 2023-01-24 00:01:53.683684: step: 220/77, loss: 0.011669320985674858 2023-01-24 00:01:55.149600: step: 224/77, loss: 0.001169293187558651 2023-01-24 00:01:56.519873: step: 228/77, loss: 0.012834685854613781 2023-01-24 00:01:57.948638: step: 232/77, loss: 0.010140188038349152 2023-01-24 00:01:59.362446: step: 236/77, loss: 0.015939615666866302 2023-01-24 00:02:00.826898: step: 240/77, loss: 0.02318131923675537 2023-01-24 00:02:02.230889: step: 244/77, loss: 0.013882097788155079 2023-01-24 00:02:03.640093: step: 248/77, loss: 0.01791475899517536 2023-01-24 00:02:05.165901: step: 252/77, loss: 0.012156199663877487 2023-01-24 00:02:06.635469: step: 256/77, loss: 0.015079764649271965 2023-01-24 00:02:08.111301: step: 260/77, loss: 0.0012284107506275177 2023-01-24 00:02:09.554597: step: 264/77, loss: 0.018296556547284126 2023-01-24 00:02:11.053866: step: 268/77, loss: 0.013591526076197624 2023-01-24 00:02:12.454405: step: 272/77, loss: 0.07734925299882889 2023-01-24 00:02:13.929004: step: 276/77, loss: 0.06120207533240318 2023-01-24 00:02:15.376644: step: 280/77, loss: 0.012348676100373268 2023-01-24 00:02:16.767709: step: 284/77, loss: 0.0011046245926991105 2023-01-24 00:02:18.203711: step: 288/77, loss: 0.008927395567297935 2023-01-24 00:02:19.676315: step: 292/77, loss: 0.000859726220369339 2023-01-24 00:02:21.162136: step: 296/77, loss: 0.03434096649289131 2023-01-24 00:02:22.607047: step: 300/77, loss: 0.046841878443956375 2023-01-24 00:02:24.011441: step: 304/77, loss: 0.03707262501120567 2023-01-24 00:02:25.482574: step: 308/77, loss: 0.05187338963150978 2023-01-24 00:02:26.982193: step: 312/77, loss: 0.03167300298810005 2023-01-24 00:02:28.453204: step: 316/77, loss: 0.018084578216075897 2023-01-24 00:02:29.962726: step: 320/77, loss: 0.040085483342409134 2023-01-24 00:02:31.394194: step: 324/77, loss: 0.011672168038785458 2023-01-24 00:02:32.804639: step: 328/77, loss: 0.020293209701776505 2023-01-24 00:02:34.264918: step: 332/77, loss: 0.007086321711540222 2023-01-24 00:02:35.717248: step: 336/77, loss: 0.07502918690443039 2023-01-24 00:02:37.185625: step: 340/77, loss: 0.00554603012278676 2023-01-24 00:02:38.634411: step: 344/77, loss: 0.00422379607334733 2023-01-24 00:02:40.055339: step: 348/77, loss: 0.01804770715534687 2023-01-24 00:02:41.464119: step: 352/77, loss: 0.009498151950538158 2023-01-24 00:02:42.916187: step: 356/77, loss: 0.006640831008553505 2023-01-24 00:02:44.373522: step: 360/77, loss: 0.011323105543851852 2023-01-24 00:02:45.769687: step: 364/77, loss: 0.0036079012788832188 2023-01-24 00:02:47.212807: step: 368/77, loss: 0.018234066665172577 2023-01-24 00:02:48.641026: step: 372/77, loss: 0.0013295363169163465 2023-01-24 00:02:50.001088: step: 376/77, loss: 0.02715039998292923 2023-01-24 00:02:51.489902: step: 380/77, loss: 0.005858607590198517 2023-01-24 00:02:52.997358: step: 384/77, loss: 0.004114366136491299 2023-01-24 00:02:54.453935: step: 388/77, loss: 0.034678973257541656 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9367088607594937, 'r': 0.578125, 'f1': 0.7149758454106279}, 'slot': {'p': 0.5806451612903226, 'r': 0.017664376840039256, 'f1': 0.03428571428571429}, 'combined': 0.024513457556935812, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5703125, 'f1': 0.7087378640776699}, 'slot': {'p': 0.5333333333333333, 'r': 0.015701668302257114, 'f1': 0.030505243088655855}, 'combined': 0.021620220829824052, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5703125, 'f1': 0.7087378640776699}, 'slot': {'p': 0.5333333333333333, 'r': 0.015701668302257114, 'f1': 0.030505243088655855}, 'combined': 0.021620220829824052, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:04:36.630897: step: 4/77, loss: 0.06111355870962143 2023-01-24 00:04:38.032054: step: 8/77, loss: 0.03258708119392395 2023-01-24 00:04:39.523295: step: 12/77, loss: 0.007520253770053387 2023-01-24 00:04:40.967328: step: 16/77, loss: 0.0033959210850298405 2023-01-24 00:04:42.406476: step: 20/77, loss: 0.018976226449012756 2023-01-24 00:04:43.827978: step: 24/77, loss: 0.022655602544546127 2023-01-24 00:04:45.301820: step: 28/77, loss: 0.0007378259324468672 2023-01-24 00:04:46.717395: step: 32/77, loss: 0.023256225511431694 2023-01-24 00:04:48.132706: step: 36/77, loss: 0.03412342816591263 2023-01-24 00:04:49.623780: step: 40/77, loss: 0.011993163265287876 2023-01-24 00:04:51.118529: step: 44/77, loss: 0.005109499208629131 2023-01-24 00:04:52.522177: step: 48/77, loss: 0.00035834929440170527 2023-01-24 00:04:53.929088: step: 52/77, loss: 0.3323846757411957 2023-01-24 00:04:55.322264: step: 56/77, loss: 0.00893863383680582 2023-01-24 00:04:56.783025: step: 60/77, loss: 0.0043699974194169044 2023-01-24 00:04:58.198933: step: 64/77, loss: 0.022914201021194458 2023-01-24 00:04:59.620457: step: 68/77, loss: 0.0014546663733199239 2023-01-24 00:05:01.081002: step: 72/77, loss: 0.004197940230369568 2023-01-24 00:05:02.486101: step: 76/77, loss: 0.012708312831819057 2023-01-24 00:05:03.896961: step: 80/77, loss: 0.0046439156867563725 2023-01-24 00:05:05.358788: step: 84/77, loss: 0.007488596718758345 2023-01-24 00:05:06.753719: step: 88/77, loss: 0.00971380528062582 2023-01-24 00:05:08.177351: step: 92/77, loss: 0.00930837169289589 2023-01-24 00:05:09.620367: step: 96/77, loss: 0.020319797098636627 2023-01-24 00:05:11.073215: step: 100/77, loss: 0.005104791838675737 2023-01-24 00:05:12.512948: step: 104/77, loss: 0.08812321722507477 2023-01-24 00:05:14.045562: step: 108/77, loss: 0.0001489566930104047 2023-01-24 00:05:15.477580: step: 112/77, loss: 0.001770321512594819 2023-01-24 00:05:16.922085: step: 116/77, loss: 0.0025908551178872585 2023-01-24 00:05:18.351791: step: 120/77, loss: 0.000276944920187816 2023-01-24 00:05:19.851629: step: 124/77, loss: 0.0008326682145707309 2023-01-24 00:05:21.258437: step: 128/77, loss: 0.003297338727861643 2023-01-24 00:05:22.671818: step: 132/77, loss: 0.006724027916789055 2023-01-24 00:05:24.092838: step: 136/77, loss: 8.205480116885155e-05 2023-01-24 00:05:25.468554: step: 140/77, loss: 0.09621748328208923 2023-01-24 00:05:26.860354: step: 144/77, loss: 0.0013389689847826958 2023-01-24 00:05:28.316926: step: 148/77, loss: 0.0012504963669925928 2023-01-24 00:05:29.769594: step: 152/77, loss: 8.572525257477537e-05 2023-01-24 00:05:31.264013: step: 156/77, loss: 0.056684426963329315 2023-01-24 00:05:32.696261: step: 160/77, loss: 0.044627901166677475 2023-01-24 00:05:34.133829: step: 164/77, loss: 0.005165470764040947 2023-01-24 00:05:35.553984: step: 168/77, loss: 0.07160969823598862 2023-01-24 00:05:36.946626: step: 172/77, loss: 0.021577483043074608 2023-01-24 00:05:38.371743: step: 176/77, loss: 0.04253820329904556 2023-01-24 00:05:39.788238: step: 180/77, loss: 0.004028831608593464 2023-01-24 00:05:41.192900: step: 184/77, loss: 0.00423656078055501 2023-01-24 00:05:42.598237: step: 188/77, loss: 0.00010493271111045033 2023-01-24 00:05:44.027187: step: 192/77, loss: 0.06777942180633545 2023-01-24 00:05:45.517197: step: 196/77, loss: 0.06577803939580917 2023-01-24 00:05:46.981593: step: 200/77, loss: 0.019269591197371483 2023-01-24 00:05:48.421505: step: 204/77, loss: 0.0390661284327507 2023-01-24 00:05:49.821558: step: 208/77, loss: 0.013898050412535667 2023-01-24 00:05:51.293467: step: 212/77, loss: 0.028204157948493958 2023-01-24 00:05:52.782324: step: 216/77, loss: 0.005166948307305574 2023-01-24 00:05:54.359771: step: 220/77, loss: 0.014332697726786137 2023-01-24 00:05:55.810460: step: 224/77, loss: 0.05497150868177414 2023-01-24 00:05:57.203541: step: 228/77, loss: 0.012002397328615189 2023-01-24 00:05:58.625774: step: 232/77, loss: 0.011243941262364388 2023-01-24 00:06:00.066982: step: 236/77, loss: 0.001498940633609891 2023-01-24 00:06:01.572401: step: 240/77, loss: 0.02745002880692482 2023-01-24 00:06:02.952749: step: 244/77, loss: 0.023813052102923393 2023-01-24 00:06:04.443630: step: 248/77, loss: 0.039781540632247925 2023-01-24 00:06:05.861435: step: 252/77, loss: 0.19215527176856995 2023-01-24 00:06:07.263818: step: 256/77, loss: 0.005475187674164772 2023-01-24 00:06:08.719036: step: 260/77, loss: 0.0012005360331386328 2023-01-24 00:06:10.136057: step: 264/77, loss: 0.0016509962733834982 2023-01-24 00:06:11.628319: step: 268/77, loss: 0.0016201656544581056 2023-01-24 00:06:13.129502: step: 272/77, loss: 0.023119984194636345 2023-01-24 00:06:14.574765: step: 276/77, loss: 0.010675408877432346 2023-01-24 00:06:16.085570: step: 280/77, loss: 0.0067598833702504635 2023-01-24 00:06:17.495613: step: 284/77, loss: 0.019721917808055878 2023-01-24 00:06:18.935726: step: 288/77, loss: 0.017890213057398796 2023-01-24 00:06:20.415048: step: 292/77, loss: 0.01658380590379238 2023-01-24 00:06:21.879612: step: 296/77, loss: 0.0016131179872900248 2023-01-24 00:06:23.302211: step: 300/77, loss: 0.014928465709090233 2023-01-24 00:06:24.764255: step: 304/77, loss: 0.003837120020762086 2023-01-24 00:06:26.186804: step: 308/77, loss: 0.011906763538718224 2023-01-24 00:06:27.608431: step: 312/77, loss: 0.003964369650930166 2023-01-24 00:06:29.123773: step: 316/77, loss: 0.012319169007241726 2023-01-24 00:06:30.685784: step: 320/77, loss: 0.01478549838066101 2023-01-24 00:06:32.223420: step: 324/77, loss: 0.06367487460374832 2023-01-24 00:06:33.673306: step: 328/77, loss: 0.004723585210740566 2023-01-24 00:06:35.184893: step: 332/77, loss: 0.01060541719198227 2023-01-24 00:06:36.682540: step: 336/77, loss: 0.09059099853038788 2023-01-24 00:06:38.148812: step: 340/77, loss: 0.025290492922067642 2023-01-24 00:06:39.642298: step: 344/77, loss: 0.008649226278066635 2023-01-24 00:06:41.178505: step: 348/77, loss: 0.02813754975795746 2023-01-24 00:06:42.698050: step: 352/77, loss: 0.05314696580171585 2023-01-24 00:06:44.215589: step: 356/77, loss: 0.024964183568954468 2023-01-24 00:06:45.722199: step: 360/77, loss: 0.04871615767478943 2023-01-24 00:06:47.177856: step: 364/77, loss: 0.0009216809994541109 2023-01-24 00:06:48.608593: step: 368/77, loss: 0.03678221255540848 2023-01-24 00:06:50.059196: step: 372/77, loss: 0.00028942187782377005 2023-01-24 00:06:51.549581: step: 376/77, loss: 0.06943628937005997 2023-01-24 00:06:52.991235: step: 380/77, loss: 0.010281715542078018 2023-01-24 00:06:54.415836: step: 384/77, loss: 0.022855132818222046 2023-01-24 00:06:55.887112: step: 388/77, loss: 0.0057218074798583984 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9066666666666666, 'r': 0.53125, 'f1': 0.6699507389162561}, 'slot': {'p': 0.4878048780487805, 'r': 0.019627085377821395, 'f1': 0.03773584905660377}, 'combined': 0.025281159959104002, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.9066666666666666, 'r': 0.53125, 'f1': 0.6699507389162561}, 'slot': {'p': 0.4878048780487805, 'r': 0.019627085377821395, 'f1': 0.03773584905660377}, 'combined': 0.025281159959104002, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9066666666666666, 'r': 0.53125, 'f1': 0.6699507389162561}, 'slot': {'p': 0.4878048780487805, 'r': 0.019627085377821395, 'f1': 0.03773584905660377}, 'combined': 0.025281159959104002, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:08:43.387308: step: 4/77, loss: 0.004413371905684471 2023-01-24 00:08:44.780611: step: 8/77, loss: 0.011603401042521 2023-01-24 00:08:46.158636: step: 12/77, loss: 0.0070768119767308235 2023-01-24 00:08:47.559755: step: 16/77, loss: 0.011790785938501358 2023-01-24 00:08:49.019285: step: 20/77, loss: 0.003310129977762699 2023-01-24 00:08:50.500774: step: 24/77, loss: 0.001037739566527307 2023-01-24 00:08:51.951178: step: 28/77, loss: 0.0685984268784523 2023-01-24 00:08:53.365424: step: 32/77, loss: 0.012599118985235691 2023-01-24 00:08:54.829147: step: 36/77, loss: 0.031620293855667114 2023-01-24 00:08:56.367071: step: 40/77, loss: 0.015515482984483242 2023-01-24 00:08:57.852582: step: 44/77, loss: 0.013255885802209377 2023-01-24 00:08:59.242872: step: 48/77, loss: 0.017563162371516228 2023-01-24 00:09:00.698183: step: 52/77, loss: 0.023024428635835648 2023-01-24 00:09:02.164446: step: 56/77, loss: 0.015319026075303555 2023-01-24 00:09:03.616261: step: 60/77, loss: 0.0038829308468848467 2023-01-24 00:09:05.106093: step: 64/77, loss: 0.010435502976179123 2023-01-24 00:09:06.604239: step: 68/77, loss: 0.004648303613066673 2023-01-24 00:09:08.102230: step: 72/77, loss: 0.0006056458223611116 2023-01-24 00:09:09.592801: step: 76/77, loss: 0.0007848583627492189 2023-01-24 00:09:11.066187: step: 80/77, loss: 0.0031027907971292734 2023-01-24 00:09:12.514661: step: 84/77, loss: 0.002980540506541729 2023-01-24 00:09:13.995195: step: 88/77, loss: 0.004327086266130209 2023-01-24 00:09:15.525808: step: 92/77, loss: 0.0061401608400046825 2023-01-24 00:09:16.959273: step: 96/77, loss: 0.03094206564128399 2023-01-24 00:09:18.414813: step: 100/77, loss: 0.010667397640645504 2023-01-24 00:09:19.906386: step: 104/77, loss: 0.002312043448910117 2023-01-24 00:09:21.345552: step: 108/77, loss: 0.02056185156106949 2023-01-24 00:09:22.756252: step: 112/77, loss: 0.013677126727998257 2023-01-24 00:09:24.188495: step: 116/77, loss: 0.03300682455301285 2023-01-24 00:09:25.638395: step: 120/77, loss: 0.002695606555789709 2023-01-24 00:09:27.065663: step: 124/77, loss: 0.0016732718795537949 2023-01-24 00:09:28.511244: step: 128/77, loss: 0.017262576147913933 2023-01-24 00:09:29.913456: step: 132/77, loss: 0.01876669004559517 2023-01-24 00:09:31.436201: step: 136/77, loss: 0.007911132648587227 2023-01-24 00:09:32.861353: step: 140/77, loss: 0.007990386337041855 2023-01-24 00:09:34.392715: step: 144/77, loss: 0.001423410139977932 2023-01-24 00:09:35.898925: step: 148/77, loss: 0.0002995043178088963 2023-01-24 00:09:37.354375: step: 152/77, loss: 0.0005054707289673388 2023-01-24 00:09:38.787211: step: 156/77, loss: 0.010499808937311172 2023-01-24 00:09:40.261542: step: 160/77, loss: 0.01981007121503353 2023-01-24 00:09:41.717148: step: 164/77, loss: 0.061028920114040375 2023-01-24 00:09:43.179391: step: 168/77, loss: 0.007777105551213026 2023-01-24 00:09:44.624177: step: 172/77, loss: 0.0684259682893753 2023-01-24 00:09:46.077226: step: 176/77, loss: 0.027019374072551727 2023-01-24 00:09:47.482667: step: 180/77, loss: 0.003009375650435686 2023-01-24 00:09:48.899067: step: 184/77, loss: 0.03607326000928879 2023-01-24 00:09:50.400379: step: 188/77, loss: 0.0006161195342428982 2023-01-24 00:09:51.876130: step: 192/77, loss: 0.03203636780381203 2023-01-24 00:09:53.313166: step: 196/77, loss: 0.009488014504313469 2023-01-24 00:09:54.774697: step: 200/77, loss: 0.0061500160954892635 2023-01-24 00:09:56.208297: step: 204/77, loss: 0.003809800138697028 2023-01-24 00:09:57.628564: step: 208/77, loss: 0.0006669530994258821 2023-01-24 00:09:59.129386: step: 212/77, loss: 0.09732852131128311 2023-01-24 00:10:00.573486: step: 216/77, loss: 0.023626577109098434 2023-01-24 00:10:02.089681: step: 220/77, loss: 0.036544617265462875 2023-01-24 00:10:03.573506: step: 224/77, loss: 0.009659359231591225 2023-01-24 00:10:05.003084: step: 228/77, loss: 0.0033210739493370056 2023-01-24 00:10:06.433588: step: 232/77, loss: 0.0032579938415437937 2023-01-24 00:10:07.906922: step: 236/77, loss: 0.03421283885836601 2023-01-24 00:10:09.415811: step: 240/77, loss: 0.019285082817077637 2023-01-24 00:10:10.852425: step: 244/77, loss: 0.025123365223407745 2023-01-24 00:10:12.273567: step: 248/77, loss: 0.007166619878262281 2023-01-24 00:10:13.673538: step: 252/77, loss: 0.038742199540138245 2023-01-24 00:10:15.182312: step: 256/77, loss: 0.0009126511286012828 2023-01-24 00:10:16.665794: step: 260/77, loss: 0.020126396790146828 2023-01-24 00:10:18.144112: step: 264/77, loss: 0.02404876798391342 2023-01-24 00:10:19.641732: step: 268/77, loss: 0.006159630138427019 2023-01-24 00:10:21.112565: step: 272/77, loss: 0.010606272146105766 2023-01-24 00:10:22.471666: step: 276/77, loss: 0.04204922169446945 2023-01-24 00:10:23.931390: step: 280/77, loss: 0.007835128344595432 2023-01-24 00:10:25.387598: step: 284/77, loss: 0.017098350450396538 2023-01-24 00:10:26.922338: step: 288/77, loss: 0.011766848154366016 2023-01-24 00:10:28.315150: step: 292/77, loss: 0.000319849670631811 2023-01-24 00:10:29.815678: step: 296/77, loss: 0.07437402009963989 2023-01-24 00:10:31.251580: step: 300/77, loss: 0.01065000332891941 2023-01-24 00:10:32.769824: step: 304/77, loss: 0.0071182590909302235 2023-01-24 00:10:34.246994: step: 308/77, loss: 0.014701258391141891 2023-01-24 00:10:35.626288: step: 312/77, loss: 0.03189054876565933 2023-01-24 00:10:37.072839: step: 316/77, loss: 0.023664599284529686 2023-01-24 00:10:38.504163: step: 320/77, loss: 0.001820672769099474 2023-01-24 00:10:40.010879: step: 324/77, loss: 0.03477199375629425 2023-01-24 00:10:41.399917: step: 328/77, loss: 0.051144201308488846 2023-01-24 00:10:42.901794: step: 332/77, loss: 0.03953402116894722 2023-01-24 00:10:44.331738: step: 336/77, loss: 0.026595700532197952 2023-01-24 00:10:45.724404: step: 340/77, loss: 0.01947232335805893 2023-01-24 00:10:47.142198: step: 344/77, loss: 0.005510674323886633 2023-01-24 00:10:48.680029: step: 348/77, loss: 0.005318933166563511 2023-01-24 00:10:50.090805: step: 352/77, loss: 0.021326635032892227 2023-01-24 00:10:51.585437: step: 356/77, loss: 0.033761221915483475 2023-01-24 00:10:53.108761: step: 360/77, loss: 0.005709969904273748 2023-01-24 00:10:54.582411: step: 364/77, loss: 0.012541381642222404 2023-01-24 00:10:56.019962: step: 368/77, loss: 0.002873632125556469 2023-01-24 00:10:57.525036: step: 372/77, loss: 0.010858142748475075 2023-01-24 00:10:59.017240: step: 376/77, loss: 0.0023356713354587555 2023-01-24 00:11:00.463817: step: 380/77, loss: 0.04005616530776024 2023-01-24 00:11:01.910746: step: 384/77, loss: 0.0035838475450873375 2023-01-24 00:11:03.287028: step: 388/77, loss: 0.027848560363054276 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.515625, 'f1': 0.6633165829145728}, 'slot': {'p': 0.5625, 'r': 0.017664376840039256, 'f1': 0.03425309229305423}, 'combined': 0.022720644134086223, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.515625, 'f1': 0.6633165829145728}, 'slot': {'p': 0.5625, 'r': 0.017664376840039256, 'f1': 0.03425309229305423}, 'combined': 0.022720644134086223, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.515625, 'f1': 0.6633165829145728}, 'slot': {'p': 0.59375, 'r': 0.018645731108930325, 'f1': 0.03615604186489058}, 'combined': 0.02398290214153546, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:12:48.732092: step: 4/77, loss: 0.0002601227897685021 2023-01-24 00:12:50.166964: step: 8/77, loss: 0.010210440494120121 2023-01-24 00:12:51.617771: step: 12/77, loss: 0.0026236511766910553 2023-01-24 00:12:53.071588: step: 16/77, loss: 0.003634335473179817 2023-01-24 00:12:54.576067: step: 20/77, loss: 0.0036590697709470987 2023-01-24 00:12:56.057686: step: 24/77, loss: 0.016517236828804016 2023-01-24 00:12:57.491088: step: 28/77, loss: 0.02679351158440113 2023-01-24 00:12:58.961234: step: 32/77, loss: 0.00783599354326725 2023-01-24 00:13:00.362286: step: 36/77, loss: 0.005712728947401047 2023-01-24 00:13:01.797221: step: 40/77, loss: 0.02416357956826687 2023-01-24 00:13:03.283676: step: 44/77, loss: 0.004781464114785194 2023-01-24 00:13:04.737272: step: 48/77, loss: 0.0013125156983733177 2023-01-24 00:13:06.173560: step: 52/77, loss: 0.008379247039556503 2023-01-24 00:13:07.697165: step: 56/77, loss: 0.02059927023947239 2023-01-24 00:13:09.127949: step: 60/77, loss: 0.017647510394454002 2023-01-24 00:13:10.518935: step: 64/77, loss: 0.003159626154229045 2023-01-24 00:13:11.958942: step: 68/77, loss: 0.0001272601803066209 2023-01-24 00:13:13.425555: step: 72/77, loss: 0.010219249874353409 2023-01-24 00:13:14.868755: step: 76/77, loss: 0.0011884287232533097 2023-01-24 00:13:16.299473: step: 80/77, loss: 0.010820185765624046 2023-01-24 00:13:17.731606: step: 84/77, loss: 0.03870443254709244 2023-01-24 00:13:19.229789: step: 88/77, loss: 0.00692142266780138 2023-01-24 00:13:20.717982: step: 92/77, loss: 0.009901397861540318 2023-01-24 00:13:22.168488: step: 96/77, loss: 0.0022363499738276005 2023-01-24 00:13:23.678033: step: 100/77, loss: 0.03652594983577728 2023-01-24 00:13:25.134485: step: 104/77, loss: 0.05372604727745056 2023-01-24 00:13:26.646950: step: 108/77, loss: 0.0008952724747359753 2023-01-24 00:13:28.122689: step: 112/77, loss: 0.2615503668785095 2023-01-24 00:13:29.567867: step: 116/77, loss: 0.00021023042791057378 2023-01-24 00:13:31.004369: step: 120/77, loss: 0.012398790568113327 2023-01-24 00:13:32.550247: step: 124/77, loss: 0.013235787861049175 2023-01-24 00:13:34.049835: step: 128/77, loss: 0.0010583129478618503 2023-01-24 00:13:35.496723: step: 132/77, loss: 0.08256355673074722 2023-01-24 00:13:36.956688: step: 136/77, loss: 0.05031026154756546 2023-01-24 00:13:38.461271: step: 140/77, loss: 0.015843644738197327 2023-01-24 00:13:39.891766: step: 144/77, loss: 0.07007738202810287 2023-01-24 00:13:41.375770: step: 148/77, loss: 0.0006344180437736213 2023-01-24 00:13:42.833591: step: 152/77, loss: 0.002021482679992914 2023-01-24 00:13:44.287633: step: 156/77, loss: 0.04340111464262009 2023-01-24 00:13:45.738469: step: 160/77, loss: 0.05992339551448822 2023-01-24 00:13:47.241493: step: 164/77, loss: 0.0021300525404512882 2023-01-24 00:13:48.643978: step: 168/77, loss: 0.005749665666371584 2023-01-24 00:13:50.162057: step: 172/77, loss: 0.0012523261830210686 2023-01-24 00:13:51.668260: step: 176/77, loss: 0.003278427990153432 2023-01-24 00:13:53.113616: step: 180/77, loss: 0.005797204561531544 2023-01-24 00:13:54.563481: step: 184/77, loss: 0.004608628340065479 2023-01-24 00:13:56.058265: step: 188/77, loss: 0.03436815366148949 2023-01-24 00:13:57.491257: step: 192/77, loss: 0.00024082028539851308 2023-01-24 00:13:58.991922: step: 196/77, loss: 0.06575880199670792 2023-01-24 00:14:00.404944: step: 200/77, loss: 0.008478851988911629 2023-01-24 00:14:01.906969: step: 204/77, loss: 0.00896163284778595 2023-01-24 00:14:03.411697: step: 208/77, loss: 0.008419353514909744 2023-01-24 00:14:04.847387: step: 212/77, loss: 0.006319502368569374 2023-01-24 00:14:06.282327: step: 216/77, loss: 0.05734553560614586 2023-01-24 00:14:07.731896: step: 220/77, loss: 0.015883300453424454 2023-01-24 00:14:09.214442: step: 224/77, loss: 0.05923188850283623 2023-01-24 00:14:10.640775: step: 228/77, loss: 0.006545787677168846 2023-01-24 00:14:12.099235: step: 232/77, loss: 0.004563986789435148 2023-01-24 00:14:13.568756: step: 236/77, loss: 0.0011642567114904523 2023-01-24 00:14:14.958602: step: 240/77, loss: 0.009855842217803001 2023-01-24 00:14:16.386239: step: 244/77, loss: 0.0019316822290420532 2023-01-24 00:14:17.853914: step: 248/77, loss: 0.0028050937689840794 2023-01-24 00:14:19.354668: step: 252/77, loss: 0.017084931954741478 2023-01-24 00:14:20.849297: step: 256/77, loss: 0.0007316049304790795 2023-01-24 00:14:22.360149: step: 260/77, loss: 0.030099056661128998 2023-01-24 00:14:23.874192: step: 264/77, loss: 0.035914164036512375 2023-01-24 00:14:25.279528: step: 268/77, loss: 0.07876569032669067 2023-01-24 00:14:26.696198: step: 272/77, loss: 0.006000447552651167 2023-01-24 00:14:28.133456: step: 276/77, loss: 0.007713375613093376 2023-01-24 00:14:29.594387: step: 280/77, loss: 0.0012402540305629373 2023-01-24 00:14:31.101731: step: 284/77, loss: 0.0010725243482738733 2023-01-24 00:14:32.619278: step: 288/77, loss: 0.05046703666448593 2023-01-24 00:14:34.043778: step: 292/77, loss: 0.0003727408475242555 2023-01-24 00:14:35.503922: step: 296/77, loss: 0.009448207914829254 2023-01-24 00:14:36.901239: step: 300/77, loss: 0.01585238240659237 2023-01-24 00:14:38.429657: step: 304/77, loss: 0.07688168436288834 2023-01-24 00:14:39.896271: step: 308/77, loss: 0.12805522978305817 2023-01-24 00:14:41.308065: step: 312/77, loss: 0.01632690243422985 2023-01-24 00:14:42.793476: step: 316/77, loss: 0.014750231988728046 2023-01-24 00:14:44.227716: step: 320/77, loss: 0.010419289581477642 2023-01-24 00:14:45.737922: step: 324/77, loss: 0.016786780208349228 2023-01-24 00:14:47.233020: step: 328/77, loss: 0.0032613922376185656 2023-01-24 00:14:48.697102: step: 332/77, loss: 0.00038973920163698494 2023-01-24 00:14:50.089909: step: 336/77, loss: 0.03285951167345047 2023-01-24 00:14:51.551482: step: 340/77, loss: 0.030117888003587723 2023-01-24 00:14:53.029132: step: 344/77, loss: 0.00024034206580836326 2023-01-24 00:14:54.459993: step: 348/77, loss: 0.0005765100358985364 2023-01-24 00:14:55.993790: step: 352/77, loss: 0.019205616787075996 2023-01-24 00:14:57.425094: step: 356/77, loss: 0.005558905191719532 2023-01-24 00:14:58.889592: step: 360/77, loss: 0.004256248939782381 2023-01-24 00:15:00.334237: step: 364/77, loss: 0.015143109485507011 2023-01-24 00:15:01.768030: step: 368/77, loss: 0.0031705782748758793 2023-01-24 00:15:03.229447: step: 372/77, loss: 8.148000051733106e-05 2023-01-24 00:15:04.646104: step: 376/77, loss: 3.1818810384720564e-05 2023-01-24 00:15:06.083442: step: 380/77, loss: 3.5136061342200264e-05 2023-01-24 00:15:07.561666: step: 384/77, loss: 0.0008394332253374159 2023-01-24 00:15:08.981734: step: 388/77, loss: 0.058147720992565155 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.875, 'r': 0.546875, 'f1': 0.6730769230769231}, 'slot': {'p': 0.5666666666666667, 'r': 0.016683022571148183, 'f1': 0.032411820781696854}, 'combined': 0.02181564860306519, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.8641975308641975, 'r': 0.546875, 'f1': 0.6698564593301436}, 'slot': {'p': 0.5714285714285714, 'r': 0.015701668302257114, 'f1': 0.030563514804202475}, 'combined': 0.0204731678114275, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.8641975308641975, 'r': 0.546875, 'f1': 0.6698564593301436}, 'slot': {'p': 0.5666666666666667, 'r': 0.016683022571148183, 'f1': 0.032411820781696854}, 'combined': 0.02171126750927062, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:16:54.606983: step: 4/77, loss: 0.0016438113525509834 2023-01-24 00:16:56.068338: step: 8/77, loss: 0.021829890087246895 2023-01-24 00:16:57.531723: step: 12/77, loss: 0.004034928511828184 2023-01-24 00:16:59.032611: step: 16/77, loss: 4.006828476121882e-06 2023-01-24 00:17:00.421302: step: 20/77, loss: 0.013220297172665596 2023-01-24 00:17:01.886945: step: 24/77, loss: 0.004591222852468491 2023-01-24 00:17:03.321405: step: 28/77, loss: 0.04591258987784386 2023-01-24 00:17:04.731263: step: 32/77, loss: 0.09675614535808563 2023-01-24 00:17:06.180453: step: 36/77, loss: 0.0018667132826521993 2023-01-24 00:17:07.622859: step: 40/77, loss: 0.003975985571742058 2023-01-24 00:17:09.127404: step: 44/77, loss: 0.07758969068527222 2023-01-24 00:17:10.543758: step: 48/77, loss: 0.014692619442939758 2023-01-24 00:17:12.027663: step: 52/77, loss: 0.00668076379224658 2023-01-24 00:17:13.483034: step: 56/77, loss: 0.03895683214068413 2023-01-24 00:17:14.954518: step: 60/77, loss: 0.00016144484106916934 2023-01-24 00:17:16.415178: step: 64/77, loss: 0.009363364428281784 2023-01-24 00:17:17.830504: step: 68/77, loss: 0.01680179126560688 2023-01-24 00:17:19.232816: step: 72/77, loss: 0.037707582116127014 2023-01-24 00:17:20.688223: step: 76/77, loss: 0.0043097869493067265 2023-01-24 00:17:22.134648: step: 80/77, loss: 0.00547909876331687 2023-01-24 00:17:23.580837: step: 84/77, loss: 0.013198956847190857 2023-01-24 00:17:25.089450: step: 88/77, loss: 0.15006424486637115 2023-01-24 00:17:26.604324: step: 92/77, loss: 0.023897048085927963 2023-01-24 00:17:28.052383: step: 96/77, loss: 0.030548155307769775 2023-01-24 00:17:29.450121: step: 100/77, loss: 0.0020266990177333355 2023-01-24 00:17:30.913146: step: 104/77, loss: 0.0009935040725395083 2023-01-24 00:17:32.438856: step: 108/77, loss: 0.015393025241792202 2023-01-24 00:17:33.905031: step: 112/77, loss: 0.033583465963602066 2023-01-24 00:17:35.374510: step: 116/77, loss: 0.0640583410859108 2023-01-24 00:17:36.807574: step: 120/77, loss: 0.019880419597029686 2023-01-24 00:17:38.307190: step: 124/77, loss: 0.007873378694057465 2023-01-24 00:17:39.754546: step: 128/77, loss: 0.00377391604706645 2023-01-24 00:17:41.169122: step: 132/77, loss: 0.04253475368022919 2023-01-24 00:17:42.631616: step: 136/77, loss: 0.0010281100403517485 2023-01-24 00:17:44.098539: step: 140/77, loss: 5.9074875025544316e-05 2023-01-24 00:17:45.521421: step: 144/77, loss: 0.004570574499666691 2023-01-24 00:17:46.980055: step: 148/77, loss: 0.019134998321533203 2023-01-24 00:17:48.424451: step: 152/77, loss: 0.01529659703373909 2023-01-24 00:17:49.848359: step: 156/77, loss: 0.010097681544721127 2023-01-24 00:17:51.369213: step: 160/77, loss: 0.003892091568559408 2023-01-24 00:17:52.809591: step: 164/77, loss: 0.008095276542007923 2023-01-24 00:17:54.247795: step: 168/77, loss: 0.006381608545780182 2023-01-24 00:17:55.730120: step: 172/77, loss: 0.00017421241500414908 2023-01-24 00:17:57.250565: step: 176/77, loss: 0.010663943365216255 2023-01-24 00:17:58.692030: step: 180/77, loss: 0.002063462045043707 2023-01-24 00:18:00.137332: step: 184/77, loss: 0.0066839721985161304 2023-01-24 00:18:01.558046: step: 188/77, loss: 0.035569749772548676 2023-01-24 00:18:02.993184: step: 192/77, loss: 0.02284320630133152 2023-01-24 00:18:04.464881: step: 196/77, loss: 0.011159653775393963 2023-01-24 00:18:05.892660: step: 200/77, loss: 0.0004205276200082153 2023-01-24 00:18:07.376932: step: 204/77, loss: 0.0013540438376367092 2023-01-24 00:18:08.849491: step: 208/77, loss: 0.010324251838028431 2023-01-24 00:18:10.424075: step: 212/77, loss: 0.042814191430807114 2023-01-24 00:18:11.842312: step: 216/77, loss: 0.009010246023535728 2023-01-24 00:18:13.339555: step: 220/77, loss: 0.03980337828397751 2023-01-24 00:18:14.784680: step: 224/77, loss: 0.006109605543315411 2023-01-24 00:18:16.240242: step: 228/77, loss: 0.011437594890594482 2023-01-24 00:18:17.672860: step: 232/77, loss: 0.022036511451005936 2023-01-24 00:18:19.116339: step: 236/77, loss: 0.006096334662288427 2023-01-24 00:18:20.565969: step: 240/77, loss: 7.65419281378854e-06 2023-01-24 00:18:22.044569: step: 244/77, loss: 0.0005192124517634511 2023-01-24 00:18:23.420313: step: 248/77, loss: 0.012744538486003876 2023-01-24 00:18:24.926432: step: 252/77, loss: 7.806696521583945e-05 2023-01-24 00:18:26.416253: step: 256/77, loss: 0.016540756449103355 2023-01-24 00:18:27.893473: step: 260/77, loss: 0.0001507106062490493 2023-01-24 00:18:29.341204: step: 264/77, loss: 0.08338964730501175 2023-01-24 00:18:30.870202: step: 268/77, loss: 0.039350226521492004 2023-01-24 00:18:32.267995: step: 272/77, loss: 0.01819436624646187 2023-01-24 00:18:33.732618: step: 276/77, loss: 0.06712066382169724 2023-01-24 00:18:35.152181: step: 280/77, loss: 0.00011724078649422154 2023-01-24 00:18:36.631414: step: 284/77, loss: 0.0063972026109695435 2023-01-24 00:18:38.123145: step: 288/77, loss: 4.31464723078534e-05 2023-01-24 00:18:39.541247: step: 292/77, loss: 1.0793160981847905e-05 2023-01-24 00:18:40.958612: step: 296/77, loss: 0.0031588266137987375 2023-01-24 00:18:42.443482: step: 300/77, loss: 5.132694059284404e-05 2023-01-24 00:18:43.936199: step: 304/77, loss: 0.0010889836121350527 2023-01-24 00:18:45.404078: step: 308/77, loss: 0.0011067269369959831 2023-01-24 00:18:46.903441: step: 312/77, loss: 0.002232741564512253 2023-01-24 00:18:48.408463: step: 316/77, loss: 0.009018322452902794 2023-01-24 00:18:49.748121: step: 320/77, loss: 0.04016238823533058 2023-01-24 00:18:51.235692: step: 324/77, loss: 0.02760501578450203 2023-01-24 00:18:52.715138: step: 328/77, loss: 0.021644847467541695 2023-01-24 00:18:54.143818: step: 332/77, loss: 0.007411382161080837 2023-01-24 00:18:55.522065: step: 336/77, loss: 0.0015520785236731172 2023-01-24 00:18:57.016543: step: 340/77, loss: 0.0025608709547668695 2023-01-24 00:18:58.509489: step: 344/77, loss: 0.07523033022880554 2023-01-24 00:19:00.001836: step: 348/77, loss: 0.0020249553490430117 2023-01-24 00:19:01.466668: step: 352/77, loss: 0.0007536716875620186 2023-01-24 00:19:02.924699: step: 356/77, loss: 0.0054727462120354176 2023-01-24 00:19:04.397430: step: 360/77, loss: 0.010609529912471771 2023-01-24 00:19:05.843619: step: 364/77, loss: 0.0005191811360418797 2023-01-24 00:19:07.279408: step: 368/77, loss: 3.0023405997781083e-05 2023-01-24 00:19:08.706409: step: 372/77, loss: 1.870140476967208e-05 2023-01-24 00:19:10.145618: step: 376/77, loss: 0.0015816589584574103 2023-01-24 00:19:11.605342: step: 380/77, loss: 0.0009871306829154491 2023-01-24 00:19:13.109570: step: 384/77, loss: 0.013140087947249413 2023-01-24 00:19:14.568580: step: 388/77, loss: 0.05055278539657593 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Chinese: {'template': {'p': 0.9315068493150684, 'r': 0.53125, 'f1': 0.6766169154228856}, 'slot': {'p': 0.5405405405405406, 'r': 0.019627085377821395, 'f1': 0.03787878787878788}, 'combined': 0.025629428614503243, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Korean: {'template': {'p': 0.9315068493150684, 'r': 0.53125, 'f1': 0.6766169154228856}, 'slot': {'p': 0.5428571428571428, 'r': 0.018645731108930325, 'f1': 0.036053130929791274}, 'combined': 0.024394158241052805, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Russian: {'template': {'p': 0.9305555555555556, 'r': 0.5234375, 'f1': 0.6699999999999999}, 'slot': {'p': 0.5405405405405406, 'r': 0.019627085377821395, 'f1': 0.03787878787878788}, 'combined': 0.025378787878787876, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:21:00.171392: step: 4/77, loss: 0.0028693326748907566 2023-01-24 00:21:01.625943: step: 8/77, loss: 0.00885781366378069 2023-01-24 00:21:03.100594: step: 12/77, loss: 0.018565163016319275 2023-01-24 00:21:04.608508: step: 16/77, loss: 0.003598729381337762 2023-01-24 00:21:06.092064: step: 20/77, loss: 0.020400619134306908 2023-01-24 00:21:07.580102: step: 24/77, loss: 0.030392616987228394 2023-01-24 00:21:09.063317: step: 28/77, loss: 0.018394771963357925 2023-01-24 00:21:10.454844: step: 32/77, loss: 0.010670867748558521 2023-01-24 00:21:11.946850: step: 36/77, loss: 0.0007476868922822177 2023-01-24 00:21:13.407809: step: 40/77, loss: 0.0008807579288259149 2023-01-24 00:21:14.835381: step: 44/77, loss: 0.001965353498235345 2023-01-24 00:21:16.316486: step: 48/77, loss: 0.006215990521013737 2023-01-24 00:21:17.794062: step: 52/77, loss: 0.0004018288746010512 2023-01-24 00:21:19.273884: step: 56/77, loss: 0.0014737469609826803 2023-01-24 00:21:20.702383: step: 60/77, loss: 0.11779823899269104 2023-01-24 00:21:22.177979: step: 64/77, loss: 0.0219552181661129 2023-01-24 00:21:23.574739: step: 68/77, loss: 0.0028365645557641983 2023-01-24 00:21:25.015899: step: 72/77, loss: 0.030482318252325058 2023-01-24 00:21:26.523749: step: 76/77, loss: 0.3921399712562561 2023-01-24 00:21:27.993928: step: 80/77, loss: 0.062012821435928345 2023-01-24 00:21:29.423447: step: 84/77, loss: 0.01332969218492508 2023-01-24 00:21:30.919374: step: 88/77, loss: 0.01111157238483429 2023-01-24 00:21:32.355596: step: 92/77, loss: 0.0018552043475210667 2023-01-24 00:21:33.870481: step: 96/77, loss: 0.014295405708253384 2023-01-24 00:21:35.234082: step: 100/77, loss: 0.06454520672559738 2023-01-24 00:21:36.666279: step: 104/77, loss: 0.0072874510660767555 2023-01-24 00:21:38.064104: step: 108/77, loss: 0.00016964730457402766 2023-01-24 00:21:39.503082: step: 112/77, loss: 0.0008886498399078846 2023-01-24 00:21:40.922609: step: 116/77, loss: 0.03283444419503212 2023-01-24 00:21:42.348377: step: 120/77, loss: 0.0010433443821966648 2023-01-24 00:21:43.724336: step: 124/77, loss: 0.009966873563826084 2023-01-24 00:21:45.174310: step: 128/77, loss: 0.0013519097119569778 2023-01-24 00:21:46.691130: step: 132/77, loss: 0.00016006355872377753 2023-01-24 00:21:48.149965: step: 136/77, loss: 0.045845817774534225 2023-01-24 00:21:49.682855: step: 140/77, loss: 0.0009190597338601947 2023-01-24 00:21:51.162645: step: 144/77, loss: 0.010922224260866642 2023-01-24 00:21:52.651394: step: 148/77, loss: 0.022966429591178894 2023-01-24 00:21:54.118754: step: 152/77, loss: 0.003330930834636092 2023-01-24 00:21:55.632635: step: 156/77, loss: 0.0006601756322197616 2023-01-24 00:21:57.077768: step: 160/77, loss: 0.024167301133275032 2023-01-24 00:21:58.489964: step: 164/77, loss: 0.032688651233911514 2023-01-24 00:21:59.923329: step: 168/77, loss: 0.012217102572321892 2023-01-24 00:22:01.388100: step: 172/77, loss: 0.031610578298568726 2023-01-24 00:22:02.730987: step: 176/77, loss: 0.012368066236376762 2023-01-24 00:22:04.210832: step: 180/77, loss: 0.002754107117652893 2023-01-24 00:22:05.655314: step: 184/77, loss: 0.007287105079740286 2023-01-24 00:22:07.173234: step: 188/77, loss: 0.0005752938450314105 2023-01-24 00:22:08.701042: step: 192/77, loss: 0.0035592676140367985 2023-01-24 00:22:10.081937: step: 196/77, loss: 0.0022799689322710037 2023-01-24 00:22:11.543311: step: 200/77, loss: 0.04851929470896721 2023-01-24 00:22:13.040024: step: 204/77, loss: 0.002934606047347188 2023-01-24 00:22:14.529478: step: 208/77, loss: 0.0063961283303797245 2023-01-24 00:22:15.984767: step: 212/77, loss: 0.00015372905181720853 2023-01-24 00:22:17.508740: step: 216/77, loss: 0.0050109392032027245 2023-01-24 00:22:19.043840: step: 220/77, loss: 0.0013842899352312088 2023-01-24 00:22:20.485001: step: 224/77, loss: 0.0016586286947131157 2023-01-24 00:22:21.919199: step: 228/77, loss: 0.04991191625595093 2023-01-24 00:22:23.435477: step: 232/77, loss: 0.028952427208423615 2023-01-24 00:22:24.867340: step: 236/77, loss: 0.0029057434294372797 2023-01-24 00:22:26.363444: step: 240/77, loss: 7.611663932038937e-06 2023-01-24 00:22:27.842630: step: 244/77, loss: 0.008669132366776466 2023-01-24 00:22:29.361685: step: 248/77, loss: 0.06115046888589859 2023-01-24 00:22:30.808599: step: 252/77, loss: 0.0016680802218616009 2023-01-24 00:22:32.228088: step: 256/77, loss: 0.013491089455783367 2023-01-24 00:22:33.686273: step: 260/77, loss: 0.010971372947096825 2023-01-24 00:22:35.215700: step: 264/77, loss: 0.019208434969186783 2023-01-24 00:22:36.643625: step: 268/77, loss: 0.0001701193832559511 2023-01-24 00:22:38.125321: step: 272/77, loss: 0.01278771460056305 2023-01-24 00:22:39.581839: step: 276/77, loss: 3.134423604933545e-05 2023-01-24 00:22:41.103715: step: 280/77, loss: 0.00923230778425932 2023-01-24 00:22:42.573362: step: 284/77, loss: 0.003195566590875387 2023-01-24 00:22:44.017900: step: 288/77, loss: 0.00010174508497584611 2023-01-24 00:22:45.434469: step: 292/77, loss: 0.036582957953214645 2023-01-24 00:22:46.932923: step: 296/77, loss: 0.00014206249034032226 2023-01-24 00:22:48.363475: step: 300/77, loss: 0.03389093652367592 2023-01-24 00:22:49.827242: step: 304/77, loss: 0.06591126322746277 2023-01-24 00:22:51.302022: step: 308/77, loss: 0.0071951295249164104 2023-01-24 00:22:52.820106: step: 312/77, loss: 0.01362125389277935 2023-01-24 00:22:54.218381: step: 316/77, loss: 0.044941496104002 2023-01-24 00:22:55.727676: step: 320/77, loss: 0.000544301641639322 2023-01-24 00:22:57.203324: step: 324/77, loss: 0.012037895619869232 2023-01-24 00:22:58.709922: step: 328/77, loss: 0.0004972864990122616 2023-01-24 00:23:00.196604: step: 332/77, loss: 0.02681150659918785 2023-01-24 00:23:01.638307: step: 336/77, loss: 0.059612877666950226 2023-01-24 00:23:03.069728: step: 340/77, loss: 0.03380858525633812 2023-01-24 00:23:04.514283: step: 344/77, loss: 0.00674303388223052 2023-01-24 00:23:06.067874: step: 348/77, loss: 0.00896853394806385 2023-01-24 00:23:07.515191: step: 352/77, loss: 0.04173743352293968 2023-01-24 00:23:09.111543: step: 356/77, loss: 0.03315752372145653 2023-01-24 00:23:10.602361: step: 360/77, loss: 0.0027552645187824965 2023-01-24 00:23:12.061403: step: 364/77, loss: 0.022793779149651527 2023-01-24 00:23:13.493508: step: 368/77, loss: 0.01640704646706581 2023-01-24 00:23:14.982398: step: 372/77, loss: 0.00779336504638195 2023-01-24 00:23:16.494340: step: 376/77, loss: 0.007757310755550861 2023-01-24 00:23:17.940811: step: 380/77, loss: 0.01880963332951069 2023-01-24 00:23:19.425905: step: 384/77, loss: 0.014945488423109055 2023-01-24 00:23:20.896397: step: 388/77, loss: 0.00010710747301345691 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.5625, 'f1': 0.7058823529411765}, 'slot': {'p': 0.5135135135135135, 'r': 0.018645731108930325, 'f1': 0.035984848484848495}, 'combined': 0.025401069518716585, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9342105263157895, 'r': 0.5546875, 'f1': 0.6960784313725491}, 'slot': {'p': 0.4864864864864865, 'r': 0.017664376840039256, 'f1': 0.03409090909090909}, 'combined': 0.023729946524064172, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.935064935064935, 'r': 0.5625, 'f1': 0.702439024390244}, 'slot': {'p': 0.5135135135135135, 'r': 0.018645731108930325, 'f1': 0.035984848484848495}, 'combined': 0.025277161862527726, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:25:07.494291: step: 4/77, loss: 0.005267225205898285 2023-01-24 00:25:08.974929: step: 8/77, loss: 0.011591697111725807 2023-01-24 00:25:10.391863: step: 12/77, loss: 0.0173267163336277 2023-01-24 00:25:11.912163: step: 16/77, loss: 0.02167614735662937 2023-01-24 00:25:13.377950: step: 20/77, loss: 0.0008013755432330072 2023-01-24 00:25:14.818108: step: 24/77, loss: 5.200561281526461e-05 2023-01-24 00:25:16.254525: step: 28/77, loss: 0.0007757511339150369 2023-01-24 00:25:17.748551: step: 32/77, loss: 0.022520437836647034 2023-01-24 00:25:19.179812: step: 36/77, loss: 0.0003649297577794641 2023-01-24 00:25:20.682718: step: 40/77, loss: 0.009602165780961514 2023-01-24 00:25:22.210391: step: 44/77, loss: 0.0013668169267475605 2023-01-24 00:25:23.648159: step: 48/77, loss: 0.05336320400238037 2023-01-24 00:25:25.087282: step: 52/77, loss: 0.009318229742348194 2023-01-24 00:25:26.536078: step: 56/77, loss: 0.0022584404796361923 2023-01-24 00:25:27.996012: step: 60/77, loss: 0.0013622129335999489 2023-01-24 00:25:29.451586: step: 64/77, loss: 0.0027268631383776665 2023-01-24 00:25:30.948676: step: 68/77, loss: 0.0006924830377101898 2023-01-24 00:25:32.460849: step: 72/77, loss: 0.001636262284591794 2023-01-24 00:25:33.916525: step: 76/77, loss: 0.023755662143230438 2023-01-24 00:25:35.318741: step: 80/77, loss: 0.0003746433067135513 2023-01-24 00:25:36.792684: step: 84/77, loss: 0.0005386627744883299 2023-01-24 00:25:38.187274: step: 88/77, loss: 0.00035149790346622467 2023-01-24 00:25:39.682894: step: 92/77, loss: 0.004147316329181194 2023-01-24 00:25:41.077622: step: 96/77, loss: 0.0008540649432688951 2023-01-24 00:25:42.585940: step: 100/77, loss: 0.021562719717621803 2023-01-24 00:25:44.083241: step: 104/77, loss: 0.011822369880974293 2023-01-24 00:25:45.571783: step: 108/77, loss: 0.0012906633783131838 2023-01-24 00:25:47.028271: step: 112/77, loss: 0.05661414936184883 2023-01-24 00:25:48.520515: step: 116/77, loss: 0.0007475563324987888 2023-01-24 00:25:49.952705: step: 120/77, loss: 0.003470787312835455 2023-01-24 00:25:51.462170: step: 124/77, loss: 0.04624112322926521 2023-01-24 00:25:52.957458: step: 128/77, loss: 7.761608139844611e-05 2023-01-24 00:25:54.417520: step: 132/77, loss: 0.009200241416692734 2023-01-24 00:25:55.906381: step: 136/77, loss: 0.00542981643229723 2023-01-24 00:25:57.352814: step: 140/77, loss: 3.0431778213824145e-05 2023-01-24 00:25:58.863884: step: 144/77, loss: 0.006928062532097101 2023-01-24 00:26:00.304279: step: 148/77, loss: 0.09509888291358948 2023-01-24 00:26:01.742523: step: 152/77, loss: 0.0197527464479208 2023-01-24 00:26:03.185225: step: 156/77, loss: 0.0004752454406116158 2023-01-24 00:26:04.636119: step: 160/77, loss: 0.0003356131783220917 2023-01-24 00:26:06.069521: step: 164/77, loss: 0.0012678257189691067 2023-01-24 00:26:07.516415: step: 168/77, loss: 0.00036199152236804366 2023-01-24 00:26:08.963240: step: 172/77, loss: 0.03243018686771393 2023-01-24 00:26:10.385336: step: 176/77, loss: 0.01618010364472866 2023-01-24 00:26:11.899525: step: 180/77, loss: 0.007925470359623432 2023-01-24 00:26:13.335772: step: 184/77, loss: 0.001082070404663682 2023-01-24 00:26:14.761955: step: 188/77, loss: 0.002177658025175333 2023-01-24 00:26:16.211338: step: 192/77, loss: 2.7793914341600612e-05 2023-01-24 00:26:17.747367: step: 196/77, loss: 0.006863879971206188 2023-01-24 00:26:19.139284: step: 200/77, loss: 0.00818274449557066 2023-01-24 00:26:20.551082: step: 204/77, loss: 0.0021532122045755386 2023-01-24 00:26:22.034979: step: 208/77, loss: 0.003785195993259549 2023-01-24 00:26:23.424054: step: 212/77, loss: 0.0032439667265862226 2023-01-24 00:26:24.949575: step: 216/77, loss: 0.06093345209956169 2023-01-24 00:26:26.428827: step: 220/77, loss: 0.0012553473934531212 2023-01-24 00:26:27.859867: step: 224/77, loss: 0.0009988200617954135 2023-01-24 00:26:29.304526: step: 228/77, loss: 0.007733221631497145 2023-01-24 00:26:30.767291: step: 232/77, loss: 0.040917910635471344 2023-01-24 00:26:32.218765: step: 236/77, loss: 0.0003634836757555604 2023-01-24 00:26:33.650974: step: 240/77, loss: 0.0016613565385341644 2023-01-24 00:26:35.098503: step: 244/77, loss: 0.0347539484500885 2023-01-24 00:26:36.612308: step: 248/77, loss: 0.027186525985598564 2023-01-24 00:26:38.080693: step: 252/77, loss: 0.006373404059559107 2023-01-24 00:26:39.563580: step: 256/77, loss: 2.8301956263021566e-05 2023-01-24 00:26:41.008880: step: 260/77, loss: 0.0018408658215776086 2023-01-24 00:26:42.554455: step: 264/77, loss: 0.08936366438865662 2023-01-24 00:26:43.972108: step: 268/77, loss: 0.0006639264174737036 2023-01-24 00:26:45.421329: step: 272/77, loss: 0.0018201242201030254 2023-01-24 00:26:46.862683: step: 276/77, loss: 0.055738504976034164 2023-01-24 00:26:48.323496: step: 280/77, loss: 2.838547743522213e-06 2023-01-24 00:26:49.794794: step: 284/77, loss: 0.0014087861636653543 2023-01-24 00:26:51.263051: step: 288/77, loss: 5.1704242650885135e-05 2023-01-24 00:26:52.731394: step: 292/77, loss: 0.0033315045293420553 2023-01-24 00:26:54.235746: step: 296/77, loss: 0.01905788853764534 2023-01-24 00:26:55.682905: step: 300/77, loss: 0.005263179074972868 2023-01-24 00:26:57.147452: step: 304/77, loss: 0.0037886740174144506 2023-01-24 00:26:58.597674: step: 308/77, loss: 0.07296175509691238 2023-01-24 00:27:00.039474: step: 312/77, loss: 0.0009286962449550629 2023-01-24 00:27:01.520728: step: 316/77, loss: 0.0003937944129575044 2023-01-24 00:27:02.967927: step: 320/77, loss: 0.00011244205961702392 2023-01-24 00:27:04.428716: step: 324/77, loss: 0.02179723232984543 2023-01-24 00:27:05.924852: step: 328/77, loss: 0.04251597821712494 2023-01-24 00:27:07.389167: step: 332/77, loss: 0.00040819545392878354 2023-01-24 00:27:08.855153: step: 336/77, loss: 5.535347736440599e-05 2023-01-24 00:27:10.315538: step: 340/77, loss: 0.011182352900505066 2023-01-24 00:27:11.819948: step: 344/77, loss: 0.0029422559309750795 2023-01-24 00:27:13.267137: step: 348/77, loss: 0.003457922488451004 2023-01-24 00:27:14.729412: step: 352/77, loss: 0.00013586209388449788 2023-01-24 00:27:16.141244: step: 356/77, loss: 5.510602568392642e-05 2023-01-24 00:27:17.629462: step: 360/77, loss: 9.337370283901691e-05 2023-01-24 00:27:19.041106: step: 364/77, loss: 0.011819742619991302 2023-01-24 00:27:20.544929: step: 368/77, loss: 0.00814152229577303 2023-01-24 00:27:21.976461: step: 372/77, loss: 0.029679525643587112 2023-01-24 00:27:23.352857: step: 376/77, loss: 0.033626340329647064 2023-01-24 00:27:24.810669: step: 380/77, loss: 0.015861092135310173 2023-01-24 00:27:26.285829: step: 384/77, loss: 0.00013995288463775069 2023-01-24 00:27:27.698205: step: 388/77, loss: 0.0010508573614060879 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.045661569471093295, 'epoch': 14} Test Chinese: {'template': {'p': 0.9154929577464789, 'r': 0.5078125, 'f1': 0.6532663316582914}, 'slot': {'p': 0.5483870967741935, 'r': 0.016683022571148183, 'f1': 0.03238095238095238}, 'combined': 0.021153385977506576, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.045661569471093295, 'epoch': 14} Test Korean: {'template': {'p': 0.9130434782608695, 'r': 0.4921875, 'f1': 0.6395939086294415}, 'slot': {'p': 0.5172413793103449, 'r': 0.014720314033366046, 'f1': 0.028625954198473285}, 'combined': 0.0183089859340489, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.045661569471093295, 'epoch': 14} Test Russian: {'template': {'p': 0.9142857142857143, 'r': 0.5, 'f1': 0.6464646464646465}, 'slot': {'p': 0.5172413793103449, 'r': 0.014720314033366046, 'f1': 0.028625954198473285}, 'combined': 0.018505667360629197, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:29:13.163654: step: 4/77, loss: 0.06576584279537201 2023-01-24 00:29:14.587610: step: 8/77, loss: 0.0211432334035635 2023-01-24 00:29:15.938859: step: 12/77, loss: 0.00011878870282089338 2023-01-24 00:29:17.310071: step: 16/77, loss: 0.0009247027919627726 2023-01-24 00:29:18.750444: step: 20/77, loss: 0.00018795518553815782 2023-01-24 00:29:20.152097: step: 24/77, loss: 0.002515072701498866 2023-01-24 00:29:21.594853: step: 28/77, loss: 0.0043100942857563496 2023-01-24 00:29:23.087467: step: 32/77, loss: 0.002241323236376047 2023-01-24 00:29:24.527793: step: 36/77, loss: 0.005226944573223591 2023-01-24 00:29:26.002129: step: 40/77, loss: 0.0135931596159935 2023-01-24 00:29:27.511449: step: 44/77, loss: 0.0008732576388865709 2023-01-24 00:29:28.940076: step: 48/77, loss: 0.0009864146122708917 2023-01-24 00:29:30.382677: step: 52/77, loss: 0.0003585018857847899 2023-01-24 00:29:31.862759: step: 56/77, loss: 2.714387301239185e-05 2023-01-24 00:29:33.304401: step: 60/77, loss: 0.0016551834996789694 2023-01-24 00:29:34.767888: step: 64/77, loss: 0.008181986398994923 2023-01-24 00:29:36.214222: step: 68/77, loss: 0.009752114303410053 2023-01-24 00:29:37.706463: step: 72/77, loss: 0.013987277634441853 2023-01-24 00:29:39.128461: step: 76/77, loss: 9.721517562866211e-05 2023-01-24 00:29:40.505245: step: 80/77, loss: 0.05479699745774269 2023-01-24 00:29:41.958911: step: 84/77, loss: 0.0022855522111058235 2023-01-24 00:29:43.384931: step: 88/77, loss: 0.009939520619809628 2023-01-24 00:29:44.737518: step: 92/77, loss: 4.864174479735084e-05 2023-01-24 00:29:46.173710: step: 96/77, loss: 0.0005828423891216516 2023-01-24 00:29:47.623884: step: 100/77, loss: 0.03715372458100319 2023-01-24 00:29:49.046197: step: 104/77, loss: 0.015810023993253708 2023-01-24 00:29:50.509451: step: 108/77, loss: 0.008435437455773354 2023-01-24 00:29:51.981059: step: 112/77, loss: 7.444271432177629e-06 2023-01-24 00:29:53.457413: step: 116/77, loss: 0.01651328057050705 2023-01-24 00:29:54.984353: step: 120/77, loss: 4.717539013654459e-06 2023-01-24 00:29:56.444895: step: 124/77, loss: 0.016156084835529327 2023-01-24 00:29:57.873938: step: 128/77, loss: 0.025796879082918167 2023-01-24 00:29:59.389588: step: 132/77, loss: 1.039034304994857e-05 2023-01-24 00:30:00.889523: step: 136/77, loss: 3.2849387935129926e-05 2023-01-24 00:30:02.314239: step: 140/77, loss: 0.009958821348845959 2023-01-24 00:30:03.783791: step: 144/77, loss: 0.0002767142141237855 2023-01-24 00:30:05.261082: step: 148/77, loss: 0.002541220746934414 2023-01-24 00:30:06.676982: step: 152/77, loss: 0.0062828054651618 2023-01-24 00:30:08.109041: step: 156/77, loss: 0.059553973376750946 2023-01-24 00:30:09.537395: step: 160/77, loss: 0.002758648945018649 2023-01-24 00:30:11.046400: step: 164/77, loss: 2.6639016141416505e-05 2023-01-24 00:30:12.588341: step: 168/77, loss: 0.003472244367003441 2023-01-24 00:30:14.094802: step: 172/77, loss: 2.9227057893876918e-05 2023-01-24 00:30:15.485495: step: 176/77, loss: 0.0006330236210487783 2023-01-24 00:30:16.969290: step: 180/77, loss: 0.006795175839215517 2023-01-24 00:30:18.431007: step: 184/77, loss: 0.0002546339819673449 2023-01-24 00:30:19.834118: step: 188/77, loss: 0.0009237767080776393 2023-01-24 00:30:21.295487: step: 192/77, loss: 0.022265290841460228 2023-01-24 00:30:22.746402: step: 196/77, loss: 0.00012686454283539206 2023-01-24 00:30:24.210716: step: 200/77, loss: 0.0052398075349628925 2023-01-24 00:30:25.686511: step: 204/77, loss: 0.023927073925733566 2023-01-24 00:30:27.109197: step: 208/77, loss: 0.0007230336777865887 2023-01-24 00:30:28.591945: step: 212/77, loss: 0.04853575676679611 2023-01-24 00:30:30.080931: step: 216/77, loss: 0.014252977445721626 2023-01-24 00:30:31.532736: step: 220/77, loss: 0.008642412722110748 2023-01-24 00:30:33.019832: step: 224/77, loss: 9.650964784668759e-05 2023-01-24 00:30:34.457688: step: 228/77, loss: 0.007102798670530319 2023-01-24 00:30:35.952173: step: 232/77, loss: 0.008989858441054821 2023-01-24 00:30:37.458746: step: 236/77, loss: 0.020074237138032913 2023-01-24 00:30:38.915482: step: 240/77, loss: 0.01964585669338703 2023-01-24 00:30:40.353309: step: 244/77, loss: 0.004278081934899092 2023-01-24 00:30:41.812577: step: 248/77, loss: 0.03222713619470596 2023-01-24 00:30:43.284803: step: 252/77, loss: 0.00010605229181237519 2023-01-24 00:30:44.791679: step: 256/77, loss: 0.07531239837408066 2023-01-24 00:30:46.257480: step: 260/77, loss: 0.012602516449987888 2023-01-24 00:30:47.782479: step: 264/77, loss: 0.008190508000552654 2023-01-24 00:30:49.279231: step: 268/77, loss: 0.06289426237344742 2023-01-24 00:30:50.721940: step: 272/77, loss: 0.0015910831280052662 2023-01-24 00:30:52.187186: step: 276/77, loss: 4.247297329129651e-05 2023-01-24 00:30:53.688161: step: 280/77, loss: 0.0022597406059503555 2023-01-24 00:30:55.146463: step: 284/77, loss: 0.0009675543988123536 2023-01-24 00:30:56.610843: step: 288/77, loss: 0.01921030879020691 2023-01-24 00:30:58.017266: step: 292/77, loss: 0.017905812710523605 2023-01-24 00:30:59.421181: step: 296/77, loss: 9.813245924306102e-06 2023-01-24 00:31:00.887705: step: 300/77, loss: 3.5145716537954286e-05 2023-01-24 00:31:02.361306: step: 304/77, loss: 0.023539673537015915 2023-01-24 00:31:03.744240: step: 308/77, loss: 0.024415859952569008 2023-01-24 00:31:05.224036: step: 312/77, loss: 0.0002481754054315388 2023-01-24 00:31:06.702896: step: 316/77, loss: 0.0004024395893793553 2023-01-24 00:31:08.170543: step: 320/77, loss: 0.0037798385601490736 2023-01-24 00:31:09.626790: step: 324/77, loss: 0.00010083715460496023 2023-01-24 00:31:11.091813: step: 328/77, loss: 0.007590695284307003 2023-01-24 00:31:12.566446: step: 332/77, loss: 0.0030735982581973076 2023-01-24 00:31:14.060400: step: 336/77, loss: 9.102160220209043e-06 2023-01-24 00:31:15.553108: step: 340/77, loss: 1.833957139751874e-05 2023-01-24 00:31:17.006181: step: 344/77, loss: 0.0004640549304895103 2023-01-24 00:31:18.481282: step: 348/77, loss: 0.00939967017620802 2023-01-24 00:31:19.955274: step: 352/77, loss: 0.01208038441836834 2023-01-24 00:31:21.431529: step: 356/77, loss: 0.022390833124518394 2023-01-24 00:31:22.907497: step: 360/77, loss: 0.08729444444179535 2023-01-24 00:31:24.351600: step: 364/77, loss: 0.006416450720280409 2023-01-24 00:31:25.880872: step: 368/77, loss: 0.033059343695640564 2023-01-24 00:31:27.446013: step: 372/77, loss: 0.009096910245716572 2023-01-24 00:31:28.916604: step: 376/77, loss: 0.0011584979947656393 2023-01-24 00:31:30.398083: step: 380/77, loss: 0.0014451435999944806 2023-01-24 00:31:31.904495: step: 384/77, loss: 1.3844704881194048e-05 2023-01-24 00:31:33.434986: step: 388/77, loss: 0.00019103710656054318 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04756215508903682, 'epoch': 15} Test Chinese: {'template': {'p': 0.9420289855072463, 'r': 0.5078125, 'f1': 0.6598984771573604}, 'slot': {'p': 0.6129032258064516, 'r': 0.018645731108930325, 'f1': 0.03619047619047619}, 'combined': 0.023882040125694948, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04756215508903682, 'epoch': 15} Test Korean: {'template': {'p': 0.9420289855072463, 'r': 0.5078125, 'f1': 0.6598984771573604}, 'slot': {'p': 0.6129032258064516, 'r': 0.018645731108930325, 'f1': 0.03619047619047619}, 'combined': 0.023882040125694948, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04756215508903682, 'epoch': 15} Test Russian: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.6129032258064516, 'r': 0.018645731108930325, 'f1': 0.03619047619047619}, 'combined': 0.023634596695821183, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:33:19.290523: step: 4/77, loss: 0.002274035243317485 2023-01-24 00:33:20.723241: step: 8/77, loss: 0.0019370621303096414 2023-01-24 00:33:22.278654: step: 12/77, loss: 0.054813411086797714 2023-01-24 00:33:23.684511: step: 16/77, loss: 0.008220945484936237 2023-01-24 00:33:25.151975: step: 20/77, loss: 1.7062940969481133e-05 2023-01-24 00:33:26.636827: step: 24/77, loss: 0.0018931454978883266 2023-01-24 00:33:28.120705: step: 28/77, loss: 0.002858012681826949 2023-01-24 00:33:29.645734: step: 32/77, loss: 0.0064864917658269405 2023-01-24 00:33:31.122343: step: 36/77, loss: 0.0004651574417948723 2023-01-24 00:33:32.617990: step: 40/77, loss: 0.09202048927545547 2023-01-24 00:33:34.096672: step: 44/77, loss: 0.001105927163735032 2023-01-24 00:33:35.505894: step: 48/77, loss: 0.04170398414134979 2023-01-24 00:33:36.927408: step: 52/77, loss: 0.01814689300954342 2023-01-24 00:33:38.449162: step: 56/77, loss: 0.015868939459323883 2023-01-24 00:33:39.846233: step: 60/77, loss: 0.0033887759782373905 2023-01-24 00:33:41.301273: step: 64/77, loss: 0.020517315715551376 2023-01-24 00:33:42.814389: step: 68/77, loss: 0.0010318057611584663 2023-01-24 00:33:44.296806: step: 72/77, loss: 0.01650240831077099 2023-01-24 00:33:45.763438: step: 76/77, loss: 0.0040339017286896706 2023-01-24 00:33:47.158925: step: 80/77, loss: 0.053633980453014374 2023-01-24 00:33:48.680873: step: 84/77, loss: 0.024445664137601852 2023-01-24 00:33:50.228221: step: 88/77, loss: 0.022687377408146858 2023-01-24 00:33:51.757685: step: 92/77, loss: 0.00024189718533307314 2023-01-24 00:33:53.215170: step: 96/77, loss: 0.001149466261267662 2023-01-24 00:33:54.643673: step: 100/77, loss: 0.022049574181437492 2023-01-24 00:33:56.125162: step: 104/77, loss: 0.0008618884021416306 2023-01-24 00:33:57.587184: step: 108/77, loss: 0.0030203552450984716 2023-01-24 00:33:59.043235: step: 112/77, loss: 0.001800397178158164 2023-01-24 00:34:00.460726: step: 116/77, loss: 0.0006228326237760484 2023-01-24 00:34:01.947069: step: 120/77, loss: 0.032183367758989334 2023-01-24 00:34:03.391824: step: 124/77, loss: 0.04554427042603493 2023-01-24 00:34:04.854677: step: 128/77, loss: 0.00010357674182159826 2023-01-24 00:34:06.269390: step: 132/77, loss: 0.0013730874052271247 2023-01-24 00:34:07.716137: step: 136/77, loss: 0.029175298288464546 2023-01-24 00:34:09.206851: step: 140/77, loss: 0.0006298382068052888 2023-01-24 00:34:10.672200: step: 144/77, loss: 6.223077798495069e-05 2023-01-24 00:34:12.174991: step: 148/77, loss: 0.003994886297732592 2023-01-24 00:34:13.633517: step: 152/77, loss: 0.013091296888887882 2023-01-24 00:34:15.104114: step: 156/77, loss: 0.0001558217772981152 2023-01-24 00:34:16.567935: step: 160/77, loss: 0.0014729941030964255 2023-01-24 00:34:18.097895: step: 164/77, loss: 0.0010588800068944693 2023-01-24 00:34:19.568790: step: 168/77, loss: 0.00016571594460401684 2023-01-24 00:34:21.052814: step: 172/77, loss: 0.009606136940419674 2023-01-24 00:34:22.532955: step: 176/77, loss: 0.00643984554335475 2023-01-24 00:34:23.986995: step: 180/77, loss: 0.0014251623069867492 2023-01-24 00:34:25.388922: step: 184/77, loss: 0.0024857092648744583 2023-01-24 00:34:26.816019: step: 188/77, loss: 0.08830219507217407 2023-01-24 00:34:28.224917: step: 192/77, loss: 0.0001782501203706488 2023-01-24 00:34:29.648868: step: 196/77, loss: 0.0014881398528814316 2023-01-24 00:34:31.129108: step: 200/77, loss: 7.254660886246711e-05 2023-01-24 00:34:32.637353: step: 204/77, loss: 0.0015049743233248591 2023-01-24 00:34:34.054157: step: 208/77, loss: 0.038156285881996155 2023-01-24 00:34:35.507392: step: 212/77, loss: 0.0020367216784507036 2023-01-24 00:34:36.937722: step: 216/77, loss: 0.003434864804148674 2023-01-24 00:34:38.327586: step: 220/77, loss: 0.0012388827744871378 2023-01-24 00:34:39.736208: step: 224/77, loss: 0.11639466136693954 2023-01-24 00:34:41.222675: step: 228/77, loss: 0.0008922365377657115 2023-01-24 00:34:42.614013: step: 232/77, loss: 0.045566074550151825 2023-01-24 00:34:44.083738: step: 236/77, loss: 0.0022351492661982775 2023-01-24 00:34:45.517918: step: 240/77, loss: 0.07056190073490143 2023-01-24 00:34:46.999868: step: 244/77, loss: 0.0027572649996727705 2023-01-24 00:34:48.434596: step: 248/77, loss: 0.00025906716473400593 2023-01-24 00:34:49.811848: step: 252/77, loss: 0.004387011285871267 2023-01-24 00:34:51.276015: step: 256/77, loss: 0.0001116612329497002 2023-01-24 00:34:52.668546: step: 260/77, loss: 0.005593471694737673 2023-01-24 00:34:54.086423: step: 264/77, loss: 0.03438470885157585 2023-01-24 00:34:55.552270: step: 268/77, loss: 0.0016064458759501576 2023-01-24 00:34:57.003263: step: 272/77, loss: 0.004222017712891102 2023-01-24 00:34:58.453577: step: 276/77, loss: 0.00023497124493587762 2023-01-24 00:34:59.915975: step: 280/77, loss: 0.007615208625793457 2023-01-24 00:35:01.403455: step: 284/77, loss: 0.030379055067896843 2023-01-24 00:35:02.895978: step: 288/77, loss: 7.958993955980986e-05 2023-01-24 00:35:04.355698: step: 292/77, loss: 7.129358436941402e-06 2023-01-24 00:35:05.786302: step: 296/77, loss: 0.007867962121963501 2023-01-24 00:35:07.196059: step: 300/77, loss: 0.003189869923517108 2023-01-24 00:35:08.600432: step: 304/77, loss: 0.025252973660826683 2023-01-24 00:35:10.122755: step: 308/77, loss: 0.0027913593221455812 2023-01-24 00:35:11.706224: step: 312/77, loss: 0.008064726367592812 2023-01-24 00:35:13.189316: step: 316/77, loss: 0.01227780431509018 2023-01-24 00:35:14.633096: step: 320/77, loss: 0.00796580035239458 2023-01-24 00:35:16.104993: step: 324/77, loss: 0.004041627515107393 2023-01-24 00:35:17.526158: step: 328/77, loss: 8.001786682143575e-07 2023-01-24 00:35:19.059134: step: 332/77, loss: 0.2514524459838867 2023-01-24 00:35:20.481752: step: 336/77, loss: 0.06413402408361435 2023-01-24 00:35:21.938851: step: 340/77, loss: 0.031314950436353683 2023-01-24 00:35:23.414411: step: 344/77, loss: 0.00578667875379324 2023-01-24 00:35:24.873703: step: 348/77, loss: 0.020943451672792435 2023-01-24 00:35:26.335769: step: 352/77, loss: 0.00128412083722651 2023-01-24 00:35:27.735069: step: 356/77, loss: 0.0031907805241644382 2023-01-24 00:35:29.193152: step: 360/77, loss: 0.009251225739717484 2023-01-24 00:35:30.654312: step: 364/77, loss: 0.00013018125900998712 2023-01-24 00:35:32.093169: step: 368/77, loss: 0.0030776180792599916 2023-01-24 00:35:33.563359: step: 372/77, loss: 0.0011256110155954957 2023-01-24 00:35:35.063269: step: 376/77, loss: 0.0024954641703516245 2023-01-24 00:35:36.501068: step: 380/77, loss: 0.0042509096674621105 2023-01-24 00:35:37.992238: step: 384/77, loss: 0.01576077565550804 2023-01-24 00:35:39.451346: step: 388/77, loss: 0.006246781442314386 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Chinese: {'template': {'p': 0.8571428571428571, 'r': 0.5625, 'f1': 0.6792452830188678}, 'slot': {'p': 0.4878048780487805, 'r': 0.019627085377821395, 'f1': 0.03773584905660377}, 'combined': 0.025631897472410105, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Korean: {'template': {'p': 0.8674698795180723, 'r': 0.5625, 'f1': 0.6824644549763033}, 'slot': {'p': 0.5128205128205128, 'r': 0.019627085377821395, 'f1': 0.03780718336483932}, 'combined': 0.025802058789274225, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Russian: {'template': {'p': 0.8470588235294118, 'r': 0.5625, 'f1': 0.6760563380281689}, 'slot': {'p': 0.47619047619047616, 'r': 0.019627085377821395, 'f1': 0.037700282752120645}, 'combined': 0.02548751510002522, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:37:24.987262: step: 4/77, loss: 0.014770681969821453 2023-01-24 00:37:26.452659: step: 8/77, loss: 0.0876326635479927 2023-01-24 00:37:27.942761: step: 12/77, loss: 0.04138243570923805 2023-01-24 00:37:29.393957: step: 16/77, loss: 0.003260186407715082 2023-01-24 00:37:30.795797: step: 20/77, loss: 0.0027825208380818367 2023-01-24 00:37:32.190643: step: 24/77, loss: 0.00017264731286559254 2023-01-24 00:37:33.622775: step: 28/77, loss: 0.00026574067305773497 2023-01-24 00:37:35.102713: step: 32/77, loss: 0.0002445220889057964 2023-01-24 00:37:36.549340: step: 36/77, loss: 0.0008409050642512739 2023-01-24 00:37:38.011007: step: 40/77, loss: 0.0014837709022685885 2023-01-24 00:37:39.403146: step: 44/77, loss: 0.0001334149419562891 2023-01-24 00:37:40.915940: step: 48/77, loss: 0.0022824243642389774 2023-01-24 00:37:42.421437: step: 52/77, loss: 0.007556657772511244 2023-01-24 00:37:43.878285: step: 56/77, loss: 0.018492089584469795 2023-01-24 00:37:45.306301: step: 60/77, loss: 0.018770309165120125 2023-01-24 00:37:46.822394: step: 64/77, loss: 0.004591003060340881 2023-01-24 00:37:48.302911: step: 68/77, loss: 3.793321593548171e-05 2023-01-24 00:37:49.814569: step: 72/77, loss: 0.0016521599609404802 2023-01-24 00:37:51.226956: step: 76/77, loss: 8.729464752832428e-05 2023-01-24 00:37:52.714604: step: 80/77, loss: 2.5107715373451356e-06 2023-01-24 00:37:54.158881: step: 84/77, loss: 0.0007668976322747767 2023-01-24 00:37:55.610996: step: 88/77, loss: 0.00569231016561389 2023-01-24 00:37:57.110979: step: 92/77, loss: 0.04539667069911957 2023-01-24 00:37:58.602653: step: 96/77, loss: 0.004317179322242737 2023-01-24 00:38:00.075361: step: 100/77, loss: 0.00014809536514803767 2023-01-24 00:38:01.560824: step: 104/77, loss: 4.2761865188367665e-05 2023-01-24 00:38:03.018986: step: 108/77, loss: 0.0007335086120292544 2023-01-24 00:38:04.500638: step: 112/77, loss: 0.007040671072900295 2023-01-24 00:38:05.922354: step: 116/77, loss: 0.020112091675400734 2023-01-24 00:38:07.340938: step: 120/77, loss: 0.005425078794360161 2023-01-24 00:38:08.889681: step: 124/77, loss: 0.00016795247211121023 2023-01-24 00:38:10.370873: step: 128/77, loss: 0.019081249833106995 2023-01-24 00:38:11.916488: step: 132/77, loss: 0.019822169095277786 2023-01-24 00:38:13.388252: step: 136/77, loss: 0.0001944322866620496 2023-01-24 00:38:14.779742: step: 140/77, loss: 0.017132896929979324 2023-01-24 00:38:16.161079: step: 144/77, loss: 0.0003910954692400992 2023-01-24 00:38:17.580257: step: 148/77, loss: 0.0030151931568980217 2023-01-24 00:38:19.038574: step: 152/77, loss: 0.012330949306488037 2023-01-24 00:38:20.417019: step: 156/77, loss: 0.008543347008526325 2023-01-24 00:38:21.873949: step: 160/77, loss: 3.2266092603094876e-05 2023-01-24 00:38:23.272591: step: 164/77, loss: 0.01012550387531519 2023-01-24 00:38:24.650537: step: 168/77, loss: 0.006201722659170628 2023-01-24 00:38:26.056366: step: 172/77, loss: 0.005624229088425636 2023-01-24 00:38:27.539963: step: 176/77, loss: 4.4780652387999e-05 2023-01-24 00:38:29.023732: step: 180/77, loss: 0.024179434403777122 2023-01-24 00:38:30.483672: step: 184/77, loss: 9.032005618792027e-05 2023-01-24 00:38:31.923948: step: 188/77, loss: 0.0244180615991354 2023-01-24 00:38:33.361801: step: 192/77, loss: 0.03398584946990013 2023-01-24 00:38:34.768380: step: 196/77, loss: 0.0010689867194741964 2023-01-24 00:38:36.239983: step: 200/77, loss: 0.0035388905089348555 2023-01-24 00:38:37.704369: step: 204/77, loss: 0.010380645282566547 2023-01-24 00:38:39.216696: step: 208/77, loss: 0.03376935422420502 2023-01-24 00:38:40.603308: step: 212/77, loss: 0.018724530935287476 2023-01-24 00:38:42.087666: step: 216/77, loss: 0.002145115751773119 2023-01-24 00:38:43.569428: step: 220/77, loss: 0.001531552872620523 2023-01-24 00:38:45.065626: step: 224/77, loss: 0.022273162379860878 2023-01-24 00:38:46.483770: step: 228/77, loss: 0.00013202633999753743 2023-01-24 00:38:47.977027: step: 232/77, loss: 0.010384333319962025 2023-01-24 00:38:49.398215: step: 236/77, loss: 0.005090624559670687 2023-01-24 00:38:50.889603: step: 240/77, loss: 0.0037023834884166718 2023-01-24 00:38:52.357934: step: 244/77, loss: 0.0026756480801850557 2023-01-24 00:38:53.814824: step: 248/77, loss: 0.0014291125116869807 2023-01-24 00:38:55.262827: step: 252/77, loss: 0.00014384086534846574 2023-01-24 00:38:56.702499: step: 256/77, loss: 5.036376478528837e-06 2023-01-24 00:38:58.176326: step: 260/77, loss: 0.00022549970890395343 2023-01-24 00:38:59.635224: step: 264/77, loss: 0.0037392026279121637 2023-01-24 00:39:01.098861: step: 268/77, loss: 0.06179117038846016 2023-01-24 00:39:02.626719: step: 272/77, loss: 0.0005593567620962858 2023-01-24 00:39:04.070735: step: 276/77, loss: 0.00114069867413491 2023-01-24 00:39:05.517262: step: 280/77, loss: 0.002634587697684765 2023-01-24 00:39:06.980777: step: 284/77, loss: 0.0018559263553470373 2023-01-24 00:39:08.430990: step: 288/77, loss: 0.031140733510255814 2023-01-24 00:39:09.900340: step: 292/77, loss: 0.006226778961718082 2023-01-24 00:39:11.429688: step: 296/77, loss: 0.0006024112808518112 2023-01-24 00:39:12.965901: step: 300/77, loss: 0.0003451938973739743 2023-01-24 00:39:14.464891: step: 304/77, loss: 0.0025052230339497328 2023-01-24 00:39:15.919806: step: 308/77, loss: 0.05138927698135376 2023-01-24 00:39:17.390628: step: 312/77, loss: 0.0004222650022711605 2023-01-24 00:39:18.819303: step: 316/77, loss: 0.010836608707904816 2023-01-24 00:39:20.296644: step: 320/77, loss: 2.141235654562479e-06 2023-01-24 00:39:21.822116: step: 324/77, loss: 0.0036584362387657166 2023-01-24 00:39:23.335544: step: 328/77, loss: 0.011637730523943901 2023-01-24 00:39:24.808939: step: 332/77, loss: 0.0017785464879125357 2023-01-24 00:39:26.280504: step: 336/77, loss: 0.0008704649517312646 2023-01-24 00:39:27.759992: step: 340/77, loss: 0.0016544598620384932 2023-01-24 00:39:29.278014: step: 344/77, loss: 0.0001686064206296578 2023-01-24 00:39:30.742002: step: 348/77, loss: 3.0189055451046443e-06 2023-01-24 00:39:32.232814: step: 352/77, loss: 1.2028551282128319e-05 2023-01-24 00:39:33.683145: step: 356/77, loss: 0.005813794210553169 2023-01-24 00:39:35.123524: step: 360/77, loss: 0.018742024898529053 2023-01-24 00:39:36.561161: step: 364/77, loss: 0.0039033587090671062 2023-01-24 00:39:38.041977: step: 368/77, loss: 7.665553857805207e-05 2023-01-24 00:39:39.484531: step: 372/77, loss: 0.0037815242540091276 2023-01-24 00:39:40.972371: step: 376/77, loss: 0.0016702398424968123 2023-01-24 00:39:42.515122: step: 380/77, loss: 0.0018711560405790806 2023-01-24 00:39:44.045235: step: 384/77, loss: 0.004602421075105667 2023-01-24 00:39:45.622697: step: 388/77, loss: 0.005135645158588886 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9264705882352942, 'r': 0.4921875, 'f1': 0.6428571428571428}, 'slot': {'p': 0.6333333333333333, 'r': 0.018645731108930325, 'f1': 0.036224976167778845}, 'combined': 0.0232874846792864, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.9402985074626866, 'r': 0.4921875, 'f1': 0.6461538461538462}, 'slot': {'p': 0.6333333333333333, 'r': 0.018645731108930325, 'f1': 0.036224976167778845}, 'combined': 0.023406907677641718, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9402985074626866, 'r': 0.4921875, 'f1': 0.6461538461538462}, 'slot': {'p': 0.6333333333333333, 'r': 0.018645731108930325, 'f1': 0.036224976167778845}, 'combined': 0.023406907677641718, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:41:31.226358: step: 4/77, loss: 0.00629640556871891 2023-01-24 00:41:32.649961: step: 8/77, loss: 1.3032102287979797e-05 2023-01-24 00:41:34.189389: step: 12/77, loss: 0.00390604161657393 2023-01-24 00:41:35.609064: step: 16/77, loss: 0.01602979004383087 2023-01-24 00:41:37.076130: step: 20/77, loss: 5.475866146298358e-06 2023-01-24 00:41:38.528434: step: 24/77, loss: 0.0031633723992854357 2023-01-24 00:41:40.041201: step: 28/77, loss: 0.014412499964237213 2023-01-24 00:41:41.480038: step: 32/77, loss: 2.1889086383453105e-06 2023-01-24 00:41:42.992021: step: 36/77, loss: 3.456926788203418e-05 2023-01-24 00:41:44.445380: step: 40/77, loss: 2.2342781448969617e-05 2023-01-24 00:41:45.922808: step: 44/77, loss: 0.005496377125382423 2023-01-24 00:41:47.376515: step: 48/77, loss: 0.0002522608556319028 2023-01-24 00:41:48.867848: step: 52/77, loss: 0.00036922251456417143 2023-01-24 00:41:50.317616: step: 56/77, loss: 0.00252554495818913 2023-01-24 00:41:51.746407: step: 60/77, loss: 0.021473003551363945 2023-01-24 00:41:53.177172: step: 64/77, loss: 0.003913488704711199 2023-01-24 00:41:54.681347: step: 68/77, loss: 0.003509226720780134 2023-01-24 00:41:56.129875: step: 72/77, loss: 0.00018533196998760104 2023-01-24 00:41:57.584547: step: 76/77, loss: 0.001384116942062974 2023-01-24 00:41:59.043724: step: 80/77, loss: 0.10798466950654984 2023-01-24 00:42:00.522131: step: 84/77, loss: 4.4980937673244625e-05 2023-01-24 00:42:01.973609: step: 88/77, loss: 0.00011074270878452808 2023-01-24 00:42:03.481639: step: 92/77, loss: 2.721722012211103e-05 2023-01-24 00:42:04.900462: step: 96/77, loss: 2.3289867385756224e-06 2023-01-24 00:42:06.310961: step: 100/77, loss: 6.403658062481554e-06 2023-01-24 00:42:07.846109: step: 104/77, loss: 0.0002635190321598202 2023-01-24 00:42:09.326382: step: 108/77, loss: 8.024475391721353e-05 2023-01-24 00:42:10.839821: step: 112/77, loss: 0.011582602746784687 2023-01-24 00:42:12.273387: step: 116/77, loss: 0.004289014730602503 2023-01-24 00:42:13.797674: step: 120/77, loss: 0.038263604044914246 2023-01-24 00:42:15.297022: step: 124/77, loss: 0.004939154721796513 2023-01-24 00:42:16.697025: step: 128/77, loss: 0.0020356210879981518 2023-01-24 00:42:18.079987: step: 132/77, loss: 3.482759348116815e-05 2023-01-24 00:42:19.597221: step: 136/77, loss: 0.001752390991896391 2023-01-24 00:42:21.017793: step: 140/77, loss: 0.015921536833047867 2023-01-24 00:42:22.467131: step: 144/77, loss: 2.1755639068032906e-07 2023-01-24 00:42:23.920576: step: 148/77, loss: 6.622581167903263e-06 2023-01-24 00:42:25.423161: step: 152/77, loss: 0.0007548375870101154 2023-01-24 00:42:26.892812: step: 156/77, loss: 0.025477493181824684 2023-01-24 00:42:28.327299: step: 160/77, loss: 0.00039732432924211025 2023-01-24 00:42:29.761205: step: 164/77, loss: 0.014110036194324493 2023-01-24 00:42:31.207142: step: 168/77, loss: 0.00010267073230352253 2023-01-24 00:42:32.613407: step: 172/77, loss: 0.0075200931169092655 2023-01-24 00:42:34.081133: step: 176/77, loss: 0.010753368958830833 2023-01-24 00:42:35.559898: step: 180/77, loss: 0.0007860129699110985 2023-01-24 00:42:37.048161: step: 184/77, loss: 0.021613411605358124 2023-01-24 00:42:38.554318: step: 188/77, loss: 0.004621770698577166 2023-01-24 00:42:39.969021: step: 192/77, loss: 2.2125350369606167e-05 2023-01-24 00:42:41.413030: step: 196/77, loss: 0.00014070799807086587 2023-01-24 00:42:42.951731: step: 200/77, loss: 0.022858303040266037 2023-01-24 00:42:44.433650: step: 204/77, loss: 9.427463737665676e-06 2023-01-24 00:42:45.861604: step: 208/77, loss: 0.0003459584841039032 2023-01-24 00:42:47.349470: step: 212/77, loss: 0.0033417996019124985 2023-01-24 00:42:48.798229: step: 216/77, loss: 0.01567632518708706 2023-01-24 00:42:50.231430: step: 220/77, loss: 0.014902174472808838 2023-01-24 00:42:51.631494: step: 224/77, loss: 0.00310932332649827 2023-01-24 00:42:53.110816: step: 228/77, loss: 0.056226640939712524 2023-01-24 00:42:54.589730: step: 232/77, loss: 4.0978045490192017e-07 2023-01-24 00:42:56.084256: step: 236/77, loss: 0.001314049819484353 2023-01-24 00:42:57.515087: step: 240/77, loss: 0.0017302314518019557 2023-01-24 00:42:59.031081: step: 244/77, loss: 0.007447042502462864 2023-01-24 00:43:00.507835: step: 248/77, loss: 0.06286231428384781 2023-01-24 00:43:01.990061: step: 252/77, loss: 0.00034409796353429556 2023-01-24 00:43:03.393833: step: 256/77, loss: 0.0007446880335919559 2023-01-24 00:43:04.809535: step: 260/77, loss: 0.02761266753077507 2023-01-24 00:43:06.278901: step: 264/77, loss: 0.032420236617326736 2023-01-24 00:43:07.729364: step: 268/77, loss: 0.0014822514494881034 2023-01-24 00:43:09.195070: step: 272/77, loss: 2.571425648056902e-05 2023-01-24 00:43:10.680625: step: 276/77, loss: 0.032803013920784 2023-01-24 00:43:12.140308: step: 280/77, loss: 0.0003158951294608414 2023-01-24 00:43:13.635235: step: 284/77, loss: 0.0009535959688946605 2023-01-24 00:43:15.176827: step: 288/77, loss: 0.007475333753973246 2023-01-24 00:43:16.605702: step: 292/77, loss: 0.002793649211525917 2023-01-24 00:43:18.059730: step: 296/77, loss: 0.002629114082083106 2023-01-24 00:43:19.476700: step: 300/77, loss: 0.0005797538906335831 2023-01-24 00:43:20.953867: step: 304/77, loss: 0.001783881220035255 2023-01-24 00:43:22.443521: step: 308/77, loss: 0.0007740409346297383 2023-01-24 00:43:23.921048: step: 312/77, loss: 0.06472983211278915 2023-01-24 00:43:25.344625: step: 316/77, loss: 0.00124368688557297 2023-01-24 00:43:26.833214: step: 320/77, loss: 0.0465131551027298 2023-01-24 00:43:28.306713: step: 324/77, loss: 0.07662312686443329 2023-01-24 00:43:29.755819: step: 328/77, loss: 0.0008284562500193715 2023-01-24 00:43:31.145331: step: 332/77, loss: 0.013321863487362862 2023-01-24 00:43:32.617544: step: 336/77, loss: 5.0740571168717e-05 2023-01-24 00:43:34.105856: step: 340/77, loss: 0.00013221157132647932 2023-01-24 00:43:35.481172: step: 344/77, loss: 0.002166406251490116 2023-01-24 00:43:36.997047: step: 348/77, loss: 0.006550523452460766 2023-01-24 00:43:38.475622: step: 352/77, loss: 0.022207271307706833 2023-01-24 00:43:39.988249: step: 356/77, loss: 7.443765935022384e-05 2023-01-24 00:43:41.457299: step: 360/77, loss: 0.0145955141633749 2023-01-24 00:43:42.887530: step: 364/77, loss: 0.15446336567401886 2023-01-24 00:43:44.325395: step: 368/77, loss: 0.008689656853675842 2023-01-24 00:43:45.828027: step: 372/77, loss: 0.001763333915732801 2023-01-24 00:43:47.315197: step: 376/77, loss: 7.676680252188817e-05 2023-01-24 00:43:48.838949: step: 380/77, loss: 0.017358239740133286 2023-01-24 00:43:50.186064: step: 384/77, loss: 0.0013745925389230251 2023-01-24 00:43:51.649452: step: 388/77, loss: 0.00026828423142433167 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5390625, 'f1': 0.69}, 'slot': {'p': 0.5641025641025641, 'r': 0.021589793915603533, 'f1': 0.04158790170132325}, 'combined': 0.028695652173913042, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.958904109589041, 'r': 0.546875, 'f1': 0.6965174129353234}, 'slot': {'p': 0.5526315789473685, 'r': 0.020608439646712464, 'f1': 0.03973509933774835}, 'combined': 0.027676188593456566, 'epoch': 18} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 18} Test Russian: {'template': {'p': 0.958904109589041, 'r': 0.546875, 'f1': 0.6965174129353234}, 'slot': {'p': 0.5641025641025641, 'r': 0.021589793915603533, 'f1': 0.04158790170132325}, 'combined': 0.02896669770241421, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:45:37.091617: step: 4/77, loss: 0.00186650559771806 2023-01-24 00:45:38.538468: step: 8/77, loss: 0.011341345496475697 2023-01-24 00:45:40.006448: step: 12/77, loss: 0.00042457960080355406 2023-01-24 00:45:41.467713: step: 16/77, loss: 0.004452820401638746 2023-01-24 00:45:42.910195: step: 20/77, loss: 9.29928501136601e-06 2023-01-24 00:45:44.391960: step: 24/77, loss: 1.871081258286722e-05 2023-01-24 00:45:45.904619: step: 28/77, loss: 2.7184534701518714e-05 2023-01-24 00:45:47.376821: step: 32/77, loss: 0.0002637415600474924 2023-01-24 00:45:48.853286: step: 36/77, loss: 0.005314426030963659 2023-01-24 00:45:50.321387: step: 40/77, loss: 0.003934026230126619 2023-01-24 00:45:51.755108: step: 44/77, loss: 0.01600668765604496 2023-01-24 00:45:53.185898: step: 48/77, loss: 0.007245240733027458 2023-01-24 00:45:54.650943: step: 52/77, loss: 0.013580265454947948 2023-01-24 00:45:56.094225: step: 56/77, loss: 0.0537562258541584 2023-01-24 00:45:57.565661: step: 60/77, loss: 0.005792696960270405 2023-01-24 00:45:59.073421: step: 64/77, loss: 0.000631436298135668 2023-01-24 00:46:00.578803: step: 68/77, loss: 0.001448130700737238 2023-01-24 00:46:02.017691: step: 72/77, loss: 0.0027798700612038374 2023-01-24 00:46:03.451761: step: 76/77, loss: 0.08459872752428055 2023-01-24 00:46:04.934874: step: 80/77, loss: 0.0358988381922245 2023-01-24 00:46:06.435949: step: 84/77, loss: 0.04680085927248001 2023-01-24 00:46:07.889233: step: 88/77, loss: 2.6717007131082937e-06 2023-01-24 00:46:09.368898: step: 92/77, loss: 0.0044856686145067215 2023-01-24 00:46:10.822692: step: 96/77, loss: 7.837633893359452e-05 2023-01-24 00:46:12.334240: step: 100/77, loss: 4.082479063072242e-05 2023-01-24 00:46:13.745223: step: 104/77, loss: 0.009041888639330864 2023-01-24 00:46:15.214685: step: 108/77, loss: 0.0013675595400854945 2023-01-24 00:46:16.594760: step: 112/77, loss: 2.3649601644137874e-05 2023-01-24 00:46:18.057734: step: 116/77, loss: 0.0014585917815566063 2023-01-24 00:46:19.531215: step: 120/77, loss: 0.10662340372800827 2023-01-24 00:46:21.022026: step: 124/77, loss: 0.0008903276757337153 2023-01-24 00:46:22.543911: step: 128/77, loss: 0.012171868234872818 2023-01-24 00:46:23.989083: step: 132/77, loss: 1.0170209861826152e-05 2023-01-24 00:46:25.459766: step: 136/77, loss: 0.002750490326434374 2023-01-24 00:46:27.000925: step: 140/77, loss: 0.0001082413800759241 2023-01-24 00:46:28.466915: step: 144/77, loss: 0.017326457425951958 2023-01-24 00:46:29.902852: step: 148/77, loss: 0.00018802982231136411 2023-01-24 00:46:31.418565: step: 152/77, loss: 0.0004940549260936677 2023-01-24 00:46:32.918955: step: 156/77, loss: 0.021055961027741432 2023-01-24 00:46:34.339900: step: 160/77, loss: 0.007170780561864376 2023-01-24 00:46:35.762736: step: 164/77, loss: 0.04258148372173309 2023-01-24 00:46:37.210342: step: 168/77, loss: 4.662974242819473e-05 2023-01-24 00:46:38.678631: step: 172/77, loss: 0.006000472232699394 2023-01-24 00:46:40.160683: step: 176/77, loss: 0.022700928151607513 2023-01-24 00:46:41.583653: step: 180/77, loss: 0.041062355041503906 2023-01-24 00:46:43.126892: step: 184/77, loss: 0.00031986297108232975 2023-01-24 00:46:44.585820: step: 188/77, loss: 0.011816378682851791 2023-01-24 00:46:46.066750: step: 192/77, loss: 0.0010583401890471578 2023-01-24 00:46:47.554382: step: 196/77, loss: 0.004462048877030611 2023-01-24 00:46:48.986021: step: 200/77, loss: 0.045728087425231934 2023-01-24 00:46:50.447767: step: 204/77, loss: 0.0007927333936095238 2023-01-24 00:46:51.919940: step: 208/77, loss: 0.0013061045901849866 2023-01-24 00:46:53.348940: step: 212/77, loss: 0.002018637489527464 2023-01-24 00:46:54.821003: step: 216/77, loss: 6.201502401381731e-05 2023-01-24 00:46:56.260701: step: 220/77, loss: 0.0016469608526676893 2023-01-24 00:46:57.728314: step: 224/77, loss: 0.000588214083109051 2023-01-24 00:46:59.231512: step: 228/77, loss: 0.01477520540356636 2023-01-24 00:47:00.637551: step: 232/77, loss: 0.012828252278268337 2023-01-24 00:47:02.066359: step: 236/77, loss: 0.0024332371540367603 2023-01-24 00:47:03.514938: step: 240/77, loss: 0.00028775102691724896 2023-01-24 00:47:05.007830: step: 244/77, loss: 0.0565745048224926 2023-01-24 00:47:06.461829: step: 248/77, loss: 0.001304167089983821 2023-01-24 00:47:07.909915: step: 252/77, loss: 0.001137460581958294 2023-01-24 00:47:09.498881: step: 256/77, loss: 0.07675373554229736 2023-01-24 00:47:10.891393: step: 260/77, loss: 2.674641336852801e-06 2023-01-24 00:47:12.383438: step: 264/77, loss: 0.0002038546372205019 2023-01-24 00:47:13.892239: step: 268/77, loss: 0.0010206920560449362 2023-01-24 00:47:15.360578: step: 272/77, loss: 0.017133938148617744 2023-01-24 00:47:16.816977: step: 276/77, loss: 0.016053365543484688 2023-01-24 00:47:18.266154: step: 280/77, loss: 0.0016087039839476347 2023-01-24 00:47:19.664001: step: 284/77, loss: 0.01253533735871315 2023-01-24 00:47:21.208882: step: 288/77, loss: 0.0019009861862286925 2023-01-24 00:47:22.632344: step: 292/77, loss: 0.00038376866723410785 2023-01-24 00:47:24.115051: step: 296/77, loss: 0.002783339936286211 2023-01-24 00:47:25.617644: step: 300/77, loss: 0.0008510855259373784 2023-01-24 00:47:27.042167: step: 304/77, loss: 0.005121702328324318 2023-01-24 00:47:28.504060: step: 308/77, loss: 0.0007576277712360024 2023-01-24 00:47:29.976551: step: 312/77, loss: 9.761757974047214e-05 2023-01-24 00:47:31.460666: step: 316/77, loss: 0.08049427717924118 2023-01-24 00:47:32.869855: step: 320/77, loss: 5.37521846126765e-05 2023-01-24 00:47:34.337776: step: 324/77, loss: 0.00024958080030046403 2023-01-24 00:47:35.839338: step: 328/77, loss: 3.047224936381099e-06 2023-01-24 00:47:37.259928: step: 332/77, loss: 0.00022927092504687607 2023-01-24 00:47:38.724906: step: 336/77, loss: 3.923237818526104e-05 2023-01-24 00:47:40.190733: step: 340/77, loss: 4.395913128973916e-05 2023-01-24 00:47:41.631728: step: 344/77, loss: 1.4373413250723388e-05 2023-01-24 00:47:43.191964: step: 348/77, loss: 0.004309260752052069 2023-01-24 00:47:44.621308: step: 352/77, loss: 0.0005051622283644974 2023-01-24 00:47:46.036815: step: 356/77, loss: 0.003383730771020055 2023-01-24 00:47:47.471673: step: 360/77, loss: 0.016977690160274506 2023-01-24 00:47:48.904935: step: 364/77, loss: 0.010579852387309074 2023-01-24 00:47:50.346737: step: 368/77, loss: 0.0010027396492660046 2023-01-24 00:47:51.786774: step: 372/77, loss: 0.00314369797706604 2023-01-24 00:47:53.275568: step: 376/77, loss: 0.028379876166582108 2023-01-24 00:47:54.791546: step: 380/77, loss: 8.340930435224436e-06 2023-01-24 00:47:56.208724: step: 384/77, loss: 0.012038210406899452 2023-01-24 00:47:57.655016: step: 388/77, loss: 0.04218676686286926 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.9230769230769231, 'r': 0.5625, 'f1': 0.6990291262135923}, 'slot': {'p': 0.5789473684210527, 'r': 0.021589793915603533, 'f1': 0.04162724692526017}, 'combined': 0.02909865804484206, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.9240506329113924, 'r': 0.5703125, 'f1': 0.7053140096618359}, 'slot': {'p': 0.5675675675675675, 'r': 0.020608439646712464, 'f1': 0.03977272727272728}, 'combined': 0.02805226174791393, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.9240506329113924, 'r': 0.5703125, 'f1': 0.7053140096618359}, 'slot': {'p': 0.5675675675675675, 'r': 0.020608439646712464, 'f1': 0.03977272727272728}, 'combined': 0.02805226174791393, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:49:43.521652: step: 4/77, loss: 1.0884960829571355e-05 2023-01-24 00:49:44.964264: step: 8/77, loss: 0.007430312689393759 2023-01-24 00:49:46.415860: step: 12/77, loss: 0.009909817017614841 2023-01-24 00:49:47.848363: step: 16/77, loss: 0.0011846608249470592 2023-01-24 00:49:49.315709: step: 20/77, loss: 1.698593951005023e-05 2023-01-24 00:49:50.802463: step: 24/77, loss: 0.0011173434322699904 2023-01-24 00:49:52.210483: step: 28/77, loss: 0.006730676628649235 2023-01-24 00:49:53.711854: step: 32/77, loss: 1.2242147931829095e-05 2023-01-24 00:49:55.132046: step: 36/77, loss: 0.005255047231912613 2023-01-24 00:49:56.585886: step: 40/77, loss: 1.379373497911729e-05 2023-01-24 00:49:58.081319: step: 44/77, loss: 0.004701434168964624 2023-01-24 00:49:59.530194: step: 48/77, loss: 0.0014443210093304515 2023-01-24 00:50:01.050936: step: 52/77, loss: 0.0008496107766404748 2023-01-24 00:50:02.491062: step: 56/77, loss: 6.5918388827412855e-06 2023-01-24 00:50:04.040820: step: 60/77, loss: 0.019632356241345406 2023-01-24 00:50:05.467686: step: 64/77, loss: 0.0003296667418908328 2023-01-24 00:50:06.913629: step: 68/77, loss: 0.0006062939064577222 2023-01-24 00:50:08.401563: step: 72/77, loss: 0.0002514682710170746 2023-01-24 00:50:09.891083: step: 76/77, loss: 0.023351075127720833 2023-01-24 00:50:11.285753: step: 80/77, loss: 5.627698101307033e-06 2023-01-24 00:50:12.743497: step: 84/77, loss: 2.480560215190053e-05 2023-01-24 00:50:14.231878: step: 88/77, loss: 4.735262336907908e-06 2023-01-24 00:50:15.698898: step: 92/77, loss: 6.775063957320526e-05 2023-01-24 00:50:17.191341: step: 96/77, loss: 0.0003638921189121902 2023-01-24 00:50:18.708750: step: 100/77, loss: 0.02972320280969143 2023-01-24 00:50:20.134822: step: 104/77, loss: 0.0001608357997611165 2023-01-24 00:50:21.677445: step: 108/77, loss: 0.0215868279337883 2023-01-24 00:50:23.192409: step: 112/77, loss: 0.05368008837103844 2023-01-24 00:50:24.663323: step: 116/77, loss: 0.005768325179815292 2023-01-24 00:50:26.125188: step: 120/77, loss: 0.005247610621154308 2023-01-24 00:50:27.571928: step: 124/77, loss: 0.002271172357723117 2023-01-24 00:50:29.108590: step: 128/77, loss: 9.185097587760538e-05 2023-01-24 00:50:30.516639: step: 132/77, loss: 0.0012038334971293807 2023-01-24 00:50:31.892612: step: 136/77, loss: 0.0008183949394151568 2023-01-24 00:50:33.398597: step: 140/77, loss: 0.03969525545835495 2023-01-24 00:50:34.861033: step: 144/77, loss: 0.0012220474891364574 2023-01-24 00:50:36.300363: step: 148/77, loss: 0.0033739679493010044 2023-01-24 00:50:37.743873: step: 152/77, loss: 1.5616130895068636e-06 2023-01-24 00:50:39.230444: step: 156/77, loss: 0.060396626591682434 2023-01-24 00:50:40.766460: step: 160/77, loss: 0.009896337985992432 2023-01-24 00:50:42.268245: step: 164/77, loss: 0.000577462837100029 2023-01-24 00:50:43.734344: step: 168/77, loss: 0.031885746866464615 2023-01-24 00:50:45.227978: step: 172/77, loss: 4.327083661337383e-05 2023-01-24 00:50:46.733193: step: 176/77, loss: 0.010665344074368477 2023-01-24 00:50:48.171378: step: 180/77, loss: 3.1690917239757255e-05 2023-01-24 00:50:49.640362: step: 184/77, loss: 0.016238974407315254 2023-01-24 00:50:51.076049: step: 188/77, loss: 3.9243343053385615e-05 2023-01-24 00:50:52.572421: step: 192/77, loss: 7.24821729818359e-05 2023-01-24 00:50:54.058781: step: 196/77, loss: 2.0607499209290836e-06 2023-01-24 00:50:55.549464: step: 200/77, loss: 0.0031924722716212273 2023-01-24 00:50:56.944991: step: 204/77, loss: 8.680317841935903e-05 2023-01-24 00:50:58.365925: step: 208/77, loss: 0.004762844182550907 2023-01-24 00:50:59.814629: step: 212/77, loss: 0.004229036625474691 2023-01-24 00:51:01.248766: step: 216/77, loss: 0.0013016742886975408 2023-01-24 00:51:02.723468: step: 220/77, loss: 0.0001282370212720707 2023-01-24 00:51:04.173588: step: 224/77, loss: 3.843209924525581e-05 2023-01-24 00:51:05.571569: step: 228/77, loss: 0.02050735242664814 2023-01-24 00:51:07.031655: step: 232/77, loss: 0.0035930124577134848 2023-01-24 00:51:08.443513: step: 236/77, loss: 0.00012073626567143947 2023-01-24 00:51:09.876170: step: 240/77, loss: 0.040597103536129 2023-01-24 00:51:11.318399: step: 244/77, loss: 9.38911980483681e-05 2023-01-24 00:51:12.850161: step: 248/77, loss: 1.2011138096568175e-05 2023-01-24 00:51:14.346986: step: 252/77, loss: 0.03938305377960205 2023-01-24 00:51:15.842358: step: 256/77, loss: 0.00027365548885427415 2023-01-24 00:51:17.290564: step: 260/77, loss: 0.0023456427734345198 2023-01-24 00:51:18.769475: step: 264/77, loss: 0.007549828849732876 2023-01-24 00:51:20.234633: step: 268/77, loss: 0.0025673359632492065 2023-01-24 00:51:21.623575: step: 272/77, loss: 0.0044209775514900684 2023-01-24 00:51:23.026522: step: 276/77, loss: 1.4037575965630822e-05 2023-01-24 00:51:24.489681: step: 280/77, loss: 0.0001280048891203478 2023-01-24 00:51:26.055513: step: 284/77, loss: 0.03903944045305252 2023-01-24 00:51:27.533541: step: 288/77, loss: 1.4573142834706232e-06 2023-01-24 00:51:29.054423: step: 292/77, loss: 0.09980116039514542 2023-01-24 00:51:30.587912: step: 296/77, loss: 0.00011804010136984289 2023-01-24 00:51:32.054201: step: 300/77, loss: 0.011888409033417702 2023-01-24 00:51:33.504725: step: 304/77, loss: 0.0007111412705853581 2023-01-24 00:51:34.917599: step: 308/77, loss: 4.1683739254949614e-05 2023-01-24 00:51:36.381510: step: 312/77, loss: 0.012236321344971657 2023-01-24 00:51:37.913861: step: 316/77, loss: 0.0014427776914089918 2023-01-24 00:51:39.382513: step: 320/77, loss: 6.675928307231516e-05 2023-01-24 00:51:40.852547: step: 324/77, loss: 0.004596993327140808 2023-01-24 00:51:42.311208: step: 328/77, loss: 0.014233395457267761 2023-01-24 00:51:43.758082: step: 332/77, loss: 0.06907449662685394 2023-01-24 00:51:45.231169: step: 336/77, loss: 5.8134014579991344e-06 2023-01-24 00:51:46.709481: step: 340/77, loss: 3.035301688214531e-06 2023-01-24 00:51:48.168349: step: 344/77, loss: 0.000876867794431746 2023-01-24 00:51:49.644066: step: 348/77, loss: 0.0074017345905303955 2023-01-24 00:51:51.082297: step: 352/77, loss: 0.0007744339527562261 2023-01-24 00:51:52.556868: step: 356/77, loss: 0.008747021667659283 2023-01-24 00:51:53.992669: step: 360/77, loss: 0.017727002501487732 2023-01-24 00:51:55.415498: step: 364/77, loss: 0.00010129573638550937 2023-01-24 00:51:56.866843: step: 368/77, loss: 0.01120673306286335 2023-01-24 00:51:58.306842: step: 372/77, loss: 0.004367190413177013 2023-01-24 00:51:59.727667: step: 376/77, loss: 6.192243745317683e-05 2023-01-24 00:52:01.128556: step: 380/77, loss: 0.0005244517815299332 2023-01-24 00:52:02.570304: step: 384/77, loss: 0.0010551504092290998 2023-01-24 00:52:03.990276: step: 388/77, loss: 2.8043286874890327e-06 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 20} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5390625, 'f1': 0.69}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.02628571428571428, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 20} Test Korean: {'template': {'p': 0.958904109589041, 'r': 0.546875, 'f1': 0.6965174129353234}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.026533996683250415, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 20} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5390625, 'f1': 0.69}, 'slot': {'p': 0.6666666666666666, 'r': 0.019627085377821395, 'f1': 0.03813155386081983}, 'combined': 0.02631077216396568, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:53:49.598259: step: 4/77, loss: 0.005530391354113817 2023-01-24 00:53:50.998156: step: 8/77, loss: 0.0003530173853505403 2023-01-24 00:53:52.447219: step: 12/77, loss: 0.0536833330988884 2023-01-24 00:53:53.928086: step: 16/77, loss: 0.005902047269046307 2023-01-24 00:53:55.376324: step: 20/77, loss: 4.738548682325927e-07 2023-01-24 00:53:56.865881: step: 24/77, loss: 0.0003305294376332313 2023-01-24 00:53:58.286296: step: 28/77, loss: 0.00017434243636671454 2023-01-24 00:53:59.750530: step: 32/77, loss: 0.00037804216844961047 2023-01-24 00:54:01.211080: step: 36/77, loss: 0.003190763061866164 2023-01-24 00:54:02.685429: step: 40/77, loss: 0.0003863647871185094 2023-01-24 00:54:04.153309: step: 44/77, loss: 2.1436835595523007e-05 2023-01-24 00:54:05.610871: step: 48/77, loss: 0.0010237182723358274 2023-01-24 00:54:07.075099: step: 52/77, loss: 0.00014183111488819122 2023-01-24 00:54:08.554802: step: 56/77, loss: 0.0003928892838303 2023-01-24 00:54:09.998758: step: 60/77, loss: 0.01873972825706005 2023-01-24 00:54:11.507139: step: 64/77, loss: 0.00020161019347142428 2023-01-24 00:54:12.989281: step: 68/77, loss: 0.006954270415008068 2023-01-24 00:54:14.512235: step: 72/77, loss: 0.0023213254753500223 2023-01-24 00:54:15.988706: step: 76/77, loss: 0.013047211803495884 2023-01-24 00:54:17.454385: step: 80/77, loss: 0.006372448056936264 2023-01-24 00:54:18.909632: step: 84/77, loss: 1.8654458472155966e-05 2023-01-24 00:54:20.330124: step: 88/77, loss: 0.019734129309654236 2023-01-24 00:54:21.899830: step: 92/77, loss: 0.0007313595851883292 2023-01-24 00:54:23.327271: step: 96/77, loss: 2.9799655749229714e-05 2023-01-24 00:54:24.699278: step: 100/77, loss: 0.0002187217032769695 2023-01-24 00:54:26.195351: step: 104/77, loss: 1.2623586371773854e-05 2023-01-24 00:54:27.685571: step: 108/77, loss: 0.011636082082986832 2023-01-24 00:54:29.123509: step: 112/77, loss: 0.012348459102213383 2023-01-24 00:54:30.642383: step: 116/77, loss: 0.01756596937775612 2023-01-24 00:54:32.128246: step: 120/77, loss: 5.153836809768109e-06 2023-01-24 00:54:33.503996: step: 124/77, loss: 0.0007325861952267587 2023-01-24 00:54:35.006182: step: 128/77, loss: 0.00038951492751948535 2023-01-24 00:54:36.452725: step: 132/77, loss: 0.0022269547916948795 2023-01-24 00:54:37.957060: step: 136/77, loss: 1.160401643574005e-05 2023-01-24 00:54:39.401672: step: 140/77, loss: 0.04449259117245674 2023-01-24 00:54:40.807843: step: 144/77, loss: 1.0371063581260387e-06 2023-01-24 00:54:42.246928: step: 148/77, loss: 0.000331414194079116 2023-01-24 00:54:43.685930: step: 152/77, loss: 6.859993300167844e-05 2023-01-24 00:54:45.167188: step: 156/77, loss: 2.431750772302621e-06 2023-01-24 00:54:46.665321: step: 160/77, loss: 0.006880715489387512 2023-01-24 00:54:48.134852: step: 164/77, loss: 0.0014426393900066614 2023-01-24 00:54:49.632340: step: 168/77, loss: 0.00031652135658077896 2023-01-24 00:54:51.154580: step: 172/77, loss: 1.2606246855284553e-06 2023-01-24 00:54:52.561548: step: 176/77, loss: 2.0184765162412077e-05 2023-01-24 00:54:54.109678: step: 180/77, loss: 0.00422252481803298 2023-01-24 00:54:55.623323: step: 184/77, loss: 5.170652457309188e-07 2023-01-24 00:54:57.119010: step: 188/77, loss: 0.03473076969385147 2023-01-24 00:54:58.605743: step: 192/77, loss: 0.0005205783527344465 2023-01-24 00:55:00.060843: step: 196/77, loss: 0.0054445634596049786 2023-01-24 00:55:01.486852: step: 200/77, loss: 2.1511028535314836e-05 2023-01-24 00:55:02.901325: step: 204/77, loss: 4.1872135625453666e-07 2023-01-24 00:55:04.290559: step: 208/77, loss: 8.406052984355483e-06 2023-01-24 00:55:05.753770: step: 212/77, loss: 0.00669349217787385 2023-01-24 00:55:07.254480: step: 216/77, loss: 0.00034069089451804757 2023-01-24 00:55:08.703410: step: 220/77, loss: 0.011898697353899479 2023-01-24 00:55:10.125611: step: 224/77, loss: 0.000776562956161797 2023-01-24 00:55:11.609231: step: 228/77, loss: 0.001380259171128273 2023-01-24 00:55:13.096983: step: 232/77, loss: 1.1802635526692029e-05 2023-01-24 00:55:14.557598: step: 236/77, loss: 2.6416120817884803e-05 2023-01-24 00:55:16.110392: step: 240/77, loss: 0.003274685936048627 2023-01-24 00:55:17.562956: step: 244/77, loss: 0.002510338556021452 2023-01-24 00:55:19.016176: step: 248/77, loss: 3.347124220454134e-05 2023-01-24 00:55:20.507436: step: 252/77, loss: 0.08811137080192566 2023-01-24 00:55:21.980718: step: 256/77, loss: 1.0266564913763432e-06 2023-01-24 00:55:23.357872: step: 260/77, loss: 0.012822951190173626 2023-01-24 00:55:24.816187: step: 264/77, loss: 1.4880620256008115e-05 2023-01-24 00:55:26.272997: step: 268/77, loss: 5.9488898841664195e-05 2023-01-24 00:55:27.796614: step: 272/77, loss: 0.0005867235595360398 2023-01-24 00:55:29.253575: step: 276/77, loss: 0.010660209693014622 2023-01-24 00:55:30.715362: step: 280/77, loss: 0.01802891679108143 2023-01-24 00:55:32.179339: step: 284/77, loss: 0.0002461299882270396 2023-01-24 00:55:33.646436: step: 288/77, loss: 0.00024737458443269134 2023-01-24 00:55:35.170789: step: 292/77, loss: 0.0002895930374506861 2023-01-24 00:55:36.572672: step: 296/77, loss: 7.560687663499266e-05 2023-01-24 00:55:38.039596: step: 300/77, loss: 1.0199121788900811e-05 2023-01-24 00:55:39.480291: step: 304/77, loss: 0.008103596977889538 2023-01-24 00:55:40.884503: step: 308/77, loss: 0.00024358216614928097 2023-01-24 00:55:42.446695: step: 312/77, loss: 3.187201218679547e-05 2023-01-24 00:55:43.909574: step: 316/77, loss: 2.9708575311815366e-05 2023-01-24 00:55:45.324801: step: 320/77, loss: 0.0017332624411210418 2023-01-24 00:55:46.849763: step: 324/77, loss: 0.0007244001026265323 2023-01-24 00:55:48.306466: step: 328/77, loss: 0.005347931291908026 2023-01-24 00:55:49.779142: step: 332/77, loss: 0.0006719163502566516 2023-01-24 00:55:51.303451: step: 336/77, loss: 0.03064035065472126 2023-01-24 00:55:52.785803: step: 340/77, loss: 0.0007121642120182514 2023-01-24 00:55:54.235779: step: 344/77, loss: 8.618739229859784e-06 2023-01-24 00:55:55.689061: step: 348/77, loss: 0.000486185890622437 2023-01-24 00:55:57.148618: step: 352/77, loss: 2.7378995582694188e-05 2023-01-24 00:55:58.636529: step: 356/77, loss: 3.448443385423161e-05 2023-01-24 00:56:00.099452: step: 360/77, loss: 2.4184762878576294e-05 2023-01-24 00:56:01.567417: step: 364/77, loss: 0.0701582059264183 2023-01-24 00:56:02.979728: step: 368/77, loss: 9.116478031501174e-05 2023-01-24 00:56:04.496247: step: 372/77, loss: 0.0022903112694621086 2023-01-24 00:56:05.958558: step: 376/77, loss: 1.1425704542489257e-05 2023-01-24 00:56:07.439281: step: 380/77, loss: 0.0025022730696946383 2023-01-24 00:56:08.928959: step: 384/77, loss: 9.283351118938299e-07 2023-01-24 00:56:10.342496: step: 388/77, loss: 0.00021973026741761714 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04747841890049978, 'epoch': 21} Test Chinese: {'template': {'p': 0.9571428571428572, 'r': 0.5234375, 'f1': 0.6767676767676768}, 'slot': {'p': 0.65625, 'r': 0.020608439646712464, 'f1': 0.039961941008563276}, 'combined': 0.027044949975492318, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04747841890049978, 'epoch': 21} Test Korean: {'template': {'p': 0.9571428571428572, 'r': 0.5234375, 'f1': 0.6767676767676768}, 'slot': {'p': 0.65625, 'r': 0.020608439646712464, 'f1': 0.039961941008563276}, 'combined': 0.027044949975492318, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04747841890049978, 'epoch': 21} Test Russian: {'template': {'p': 0.9577464788732394, 'r': 0.53125, 'f1': 0.6834170854271355}, 'slot': {'p': 0.65625, 'r': 0.020608439646712464, 'f1': 0.039961941008563276}, 'combined': 0.027310673252083438, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:57:55.918607: step: 4/77, loss: 0.0007130326121114194 2023-01-24 00:57:57.316963: step: 8/77, loss: 0.004753975197672844 2023-01-24 00:57:58.771762: step: 12/77, loss: 0.001979820430278778 2023-01-24 00:58:00.230532: step: 16/77, loss: 0.0001369004021398723 2023-01-24 00:58:01.633880: step: 20/77, loss: 0.002938532270491123 2023-01-24 00:58:03.096214: step: 24/77, loss: 0.018064025789499283 2023-01-24 00:58:04.487817: step: 28/77, loss: 0.010816946625709534 2023-01-24 00:58:05.888817: step: 32/77, loss: 0.000314035132760182 2023-01-24 00:58:07.324884: step: 36/77, loss: 0.001084410585463047 2023-01-24 00:58:08.764764: step: 40/77, loss: 0.0005837613716721535 2023-01-24 00:58:10.260219: step: 44/77, loss: 0.00906037725508213 2023-01-24 00:58:11.716448: step: 48/77, loss: 0.00029241805896162987 2023-01-24 00:58:13.181555: step: 52/77, loss: 2.6940817406284623e-05 2023-01-24 00:58:14.617555: step: 56/77, loss: 4.1381859773537144e-05 2023-01-24 00:58:16.113069: step: 60/77, loss: 0.00024003698490560055 2023-01-24 00:58:17.581654: step: 64/77, loss: 0.01580897718667984 2023-01-24 00:58:19.062279: step: 68/77, loss: 2.7904839953407645e-05 2023-01-24 00:58:20.504708: step: 72/77, loss: 5.069902545074001e-05 2023-01-24 00:58:22.027680: step: 76/77, loss: 0.025134893134236336 2023-01-24 00:58:23.553018: step: 80/77, loss: 0.004092110786587 2023-01-24 00:58:24.964736: step: 84/77, loss: 0.0008029898745007813 2023-01-24 00:58:26.422012: step: 88/77, loss: 0.00046595188905484974 2023-01-24 00:58:27.860730: step: 92/77, loss: 0.0006075751734897494 2023-01-24 00:58:29.326059: step: 96/77, loss: 7.684613228775561e-05 2023-01-24 00:58:30.765990: step: 100/77, loss: 4.504282696871087e-05 2023-01-24 00:58:32.211202: step: 104/77, loss: 0.0023876773193478584 2023-01-24 00:58:33.593680: step: 108/77, loss: 0.0033316421322524548 2023-01-24 00:58:35.068394: step: 112/77, loss: 0.002512221457436681 2023-01-24 00:58:36.592504: step: 116/77, loss: 0.0002792548621073365 2023-01-24 00:58:38.002819: step: 120/77, loss: 7.562458631582558e-05 2023-01-24 00:58:39.508390: step: 124/77, loss: 0.016864262521266937 2023-01-24 00:58:40.938765: step: 128/77, loss: 5.999497807351872e-05 2023-01-24 00:58:42.549365: step: 132/77, loss: 0.0022027171216905117 2023-01-24 00:58:43.996851: step: 136/77, loss: 0.04083377867937088 2023-01-24 00:58:45.458268: step: 140/77, loss: 0.0006076354184187949 2023-01-24 00:58:46.905355: step: 144/77, loss: 0.0026501461397856474 2023-01-24 00:58:48.374812: step: 148/77, loss: 4.051176802022383e-06 2023-01-24 00:58:49.816551: step: 152/77, loss: 2.730186315602623e-05 2023-01-24 00:58:51.248569: step: 156/77, loss: 0.0032909002620726824 2023-01-24 00:58:52.700956: step: 160/77, loss: 0.00011777214240282774 2023-01-24 00:58:54.159645: step: 164/77, loss: 0.0002647798683028668 2023-01-24 00:58:55.655132: step: 168/77, loss: 1.1578140401979908e-06 2023-01-24 00:58:57.071235: step: 172/77, loss: 0.00021434163500089198 2023-01-24 00:58:58.529246: step: 176/77, loss: 0.0002885766443796456 2023-01-24 00:58:59.939517: step: 180/77, loss: 0.0005006411811336875 2023-01-24 00:59:01.333949: step: 184/77, loss: 4.458000330487266e-05 2023-01-24 00:59:02.824537: step: 188/77, loss: 0.00045059213880449533 2023-01-24 00:59:04.283832: step: 192/77, loss: 0.02458992972970009 2023-01-24 00:59:05.743002: step: 196/77, loss: 2.822162969096098e-05 2023-01-24 00:59:07.186954: step: 200/77, loss: 3.4389731808914803e-06 2023-01-24 00:59:08.641566: step: 204/77, loss: 6.366405159496935e-06 2023-01-24 00:59:10.073256: step: 208/77, loss: 0.0012770395260304213 2023-01-24 00:59:11.507871: step: 212/77, loss: 0.00019756241817958653 2023-01-24 00:59:12.986154: step: 216/77, loss: 4.08826963393949e-06 2023-01-24 00:59:14.491002: step: 220/77, loss: 0.028113486245274544 2023-01-24 00:59:15.960387: step: 224/77, loss: 0.00014170001668389887 2023-01-24 00:59:17.399127: step: 228/77, loss: 9.80123604676919e-06 2023-01-24 00:59:18.871093: step: 232/77, loss: 0.0005905760335735977 2023-01-24 00:59:20.290105: step: 236/77, loss: 0.029797719791531563 2023-01-24 00:59:21.682334: step: 240/77, loss: 0.0002706180966924876 2023-01-24 00:59:23.124369: step: 244/77, loss: 0.0027564410120248795 2023-01-24 00:59:24.620365: step: 248/77, loss: 0.004286859650164843 2023-01-24 00:59:26.140530: step: 252/77, loss: 0.00023515461361967027 2023-01-24 00:59:27.606927: step: 256/77, loss: 0.009189371019601822 2023-01-24 00:59:29.060214: step: 260/77, loss: 4.962151433574036e-05 2023-01-24 00:59:30.536013: step: 264/77, loss: 3.2074090995592996e-05 2023-01-24 00:59:31.990813: step: 268/77, loss: 0.016157323494553566 2023-01-24 00:59:33.464508: step: 272/77, loss: 0.009273549541831017 2023-01-24 00:59:34.995479: step: 276/77, loss: 7.113243555068038e-06 2023-01-24 00:59:36.442461: step: 280/77, loss: 0.0007872179849073291 2023-01-24 00:59:37.892146: step: 284/77, loss: 1.5747938959975727e-05 2023-01-24 00:59:39.348500: step: 288/77, loss: 7.15578644303605e-05 2023-01-24 00:59:40.796908: step: 292/77, loss: 1.0132703209819738e-06 2023-01-24 00:59:42.226103: step: 296/77, loss: 6.1032983467157464e-06 2023-01-24 00:59:43.712542: step: 300/77, loss: 2.1115256458870135e-05 2023-01-24 00:59:45.159025: step: 304/77, loss: 0.0014195248950272799 2023-01-24 00:59:46.578646: step: 308/77, loss: 0.00951320305466652 2023-01-24 00:59:48.005443: step: 312/77, loss: 0.0012874712701886892 2023-01-24 00:59:49.471990: step: 316/77, loss: 6.160401972010732e-05 2023-01-24 00:59:50.862794: step: 320/77, loss: 0.1084732934832573 2023-01-24 00:59:52.323621: step: 324/77, loss: 0.00011493593046907336 2023-01-24 00:59:53.778533: step: 328/77, loss: 0.007658337242901325 2023-01-24 00:59:55.197357: step: 332/77, loss: 0.0026278269942849874 2023-01-24 00:59:56.679054: step: 336/77, loss: 6.384636071743444e-05 2023-01-24 00:59:58.188587: step: 340/77, loss: 1.5949266526149586e-05 2023-01-24 00:59:59.700592: step: 344/77, loss: 0.07788742333650589 2023-01-24 01:00:01.127747: step: 348/77, loss: 8.078066457528621e-05 2023-01-24 01:00:02.583556: step: 352/77, loss: 0.00018889573402702808 2023-01-24 01:00:04.071158: step: 356/77, loss: 0.0018875160021707416 2023-01-24 01:00:05.582480: step: 360/77, loss: 1.9665896616061218e-05 2023-01-24 01:00:06.990696: step: 364/77, loss: 0.0001643340801820159 2023-01-24 01:00:08.504310: step: 368/77, loss: 0.005625385791063309 2023-01-24 01:00:09.971950: step: 372/77, loss: 0.00016497427714057267 2023-01-24 01:00:11.470450: step: 376/77, loss: 0.021070636808872223 2023-01-24 01:00:12.911182: step: 380/77, loss: 0.00030101914308033884 2023-01-24 01:00:14.374920: step: 384/77, loss: 5.654891356243752e-05 2023-01-24 01:00:15.891875: step: 388/77, loss: 8.754900591156911e-06 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4418604651162791, 'r': 0.035916824196597356, 'f1': 0.06643356643356645}, 'combined': 0.04714640198511168, 'epoch': 22} Test Chinese: {'template': {'p': 0.9090909090909091, 'r': 0.546875, 'f1': 0.6829268292682926}, 'slot': {'p': 0.6111111111111112, 'r': 0.021589793915603533, 'f1': 0.04170616113744076}, 'combined': 0.028482256386544905, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4418604651162791, 'r': 0.035916824196597356, 'f1': 0.06643356643356645}, 'combined': 0.04714640198511168, 'epoch': 22} Test Korean: {'template': {'p': 0.9090909090909091, 'r': 0.546875, 'f1': 0.6829268292682926}, 'slot': {'p': 0.6111111111111112, 'r': 0.021589793915603533, 'f1': 0.04170616113744076}, 'combined': 0.028482256386544905, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4523809523809524, 'r': 0.035916824196597356, 'f1': 0.06654991243432576}, 'combined': 0.0472289701146828, 'epoch': 22} Test Russian: {'template': {'p': 0.9090909090909091, 'r': 0.546875, 'f1': 0.6829268292682926}, 'slot': {'p': 0.6, 'r': 0.020608439646712464, 'f1': 0.03984819734345351}, 'combined': 0.027213403063821907, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 01:02:01.411179: step: 4/77, loss: 9.152458369499072e-05 2023-01-24 01:02:02.828170: step: 8/77, loss: 0.0014891615137457848 2023-01-24 01:02:04.299371: step: 12/77, loss: 0.004348850343376398 2023-01-24 01:02:05.811489: step: 16/77, loss: 2.2956748580327258e-05 2023-01-24 01:02:07.252049: step: 20/77, loss: 0.00022393259860109538 2023-01-24 01:02:08.759085: step: 24/77, loss: 6.849241344752954e-06 2023-01-24 01:02:10.234991: step: 28/77, loss: 0.0009687441634014249 2023-01-24 01:02:11.659145: step: 32/77, loss: 4.574593788220227e-07 2023-01-24 01:02:13.141400: step: 36/77, loss: 0.0002080762351397425 2023-01-24 01:02:14.580081: step: 40/77, loss: 0.020183607935905457 2023-01-24 01:02:16.016454: step: 44/77, loss: 6.683401443297043e-05 2023-01-24 01:02:17.456214: step: 48/77, loss: 4.300964064896107e-05 2023-01-24 01:02:18.977067: step: 52/77, loss: 0.00010689069313229993 2023-01-24 01:02:20.407169: step: 56/77, loss: 7.621638815180631e-06 2023-01-24 01:02:21.847382: step: 60/77, loss: 0.005777846090495586 2023-01-24 01:02:23.261580: step: 64/77, loss: 1.0221141565125436e-05 2023-01-24 01:02:24.772500: step: 68/77, loss: 0.00042474266956560314 2023-01-24 01:02:26.261369: step: 72/77, loss: 0.0002089111803798005 2023-01-24 01:02:27.693281: step: 76/77, loss: 0.00219891220331192 2023-01-24 01:02:29.163107: step: 80/77, loss: 0.0003327025333419442 2023-01-24 01:02:30.732726: step: 84/77, loss: 1.5328430890804157e-05 2023-01-24 01:02:32.190686: step: 88/77, loss: 1.4556253518094309e-05 2023-01-24 01:02:33.704007: step: 92/77, loss: 0.0012400572886690497 2023-01-24 01:02:35.120117: step: 96/77, loss: 1.1806205293396488e-05 2023-01-24 01:02:36.561450: step: 100/77, loss: 0.013861570507287979 2023-01-24 01:02:37.953659: step: 104/77, loss: 0.004015210084617138 2023-01-24 01:02:39.425633: step: 108/77, loss: 7.869948603911325e-05 2023-01-24 01:02:40.858521: step: 112/77, loss: 0.0006869456265121698 2023-01-24 01:02:42.324127: step: 116/77, loss: 8.663826520205475e-06 2023-01-24 01:02:43.725296: step: 120/77, loss: 0.0004616382939275354 2023-01-24 01:02:45.258196: step: 124/77, loss: 0.00424158526584506 2023-01-24 01:02:46.779378: step: 128/77, loss: 0.0019587939605116844 2023-01-24 01:02:48.214652: step: 132/77, loss: 7.3105504270643e-05 2023-01-24 01:02:49.658323: step: 136/77, loss: 1.5767673176014796e-05 2023-01-24 01:02:51.169371: step: 140/77, loss: 0.011566844768822193 2023-01-24 01:02:52.637500: step: 144/77, loss: 1.3639378266816493e-05 2023-01-24 01:02:54.162748: step: 148/77, loss: 2.9078797524562106e-05 2023-01-24 01:02:55.680078: step: 152/77, loss: 0.00010868874232983217 2023-01-24 01:02:57.191463: step: 156/77, loss: 0.012522750534117222 2023-01-24 01:02:58.659409: step: 160/77, loss: 4.022658686153591e-05 2023-01-24 01:03:00.132029: step: 164/77, loss: 9.945671627065167e-05 2023-01-24 01:03:01.587340: step: 168/77, loss: 0.00214787176810205 2023-01-24 01:03:03.022023: step: 172/77, loss: 3.362211646162905e-05 2023-01-24 01:03:04.495447: step: 176/77, loss: 1.410477125318721e-05 2023-01-24 01:03:06.013188: step: 180/77, loss: 1.957959284482058e-06 2023-01-24 01:03:07.461795: step: 184/77, loss: 3.4792228689184412e-06 2023-01-24 01:03:08.992493: step: 188/77, loss: 7.544868276454508e-05 2023-01-24 01:03:10.455623: step: 192/77, loss: 6.105268403189257e-05 2023-01-24 01:03:11.900266: step: 196/77, loss: 1.0859316716960166e-05 2023-01-24 01:03:13.327372: step: 200/77, loss: 0.006887755356729031 2023-01-24 01:03:14.787213: step: 204/77, loss: 0.0004280690918676555 2023-01-24 01:03:16.322807: step: 208/77, loss: 5.831807357026264e-06 2023-01-24 01:03:17.767606: step: 212/77, loss: 8.523413157490722e-07 2023-01-24 01:03:19.206828: step: 216/77, loss: 6.899658183101565e-05 2023-01-24 01:03:20.655078: step: 220/77, loss: 0.0049618808552622795 2023-01-24 01:03:22.142036: step: 224/77, loss: 0.00014697492588311434 2023-01-24 01:03:23.687275: step: 228/77, loss: 0.0004961665254086256 2023-01-24 01:03:25.160432: step: 232/77, loss: 5.026019789511338e-05 2023-01-24 01:03:26.671640: step: 236/77, loss: 0.0016431210096925497 2023-01-24 01:03:28.174841: step: 240/77, loss: 0.0011630343506112695 2023-01-24 01:03:29.627844: step: 244/77, loss: 0.0003789504407905042 2023-01-24 01:03:31.115334: step: 248/77, loss: 0.006341112311929464 2023-01-24 01:03:32.593683: step: 252/77, loss: 6.882879915792728e-06 2023-01-24 01:03:34.044482: step: 256/77, loss: 0.0005342121585272253 2023-01-24 01:03:35.490852: step: 260/77, loss: 0.0001719615829642862 2023-01-24 01:03:36.986901: step: 264/77, loss: 9.308056178269908e-05 2023-01-24 01:03:38.394890: step: 268/77, loss: 6.610100535908714e-05 2023-01-24 01:03:39.865827: step: 272/77, loss: 8.758709009271115e-05 2023-01-24 01:03:41.253702: step: 276/77, loss: 0.010416517965495586 2023-01-24 01:03:42.687777: step: 280/77, loss: 0.01606697589159012 2023-01-24 01:03:44.154427: step: 284/77, loss: 1.2740185866277898e-06 2023-01-24 01:03:45.623886: step: 288/77, loss: 1.1670215826597996e-05 2023-01-24 01:03:47.045856: step: 292/77, loss: 0.06407275795936584 2023-01-24 01:03:48.482286: step: 296/77, loss: 8.716026968613733e-06 2023-01-24 01:03:49.941903: step: 300/77, loss: 0.0001433848956367001 2023-01-24 01:03:51.377227: step: 304/77, loss: 4.845308285439387e-05 2023-01-24 01:03:52.895158: step: 308/77, loss: 8.231492392951623e-05 2023-01-24 01:03:54.401300: step: 312/77, loss: 0.0001697995758149773 2023-01-24 01:03:55.815174: step: 316/77, loss: 0.00019652053015306592 2023-01-24 01:03:57.325293: step: 320/77, loss: 0.0017496251966804266 2023-01-24 01:03:58.771698: step: 324/77, loss: 0.010774437338113785 2023-01-24 01:04:00.262737: step: 328/77, loss: 4.13789121012087e-06 2023-01-24 01:04:01.663176: step: 332/77, loss: 0.00026316073490306735 2023-01-24 01:04:03.115170: step: 336/77, loss: 0.0004801765608135611 2023-01-24 01:04:04.570867: step: 340/77, loss: 0.00015464633179362863 2023-01-24 01:04:06.005367: step: 344/77, loss: 1.1597515367611777e-05 2023-01-24 01:04:07.507084: step: 348/77, loss: 0.014501575380563736 2023-01-24 01:04:08.989310: step: 352/77, loss: 0.00031463519553653896 2023-01-24 01:04:10.439370: step: 356/77, loss: 2.0194192984490655e-05 2023-01-24 01:04:11.848050: step: 360/77, loss: 0.0047062234953045845 2023-01-24 01:04:13.290137: step: 364/77, loss: 0.003076509339734912 2023-01-24 01:04:14.763758: step: 368/77, loss: 7.674502558074892e-05 2023-01-24 01:04:16.229132: step: 372/77, loss: 0.0009569913381710649 2023-01-24 01:04:17.721096: step: 376/77, loss: 0.000321688101394102 2023-01-24 01:04:19.098766: step: 380/77, loss: 0.00011519218242028728 2023-01-24 01:04:20.584108: step: 384/77, loss: 2.3515976863563992e-05 2023-01-24 01:04:22.076612: step: 388/77, loss: 0.0040862769819796085 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4634146341463415, 'r': 0.035916824196597356, 'f1': 0.06666666666666668}, 'combined': 0.04912280701754387, 'epoch': 23} Test Chinese: {'template': {'p': 0.9452054794520548, 'r': 0.5390625, 'f1': 0.6865671641791045}, 'slot': {'p': 0.58, 'r': 0.02845927379784102, 'f1': 0.05425631431244154}, 'combined': 0.03725060385630315, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04839676356008391, 'epoch': 23} Test Korean: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.56, 'r': 0.02747791952894995, 'f1': 0.05238540692235734}, 'combined': 0.03578805027368967, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4634146341463415, 'r': 0.035916824196597356, 'f1': 0.06666666666666668}, 'combined': 0.04912280701754387, 'epoch': 23} Test Russian: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5686274509803921, 'r': 0.02845927379784102, 'f1': 0.05420560747663552}, 'combined': 0.037031553622651994, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 01:06:07.634657: step: 4/77, loss: 0.00016842114564497024 2023-01-24 01:06:09.025951: step: 8/77, loss: 0.00707374420017004 2023-01-24 01:06:10.559191: step: 12/77, loss: 9.162640708382241e-06 2023-01-24 01:06:11.942211: step: 16/77, loss: 6.259590008994564e-05 2023-01-24 01:06:13.400482: step: 20/77, loss: 5.2402078836166766e-06 2023-01-24 01:06:14.857919: step: 24/77, loss: 8.415575393883046e-06 2023-01-24 01:06:16.321424: step: 28/77, loss: 0.006951048504561186 2023-01-24 01:06:17.743687: step: 32/77, loss: 3.4868426723733137e-07 2023-01-24 01:06:19.126774: step: 36/77, loss: 7.024884325801395e-06 2023-01-24 01:06:20.568790: step: 40/77, loss: 0.004366460256278515 2023-01-24 01:06:22.058140: step: 44/77, loss: 0.0004859516047872603 2023-01-24 01:06:23.497876: step: 48/77, loss: 0.00039076071698218584 2023-01-24 01:06:24.929680: step: 52/77, loss: 4.046229878440499e-05 2023-01-24 01:06:26.319839: step: 56/77, loss: 0.0024879940319806337 2023-01-24 01:06:27.788357: step: 60/77, loss: 0.0001154499186668545 2023-01-24 01:06:29.231900: step: 64/77, loss: 0.0010849382961168885 2023-01-24 01:06:30.711029: step: 68/77, loss: 0.05313294753432274 2023-01-24 01:06:32.201479: step: 72/77, loss: 0.00030986842466518283 2023-01-24 01:06:33.694170: step: 76/77, loss: 0.006430521607398987 2023-01-24 01:06:35.170184: step: 80/77, loss: 0.0293259434401989 2023-01-24 01:06:36.603660: step: 84/77, loss: 0.018152793869376183 2023-01-24 01:06:38.077657: step: 88/77, loss: 0.0007140887901186943 2023-01-24 01:06:39.529211: step: 92/77, loss: 0.00023382938525173813 2023-01-24 01:06:40.949821: step: 96/77, loss: 8.996458200272173e-05 2023-01-24 01:06:42.433001: step: 100/77, loss: 8.120103302644566e-06 2023-01-24 01:06:43.937345: step: 104/77, loss: 0.0007067503174766898 2023-01-24 01:06:45.375788: step: 108/77, loss: 0.000819536333438009 2023-01-24 01:06:46.826685: step: 112/77, loss: 5.364386197470594e-07 2023-01-24 01:06:48.355540: step: 116/77, loss: 5.002348552807234e-05 2023-01-24 01:06:49.797907: step: 120/77, loss: 0.007737953215837479 2023-01-24 01:06:51.291100: step: 124/77, loss: 0.0038814896252006292 2023-01-24 01:06:52.670214: step: 128/77, loss: 0.0030426017474383116 2023-01-24 01:06:54.188743: step: 132/77, loss: 0.00019459401664789766 2023-01-24 01:06:55.608338: step: 136/77, loss: 4.80973903904669e-05 2023-01-24 01:06:57.090318: step: 140/77, loss: 0.00020540988771244884 2023-01-24 01:06:58.579087: step: 144/77, loss: 9.273541218135506e-05 2023-01-24 01:07:00.019147: step: 148/77, loss: 7.823100531823002e-06 2023-01-24 01:07:01.469341: step: 152/77, loss: 7.632689630554523e-06 2023-01-24 01:07:02.917904: step: 156/77, loss: 7.687011020607315e-06 2023-01-24 01:07:04.375860: step: 160/77, loss: 0.002562094945460558 2023-01-24 01:07:05.841215: step: 164/77, loss: 0.002456388669088483 2023-01-24 01:07:07.240262: step: 168/77, loss: 4.518212517723441e-05 2023-01-24 01:07:08.636900: step: 172/77, loss: 2.0654457330238074e-05 2023-01-24 01:07:10.128503: step: 176/77, loss: 0.001107532880268991 2023-01-24 01:07:11.620404: step: 180/77, loss: 4.694052768172696e-05 2023-01-24 01:07:13.150037: step: 184/77, loss: 4.738108600577107e-06 2023-01-24 01:07:14.589837: step: 188/77, loss: 4.7500452637905255e-06 2023-01-24 01:07:16.095337: step: 192/77, loss: 0.00016000482719391584 2023-01-24 01:07:17.528704: step: 196/77, loss: 9.928902727551758e-05 2023-01-24 01:07:19.018879: step: 200/77, loss: 1.9417080693528987e-05 2023-01-24 01:07:20.537700: step: 204/77, loss: 0.015278271399438381 2023-01-24 01:07:22.027947: step: 208/77, loss: 3.050275699933991e-05 2023-01-24 01:07:23.503653: step: 212/77, loss: 5.545574822463095e-06 2023-01-24 01:07:25.001003: step: 216/77, loss: 0.00744546577334404 2023-01-24 01:07:26.432942: step: 220/77, loss: 0.0001620856928639114 2023-01-24 01:07:27.835648: step: 224/77, loss: 4.286890998628223e-06 2023-01-24 01:07:29.340191: step: 228/77, loss: 0.0006224351236596704 2023-01-24 01:07:30.791480: step: 232/77, loss: 1.8760068769552163e-06 2023-01-24 01:07:32.202355: step: 236/77, loss: 0.04302788898348808 2023-01-24 01:07:33.632168: step: 240/77, loss: 0.009987486526370049 2023-01-24 01:07:35.132071: step: 244/77, loss: 2.8072849090676755e-05 2023-01-24 01:07:36.652597: step: 248/77, loss: 4.380669906822732e-06 2023-01-24 01:07:38.130321: step: 252/77, loss: 0.00025404879124835134 2023-01-24 01:07:39.575665: step: 256/77, loss: 0.0018334381747990847 2023-01-24 01:07:41.045622: step: 260/77, loss: 1.198196696350351e-05 2023-01-24 01:07:42.528730: step: 264/77, loss: 1.776934186636936e-05 2023-01-24 01:07:44.060015: step: 268/77, loss: 3.260213816247415e-06 2023-01-24 01:07:45.533792: step: 272/77, loss: 0.00028547868714667857 2023-01-24 01:07:46.990586: step: 276/77, loss: 3.34371070493944e-05 2023-01-24 01:07:48.466875: step: 280/77, loss: 3.5840192140312865e-05 2023-01-24 01:07:49.856533: step: 284/77, loss: 0.00037121682544238865 2023-01-24 01:07:51.329935: step: 288/77, loss: 0.0007780568557791412 2023-01-24 01:07:52.775843: step: 292/77, loss: 0.00021716710762120783 2023-01-24 01:07:54.280835: step: 296/77, loss: 0.009425071999430656 2023-01-24 01:07:55.732269: step: 300/77, loss: 0.012034622021019459 2023-01-24 01:07:57.140293: step: 304/77, loss: 0.002567733870819211 2023-01-24 01:07:58.643199: step: 308/77, loss: 6.655389006482437e-05 2023-01-24 01:08:00.095156: step: 312/77, loss: 4.360734601505101e-05 2023-01-24 01:08:01.486713: step: 316/77, loss: 0.002691005589440465 2023-01-24 01:08:02.938702: step: 320/77, loss: 0.0007407565717585385 2023-01-24 01:08:04.373409: step: 324/77, loss: 1.1256095604039729e-05 2023-01-24 01:08:05.819929: step: 328/77, loss: 0.003093632636591792 2023-01-24 01:08:07.295011: step: 332/77, loss: 1.4860575902275741e-05 2023-01-24 01:08:08.731661: step: 336/77, loss: 0.00033171725226566195 2023-01-24 01:08:10.147547: step: 340/77, loss: 1.877544093531469e-07 2023-01-24 01:08:11.571671: step: 344/77, loss: 0.015202153474092484 2023-01-24 01:08:12.996088: step: 348/77, loss: 1.6539713669772027e-06 2023-01-24 01:08:14.468402: step: 352/77, loss: 0.0009299773373641074 2023-01-24 01:08:15.941161: step: 356/77, loss: 0.00010895145533140749 2023-01-24 01:08:17.388961: step: 360/77, loss: 0.013841778971254826 2023-01-24 01:08:18.846340: step: 364/77, loss: 1.5914132745820098e-05 2023-01-24 01:08:20.305561: step: 368/77, loss: 0.0004517743072938174 2023-01-24 01:08:21.812967: step: 372/77, loss: 0.00041712226811796427 2023-01-24 01:08:23.228478: step: 376/77, loss: 5.8034638641402125e-06 2023-01-24 01:08:24.643029: step: 380/77, loss: 0.00019174578483216465 2023-01-24 01:08:26.127056: step: 384/77, loss: 0.0004269965284038335 2023-01-24 01:08:27.581006: step: 388/77, loss: 5.146474904904608e-06 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4444444444444444, 'r': 0.03780718336483932, 'f1': 0.06968641114982578}, 'combined': 0.04945487242890862, 'epoch': 24} Test Chinese: {'template': {'p': 0.922077922077922, 'r': 0.5546875, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5833333333333334, 'r': 0.020608439646712464, 'f1': 0.03981042654028436}, 'combined': 0.02757600277424575, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4444444444444444, 'r': 0.03780718336483932, 'f1': 0.06968641114982578}, 'combined': 0.04945487242890862, 'epoch': 24} Test Korean: {'template': {'p': 0.922077922077922, 'r': 0.5546875, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5588235294117647, 'r': 0.018645731108930325, 'f1': 0.03608736942070276}, 'combined': 0.024997104671901423, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4444444444444444, 'r': 0.03780718336483932, 'f1': 0.06968641114982578}, 'combined': 0.04945487242890862, 'epoch': 24} Test Russian: {'template': {'p': 0.922077922077922, 'r': 0.5546875, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5833333333333334, 'r': 0.020608439646712464, 'f1': 0.03981042654028436}, 'combined': 0.02757600277424575, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 01:10:13.157148: step: 4/77, loss: 0.030350100249052048 2023-01-24 01:10:14.559590: step: 8/77, loss: 3.5907471556129167e-06 2023-01-24 01:10:15.969560: step: 12/77, loss: 0.0317850299179554 2023-01-24 01:10:17.486563: step: 16/77, loss: 7.020366319920868e-05 2023-01-24 01:10:18.955267: step: 20/77, loss: 2.1531625407078536e-06 2023-01-24 01:10:20.402876: step: 24/77, loss: 0.0002119422279065475 2023-01-24 01:10:21.882796: step: 28/77, loss: 2.0130776192672784e-06 2023-01-24 01:10:23.306975: step: 32/77, loss: 1.1276445547991898e-05 2023-01-24 01:10:24.732709: step: 36/77, loss: 0.00117277551908046 2023-01-24 01:10:26.183195: step: 40/77, loss: 0.002095460193231702 2023-01-24 01:10:27.616505: step: 44/77, loss: 0.002477210946381092 2023-01-24 01:10:29.144231: step: 48/77, loss: 0.00013856086297892034 2023-01-24 01:10:30.583076: step: 52/77, loss: 0.00027021500864066184 2023-01-24 01:10:32.071639: step: 56/77, loss: 0.0034453249536454678 2023-01-24 01:10:33.576487: step: 60/77, loss: 6.887513791298261e-06 2023-01-24 01:10:35.042166: step: 64/77, loss: 1.2799639534932794e-06 2023-01-24 01:10:36.433406: step: 68/77, loss: 0.041449591517448425 2023-01-24 01:10:37.942106: step: 72/77, loss: 9.353303903480992e-05 2023-01-24 01:10:39.370614: step: 76/77, loss: 1.1487300071166828e-05 2023-01-24 01:10:40.761746: step: 80/77, loss: 0.0002724641526583582 2023-01-24 01:10:42.232488: step: 84/77, loss: 0.0012550866231322289 2023-01-24 01:10:43.729183: step: 88/77, loss: 0.0004702557052951306 2023-01-24 01:10:45.235693: step: 92/77, loss: 5.4410127631854266e-05 2023-01-24 01:10:46.672664: step: 96/77, loss: 0.0006709980079904199 2023-01-24 01:10:48.132689: step: 100/77, loss: 0.001236169831827283 2023-01-24 01:10:49.611743: step: 104/77, loss: 3.693071994348429e-05 2023-01-24 01:10:51.063423: step: 108/77, loss: 0.0002465422439854592 2023-01-24 01:10:52.559347: step: 112/77, loss: 0.0004932255251333117 2023-01-24 01:10:53.971669: step: 116/77, loss: 0.00036463479045778513 2023-01-24 01:10:55.386505: step: 120/77, loss: 5.476816659211181e-05 2023-01-24 01:10:56.833795: step: 124/77, loss: 2.9448181521729566e-05 2023-01-24 01:10:58.294386: step: 128/77, loss: 0.0026145465672016144 2023-01-24 01:10:59.775302: step: 132/77, loss: 0.03979451209306717 2023-01-24 01:11:01.282956: step: 136/77, loss: 3.248447058012971e-07 2023-01-24 01:11:02.743640: step: 140/77, loss: 0.0006305769784376025 2023-01-24 01:11:04.222478: step: 144/77, loss: 5.022615732741542e-05 2023-01-24 01:11:05.666955: step: 148/77, loss: 0.01937961019575596 2023-01-24 01:11:07.170881: step: 152/77, loss: 1.3635562027047854e-05 2023-01-24 01:11:08.626328: step: 156/77, loss: 7.278964403667487e-06 2023-01-24 01:11:10.039129: step: 160/77, loss: 6.216685142135248e-05 2023-01-24 01:11:11.511163: step: 164/77, loss: 1.2665967119573907e-07 2023-01-24 01:11:12.987996: step: 168/77, loss: 0.011341569945216179 2023-01-24 01:11:14.431491: step: 172/77, loss: 1.9737020920729265e-05 2023-01-24 01:11:15.907184: step: 176/77, loss: 5.379262120186468e-07 2023-01-24 01:11:17.378226: step: 180/77, loss: 3.039822900063882e-07 2023-01-24 01:11:18.944836: step: 184/77, loss: 0.0010536059271544218 2023-01-24 01:11:20.411670: step: 188/77, loss: 7.659011771465885e-07 2023-01-24 01:11:21.870464: step: 192/77, loss: 0.00901062786579132 2023-01-24 01:11:23.304287: step: 196/77, loss: 0.0001946162956301123 2023-01-24 01:11:24.838216: step: 200/77, loss: 0.001380533562041819 2023-01-24 01:11:26.273773: step: 204/77, loss: 2.5109788111876696e-05 2023-01-24 01:11:27.757148: step: 208/77, loss: 1.0168966582568828e-05 2023-01-24 01:11:29.179964: step: 212/77, loss: 8.078421524260193e-05 2023-01-24 01:11:30.607795: step: 216/77, loss: 0.004088700283318758 2023-01-24 01:11:32.071765: step: 220/77, loss: 2.3739892640151083e-05 2023-01-24 01:11:33.507480: step: 224/77, loss: 0.0006305679562501609 2023-01-24 01:11:34.918940: step: 228/77, loss: 0.0006910674856044352 2023-01-24 01:11:36.357115: step: 232/77, loss: 0.000501536822412163 2023-01-24 01:11:37.768871: step: 236/77, loss: 0.00013173221668694168 2023-01-24 01:11:39.221424: step: 240/77, loss: 0.01899598352611065 2023-01-24 01:11:40.659768: step: 244/77, loss: 5.1375267503317446e-05 2023-01-24 01:11:42.090125: step: 248/77, loss: 8.276186417788267e-05 2023-01-24 01:11:43.592064: step: 252/77, loss: 0.0019188302103430033 2023-01-24 01:11:45.153925: step: 256/77, loss: 1.1533292081367108e-06 2023-01-24 01:11:46.633612: step: 260/77, loss: 3.2556363294133916e-06 2023-01-24 01:11:48.105902: step: 264/77, loss: 4.795873974217102e-05 2023-01-24 01:11:49.594294: step: 268/77, loss: 6.93992551532574e-05 2023-01-24 01:11:51.083946: step: 272/77, loss: 0.07327759265899658 2023-01-24 01:11:52.508980: step: 276/77, loss: 0.0050186216831207275 2023-01-24 01:11:53.954247: step: 280/77, loss: 1.8968220274473424e-06 2023-01-24 01:11:55.451734: step: 284/77, loss: 1.6182351600946276e-06 2023-01-24 01:11:56.936184: step: 288/77, loss: 0.007353936322033405 2023-01-24 01:11:58.470541: step: 292/77, loss: 2.384172717029287e-07 2023-01-24 01:11:59.946112: step: 296/77, loss: 0.00028624635888263583 2023-01-24 01:12:01.356919: step: 300/77, loss: 0.000721846881788224 2023-01-24 01:12:02.816763: step: 304/77, loss: 2.752255750237964e-05 2023-01-24 01:12:04.230452: step: 308/77, loss: 0.011221768334507942 2023-01-24 01:12:05.702630: step: 312/77, loss: 8.016594392756815e-07 2023-01-24 01:12:07.181021: step: 316/77, loss: 0.0003219220379833132 2023-01-24 01:12:08.640227: step: 320/77, loss: 0.000156089459778741 2023-01-24 01:12:10.119546: step: 324/77, loss: 5.267148299026303e-05 2023-01-24 01:12:11.526877: step: 328/77, loss: 1.6732965377741493e-06 2023-01-24 01:12:12.983495: step: 332/77, loss: 7.914335583336651e-05 2023-01-24 01:12:14.416209: step: 336/77, loss: 0.0009551231632940471 2023-01-24 01:12:15.927689: step: 340/77, loss: 4.798146164830541e-07 2023-01-24 01:12:17.340642: step: 344/77, loss: 8.940444331528852e-07 2023-01-24 01:12:18.825925: step: 348/77, loss: 3.7869740481255576e-05 2023-01-24 01:12:20.232694: step: 352/77, loss: 0.000730576110072434 2023-01-24 01:12:21.690448: step: 356/77, loss: 9.28007375478046e-06 2023-01-24 01:12:23.117179: step: 360/77, loss: 0.011991672217845917 2023-01-24 01:12:24.588166: step: 364/77, loss: 5.795432662125677e-05 2023-01-24 01:12:26.036314: step: 368/77, loss: 2.2798698751103075e-07 2023-01-24 01:12:27.538648: step: 372/77, loss: 2.812264210660942e-05 2023-01-24 01:12:28.969520: step: 376/77, loss: 0.02710641361773014 2023-01-24 01:12:30.442123: step: 380/77, loss: 1.861944838310592e-05 2023-01-24 01:12:31.900852: step: 384/77, loss: 0.0006321167456917465 2023-01-24 01:12:33.261565: step: 388/77, loss: 0.00018395755614619702 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.050676305101166295, 'epoch': 25} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5546875, 'f1': 0.6960784313725491}, 'slot': {'p': 0.6216216216216216, 'r': 0.022571148184494603, 'f1': 0.043560606060606064}, 'combined': 0.030321598336304226, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.050676305101166295, 'epoch': 25} Test Korean: {'template': {'p': 0.9333333333333333, 'r': 0.546875, 'f1': 0.689655172413793}, 'slot': {'p': 0.5675675675675675, 'r': 0.020608439646712464, 'f1': 0.03977272727272728}, 'combined': 0.0274294670846395, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.050676305101166295, 'epoch': 25} Test Russian: {'template': {'p': 0.9342105263157895, 'r': 0.5546875, 'f1': 0.6960784313725491}, 'slot': {'p': 0.5897435897435898, 'r': 0.022571148184494603, 'f1': 0.04347826086956522}, 'combined': 0.030264279624893444, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 01:14:18.981065: step: 4/77, loss: 1.8648388504516333e-05 2023-01-24 01:14:20.458909: step: 8/77, loss: 0.003614019602537155 2023-01-24 01:14:21.984062: step: 12/77, loss: 0.0036987748462706804 2023-01-24 01:14:23.403801: step: 16/77, loss: 8.188489118765574e-06 2023-01-24 01:14:24.818154: step: 20/77, loss: 0.00017946727166417986 2023-01-24 01:14:26.304667: step: 24/77, loss: 1.4870868653815705e-05 2023-01-24 01:14:27.761245: step: 28/77, loss: 0.0009488090872764587 2023-01-24 01:14:29.251797: step: 32/77, loss: 4.1778181184781715e-05 2023-01-24 01:14:30.738800: step: 36/77, loss: 0.0010264035081490874 2023-01-24 01:14:32.225512: step: 40/77, loss: 7.91398124420084e-05 2023-01-24 01:14:33.602863: step: 44/77, loss: 8.268310921266675e-06 2023-01-24 01:14:35.052297: step: 48/77, loss: 2.724805563047994e-05 2023-01-24 01:14:36.557357: step: 52/77, loss: 6.878773274365813e-05 2023-01-24 01:14:37.992298: step: 56/77, loss: 2.7371258966013556e-06 2023-01-24 01:14:39.450641: step: 60/77, loss: 1.9934646843466908e-05 2023-01-24 01:14:40.919832: step: 64/77, loss: 0.00021441919670905918 2023-01-24 01:14:42.411969: step: 68/77, loss: 0.0003429249918553978 2023-01-24 01:14:43.832908: step: 72/77, loss: 3.063391432078788e-06 2023-01-24 01:14:45.273556: step: 76/77, loss: 7.694316082051955e-06 2023-01-24 01:14:46.719179: step: 80/77, loss: 0.0010704833548516035 2023-01-24 01:14:48.159759: step: 84/77, loss: 0.000800688227172941 2023-01-24 01:14:49.581966: step: 88/77, loss: 0.00019355997210368514 2023-01-24 01:14:51.063692: step: 92/77, loss: 8.405933840549551e-06 2023-01-24 01:14:52.458311: step: 96/77, loss: 1.4901097245001438e-07 2023-01-24 01:14:53.925721: step: 100/77, loss: 3.2093255867948756e-05 2023-01-24 01:14:55.416747: step: 104/77, loss: 1.7448739981773542e-06 2023-01-24 01:14:56.780030: step: 108/77, loss: 7.703179107920732e-06 2023-01-24 01:14:58.159651: step: 112/77, loss: 0.001553960028104484 2023-01-24 01:14:59.609505: step: 116/77, loss: 8.19562373521876e-08 2023-01-24 01:15:01.060668: step: 120/77, loss: 7.058346091071144e-05 2023-01-24 01:15:02.502926: step: 124/77, loss: 0.0005340041243471205 2023-01-24 01:15:04.030713: step: 128/77, loss: 0.018589282408356667 2023-01-24 01:15:05.529051: step: 132/77, loss: 0.0001769806258380413 2023-01-24 01:15:07.066840: step: 136/77, loss: 1.1324689239700092e-06 2023-01-24 01:15:08.505453: step: 140/77, loss: 0.0002581391599960625 2023-01-24 01:15:10.035047: step: 144/77, loss: 0.0298842191696167 2023-01-24 01:15:11.455097: step: 148/77, loss: 3.725285324662764e-08 2023-01-24 01:15:12.916599: step: 152/77, loss: 0.0011672412510961294 2023-01-24 01:15:14.381287: step: 156/77, loss: 6.6999982664128765e-06 2023-01-24 01:15:15.803642: step: 160/77, loss: 0.0001974831393454224 2023-01-24 01:15:17.199901: step: 164/77, loss: 0.0011818100465461612 2023-01-24 01:15:18.734909: step: 168/77, loss: 0.12559227645397186 2023-01-24 01:15:20.196385: step: 172/77, loss: 0.00018192394054494798 2023-01-24 01:15:21.672503: step: 176/77, loss: 3.462537642917596e-05 2023-01-24 01:15:23.181525: step: 180/77, loss: 1.5296547644538805e-05 2023-01-24 01:15:24.582109: step: 184/77, loss: 3.6714420275529847e-05 2023-01-24 01:15:26.004427: step: 188/77, loss: 0.00013234214566182345 2023-01-24 01:15:27.450948: step: 192/77, loss: 3.090263362537371e-06 2023-01-24 01:15:28.905805: step: 196/77, loss: 0.014518280513584614 2023-01-24 01:15:30.331978: step: 200/77, loss: 0.000251075136475265 2023-01-24 01:15:31.763955: step: 204/77, loss: 0.04231609031558037 2023-01-24 01:15:33.198481: step: 208/77, loss: 2.1233238385320874e-06 2023-01-24 01:15:34.591269: step: 212/77, loss: 3.2484297207702184e-07 2023-01-24 01:15:36.126560: step: 216/77, loss: 3.600172931328416e-05 2023-01-24 01:15:37.551572: step: 220/77, loss: 2.236615046058432e-06 2023-01-24 01:15:39.037965: step: 224/77, loss: 4.7323348553618416e-05 2023-01-24 01:15:40.425741: step: 228/77, loss: 0.0005846671992912889 2023-01-24 01:15:41.936405: step: 232/77, loss: 3.072257413805346e-06 2023-01-24 01:15:43.441386: step: 236/77, loss: 9.84782527666539e-05 2023-01-24 01:15:44.849360: step: 240/77, loss: 0.004049063194543123 2023-01-24 01:15:46.356523: step: 244/77, loss: 6.586230369975965e-07 2023-01-24 01:15:47.810923: step: 248/77, loss: 0.0015713156899437308 2023-01-24 01:15:49.264287: step: 252/77, loss: 1.6120588043122552e-05 2023-01-24 01:15:50.719495: step: 256/77, loss: 1.328440794168273e-05 2023-01-24 01:15:52.221808: step: 260/77, loss: 0.0005180090083740652 2023-01-24 01:15:53.666621: step: 264/77, loss: 0.0010391356190666556 2023-01-24 01:15:55.019177: step: 268/77, loss: 3.50178343069274e-05 2023-01-24 01:15:56.438455: step: 272/77, loss: 2.1308568420863594e-07 2023-01-24 01:15:57.902143: step: 276/77, loss: 0.0014618869172409177 2023-01-24 01:15:59.356797: step: 280/77, loss: 0.0007588414591737092 2023-01-24 01:16:00.836209: step: 284/77, loss: 0.06243320554494858 2023-01-24 01:16:02.273339: step: 288/77, loss: 9.618224794394337e-06 2023-01-24 01:16:03.733954: step: 292/77, loss: 9.983750715036876e-08 2023-01-24 01:16:05.165566: step: 296/77, loss: 0.00012859278649557382 2023-01-24 01:16:06.652234: step: 300/77, loss: 2.0861565985796915e-07 2023-01-24 01:16:08.080228: step: 304/77, loss: 4.0165257814805955e-05 2023-01-24 01:16:09.564956: step: 308/77, loss: 1.2351800251053646e-05 2023-01-24 01:16:11.062862: step: 312/77, loss: 2.354362891310302e-07 2023-01-24 01:16:12.555535: step: 316/77, loss: 0.012203582562506199 2023-01-24 01:16:14.042292: step: 320/77, loss: 0.05889744311571121 2023-01-24 01:16:15.484689: step: 324/77, loss: 7.20866046322044e-06 2023-01-24 01:16:16.923042: step: 328/77, loss: 1.6941910416790051e-06 2023-01-24 01:16:18.316695: step: 332/77, loss: 0.008220874704420567 2023-01-24 01:16:19.782038: step: 336/77, loss: 0.02381768450140953 2023-01-24 01:16:21.298275: step: 340/77, loss: 3.486860578050255e-07 2023-01-24 01:16:22.783786: step: 344/77, loss: 2.9442082905006828e-06 2023-01-24 01:16:24.230561: step: 348/77, loss: 0.0016949096461758018 2023-01-24 01:16:25.652039: step: 352/77, loss: 4.277956577425357e-06 2023-01-24 01:16:27.117417: step: 356/77, loss: 2.846114171006775e-07 2023-01-24 01:16:28.600402: step: 360/77, loss: 7.524974989792099e-07 2023-01-24 01:16:30.075927: step: 364/77, loss: 5.088774560135789e-05 2023-01-24 01:16:31.547484: step: 368/77, loss: 0.0001697741390671581 2023-01-24 01:16:32.980545: step: 372/77, loss: 0.009770647622644901 2023-01-24 01:16:34.448357: step: 376/77, loss: 0.00020992070494685322 2023-01-24 01:16:35.856984: step: 380/77, loss: 3.278218798641319e-07 2023-01-24 01:16:37.317908: step: 384/77, loss: 2.631887764437124e-05 2023-01-24 01:16:38.742219: step: 388/77, loss: 0.000318748876452446 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 26} Test Chinese: {'template': {'p': 0.9558823529411765, 'r': 0.5078125, 'f1': 0.6632653061224489}, 'slot': {'p': 0.6470588235294118, 'r': 0.021589793915603533, 'f1': 0.04178537511870845}, 'combined': 0.02771478961955152, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 26} Test Korean: {'template': {'p': 0.9558823529411765, 'r': 0.5078125, 'f1': 0.6632653061224489}, 'slot': {'p': 0.6470588235294118, 'r': 0.021589793915603533, 'f1': 0.04178537511870845}, 'combined': 0.02771478961955152, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 26} Test Russian: {'template': {'p': 0.9558823529411765, 'r': 0.5078125, 'f1': 0.6632653061224489}, 'slot': {'p': 0.6470588235294118, 'r': 0.021589793915603533, 'f1': 0.04178537511870845}, 'combined': 0.02771478961955152, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 01:18:24.385556: step: 4/77, loss: 3.606061227401369e-07 2023-01-24 01:18:25.851968: step: 8/77, loss: 2.7565567961573834e-06 2023-01-24 01:18:27.368041: step: 12/77, loss: 1.0385904261056567e-06 2023-01-24 01:18:28.855205: step: 16/77, loss: 7.437886961270124e-05 2023-01-24 01:18:30.260156: step: 20/77, loss: 9.661864896770567e-05 2023-01-24 01:18:31.714419: step: 24/77, loss: 3.726641079992987e-05 2023-01-24 01:18:33.191086: step: 28/77, loss: 5.066294761491008e-07 2023-01-24 01:18:34.623602: step: 32/77, loss: 1.8862918295781128e-05 2023-01-24 01:18:36.075465: step: 36/77, loss: 1.8645439922693186e-05 2023-01-24 01:18:37.494253: step: 40/77, loss: 9.87338262348203e-06 2023-01-24 01:18:38.962734: step: 44/77, loss: 0.01699930801987648 2023-01-24 01:18:40.384925: step: 48/77, loss: 1.1026840951444683e-07 2023-01-24 01:18:41.882134: step: 52/77, loss: 0.0002957044052891433 2023-01-24 01:18:43.352407: step: 56/77, loss: 1.7434240362490527e-07 2023-01-24 01:18:44.778056: step: 60/77, loss: 0.00021177512826398015 2023-01-24 01:18:46.193713: step: 64/77, loss: 3.229175854357891e-05 2023-01-24 01:18:47.675866: step: 68/77, loss: 0.046003349125385284 2023-01-24 01:18:49.157025: step: 72/77, loss: 2.8995225420658244e-06 2023-01-24 01:18:50.629669: step: 76/77, loss: 0.1521517038345337 2023-01-24 01:18:52.149167: step: 80/77, loss: 0.0002786411496344954 2023-01-24 01:18:53.613111: step: 84/77, loss: 1.475212059176556e-07 2023-01-24 01:18:55.031764: step: 88/77, loss: 1.1026831714389118e-07 2023-01-24 01:18:56.444989: step: 92/77, loss: 0.0024277365300804377 2023-01-24 01:18:57.891290: step: 96/77, loss: 0.0009051822707988322 2023-01-24 01:18:59.341724: step: 100/77, loss: 0.006404219660907984 2023-01-24 01:19:00.809580: step: 104/77, loss: 0.10981199145317078 2023-01-24 01:19:02.302674: step: 108/77, loss: 0.006981880869716406 2023-01-24 01:19:03.797123: step: 112/77, loss: 0.0007049435516819358 2023-01-24 01:19:05.255893: step: 116/77, loss: 0.0004604582500178367 2023-01-24 01:19:06.823668: step: 120/77, loss: 0.00011910688772331923 2023-01-24 01:19:08.273654: step: 124/77, loss: 1.693453850748483e-05 2023-01-24 01:19:09.810542: step: 128/77, loss: 1.7974909496842884e-05 2023-01-24 01:19:11.227270: step: 132/77, loss: 4.464502490009181e-05 2023-01-24 01:19:12.682684: step: 136/77, loss: 0.0034254807978868484 2023-01-24 01:19:14.236934: step: 140/77, loss: 5.8066685596713796e-05 2023-01-24 01:19:15.701389: step: 144/77, loss: 0.006067907437682152 2023-01-24 01:19:17.167346: step: 148/77, loss: 0.00030065476312302053 2023-01-24 01:19:18.591233: step: 152/77, loss: 4.232639548717998e-05 2023-01-24 01:19:20.047224: step: 156/77, loss: 2.042884716502158e-06 2023-01-24 01:19:21.488764: step: 160/77, loss: 0.02161801978945732 2023-01-24 01:19:22.970192: step: 164/77, loss: 2.4465957721986342e-06 2023-01-24 01:19:24.387980: step: 168/77, loss: 7.546792403445579e-06 2023-01-24 01:19:25.827290: step: 172/77, loss: 0.017119623720645905 2023-01-24 01:19:27.225117: step: 176/77, loss: 0.0001016731548588723 2023-01-24 01:19:28.674485: step: 180/77, loss: 4.181843905826099e-05 2023-01-24 01:19:30.110499: step: 184/77, loss: 0.19686707854270935 2023-01-24 01:19:31.571105: step: 188/77, loss: 0.008008151315152645 2023-01-24 01:19:33.084034: step: 192/77, loss: 1.138431343861157e-06 2023-01-24 01:19:34.496514: step: 196/77, loss: 0.0228792242705822 2023-01-24 01:19:35.954614: step: 200/77, loss: 0.0002090672787744552 2023-01-24 01:19:37.466285: step: 204/77, loss: 3.613301032601157e-06 2023-01-24 01:19:38.911559: step: 208/77, loss: 0.0005431215977296233 2023-01-24 01:19:40.316150: step: 212/77, loss: 1.569067080708919e-06 2023-01-24 01:19:41.815882: step: 216/77, loss: 0.0012952117249369621 2023-01-24 01:19:43.339251: step: 220/77, loss: 2.108344915541238e-06 2023-01-24 01:19:44.848748: step: 224/77, loss: 0.00017514584760647267 2023-01-24 01:19:46.331488: step: 228/77, loss: 0.0004233909712638706 2023-01-24 01:19:47.750444: step: 232/77, loss: 4.133952097618021e-05 2023-01-24 01:19:49.250008: step: 236/77, loss: 8.803592209005728e-06 2023-01-24 01:19:50.620230: step: 240/77, loss: 1.7240215584024554e-06 2023-01-24 01:19:52.034875: step: 244/77, loss: 0.0004245509917382151 2023-01-24 01:19:53.419235: step: 248/77, loss: 1.5050139268169005e-07 2023-01-24 01:19:54.948173: step: 252/77, loss: 0.022179974243044853 2023-01-24 01:19:56.383006: step: 256/77, loss: 0.0001255650568054989 2023-01-24 01:19:57.865753: step: 260/77, loss: 4.008350060757948e-07 2023-01-24 01:19:59.333291: step: 264/77, loss: 0.0027482016012072563 2023-01-24 01:20:00.793049: step: 268/77, loss: 0.00022981772781349719 2023-01-24 01:20:02.197260: step: 272/77, loss: 7.01972112437943e-06 2023-01-24 01:20:03.673248: step: 276/77, loss: 3.4765056625474244e-05 2023-01-24 01:20:05.116154: step: 280/77, loss: 0.004884883761405945 2023-01-24 01:20:06.519449: step: 284/77, loss: 9.70681958278874e-06 2023-01-24 01:20:07.971298: step: 288/77, loss: 0.011655522510409355 2023-01-24 01:20:09.505381: step: 292/77, loss: 6.033683803252643e-06 2023-01-24 01:20:11.002169: step: 296/77, loss: 0.00023782583593856543 2023-01-24 01:20:12.488112: step: 300/77, loss: 0.004693306982517242 2023-01-24 01:20:13.904946: step: 304/77, loss: 0.00043961359187960625 2023-01-24 01:20:15.358968: step: 308/77, loss: 0.009037474170327187 2023-01-24 01:20:16.792882: step: 312/77, loss: 9.109323582379147e-05 2023-01-24 01:20:18.224100: step: 316/77, loss: 5.065477125754114e-06 2023-01-24 01:20:19.680055: step: 320/77, loss: 2.5830046070041135e-05 2023-01-24 01:20:21.132438: step: 324/77, loss: 4.08288713060756e-07 2023-01-24 01:20:22.622161: step: 328/77, loss: 5.0793074478860945e-05 2023-01-24 01:20:24.034719: step: 332/77, loss: 2.865168426069431e-05 2023-01-24 01:20:25.447490: step: 336/77, loss: 2.9721433747909032e-05 2023-01-24 01:20:26.878803: step: 340/77, loss: 0.0006815999513491988 2023-01-24 01:20:28.387219: step: 344/77, loss: 5.092594801681116e-05 2023-01-24 01:20:29.897061: step: 348/77, loss: 1.8298146642337088e-06 2023-01-24 01:20:31.470824: step: 352/77, loss: 1.0752620255516376e-05 2023-01-24 01:20:32.957113: step: 356/77, loss: 3.539942917996086e-05 2023-01-24 01:20:34.454204: step: 360/77, loss: 0.0002795817272271961 2023-01-24 01:20:35.896806: step: 364/77, loss: 0.00022478778555523604 2023-01-24 01:20:37.380769: step: 368/77, loss: 0.0007004109211266041 2023-01-24 01:20:38.850097: step: 372/77, loss: 0.00017398054478690028 2023-01-24 01:20:40.338509: step: 376/77, loss: 1.4692169543195632e-06 2023-01-24 01:20:41.841292: step: 380/77, loss: 4.7235914735210827e-07 2023-01-24 01:20:43.273171: step: 384/77, loss: 0.001804694184102118 2023-01-24 01:20:44.744546: step: 388/77, loss: 0.00010899404878728092 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050587710162178244, 'epoch': 27} Test Chinese: {'template': {'p': 0.92, 'r': 0.5390625, 'f1': 0.6798029556650247}, 'slot': {'p': 0.575, 'r': 0.022571148184494603, 'f1': 0.04343720491029273}, 'combined': 0.02952874028384432, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050587710162178244, 'epoch': 27} Test Korean: {'template': {'p': 0.92, 'r': 0.5390625, 'f1': 0.6798029556650247}, 'slot': {'p': 0.575, 'r': 0.022571148184494603, 'f1': 0.04343720491029273}, 'combined': 0.02952874028384432, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050587710162178244, 'epoch': 27} Test Russian: {'template': {'p': 0.92, 'r': 0.5390625, 'f1': 0.6798029556650247}, 'slot': {'p': 0.575, 'r': 0.022571148184494603, 'f1': 0.04343720491029273}, 'combined': 0.02952874028384432, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 01:22:30.284883: step: 4/77, loss: 0.013731489889323711 2023-01-24 01:22:31.726952: step: 8/77, loss: 0.0010407003574073315 2023-01-24 01:22:33.188764: step: 12/77, loss: 0.0005618635332211852 2023-01-24 01:22:34.659452: step: 16/77, loss: 0.0029245370533317327 2023-01-24 01:22:36.065474: step: 20/77, loss: 2.4019689135457156e-06 2023-01-24 01:22:37.521671: step: 24/77, loss: 5.255473297438584e-05 2023-01-24 01:22:38.941051: step: 28/77, loss: 0.007973378524184227 2023-01-24 01:22:40.417318: step: 32/77, loss: 0.00712230708450079 2023-01-24 01:22:41.899916: step: 36/77, loss: 1.300935309700435e-05 2023-01-24 01:22:43.382357: step: 40/77, loss: 3.725287456290971e-08 2023-01-24 01:22:44.861602: step: 44/77, loss: 0.0003985797520726919 2023-01-24 01:22:46.348385: step: 48/77, loss: 3.1143270007305546e-07 2023-01-24 01:22:47.843179: step: 52/77, loss: 4.842793828174763e-07 2023-01-24 01:22:49.266262: step: 56/77, loss: 2.2053367501939647e-05 2023-01-24 01:22:50.771055: step: 60/77, loss: 0.00310569372959435 2023-01-24 01:22:52.187278: step: 64/77, loss: 0.0004489465500228107 2023-01-24 01:22:53.647248: step: 68/77, loss: 4.373181582195684e-06 2023-01-24 01:22:55.089255: step: 72/77, loss: 0.047484856098890305 2023-01-24 01:22:56.505987: step: 76/77, loss: 0.001078817993402481 2023-01-24 01:22:57.920332: step: 80/77, loss: 0.0004845151852350682 2023-01-24 01:22:59.385361: step: 84/77, loss: 5.875883289263584e-06 2023-01-24 01:23:00.859508: step: 88/77, loss: 4.23568781116046e-06 2023-01-24 01:23:02.303347: step: 92/77, loss: 0.003169774077832699 2023-01-24 01:23:03.731213: step: 96/77, loss: 0.001381977228447795 2023-01-24 01:23:05.151350: step: 100/77, loss: 0.00012675896869041026 2023-01-24 01:23:06.589931: step: 104/77, loss: 0.00028056855080649257 2023-01-24 01:23:08.055133: step: 108/77, loss: 2.276324448757805e-05 2023-01-24 01:23:09.571413: step: 112/77, loss: 0.0013658979441970587 2023-01-24 01:23:10.995609: step: 116/77, loss: 0.03549131751060486 2023-01-24 01:23:12.486335: step: 120/77, loss: 3.8398309698095545e-05 2023-01-24 01:23:13.991933: step: 124/77, loss: 1.266596001414655e-07 2023-01-24 01:23:15.423290: step: 128/77, loss: 0.06559224426746368 2023-01-24 01:23:16.858464: step: 132/77, loss: 7.129681307560531e-06 2023-01-24 01:23:18.278775: step: 136/77, loss: 5.930523343522509e-07 2023-01-24 01:23:19.719934: step: 140/77, loss: 0.017023207619786263 2023-01-24 01:23:21.172386: step: 144/77, loss: 0.0009497597930021584 2023-01-24 01:23:22.620184: step: 148/77, loss: 0.0005004429258406162 2023-01-24 01:23:24.058840: step: 152/77, loss: 0.0023720364551991224 2023-01-24 01:23:25.526339: step: 156/77, loss: 3.3378313446519314e-07 2023-01-24 01:23:26.948207: step: 160/77, loss: 3.427231547448173e-07 2023-01-24 01:23:28.382949: step: 164/77, loss: 0.004157646559178829 2023-01-24 01:23:29.799759: step: 168/77, loss: 0.002858922118321061 2023-01-24 01:23:31.225810: step: 172/77, loss: 9.284650332119782e-06 2023-01-24 01:23:32.655355: step: 176/77, loss: 7.88212400948396e-06 2023-01-24 01:23:34.144160: step: 180/77, loss: 0.0003945681673940271 2023-01-24 01:23:35.667580: step: 184/77, loss: 3.2046635169535875e-05 2023-01-24 01:23:37.128414: step: 188/77, loss: 0.0011122300056740642 2023-01-24 01:23:38.640715: step: 192/77, loss: 1.002837279884261e-06 2023-01-24 01:23:40.120985: step: 196/77, loss: 0.06045089662075043 2023-01-24 01:23:41.657930: step: 200/77, loss: 1.2665967119573907e-07 2023-01-24 01:23:43.140692: step: 204/77, loss: 0.0015796440420672297 2023-01-24 01:23:44.647245: step: 208/77, loss: 1.553771289763972e-05 2023-01-24 01:23:46.111134: step: 212/77, loss: 1.7567413124197628e-06 2023-01-24 01:23:47.600037: step: 216/77, loss: 2.616386609588517e-06 2023-01-24 01:23:49.010626: step: 220/77, loss: 0.0005074563669040799 2023-01-24 01:23:50.528960: step: 224/77, loss: 1.072302802640479e-05 2023-01-24 01:23:51.944595: step: 228/77, loss: 0.0009682394447736442 2023-01-24 01:23:53.416249: step: 232/77, loss: 3.7219822843326256e-05 2023-01-24 01:23:54.851257: step: 236/77, loss: 4.753395899115276e-07 2023-01-24 01:23:56.371696: step: 240/77, loss: 5.6268167099915445e-05 2023-01-24 01:23:57.811250: step: 244/77, loss: 1.765011802490335e-05 2023-01-24 01:23:59.245855: step: 248/77, loss: 6.603108340641484e-05 2023-01-24 01:24:00.722416: step: 252/77, loss: 4.001375418738462e-05 2023-01-24 01:24:02.137712: step: 256/77, loss: 1.2814932404126012e-07 2023-01-24 01:24:03.597113: step: 260/77, loss: 3.8165017031133175e-05 2023-01-24 01:24:05.012964: step: 264/77, loss: 2.2879685275256634e-05 2023-01-24 01:24:06.553719: step: 268/77, loss: 0.0009290733723901212 2023-01-24 01:24:08.023476: step: 272/77, loss: 2.5331964081942715e-08 2023-01-24 01:24:09.488963: step: 276/77, loss: 0.0009572876733727753 2023-01-24 01:24:10.924634: step: 280/77, loss: 2.2842411908641225e-06 2023-01-24 01:24:12.335457: step: 284/77, loss: 0.0002019900275627151 2023-01-24 01:24:13.772386: step: 288/77, loss: 0.004656798206269741 2023-01-24 01:24:15.229379: step: 292/77, loss: 7.271652293638908e-07 2023-01-24 01:24:16.635973: step: 296/77, loss: 5.6624305244667994e-08 2023-01-24 01:24:18.070224: step: 300/77, loss: 0.0018320352537557483 2023-01-24 01:24:19.564056: step: 304/77, loss: 0.0008556064567528665 2023-01-24 01:24:21.039915: step: 308/77, loss: 2.3556367523269728e-05 2023-01-24 01:24:22.495586: step: 312/77, loss: 0.14063824713230133 2023-01-24 01:24:23.919602: step: 316/77, loss: 5.176966806175187e-05 2023-01-24 01:24:25.389924: step: 320/77, loss: 1.275466911465628e-05 2023-01-24 01:24:26.855054: step: 324/77, loss: 6.16900535987952e-07 2023-01-24 01:24:28.249972: step: 328/77, loss: 0.00014278925664257258 2023-01-24 01:24:29.612575: step: 332/77, loss: 7.105967142706504e-06 2023-01-24 01:24:31.068208: step: 336/77, loss: 0.001324579818174243 2023-01-24 01:24:32.576097: step: 340/77, loss: 4.3302843550918624e-05 2023-01-24 01:24:33.998903: step: 344/77, loss: 7.405750466205063e-07 2023-01-24 01:24:35.502248: step: 348/77, loss: 0.00011049366003135219 2023-01-24 01:24:36.898686: step: 352/77, loss: 7.489207200706005e-05 2023-01-24 01:24:38.387787: step: 356/77, loss: 2.342394509469159e-06 2023-01-24 01:24:39.853955: step: 360/77, loss: 2.3192773369373754e-05 2023-01-24 01:24:41.345207: step: 364/77, loss: 0.0006508184014819562 2023-01-24 01:24:42.843502: step: 368/77, loss: 0.0004297401465009898 2023-01-24 01:24:44.326922: step: 372/77, loss: 0.00012195734598208219 2023-01-24 01:24:45.829182: step: 376/77, loss: 2.36168352785171e-06 2023-01-24 01:24:47.296680: step: 380/77, loss: 2.2057285605114885e-05 2023-01-24 01:24:48.708011: step: 384/77, loss: 1.3709006907447474e-07 2023-01-24 01:24:50.130242: step: 388/77, loss: 0.00016052668797783554 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 28} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5703125, 'f1': 0.7087378640776699}, 'slot': {'p': 0.6216216216216216, 'r': 0.022571148184494603, 'f1': 0.043560606060606064}, 'combined': 0.030873050897322746, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 28} Test Korean: {'template': {'p': 0.9125, 'r': 0.5703125, 'f1': 0.7019230769230769}, 'slot': {'p': 0.6216216216216216, 'r': 0.022571148184494603, 'f1': 0.043560606060606064}, 'combined': 0.03057619463869464, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 28} Test Russian: {'template': {'p': 0.9240506329113924, 'r': 0.5703125, 'f1': 0.7053140096618359}, 'slot': {'p': 0.6216216216216216, 'r': 0.022571148184494603, 'f1': 0.043560606060606064}, 'combined': 0.030723905723905733, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 01:26:35.736559: step: 4/77, loss: 1.817933679149064e-07 2023-01-24 01:26:37.170559: step: 8/77, loss: 0.040769945830106735 2023-01-24 01:26:38.598746: step: 12/77, loss: 1.5795183117006673e-07 2023-01-24 01:26:40.034418: step: 16/77, loss: 1.4990278032200877e-06 2023-01-24 01:26:41.483765: step: 20/77, loss: 0.001043134368956089 2023-01-24 01:26:42.966251: step: 24/77, loss: 0.00025510074920021 2023-01-24 01:26:44.430187: step: 28/77, loss: 0.00031373612000606954 2023-01-24 01:26:45.925390: step: 32/77, loss: 1.9087267446593614e-06 2023-01-24 01:26:47.340097: step: 36/77, loss: 0.09458360075950623 2023-01-24 01:26:48.819939: step: 40/77, loss: 3.2746251235948876e-05 2023-01-24 01:26:50.304430: step: 44/77, loss: 0.00016612530453130603 2023-01-24 01:26:51.702768: step: 48/77, loss: 2.814584604493575e-06 2023-01-24 01:26:53.143224: step: 52/77, loss: 0.001565206446684897 2023-01-24 01:26:54.480624: step: 56/77, loss: 5.4112715588416904e-05 2023-01-24 01:26:55.946409: step: 60/77, loss: 0.0010413212003186345 2023-01-24 01:26:57.411797: step: 64/77, loss: 3.0024616535229143e-06 2023-01-24 01:26:58.884467: step: 68/77, loss: 0.02369655668735504 2023-01-24 01:27:00.297589: step: 72/77, loss: 0.008374444209039211 2023-01-24 01:27:01.724274: step: 76/77, loss: 6.269232471822761e-06 2023-01-24 01:27:03.203032: step: 80/77, loss: 3.87479412893299e-05 2023-01-24 01:27:04.681884: step: 84/77, loss: 2.682207878024201e-08 2023-01-24 01:27:06.185132: step: 88/77, loss: 1.1026813950820724e-07 2023-01-24 01:27:07.581426: step: 92/77, loss: 3.655839100247249e-05 2023-01-24 01:27:09.024213: step: 96/77, loss: 1.411835546605289e-05 2023-01-24 01:27:10.474894: step: 100/77, loss: 1.626298217161093e-05 2023-01-24 01:27:11.966487: step: 104/77, loss: 0.001862027682363987 2023-01-24 01:27:13.409287: step: 108/77, loss: 2.682184572222468e-07 2023-01-24 01:27:14.887038: step: 112/77, loss: 6.273245389820659e-07 2023-01-24 01:27:16.303604: step: 116/77, loss: 0.020532015711069107 2023-01-24 01:27:17.749582: step: 120/77, loss: 2.0443840185180306e-06 2023-01-24 01:27:19.243849: step: 124/77, loss: 4.589517175190849e-07 2023-01-24 01:27:20.716711: step: 128/77, loss: 2.2947628508518392e-07 2023-01-24 01:27:22.146115: step: 132/77, loss: 0.0004681869759224355 2023-01-24 01:27:23.560375: step: 136/77, loss: 2.6970980115947896e-07 2023-01-24 01:27:25.066831: step: 140/77, loss: 2.552435717007029e-06 2023-01-24 01:27:26.533112: step: 144/77, loss: 1.7097081581596285e-05 2023-01-24 01:27:27.998490: step: 148/77, loss: 1.3411013810582517e-07 2023-01-24 01:27:29.468962: step: 152/77, loss: 1.5874025848461315e-05 2023-01-24 01:27:31.015346: step: 156/77, loss: 2.0995241811760934e-06 2023-01-24 01:27:32.454207: step: 160/77, loss: 1.5233533304126468e-05 2023-01-24 01:27:33.962059: step: 164/77, loss: 0.002553701400756836 2023-01-24 01:27:35.427902: step: 168/77, loss: 5.59375221200753e-05 2023-01-24 01:27:36.863578: step: 172/77, loss: 9.596681593393441e-06 2023-01-24 01:27:38.280732: step: 176/77, loss: 0.022904738783836365 2023-01-24 01:27:39.750611: step: 180/77, loss: 1.7314723663730547e-06 2023-01-24 01:27:41.224355: step: 184/77, loss: 4.137518772040494e-06 2023-01-24 01:27:42.752767: step: 188/77, loss: 6.556387575074041e-07 2023-01-24 01:27:44.192092: step: 192/77, loss: 8.6360860223067e-06 2023-01-24 01:27:45.638212: step: 196/77, loss: 0.001840737764723599 2023-01-24 01:27:47.114035: step: 200/77, loss: 2.980230462412692e-08 2023-01-24 01:27:48.575046: step: 204/77, loss: 1.0654099241946824e-05 2023-01-24 01:27:50.081132: step: 208/77, loss: 1.0002829185395967e-05 2023-01-24 01:27:51.507744: step: 212/77, loss: 0.00041803918429650366 2023-01-24 01:27:52.983643: step: 216/77, loss: 0.00032258350984193385 2023-01-24 01:27:54.454198: step: 220/77, loss: 7.789167284499854e-06 2023-01-24 01:27:55.898994: step: 224/77, loss: 4.469942268769955e-06 2023-01-24 01:27:57.329098: step: 228/77, loss: 0.00010209907486569136 2023-01-24 01:27:58.816953: step: 232/77, loss: 1.5556329344690312e-06 2023-01-24 01:28:00.319629: step: 236/77, loss: 1.7775905689632054e-06 2023-01-24 01:28:01.753015: step: 240/77, loss: 0.001098861452192068 2023-01-24 01:28:03.222737: step: 244/77, loss: 0.00037572160363197327 2023-01-24 01:28:04.688651: step: 248/77, loss: 1.2218926315199496e-07 2023-01-24 01:28:06.159856: step: 252/77, loss: 0.004170549102127552 2023-01-24 01:28:07.656412: step: 256/77, loss: 1.2397180171319633e-06 2023-01-24 01:28:09.101495: step: 260/77, loss: 0.00014179742720443755 2023-01-24 01:28:10.476120: step: 264/77, loss: 0.0004720209108199924 2023-01-24 01:28:11.917362: step: 268/77, loss: 0.0011638924479484558 2023-01-24 01:28:13.389554: step: 272/77, loss: 0.0005042596021667123 2023-01-24 01:28:14.871153: step: 276/77, loss: 4.947142997480114e-07 2023-01-24 01:28:16.342070: step: 280/77, loss: 0.0010161111131310463 2023-01-24 01:28:17.860555: step: 284/77, loss: 0.0016872090054675937 2023-01-24 01:28:19.368750: step: 288/77, loss: 7.378399914159672e-06 2023-01-24 01:28:20.788127: step: 292/77, loss: 2.7567045890464215e-07 2023-01-24 01:28:22.278147: step: 296/77, loss: 0.005593928974121809 2023-01-24 01:28:23.763362: step: 300/77, loss: 2.3092803530744277e-05 2023-01-24 01:28:25.231304: step: 304/77, loss: 0.0003652371233329177 2023-01-24 01:28:26.737272: step: 308/77, loss: 0.003939785063266754 2023-01-24 01:28:28.171127: step: 312/77, loss: 0.0016041402705013752 2023-01-24 01:28:29.696998: step: 316/77, loss: 0.3785771131515503 2023-01-24 01:28:31.165505: step: 320/77, loss: 5.214817520027282e-06 2023-01-24 01:28:32.573558: step: 324/77, loss: 5.930572797296918e-07 2023-01-24 01:28:34.026268: step: 328/77, loss: 8.526707460987382e-06 2023-01-24 01:28:35.494120: step: 332/77, loss: 2.276741042805952e-06 2023-01-24 01:28:36.985495: step: 336/77, loss: 1.1235166539336205e-06 2023-01-24 01:28:38.482069: step: 340/77, loss: 1.3290504284668714e-05 2023-01-24 01:28:39.970287: step: 344/77, loss: 0.1279335618019104 2023-01-24 01:28:41.399614: step: 348/77, loss: 1.597791924723424e-05 2023-01-24 01:28:42.849696: step: 352/77, loss: 0.00090602453565225 2023-01-24 01:28:44.308098: step: 356/77, loss: 3.4596016575960675e-06 2023-01-24 01:28:45.817537: step: 360/77, loss: 3.472837488516234e-05 2023-01-24 01:28:47.268321: step: 364/77, loss: 1.6152355328813428e-06 2023-01-24 01:28:48.757839: step: 368/77, loss: 9.981415132642724e-06 2023-01-24 01:28:50.246276: step: 372/77, loss: 9.092828258872032e-05 2023-01-24 01:28:51.718384: step: 376/77, loss: 7.450569228240056e-08 2023-01-24 01:28:53.120950: step: 380/77, loss: 1.962437181646237e-06 2023-01-24 01:28:54.640825: step: 384/77, loss: 1.1756699223042233e-06 2023-01-24 01:28:56.116066: step: 388/77, loss: 6.934347038622946e-05 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 29} Test Chinese: {'template': {'p': 0.9102564102564102, 'r': 0.5546875, 'f1': 0.6893203883495146}, 'slot': {'p': 0.46511627906976744, 'r': 0.019627085377821395, 'f1': 0.037664783427495296}, 'combined': 0.025963103139341418, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 29} Test Korean: {'template': {'p': 0.9078947368421053, 'r': 0.5390625, 'f1': 0.6764705882352942}, 'slot': {'p': 0.5, 'r': 0.019627085377821395, 'f1': 0.03777148253068933}, 'combined': 0.025551297006054546, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 29} Test Russian: {'template': {'p': 0.9102564102564102, 'r': 0.5546875, 'f1': 0.6893203883495146}, 'slot': {'p': 0.47619047619047616, 'r': 0.019627085377821395, 'f1': 0.037700282752120645}, 'combined': 0.02598757354757831, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2}