Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:04:00.903126: step: 4/77, loss: 1.0617469549179077 2023-01-23 22:04:02.155398: step: 8/77, loss: 1.071832537651062 2023-01-23 22:04:03.382375: step: 12/77, loss: 1.0500397682189941 2023-01-23 22:04:04.670474: step: 16/77, loss: 1.0569696426391602 2023-01-23 22:04:05.985771: step: 20/77, loss: 1.036795973777771 2023-01-23 22:04:07.331821: step: 24/77, loss: 1.053377628326416 2023-01-23 22:04:08.639100: step: 28/77, loss: 1.0394463539123535 2023-01-23 22:04:09.961615: step: 32/77, loss: 1.0370593070983887 2023-01-23 22:04:11.263645: step: 36/77, loss: 1.0463190078735352 2023-01-23 22:04:12.559324: step: 40/77, loss: 1.0341830253601074 2023-01-23 22:04:13.807928: step: 44/77, loss: 1.01943039894104 2023-01-23 22:04:15.024007: step: 48/77, loss: 1.0161728858947754 2023-01-23 22:04:16.256979: step: 52/77, loss: 0.9935738444328308 2023-01-23 22:04:17.509116: step: 56/77, loss: 0.9947496652603149 2023-01-23 22:04:18.765315: step: 60/77, loss: 0.9947984218597412 2023-01-23 22:04:20.098953: step: 64/77, loss: 0.9740759134292603 2023-01-23 22:04:21.366519: step: 68/77, loss: 0.9578840732574463 2023-01-23 22:04:22.660934: step: 72/77, loss: 0.958991527557373 2023-01-23 22:04:23.964896: step: 76/77, loss: 0.9292556047439575 2023-01-23 22:04:25.291515: step: 80/77, loss: 0.9261636734008789 2023-01-23 22:04:26.554726: step: 84/77, loss: 0.9224743247032166 2023-01-23 22:04:27.856871: step: 88/77, loss: 0.9078954458236694 2023-01-23 22:04:29.184360: step: 92/77, loss: 0.8906626105308533 2023-01-23 22:04:30.446981: step: 96/77, loss: 0.8689224720001221 2023-01-23 22:04:31.741281: step: 100/77, loss: 0.8581384420394897 2023-01-23 22:04:33.038901: step: 104/77, loss: 0.8440836668014526 2023-01-23 22:04:34.335053: step: 108/77, loss: 0.8239055871963501 2023-01-23 22:04:35.614219: step: 112/77, loss: 0.7871434688568115 2023-01-23 22:04:36.882775: step: 116/77, loss: 0.7866687178611755 2023-01-23 22:04:38.175420: step: 120/77, loss: 0.8152998685836792 2023-01-23 22:04:39.454259: step: 124/77, loss: 0.7473165988922119 2023-01-23 22:04:40.809344: step: 128/77, loss: 0.7465535402297974 2023-01-23 22:04:42.086620: step: 132/77, loss: 0.7344512939453125 2023-01-23 22:04:43.393513: step: 136/77, loss: 0.6562771797180176 2023-01-23 22:04:44.703697: step: 140/77, loss: 0.7067196369171143 2023-01-23 22:04:46.037524: step: 144/77, loss: 0.6306025981903076 2023-01-23 22:04:47.328146: step: 148/77, loss: 0.6918684244155884 2023-01-23 22:04:48.696390: step: 152/77, loss: 0.5625179409980774 2023-01-23 22:04:50.013769: step: 156/77, loss: 0.5566755533218384 2023-01-23 22:04:51.326813: step: 160/77, loss: 0.5231382846832275 2023-01-23 22:04:52.674159: step: 164/77, loss: 0.5328485369682312 2023-01-23 22:04:53.918455: step: 168/77, loss: 0.5300477147102356 2023-01-23 22:04:55.201267: step: 172/77, loss: 0.4442155957221985 2023-01-23 22:04:56.489866: step: 176/77, loss: 0.4701414108276367 2023-01-23 22:04:57.779441: step: 180/77, loss: 0.45259642601013184 2023-01-23 22:04:59.049134: step: 184/77, loss: 0.3861841559410095 2023-01-23 22:05:00.306768: step: 188/77, loss: 0.4100555181503296 2023-01-23 22:05:01.596889: step: 192/77, loss: 0.3761431574821472 2023-01-23 22:05:02.934198: step: 196/77, loss: 0.48926883935928345 2023-01-23 22:05:04.250796: step: 200/77, loss: 0.3657160997390747 2023-01-23 22:05:05.562735: step: 204/77, loss: 0.314441055059433 2023-01-23 22:05:06.869621: step: 208/77, loss: 0.2680244743824005 2023-01-23 22:05:08.167975: step: 212/77, loss: 0.2731962502002716 2023-01-23 22:05:09.437679: step: 216/77, loss: 0.22471654415130615 2023-01-23 22:05:10.751169: step: 220/77, loss: 0.26468920707702637 2023-01-23 22:05:12.079957: step: 224/77, loss: 0.20208311080932617 2023-01-23 22:05:13.397995: step: 228/77, loss: 0.24795040488243103 2023-01-23 22:05:14.700027: step: 232/77, loss: 0.23075196146965027 2023-01-23 22:05:15.966594: step: 236/77, loss: 0.20327256619930267 2023-01-23 22:05:17.284666: step: 240/77, loss: 0.17273639142513275 2023-01-23 22:05:18.608868: step: 244/77, loss: 0.1513095498085022 2023-01-23 22:05:19.896349: step: 248/77, loss: 0.17677460610866547 2023-01-23 22:05:21.189264: step: 252/77, loss: 0.10056942701339722 2023-01-23 22:05:22.453376: step: 256/77, loss: 0.09389781951904297 2023-01-23 22:05:23.753825: step: 260/77, loss: 0.11536524444818497 2023-01-23 22:05:25.041548: step: 264/77, loss: 0.17818987369537354 2023-01-23 22:05:26.332482: step: 268/77, loss: 0.23469536006450653 2023-01-23 22:05:27.662181: step: 272/77, loss: 0.16398277878761292 2023-01-23 22:05:28.966943: step: 276/77, loss: 0.27092573046684265 2023-01-23 22:05:30.204787: step: 280/77, loss: 0.15249253809452057 2023-01-23 22:05:31.464745: step: 284/77, loss: 0.14844149351119995 2023-01-23 22:05:32.778845: step: 288/77, loss: 0.034965530037879944 2023-01-23 22:05:34.166626: step: 292/77, loss: 0.19286976754665375 2023-01-23 22:05:35.448662: step: 296/77, loss: 0.13884218037128448 2023-01-23 22:05:36.750816: step: 300/77, loss: 0.05468413233757019 2023-01-23 22:05:38.043606: step: 304/77, loss: 0.2615947425365448 2023-01-23 22:05:39.332028: step: 308/77, loss: 0.06353268027305603 2023-01-23 22:05:40.618300: step: 312/77, loss: 0.3093520402908325 2023-01-23 22:05:41.942814: step: 316/77, loss: 0.13308614492416382 2023-01-23 22:05:43.193990: step: 320/77, loss: 0.0702965259552002 2023-01-23 22:05:44.492127: step: 324/77, loss: 0.09301605820655823 2023-01-23 22:05:45.763626: step: 328/77, loss: 0.0733005702495575 2023-01-23 22:05:47.017146: step: 332/77, loss: 0.10775697976350784 2023-01-23 22:05:48.299826: step: 336/77, loss: 0.08684641122817993 2023-01-23 22:05:49.611689: step: 340/77, loss: 0.07254372537136078 2023-01-23 22:05:50.910881: step: 344/77, loss: 0.11763302981853485 2023-01-23 22:05:52.167715: step: 348/77, loss: 0.16487447917461395 2023-01-23 22:05:53.458141: step: 352/77, loss: 0.08935951441526413 2023-01-23 22:05:54.747326: step: 356/77, loss: 0.08099295198917389 2023-01-23 22:05:56.035676: step: 360/77, loss: 0.07289375364780426 2023-01-23 22:05:57.318116: step: 364/77, loss: 0.07927817851305008 2023-01-23 22:05:58.644802: step: 368/77, loss: 0.06166777387261391 2023-01-23 22:05:59.931691: step: 372/77, loss: 0.1259939819574356 2023-01-23 22:06:01.220437: step: 376/77, loss: 0.36279356479644775 2023-01-23 22:06:02.472933: step: 380/77, loss: 0.035687074065208435 2023-01-23 22:06:03.804009: step: 384/77, loss: 0.11559747159481049 2023-01-23 22:06:05.083670: step: 388/77, loss: 0.12898707389831543 ================================================== Loss: 0.485 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:08:09.842413: step: 4/77, loss: 0.11158759146928787 2023-01-23 22:08:11.110301: step: 8/77, loss: 0.19867026805877686 2023-01-23 22:08:12.383480: step: 12/77, loss: 0.058240726590156555 2023-01-23 22:08:13.691971: step: 16/77, loss: 0.20178799331188202 2023-01-23 22:08:14.998854: step: 20/77, loss: 0.02973691187798977 2023-01-23 22:08:16.317099: step: 24/77, loss: 0.0995328426361084 2023-01-23 22:08:17.645317: step: 28/77, loss: 0.1865939050912857 2023-01-23 22:08:18.966317: step: 32/77, loss: 0.1807020604610443 2023-01-23 22:08:20.253224: step: 36/77, loss: 0.0407174676656723 2023-01-23 22:08:21.516780: step: 40/77, loss: 0.10385138541460037 2023-01-23 22:08:22.765581: step: 44/77, loss: 0.08694268763065338 2023-01-23 22:08:24.039748: step: 48/77, loss: 0.09504902362823486 2023-01-23 22:08:25.320273: step: 52/77, loss: 0.10106496512889862 2023-01-23 22:08:26.598887: step: 56/77, loss: 0.05474689230322838 2023-01-23 22:08:27.876404: step: 60/77, loss: 0.1368316113948822 2023-01-23 22:08:29.173451: step: 64/77, loss: 0.13929077982902527 2023-01-23 22:08:30.505589: step: 68/77, loss: 0.2764717638492584 2023-01-23 22:08:31.754251: step: 72/77, loss: 0.08261242508888245 2023-01-23 22:08:33.006076: step: 76/77, loss: 0.1324632167816162 2023-01-23 22:08:34.307224: step: 80/77, loss: 0.15174496173858643 2023-01-23 22:08:35.623627: step: 84/77, loss: 0.11399096250534058 2023-01-23 22:08:36.859575: step: 88/77, loss: 0.07976777851581573 2023-01-23 22:08:38.143729: step: 92/77, loss: 0.04927085340023041 2023-01-23 22:08:39.393700: step: 96/77, loss: 0.1352429986000061 2023-01-23 22:08:40.674644: step: 100/77, loss: 0.07732612639665604 2023-01-23 22:08:42.020211: step: 104/77, loss: 0.09204569458961487 2023-01-23 22:08:43.295564: step: 108/77, loss: 0.14470499753952026 2023-01-23 22:08:44.585788: step: 112/77, loss: 0.08708120137453079 2023-01-23 22:08:45.885557: step: 116/77, loss: 0.21689417958259583 2023-01-23 22:08:47.182581: step: 120/77, loss: 0.18309366703033447 2023-01-23 22:08:48.478778: step: 124/77, loss: 0.10907326638698578 2023-01-23 22:08:49.762685: step: 128/77, loss: 0.06838659197092056 2023-01-23 22:08:51.005950: step: 132/77, loss: 0.09328626841306686 2023-01-23 22:08:52.267412: step: 136/77, loss: 0.06526198983192444 2023-01-23 22:08:53.548934: step: 140/77, loss: 0.10818645358085632 2023-01-23 22:08:54.832442: step: 144/77, loss: 0.1860373616218567 2023-01-23 22:08:56.085353: step: 148/77, loss: 0.07784507423639297 2023-01-23 22:08:57.362291: step: 152/77, loss: 0.08227992057800293 2023-01-23 22:08:58.610407: step: 156/77, loss: 0.08509774506092072 2023-01-23 22:08:59.877205: step: 160/77, loss: 0.04389530420303345 2023-01-23 22:09:01.149394: step: 164/77, loss: 0.08508370816707611 2023-01-23 22:09:02.449517: step: 168/77, loss: 0.10564348101615906 2023-01-23 22:09:03.778465: step: 172/77, loss: 0.20949102938175201 2023-01-23 22:09:05.080895: step: 176/77, loss: 0.054892972111701965 2023-01-23 22:09:06.390297: step: 180/77, loss: 0.2804702818393707 2023-01-23 22:09:07.602488: step: 184/77, loss: 0.029961321502923965 2023-01-23 22:09:08.947072: step: 188/77, loss: 0.03746054694056511 2023-01-23 22:09:10.245903: step: 192/77, loss: 0.10914406180381775 2023-01-23 22:09:11.552799: step: 196/77, loss: 0.04498276859521866 2023-01-23 22:09:12.809506: step: 200/77, loss: 0.08990252017974854 2023-01-23 22:09:14.057621: step: 204/77, loss: 0.09215635806322098 2023-01-23 22:09:15.346126: step: 208/77, loss: 0.2269093245267868 2023-01-23 22:09:16.629190: step: 212/77, loss: 0.25756344199180603 2023-01-23 22:09:17.930953: step: 216/77, loss: 0.1051335409283638 2023-01-23 22:09:19.252727: step: 220/77, loss: 0.07337190210819244 2023-01-23 22:09:20.490100: step: 224/77, loss: 0.10456050932407379 2023-01-23 22:09:21.780746: step: 228/77, loss: 0.07801325619220734 2023-01-23 22:09:23.107776: step: 232/77, loss: 0.13213995099067688 2023-01-23 22:09:24.399252: step: 236/77, loss: 0.10130394995212555 2023-01-23 22:09:25.718525: step: 240/77, loss: 0.16945858299732208 2023-01-23 22:09:26.978833: step: 244/77, loss: 0.03165304660797119 2023-01-23 22:09:28.212811: step: 248/77, loss: 0.10367722809314728 2023-01-23 22:09:29.446991: step: 252/77, loss: 0.12703952193260193 2023-01-23 22:09:30.742465: step: 256/77, loss: 0.0646631270647049 2023-01-23 22:09:32.036256: step: 260/77, loss: 0.09364254027605057 2023-01-23 22:09:33.300908: step: 264/77, loss: 0.09511947631835938 2023-01-23 22:09:34.586827: step: 268/77, loss: 0.08383557200431824 2023-01-23 22:09:35.885118: step: 272/77, loss: 0.07730644941329956 2023-01-23 22:09:37.174537: step: 276/77, loss: 0.1986403912305832 2023-01-23 22:09:38.488729: step: 280/77, loss: 0.056749798357486725 2023-01-23 22:09:39.806165: step: 284/77, loss: 0.07241851091384888 2023-01-23 22:09:41.122320: step: 288/77, loss: 0.048592459410429 2023-01-23 22:09:42.430655: step: 292/77, loss: 0.1085418313741684 2023-01-23 22:09:43.742522: step: 296/77, loss: 0.06122410297393799 2023-01-23 22:09:45.049798: step: 300/77, loss: 0.07612358033657074 2023-01-23 22:09:46.340620: step: 304/77, loss: 0.047660645097494125 2023-01-23 22:09:47.589151: step: 308/77, loss: 0.08316922932863235 2023-01-23 22:09:48.845269: step: 312/77, loss: 0.11238834261894226 2023-01-23 22:09:50.137166: step: 316/77, loss: 0.1417194902896881 2023-01-23 22:09:51.362012: step: 320/77, loss: 0.12480755150318146 2023-01-23 22:09:52.639875: step: 324/77, loss: 0.05285458266735077 2023-01-23 22:09:53.948226: step: 328/77, loss: 0.16706568002700806 2023-01-23 22:09:55.224873: step: 332/77, loss: 0.13593162596225739 2023-01-23 22:09:56.538120: step: 336/77, loss: 0.07430271804332733 2023-01-23 22:09:57.815523: step: 340/77, loss: 0.05810857564210892 2023-01-23 22:09:59.096484: step: 344/77, loss: 0.1264181137084961 2023-01-23 22:10:00.393212: step: 348/77, loss: 0.05887303501367569 2023-01-23 22:10:01.696176: step: 352/77, loss: 0.1920332908630371 2023-01-23 22:10:03.015468: step: 356/77, loss: 0.03397180885076523 2023-01-23 22:10:04.317607: step: 360/77, loss: 0.061011020094156265 2023-01-23 22:10:05.592659: step: 364/77, loss: 0.06792081892490387 2023-01-23 22:10:06.860901: step: 368/77, loss: 0.10783128440380096 2023-01-23 22:10:08.180981: step: 372/77, loss: 0.05417155474424362 2023-01-23 22:10:09.480405: step: 376/77, loss: 0.0812927782535553 2023-01-23 22:10:10.784412: step: 380/77, loss: 0.06356360018253326 2023-01-23 22:10:12.075325: step: 384/77, loss: 0.07857591658830643 2023-01-23 22:10:13.371476: step: 388/77, loss: 0.05766603723168373 ================================================== Loss: 0.106 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:11:48.681087: step: 4/77, loss: 0.11322622001171112 2023-01-23 22:11:49.959884: step: 8/77, loss: 0.10179299116134644 2023-01-23 22:11:51.262360: step: 12/77, loss: 0.06799320876598358 2023-01-23 22:11:52.538742: step: 16/77, loss: 0.15302878618240356 2023-01-23 22:11:53.795146: step: 20/77, loss: 0.11456996202468872 2023-01-23 22:11:55.099221: step: 24/77, loss: 0.06971030682325363 2023-01-23 22:11:56.409016: step: 28/77, loss: 0.09485167264938354 2023-01-23 22:11:57.702142: step: 32/77, loss: 0.09777399897575378 2023-01-23 22:11:58.988326: step: 36/77, loss: 0.04542500525712967 2023-01-23 22:12:00.254656: step: 40/77, loss: 0.10130670666694641 2023-01-23 22:12:01.526187: step: 44/77, loss: 0.04343476518988609 2023-01-23 22:12:02.812298: step: 48/77, loss: 0.2175489366054535 2023-01-23 22:12:04.104930: step: 52/77, loss: 0.04697784036397934 2023-01-23 22:12:05.366353: step: 56/77, loss: 0.11778730154037476 2023-01-23 22:12:06.621882: step: 60/77, loss: 0.056548308581113815 2023-01-23 22:12:07.897911: step: 64/77, loss: 0.01581401564180851 2023-01-23 22:12:09.140275: step: 68/77, loss: 0.05202740058302879 2023-01-23 22:12:10.354500: step: 72/77, loss: 0.06093277037143707 2023-01-23 22:12:11.623763: step: 76/77, loss: 0.02446526102721691 2023-01-23 22:12:12.908856: step: 80/77, loss: 0.06041073054075241 2023-01-23 22:12:14.154089: step: 84/77, loss: 0.04046632722020149 2023-01-23 22:12:15.443088: step: 88/77, loss: 0.07183364033699036 2023-01-23 22:12:16.759744: step: 92/77, loss: 0.10985290259122849 2023-01-23 22:12:18.016748: step: 96/77, loss: 0.06694649904966354 2023-01-23 22:12:19.321207: step: 100/77, loss: 0.04028739407658577 2023-01-23 22:12:20.550631: step: 104/77, loss: 0.07630635052919388 2023-01-23 22:12:21.850013: step: 108/77, loss: 0.03472098335623741 2023-01-23 22:12:23.117050: step: 112/77, loss: 0.12241032719612122 2023-01-23 22:12:24.426832: step: 116/77, loss: 0.02490338310599327 2023-01-23 22:12:25.706158: step: 120/77, loss: 0.1250462681055069 2023-01-23 22:12:26.986957: step: 124/77, loss: 0.05550193041563034 2023-01-23 22:12:28.276115: step: 128/77, loss: 0.01295685488730669 2023-01-23 22:12:29.542071: step: 132/77, loss: 0.03971627354621887 2023-01-23 22:12:30.812866: step: 136/77, loss: 0.10844653099775314 2023-01-23 22:12:32.114523: step: 140/77, loss: 0.0835656076669693 2023-01-23 22:12:33.462249: step: 144/77, loss: 0.04878070205450058 2023-01-23 22:12:34.743678: step: 148/77, loss: 0.0661730170249939 2023-01-23 22:12:35.999372: step: 152/77, loss: 0.07241643965244293 2023-01-23 22:12:37.266970: step: 156/77, loss: 0.01513383537530899 2023-01-23 22:12:38.569081: step: 160/77, loss: 0.021579544991254807 2023-01-23 22:12:39.876598: step: 164/77, loss: 0.04571843147277832 2023-01-23 22:12:41.178509: step: 168/77, loss: 0.05205213278532028 2023-01-23 22:12:42.445823: step: 172/77, loss: 0.07319924980401993 2023-01-23 22:12:43.699886: step: 176/77, loss: 0.0070552583783864975 2023-01-23 22:12:44.974228: step: 180/77, loss: 0.03356878086924553 2023-01-23 22:12:46.224640: step: 184/77, loss: 0.017069164663553238 2023-01-23 22:12:47.536423: step: 188/77, loss: 0.06316450238227844 2023-01-23 22:12:48.740644: step: 192/77, loss: 0.05764845013618469 2023-01-23 22:12:50.008782: step: 196/77, loss: 0.018638404086232185 2023-01-23 22:12:51.277129: step: 200/77, loss: 0.047206778079271317 2023-01-23 22:12:52.505428: step: 204/77, loss: 0.012387819588184357 2023-01-23 22:12:53.764617: step: 208/77, loss: 0.018886670470237732 2023-01-23 22:12:55.026300: step: 212/77, loss: 0.040599655359983444 2023-01-23 22:12:56.297391: step: 216/77, loss: 0.062156952917575836 2023-01-23 22:12:57.653128: step: 220/77, loss: 0.08114401996135712 2023-01-23 22:12:58.999792: step: 224/77, loss: 0.03974404186010361 2023-01-23 22:13:00.305386: step: 228/77, loss: 0.07013162225484848 2023-01-23 22:13:01.602817: step: 232/77, loss: 0.029353676363825798 2023-01-23 22:13:02.932024: step: 236/77, loss: 0.027635207399725914 2023-01-23 22:13:04.223874: step: 240/77, loss: 0.08846844732761383 2023-01-23 22:13:05.515107: step: 244/77, loss: 0.11539971828460693 2023-01-23 22:13:06.804282: step: 248/77, loss: 0.037433214485645294 2023-01-23 22:13:08.094058: step: 252/77, loss: 0.07446795701980591 2023-01-23 22:13:09.384548: step: 256/77, loss: 0.039659544825553894 2023-01-23 22:13:10.701982: step: 260/77, loss: 0.01527109369635582 2023-01-23 22:13:11.985629: step: 264/77, loss: 0.23154425621032715 2023-01-23 22:13:13.237738: step: 268/77, loss: 0.10503697395324707 2023-01-23 22:13:14.496922: step: 272/77, loss: 0.041720081120729446 2023-01-23 22:13:15.774850: step: 276/77, loss: 0.08117479085922241 2023-01-23 22:13:17.086347: step: 280/77, loss: 0.06630122661590576 2023-01-23 22:13:18.390310: step: 284/77, loss: 0.1733831912279129 2023-01-23 22:13:19.717549: step: 288/77, loss: 0.0781242698431015 2023-01-23 22:13:21.014477: step: 292/77, loss: 0.06188240647315979 2023-01-23 22:13:22.365568: step: 296/77, loss: 0.051947221159935 2023-01-23 22:13:23.661654: step: 300/77, loss: 0.040972497314214706 2023-01-23 22:13:24.981522: step: 304/77, loss: 0.03663957864046097 2023-01-23 22:13:26.302960: step: 308/77, loss: 0.07159535586833954 2023-01-23 22:13:27.644727: step: 312/77, loss: 0.15917694568634033 2023-01-23 22:13:28.997329: step: 316/77, loss: 0.037154968827962875 2023-01-23 22:13:30.233663: step: 320/77, loss: 0.07271397113800049 2023-01-23 22:13:31.532283: step: 324/77, loss: 0.1220267117023468 2023-01-23 22:13:32.833991: step: 328/77, loss: 0.03455435484647751 2023-01-23 22:13:34.176840: step: 332/77, loss: 0.019315311685204506 2023-01-23 22:13:35.451931: step: 336/77, loss: 0.05523783713579178 2023-01-23 22:13:36.826237: step: 340/77, loss: 0.03926079720258713 2023-01-23 22:13:38.146471: step: 344/77, loss: 0.10926296561956406 2023-01-23 22:13:39.459993: step: 348/77, loss: 0.0364389531314373 2023-01-23 22:13:40.742647: step: 352/77, loss: 0.015954257920384407 2023-01-23 22:13:42.059165: step: 356/77, loss: 0.25136813521385193 2023-01-23 22:13:43.415913: step: 360/77, loss: 0.013067019172012806 2023-01-23 22:13:44.733509: step: 364/77, loss: 0.005912239663302898 2023-01-23 22:13:46.076874: step: 368/77, loss: 0.06499234586954117 2023-01-23 22:13:47.336145: step: 372/77, loss: 0.06031108647584915 2023-01-23 22:13:48.636409: step: 376/77, loss: 0.07589991390705109 2023-01-23 22:13:49.940018: step: 380/77, loss: 0.01958923414349556 2023-01-23 22:13:51.254090: step: 384/77, loss: 0.01762222871184349 2023-01-23 22:13:52.601419: step: 388/77, loss: 0.008108420297503471 ================================================== Loss: 0.065 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test Chinese: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test Russian: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Chinese: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Russian: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:15:50.406393: step: 4/77, loss: 0.015968766063451767 2023-01-23 22:15:51.701341: step: 8/77, loss: 0.01351113896816969 2023-01-23 22:15:52.977194: step: 12/77, loss: 0.08549536019563675 2023-01-23 22:15:54.213508: step: 16/77, loss: 0.03732670471072197 2023-01-23 22:15:55.505597: step: 20/77, loss: 0.005760162137448788 2023-01-23 22:15:56.788448: step: 24/77, loss: 0.06140680983662605 2023-01-23 22:15:58.130707: step: 28/77, loss: 0.009903283789753914 2023-01-23 22:15:59.386291: step: 32/77, loss: 0.028609666973352432 2023-01-23 22:16:00.685442: step: 36/77, loss: 0.025533368811011314 2023-01-23 22:16:01.970599: step: 40/77, loss: 0.02272067219018936 2023-01-23 22:16:03.301434: step: 44/77, loss: 0.06468820571899414 2023-01-23 22:16:04.589703: step: 48/77, loss: 0.026076534762978554 2023-01-23 22:16:05.888291: step: 52/77, loss: 0.029940230771899223 2023-01-23 22:16:07.201392: step: 56/77, loss: 0.08760607987642288 2023-01-23 22:16:08.500205: step: 60/77, loss: 0.12449557334184647 2023-01-23 22:16:09.793412: step: 64/77, loss: 0.02922476828098297 2023-01-23 22:16:11.122807: step: 68/77, loss: 0.005582435987889767 2023-01-23 22:16:12.385994: step: 72/77, loss: 0.012879885733127594 2023-01-23 22:16:13.653733: step: 76/77, loss: 0.025143388658761978 2023-01-23 22:16:14.940526: step: 80/77, loss: 0.009446179494261742 2023-01-23 22:16:16.189250: step: 84/77, loss: 0.04552457481622696 2023-01-23 22:16:17.463245: step: 88/77, loss: 0.02507692202925682 2023-01-23 22:16:18.751147: step: 92/77, loss: 0.025074133649468422 2023-01-23 22:16:20.044487: step: 96/77, loss: 0.01472495123744011 2023-01-23 22:16:21.347946: step: 100/77, loss: 0.03009127266705036 2023-01-23 22:16:22.620354: step: 104/77, loss: 0.1456586718559265 2023-01-23 22:16:23.932688: step: 108/77, loss: 0.008024273440241814 2023-01-23 22:16:25.222751: step: 112/77, loss: 0.04816785454750061 2023-01-23 22:16:26.538826: step: 116/77, loss: 0.01610010489821434 2023-01-23 22:16:27.796766: step: 120/77, loss: 0.022976523265242577 2023-01-23 22:16:29.063078: step: 124/77, loss: 0.040475912392139435 2023-01-23 22:16:30.324453: step: 128/77, loss: 0.055941663682460785 2023-01-23 22:16:31.591736: step: 132/77, loss: 0.027231454849243164 2023-01-23 22:16:32.874735: step: 136/77, loss: 0.06407906860113144 2023-01-23 22:16:34.177763: step: 140/77, loss: 0.020403718575835228 2023-01-23 22:16:35.447863: step: 144/77, loss: 0.054121024906635284 2023-01-23 22:16:36.657937: step: 148/77, loss: 0.019535942003130913 2023-01-23 22:16:37.940127: step: 152/77, loss: 0.02978493645787239 2023-01-23 22:16:39.225359: step: 156/77, loss: 0.007111942861229181 2023-01-23 22:16:40.516746: step: 160/77, loss: 0.05967719852924347 2023-01-23 22:16:41.805312: step: 164/77, loss: 0.050702475011348724 2023-01-23 22:16:43.136290: step: 168/77, loss: 0.07628966867923737 2023-01-23 22:16:44.421505: step: 172/77, loss: 0.016380734741687775 2023-01-23 22:16:45.781020: step: 176/77, loss: 0.09666801989078522 2023-01-23 22:16:47.099846: step: 180/77, loss: 0.011515635997056961 2023-01-23 22:16:48.430568: step: 184/77, loss: 0.014797764830291271 2023-01-23 22:16:49.723518: step: 188/77, loss: 0.014288538135588169 2023-01-23 22:16:51.011767: step: 192/77, loss: 0.046548232436180115 2023-01-23 22:16:52.343584: step: 196/77, loss: 0.10158401727676392 2023-01-23 22:16:53.667969: step: 200/77, loss: 0.020895320922136307 2023-01-23 22:16:54.984710: step: 204/77, loss: 0.0929812490940094 2023-01-23 22:16:56.270945: step: 208/77, loss: 0.06224067509174347 2023-01-23 22:16:57.614587: step: 212/77, loss: 0.010834470391273499 2023-01-23 22:16:58.883036: step: 216/77, loss: 0.020457429811358452 2023-01-23 22:17:00.183072: step: 220/77, loss: 0.04090658575296402 2023-01-23 22:17:01.462758: step: 224/77, loss: 0.0852619856595993 2023-01-23 22:17:02.734376: step: 228/77, loss: 0.054530490189790726 2023-01-23 22:17:04.067148: step: 232/77, loss: 0.04404990002512932 2023-01-23 22:17:05.355565: step: 236/77, loss: 0.028828389942646027 2023-01-23 22:17:06.614637: step: 240/77, loss: 0.05302087962627411 2023-01-23 22:17:07.916474: step: 244/77, loss: 0.051580313593149185 2023-01-23 22:17:09.212679: step: 248/77, loss: 0.14568468928337097 2023-01-23 22:17:10.485450: step: 252/77, loss: 0.009636200964450836 2023-01-23 22:17:11.803382: step: 256/77, loss: 0.012510381639003754 2023-01-23 22:17:13.076213: step: 260/77, loss: 0.02064824104309082 2023-01-23 22:17:14.388268: step: 264/77, loss: 0.08655968308448792 2023-01-23 22:17:15.713683: step: 268/77, loss: 0.03449642285704613 2023-01-23 22:17:16.999921: step: 272/77, loss: 0.021431952714920044 2023-01-23 22:17:18.313643: step: 276/77, loss: 0.016396211460232735 2023-01-23 22:17:19.591506: step: 280/77, loss: 0.015242512337863445 2023-01-23 22:17:20.901297: step: 284/77, loss: 0.3615311086177826 2023-01-23 22:17:22.171114: step: 288/77, loss: 0.08149930089712143 2023-01-23 22:17:23.497133: step: 292/77, loss: 0.026354094967246056 2023-01-23 22:17:24.799182: step: 296/77, loss: 0.008077921345829964 2023-01-23 22:17:26.112087: step: 300/77, loss: 0.13474395871162415 2023-01-23 22:17:27.414314: step: 304/77, loss: 0.005618092138320208 2023-01-23 22:17:28.702130: step: 308/77, loss: 0.21946494281291962 2023-01-23 22:17:29.993872: step: 312/77, loss: 0.04267258942127228 2023-01-23 22:17:31.211765: step: 316/77, loss: 0.015815965831279755 2023-01-23 22:17:32.544778: step: 320/77, loss: 0.03485114499926567 2023-01-23 22:17:33.885924: step: 324/77, loss: 0.026810774579644203 2023-01-23 22:17:35.175346: step: 328/77, loss: 0.01187300868332386 2023-01-23 22:17:36.478898: step: 332/77, loss: 0.05864952877163887 2023-01-23 22:17:37.765759: step: 336/77, loss: 0.030480477958917618 2023-01-23 22:17:39.032687: step: 340/77, loss: 0.06372788548469543 2023-01-23 22:17:40.305025: step: 344/77, loss: 0.05656794458627701 2023-01-23 22:17:41.611855: step: 348/77, loss: 0.0045874156057834625 2023-01-23 22:17:42.893518: step: 352/77, loss: 0.05805297940969467 2023-01-23 22:17:44.135718: step: 356/77, loss: 0.044932737946510315 2023-01-23 22:17:45.424883: step: 360/77, loss: 0.01705370470881462 2023-01-23 22:17:46.724126: step: 364/77, loss: 0.15946604311466217 2023-01-23 22:17:48.008620: step: 368/77, loss: 0.006360077764838934 2023-01-23 22:17:49.288210: step: 372/77, loss: 0.04374031722545624 2023-01-23 22:17:50.563453: step: 376/77, loss: 0.006530561950057745 2023-01-23 22:17:51.876810: step: 380/77, loss: 0.0038705565966665745 2023-01-23 22:17:53.145756: step: 384/77, loss: 0.06794550269842148 2023-01-23 22:17:54.445970: step: 388/77, loss: 0.15991713106632233 ================================================== Loss: 0.047 -------------------- Dev Chinese: {'template': {'p': 0.9090909090909091, 'r': 0.5, 'f1': 0.6451612903225806}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04535404501388967, 'epoch': 3} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5772357723577236, 'f1': 0.7171717171717171}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.021464646464646464, 'epoch': 3} Dev Korean: {'template': {'p': 0.90625, 'r': 0.48333333333333334, 'f1': 0.6304347826086957}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04431878963857263, 'epoch': 3} Test Korean: {'template': {'p': 0.9466666666666667, 'r': 0.5772357723577236, 'f1': 0.7171717171717171}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.021464646464646464, 'epoch': 3} Dev Russian: {'template': {'p': 0.90625, 'r': 0.48333333333333334, 'f1': 0.6304347826086957}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04431878963857263, 'epoch': 3} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5772357723577236, 'f1': 0.7171717171717171}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.021464646464646464, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.021621621621621623, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.025806451612903226, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Chinese: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Russian: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:19:30.394799: step: 4/77, loss: 0.1591966152191162 2023-01-23 22:19:31.653642: step: 8/77, loss: 0.0036322043742984533 2023-01-23 22:19:32.941461: step: 12/77, loss: 0.0035585726145654917 2023-01-23 22:19:34.166662: step: 16/77, loss: 0.03981819003820419 2023-01-23 22:19:35.472674: step: 20/77, loss: 0.012272844091057777 2023-01-23 22:19:36.778857: step: 24/77, loss: 0.03815988078713417 2023-01-23 22:19:38.075260: step: 28/77, loss: 0.07955527305603027 2023-01-23 22:19:39.431274: step: 32/77, loss: 0.03334498777985573 2023-01-23 22:19:40.744195: step: 36/77, loss: 0.02782886102795601 2023-01-23 22:19:42.016124: step: 40/77, loss: 0.05034760758280754 2023-01-23 22:19:43.335396: step: 44/77, loss: 0.046723365783691406 2023-01-23 22:19:44.589494: step: 48/77, loss: 0.04478670284152031 2023-01-23 22:19:45.874240: step: 52/77, loss: 0.00868067517876625 2023-01-23 22:19:47.109726: step: 56/77, loss: 0.0023823282681405544 2023-01-23 22:19:48.401242: step: 60/77, loss: 0.12383562326431274 2023-01-23 22:19:49.685808: step: 64/77, loss: 0.016625147312879562 2023-01-23 22:19:50.972459: step: 68/77, loss: 0.09449809789657593 2023-01-23 22:19:52.256179: step: 72/77, loss: 0.026286687701940536 2023-01-23 22:19:53.483705: step: 76/77, loss: 0.010752486996352673 2023-01-23 22:19:54.787631: step: 80/77, loss: 0.025785956531763077 2023-01-23 22:19:56.041193: step: 84/77, loss: 0.00048810700536705554 2023-01-23 22:19:57.322701: step: 88/77, loss: 0.03618944063782692 2023-01-23 22:19:58.573624: step: 92/77, loss: 0.010275091975927353 2023-01-23 22:19:59.870008: step: 96/77, loss: 0.019382059574127197 2023-01-23 22:20:01.185311: step: 100/77, loss: 0.022107046097517014 2023-01-23 22:20:02.486833: step: 104/77, loss: 0.03271938115358353 2023-01-23 22:20:03.833469: step: 108/77, loss: 0.02639208734035492 2023-01-23 22:20:05.093780: step: 112/77, loss: 0.07708106935024261 2023-01-23 22:20:06.346471: step: 116/77, loss: 0.01799575239419937 2023-01-23 22:20:07.598149: step: 120/77, loss: 0.035872627049684525 2023-01-23 22:20:08.882841: step: 124/77, loss: 0.005020597018301487 2023-01-23 22:20:10.173371: step: 128/77, loss: 0.048189785331487656 2023-01-23 22:20:11.524520: step: 132/77, loss: 0.012047644704580307 2023-01-23 22:20:12.852060: step: 136/77, loss: 0.06587105244398117 2023-01-23 22:20:14.172589: step: 140/77, loss: 0.17298981547355652 2023-01-23 22:20:15.488717: step: 144/77, loss: 0.03656620532274246 2023-01-23 22:20:16.775186: step: 148/77, loss: 0.03557945042848587 2023-01-23 22:20:18.097459: step: 152/77, loss: 0.0015012272633612156 2023-01-23 22:20:19.386070: step: 156/77, loss: 0.011846780776977539 2023-01-23 22:20:20.663277: step: 160/77, loss: 0.06310739368200302 2023-01-23 22:20:21.952393: step: 164/77, loss: 0.016693251207470894 2023-01-23 22:20:23.266783: step: 168/77, loss: 0.03192953020334244 2023-01-23 22:20:24.563374: step: 172/77, loss: 0.027641138061881065 2023-01-23 22:20:25.882742: step: 176/77, loss: 0.03455713763833046 2023-01-23 22:20:27.206307: step: 180/77, loss: 0.02429923042654991 2023-01-23 22:20:28.529578: step: 184/77, loss: 0.004264523275196552 2023-01-23 22:20:29.836836: step: 188/77, loss: 0.014208164997398853 2023-01-23 22:20:31.112334: step: 192/77, loss: 0.01190916821360588 2023-01-23 22:20:32.393018: step: 196/77, loss: 0.056420039385557175 2023-01-23 22:20:33.690256: step: 200/77, loss: 0.018400974571704865 2023-01-23 22:20:35.008042: step: 204/77, loss: 0.04778168350458145 2023-01-23 22:20:36.278752: step: 208/77, loss: 0.011265124194324017 2023-01-23 22:20:37.553735: step: 212/77, loss: 0.005715106148272753 2023-01-23 22:20:38.835602: step: 216/77, loss: 0.012743368744850159 2023-01-23 22:20:40.097063: step: 220/77, loss: 0.03328631818294525 2023-01-23 22:20:41.362143: step: 224/77, loss: 0.13442906737327576 2023-01-23 22:20:42.648480: step: 228/77, loss: 0.03768271580338478 2023-01-23 22:20:43.927773: step: 232/77, loss: 0.15616267919540405 2023-01-23 22:20:45.225813: step: 236/77, loss: 0.010896775871515274 2023-01-23 22:20:46.488109: step: 240/77, loss: 0.02437608689069748 2023-01-23 22:20:47.792787: step: 244/77, loss: 0.0066629135981202126 2023-01-23 22:20:49.092904: step: 248/77, loss: 0.014826871454715729 2023-01-23 22:20:50.396040: step: 252/77, loss: 0.06286932528018951 2023-01-23 22:20:51.624296: step: 256/77, loss: 0.03753984346985817 2023-01-23 22:20:52.870084: step: 260/77, loss: 0.024473311379551888 2023-01-23 22:20:54.163365: step: 264/77, loss: 0.02880875952541828 2023-01-23 22:20:55.472066: step: 268/77, loss: 0.0316791832447052 2023-01-23 22:20:56.744848: step: 272/77, loss: 0.03705769032239914 2023-01-23 22:20:58.033826: step: 276/77, loss: 0.0716656893491745 2023-01-23 22:20:59.313498: step: 280/77, loss: 0.011389000341296196 2023-01-23 22:21:00.616251: step: 284/77, loss: 0.027336813509464264 2023-01-23 22:21:01.868066: step: 288/77, loss: 0.033267825841903687 2023-01-23 22:21:03.194363: step: 292/77, loss: 0.026131270453333855 2023-01-23 22:21:04.445736: step: 296/77, loss: 0.00949370302259922 2023-01-23 22:21:05.724327: step: 300/77, loss: 0.02307109721004963 2023-01-23 22:21:07.011734: step: 304/77, loss: 0.056111834943294525 2023-01-23 22:21:08.342333: step: 308/77, loss: 0.05641761049628258 2023-01-23 22:21:09.592596: step: 312/77, loss: 0.02172068879008293 2023-01-23 22:21:10.929476: step: 316/77, loss: 0.1652591973543167 2023-01-23 22:21:12.308322: step: 320/77, loss: 0.021472467109560966 2023-01-23 22:21:13.628668: step: 324/77, loss: 0.012137623503804207 2023-01-23 22:21:14.918892: step: 328/77, loss: 0.01814662106335163 2023-01-23 22:21:16.254689: step: 332/77, loss: 0.020530683919787407 2023-01-23 22:21:17.560537: step: 336/77, loss: 0.018007539212703705 2023-01-23 22:21:18.817295: step: 340/77, loss: 0.0415189191699028 2023-01-23 22:21:20.105138: step: 344/77, loss: 0.01093345694243908 2023-01-23 22:21:21.401811: step: 348/77, loss: 0.026807358488440514 2023-01-23 22:21:22.662261: step: 352/77, loss: 0.013095011003315449 2023-01-23 22:21:23.931938: step: 356/77, loss: 0.02017892897129059 2023-01-23 22:21:25.275635: step: 360/77, loss: 0.06183590739965439 2023-01-23 22:21:26.532255: step: 364/77, loss: 0.11347439140081406 2023-01-23 22:21:27.832702: step: 368/77, loss: 0.08657459169626236 2023-01-23 22:21:29.196927: step: 372/77, loss: 0.02488943189382553 2023-01-23 22:21:30.435355: step: 376/77, loss: 0.002923357766121626 2023-01-23 22:21:31.741858: step: 380/77, loss: 0.0434151217341423 2023-01-23 22:21:33.018659: step: 384/77, loss: 0.06214475631713867 2023-01-23 22:21:34.332584: step: 388/77, loss: 0.043125901371240616 ================================================== Loss: 0.038 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 4} Test Chinese: {'template': {'p': 0.9846153846153847, 'r': 0.5203252032520326, 'f1': 0.6808510638297873}, 'slot': {'p': 0.4838709677419355, 'r': 0.013636363636363636, 'f1': 0.026525198938992044}, 'combined': 0.01805970991590948, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 4} Test Korean: {'template': {'p': 0.9846153846153847, 'r': 0.5203252032520326, 'f1': 0.6808510638297873}, 'slot': {'p': 0.4838709677419355, 'r': 0.013636363636363636, 'f1': 0.026525198938992044}, 'combined': 0.01805970991590948, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 4} Test Russian: {'template': {'p': 0.9848484848484849, 'r': 0.5284552845528455, 'f1': 0.6878306878306879}, 'slot': {'p': 0.5151515151515151, 'r': 0.015454545454545455, 'f1': 0.030008826125330977}, 'combined': 0.020640991514777923, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Chinese: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Russian: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:23:10.329081: step: 4/77, loss: 0.05281934514641762 2023-01-23 22:23:11.544873: step: 8/77, loss: 0.10034886747598648 2023-01-23 22:23:12.869946: step: 12/77, loss: 0.008094294928014278 2023-01-23 22:23:14.152889: step: 16/77, loss: 0.007729171775281429 2023-01-23 22:23:15.406842: step: 20/77, loss: 0.0249926894903183 2023-01-23 22:23:16.716246: step: 24/77, loss: 0.014677442610263824 2023-01-23 22:23:17.958177: step: 28/77, loss: 0.015226608142256737 2023-01-23 22:23:19.253483: step: 32/77, loss: 0.09592887759208679 2023-01-23 22:23:20.518774: step: 36/77, loss: 0.038398392498493195 2023-01-23 22:23:21.786045: step: 40/77, loss: 0.014437681064009666 2023-01-23 22:23:23.023015: step: 44/77, loss: 0.02251637540757656 2023-01-23 22:23:24.337447: step: 48/77, loss: 0.09548071026802063 2023-01-23 22:23:25.611380: step: 52/77, loss: 0.05408954620361328 2023-01-23 22:23:26.872033: step: 56/77, loss: 0.012811540625989437 2023-01-23 22:23:28.165743: step: 60/77, loss: 0.06582754850387573 2023-01-23 22:23:29.422055: step: 64/77, loss: 0.011044980026781559 2023-01-23 22:23:30.735360: step: 68/77, loss: 0.01578996330499649 2023-01-23 22:23:32.042389: step: 72/77, loss: 0.03434739634394646 2023-01-23 22:23:33.331690: step: 76/77, loss: 0.02023492194712162 2023-01-23 22:23:34.591238: step: 80/77, loss: 0.005539305973798037 2023-01-23 22:23:35.837599: step: 84/77, loss: 0.012604881078004837 2023-01-23 22:23:37.126092: step: 88/77, loss: 0.04681181162595749 2023-01-23 22:23:38.425306: step: 92/77, loss: 0.02078882046043873 2023-01-23 22:23:39.739688: step: 96/77, loss: 0.040004126727581024 2023-01-23 22:23:40.977323: step: 100/77, loss: 0.034155894070863724 2023-01-23 22:23:42.300869: step: 104/77, loss: 0.01139921136200428 2023-01-23 22:23:43.611091: step: 108/77, loss: 0.006441830191761255 2023-01-23 22:23:44.859708: step: 112/77, loss: 0.0597098171710968 2023-01-23 22:23:46.132672: step: 116/77, loss: 0.012973377481102943 2023-01-23 22:23:47.397953: step: 120/77, loss: 0.07683604210615158 2023-01-23 22:23:48.685200: step: 124/77, loss: 0.053127095103263855 2023-01-23 22:23:49.982710: step: 128/77, loss: 0.01308278739452362 2023-01-23 22:23:51.239643: step: 132/77, loss: 0.01676439866423607 2023-01-23 22:23:52.527587: step: 136/77, loss: 0.11097963154315948 2023-01-23 22:23:53.794471: step: 140/77, loss: 0.04553266242146492 2023-01-23 22:23:55.060723: step: 144/77, loss: 0.05289888381958008 2023-01-23 22:23:56.339844: step: 148/77, loss: 0.01658434234559536 2023-01-23 22:23:57.614218: step: 152/77, loss: 0.07442338019609451 2023-01-23 22:23:58.926590: step: 156/77, loss: 0.05406294763088226 2023-01-23 22:24:00.201839: step: 160/77, loss: 0.010441828519105911 2023-01-23 22:24:01.452283: step: 164/77, loss: 0.0038215755484998226 2023-01-23 22:24:02.729387: step: 168/77, loss: 0.0016422360204160213 2023-01-23 22:24:04.014625: step: 172/77, loss: 0.07169732451438904 2023-01-23 22:24:05.305000: step: 176/77, loss: 0.01746036671102047 2023-01-23 22:24:06.595508: step: 180/77, loss: 0.04688744246959686 2023-01-23 22:24:07.931939: step: 184/77, loss: 0.023346032947301865 2023-01-23 22:24:09.216682: step: 188/77, loss: 0.022755956277251244 2023-01-23 22:24:10.491210: step: 192/77, loss: 0.00830506905913353 2023-01-23 22:24:11.753101: step: 196/77, loss: 0.006505207624286413 2023-01-23 22:24:13.037335: step: 200/77, loss: 0.034292690455913544 2023-01-23 22:24:14.374673: step: 204/77, loss: 0.1433955430984497 2023-01-23 22:24:15.615162: step: 208/77, loss: 0.011057563126087189 2023-01-23 22:24:16.908152: step: 212/77, loss: 0.02051542140543461 2023-01-23 22:24:18.146954: step: 216/77, loss: 0.015034375712275505 2023-01-23 22:24:19.415929: step: 220/77, loss: 0.02420666441321373 2023-01-23 22:24:20.738658: step: 224/77, loss: 0.07697256654500961 2023-01-23 22:24:21.988073: step: 228/77, loss: 0.03689153492450714 2023-01-23 22:24:23.254445: step: 232/77, loss: 0.015925971791148186 2023-01-23 22:24:24.500761: step: 236/77, loss: 0.0585017055273056 2023-01-23 22:24:25.805382: step: 240/77, loss: 0.009359755553305149 2023-01-23 22:24:27.035598: step: 244/77, loss: 0.021430548280477524 2023-01-23 22:24:28.347483: step: 248/77, loss: 0.018590224906802177 2023-01-23 22:24:29.654743: step: 252/77, loss: 0.01646292582154274 2023-01-23 22:24:30.984165: step: 256/77, loss: 0.049518853425979614 2023-01-23 22:24:32.299539: step: 260/77, loss: 0.02466382458806038 2023-01-23 22:24:33.641770: step: 264/77, loss: 0.007570900954306126 2023-01-23 22:24:34.959529: step: 268/77, loss: 0.1567116677761078 2023-01-23 22:24:36.285540: step: 272/77, loss: 0.044765837490558624 2023-01-23 22:24:37.562111: step: 276/77, loss: 0.04036088287830353 2023-01-23 22:24:38.881434: step: 280/77, loss: 0.0008329019183292985 2023-01-23 22:24:40.206821: step: 284/77, loss: 0.12199905514717102 2023-01-23 22:24:41.496307: step: 288/77, loss: 0.045698583126068115 2023-01-23 22:24:42.825761: step: 292/77, loss: 0.02223210595548153 2023-01-23 22:24:44.114140: step: 296/77, loss: 0.07185053825378418 2023-01-23 22:24:45.427260: step: 300/77, loss: 0.025253664702177048 2023-01-23 22:24:46.688892: step: 304/77, loss: 0.035008110105991364 2023-01-23 22:24:47.969369: step: 308/77, loss: 0.07302802056074142 2023-01-23 22:24:49.239009: step: 312/77, loss: 0.07924443483352661 2023-01-23 22:24:50.483305: step: 316/77, loss: 0.019673509523272514 2023-01-23 22:24:51.754498: step: 320/77, loss: 0.07078725099563599 2023-01-23 22:24:53.003934: step: 324/77, loss: 0.014910933561623096 2023-01-23 22:24:54.285753: step: 328/77, loss: 0.02365684136748314 2023-01-23 22:24:55.539311: step: 332/77, loss: 0.032946422696113586 2023-01-23 22:24:56.863438: step: 336/77, loss: 0.023019496351480484 2023-01-23 22:24:58.167002: step: 340/77, loss: 0.05330086126923561 2023-01-23 22:24:59.479076: step: 344/77, loss: 0.08135435730218887 2023-01-23 22:25:00.827004: step: 348/77, loss: 0.08608993887901306 2023-01-23 22:25:02.136542: step: 352/77, loss: 0.028688719496130943 2023-01-23 22:25:03.437896: step: 356/77, loss: 0.006268288008868694 2023-01-23 22:25:04.749781: step: 360/77, loss: 0.03752407804131508 2023-01-23 22:25:05.983243: step: 364/77, loss: 0.012255517765879631 2023-01-23 22:25:07.279680: step: 368/77, loss: 0.18901900947093964 2023-01-23 22:25:08.575573: step: 372/77, loss: 0.017559271305799484 2023-01-23 22:25:09.834392: step: 376/77, loss: 0.017930801957845688 2023-01-23 22:25:11.104481: step: 380/77, loss: 0.0486404225230217 2023-01-23 22:25:12.414444: step: 384/77, loss: 0.021391339600086212 2023-01-23 22:25:13.720492: step: 388/77, loss: 0.010606233961880207 ================================================== Loss: 0.039 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:27:06.220843: step: 4/77, loss: 0.014090805314481258 2023-01-23 22:27:07.485955: step: 8/77, loss: 0.037653181701898575 2023-01-23 22:27:08.793103: step: 12/77, loss: 0.012905780225992203 2023-01-23 22:27:10.057525: step: 16/77, loss: 0.12145353853702545 2023-01-23 22:27:11.374515: step: 20/77, loss: 0.004029234871268272 2023-01-23 22:27:12.688114: step: 24/77, loss: 0.0458088181912899 2023-01-23 22:27:13.955769: step: 28/77, loss: 0.013678375631570816 2023-01-23 22:27:15.248694: step: 32/77, loss: 0.016728755086660385 2023-01-23 22:27:16.510314: step: 36/77, loss: 0.00028109041159041226 2023-01-23 22:27:17.811085: step: 40/77, loss: 0.010670517571270466 2023-01-23 22:27:19.066684: step: 44/77, loss: 0.05759980529546738 2023-01-23 22:27:20.316267: step: 48/77, loss: 0.030156835913658142 2023-01-23 22:27:21.545995: step: 52/77, loss: 0.013267605565488338 2023-01-23 22:27:22.821214: step: 56/77, loss: 0.056651029735803604 2023-01-23 22:27:24.126203: step: 60/77, loss: 0.07792923599481583 2023-01-23 22:27:25.407164: step: 64/77, loss: 0.048105549067258835 2023-01-23 22:27:26.689838: step: 68/77, loss: 0.0016152573516592383 2023-01-23 22:27:27.998977: step: 72/77, loss: 0.01203523576259613 2023-01-23 22:27:29.247368: step: 76/77, loss: 0.00038057187339290977 2023-01-23 22:27:30.524249: step: 80/77, loss: 0.00727054663002491 2023-01-23 22:27:31.782364: step: 84/77, loss: 0.008634903468191624 2023-01-23 22:27:33.021140: step: 88/77, loss: 0.05545460432767868 2023-01-23 22:27:34.271818: step: 92/77, loss: 0.08746394515037537 2023-01-23 22:27:35.591075: step: 96/77, loss: 0.0012042783200740814 2023-01-23 22:27:36.866149: step: 100/77, loss: 0.0012032882077619433 2023-01-23 22:27:38.157201: step: 104/77, loss: 0.03924532234668732 2023-01-23 22:27:39.431807: step: 108/77, loss: 0.07420468330383301 2023-01-23 22:27:40.692602: step: 112/77, loss: 0.031061705201864243 2023-01-23 22:27:41.933763: step: 116/77, loss: 0.010193225927650928 2023-01-23 22:27:43.216962: step: 120/77, loss: 0.02527455799281597 2023-01-23 22:27:44.441026: step: 124/77, loss: 0.10996196419000626 2023-01-23 22:27:45.680452: step: 128/77, loss: 0.022913135588169098 2023-01-23 22:27:46.968596: step: 132/77, loss: 0.04355445131659508 2023-01-23 22:27:48.305796: step: 136/77, loss: 0.08999864012002945 2023-01-23 22:27:49.574826: step: 140/77, loss: 0.1155029907822609 2023-01-23 22:27:50.885771: step: 144/77, loss: 0.015417231246829033 2023-01-23 22:27:52.162755: step: 148/77, loss: 0.0026104964781552553 2023-01-23 22:27:53.431567: step: 152/77, loss: 0.015371415764093399 2023-01-23 22:27:54.719720: step: 156/77, loss: 0.028408939018845558 2023-01-23 22:27:56.034549: step: 160/77, loss: 0.013671678490936756 2023-01-23 22:27:57.270222: step: 164/77, loss: 0.0016819187439978123 2023-01-23 22:27:58.569031: step: 168/77, loss: 0.016836147755384445 2023-01-23 22:27:59.861768: step: 172/77, loss: 0.028055887669324875 2023-01-23 22:28:01.140170: step: 176/77, loss: 0.02009519934654236 2023-01-23 22:28:02.422837: step: 180/77, loss: 0.02400626242160797 2023-01-23 22:28:03.709445: step: 184/77, loss: 0.020713580772280693 2023-01-23 22:28:05.000031: step: 188/77, loss: 0.021156713366508484 2023-01-23 22:28:06.296758: step: 192/77, loss: 0.004837479908019304 2023-01-23 22:28:07.592789: step: 196/77, loss: 0.017320267856121063 2023-01-23 22:28:08.881936: step: 200/77, loss: 0.020907748490571976 2023-01-23 22:28:10.196217: step: 204/77, loss: 0.04583645239472389 2023-01-23 22:28:11.478492: step: 208/77, loss: 0.014641058631241322 2023-01-23 22:28:12.744334: step: 212/77, loss: 0.022127849981188774 2023-01-23 22:28:14.052970: step: 216/77, loss: 0.036513473838567734 2023-01-23 22:28:15.392402: step: 220/77, loss: 0.03856131061911583 2023-01-23 22:28:16.689748: step: 224/77, loss: 0.005931971129029989 2023-01-23 22:28:17.969596: step: 228/77, loss: 0.020555173978209496 2023-01-23 22:28:19.215919: step: 232/77, loss: 0.1392797976732254 2023-01-23 22:28:20.514167: step: 236/77, loss: 0.0072579397819936275 2023-01-23 22:28:21.759243: step: 240/77, loss: 0.016458049416542053 2023-01-23 22:28:23.067400: step: 244/77, loss: 0.017515050247311592 2023-01-23 22:28:24.324717: step: 248/77, loss: 0.015963345766067505 2023-01-23 22:28:25.612111: step: 252/77, loss: 0.04388893023133278 2023-01-23 22:28:26.866144: step: 256/77, loss: 0.011263003572821617 2023-01-23 22:28:28.109574: step: 260/77, loss: 0.051796067506074905 2023-01-23 22:28:29.426656: step: 264/77, loss: 0.010814322158694267 2023-01-23 22:28:30.628026: step: 268/77, loss: 0.014244206249713898 2023-01-23 22:28:31.848516: step: 272/77, loss: 0.01440565288066864 2023-01-23 22:28:33.109657: step: 276/77, loss: 0.017506180331110954 2023-01-23 22:28:34.409953: step: 280/77, loss: 0.02340465411543846 2023-01-23 22:28:35.698838: step: 284/77, loss: 0.054062407463788986 2023-01-23 22:28:36.955923: step: 288/77, loss: 0.01714850217103958 2023-01-23 22:28:38.244534: step: 292/77, loss: 0.02229364961385727 2023-01-23 22:28:39.557642: step: 296/77, loss: 0.014405068010091782 2023-01-23 22:28:40.850301: step: 300/77, loss: 0.06514809280633926 2023-01-23 22:28:42.149789: step: 304/77, loss: 0.029408197849988937 2023-01-23 22:28:43.462809: step: 308/77, loss: 0.010319838300347328 2023-01-23 22:28:44.704784: step: 312/77, loss: 0.010362434200942516 2023-01-23 22:28:45.985631: step: 316/77, loss: 0.002917802194133401 2023-01-23 22:28:47.316889: step: 320/77, loss: 0.018622087314724922 2023-01-23 22:28:48.614071: step: 324/77, loss: 0.024601642042398453 2023-01-23 22:28:49.872785: step: 328/77, loss: 0.0031394544057548046 2023-01-23 22:28:51.140270: step: 332/77, loss: 0.00596038531512022 2023-01-23 22:28:52.424170: step: 336/77, loss: 0.02685163915157318 2023-01-23 22:28:53.737818: step: 340/77, loss: 0.019619952887296677 2023-01-23 22:28:55.067157: step: 344/77, loss: 0.003369607264176011 2023-01-23 22:28:56.362212: step: 348/77, loss: 0.03238417208194733 2023-01-23 22:28:57.686191: step: 352/77, loss: 0.03758778050541878 2023-01-23 22:28:58.919732: step: 356/77, loss: 0.006597156636416912 2023-01-23 22:29:00.210501: step: 360/77, loss: 0.007597966585308313 2023-01-23 22:29:01.522395: step: 364/77, loss: 0.024710290133953094 2023-01-23 22:29:02.831307: step: 368/77, loss: 0.0800224095582962 2023-01-23 22:29:04.120542: step: 372/77, loss: 0.11172007769346237 2023-01-23 22:29:05.400866: step: 376/77, loss: 0.029980309307575226 2023-01-23 22:29:06.714265: step: 380/77, loss: 0.03759705647826195 2023-01-23 22:29:08.055207: step: 384/77, loss: 0.012880472466349602 2023-01-23 22:29:09.363376: step: 388/77, loss: 0.006357981823384762 ================================================== Loss: 0.029 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Chinese: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.46875, 'r': 0.013636363636363636, 'f1': 0.026501766784452298}, 'combined': 0.019777437898844997, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Korean: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.46875, 'r': 0.013636363636363636, 'f1': 0.026501766784452298}, 'combined': 0.019777437898844997, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Russian: {'template': {'p': 0.974025974025974, 'r': 0.6097560975609756, 'f1': 0.7500000000000001}, 'slot': {'p': 0.5, 'r': 0.013636363636363636, 'f1': 0.026548672566371678}, 'combined': 0.01991150442477876, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:30:45.069642: step: 4/77, loss: 0.00365395937114954 2023-01-23 22:30:46.327855: step: 8/77, loss: 0.006277657113969326 2023-01-23 22:30:47.608374: step: 12/77, loss: 0.020647669211030006 2023-01-23 22:30:48.892043: step: 16/77, loss: 0.019607748836278915 2023-01-23 22:30:50.124837: step: 20/77, loss: 0.022213328629732132 2023-01-23 22:30:51.398811: step: 24/77, loss: 0.011937039904296398 2023-01-23 22:30:52.657266: step: 28/77, loss: 0.02456793561577797 2023-01-23 22:30:53.939480: step: 32/77, loss: 0.04214583709836006 2023-01-23 22:30:55.184157: step: 36/77, loss: 0.05198121815919876 2023-01-23 22:30:56.446903: step: 40/77, loss: 0.012112004682421684 2023-01-23 22:30:57.719511: step: 44/77, loss: 0.0383678674697876 2023-01-23 22:30:58.998918: step: 48/77, loss: 0.020618222653865814 2023-01-23 22:31:00.261669: step: 52/77, loss: 0.00845943484455347 2023-01-23 22:31:01.507634: step: 56/77, loss: 0.001100810943171382 2023-01-23 22:31:02.760894: step: 60/77, loss: 0.030928371474146843 2023-01-23 22:31:04.038867: step: 64/77, loss: 0.01059103012084961 2023-01-23 22:31:05.366706: step: 68/77, loss: 0.015934430062770844 2023-01-23 22:31:06.606784: step: 72/77, loss: 0.002905278466641903 2023-01-23 22:31:07.889059: step: 76/77, loss: 0.0014717906014993787 2023-01-23 22:31:09.185641: step: 80/77, loss: 0.014231668785214424 2023-01-23 22:31:10.483048: step: 84/77, loss: 0.01598450541496277 2023-01-23 22:31:11.790237: step: 88/77, loss: 6.96962742949836e-05 2023-01-23 22:31:13.127739: step: 92/77, loss: 0.0290484931319952 2023-01-23 22:31:14.371414: step: 96/77, loss: 0.027346597984433174 2023-01-23 22:31:15.674535: step: 100/77, loss: 0.01010982133448124 2023-01-23 22:31:16.916404: step: 104/77, loss: 0.028047332540154457 2023-01-23 22:31:18.233994: step: 108/77, loss: 0.003457102458924055 2023-01-23 22:31:19.568364: step: 112/77, loss: 0.03708508610725403 2023-01-23 22:31:20.849324: step: 116/77, loss: 0.016202857717871666 2023-01-23 22:31:22.182360: step: 120/77, loss: 0.04042280465364456 2023-01-23 22:31:23.454746: step: 124/77, loss: 0.040032271295785904 2023-01-23 22:31:24.789549: step: 128/77, loss: 0.05625339224934578 2023-01-23 22:31:26.101841: step: 132/77, loss: 0.034802548587322235 2023-01-23 22:31:27.363935: step: 136/77, loss: 0.0036582592874765396 2023-01-23 22:31:28.652119: step: 140/77, loss: 0.00691894581541419 2023-01-23 22:31:29.881781: step: 144/77, loss: 0.013843704015016556 2023-01-23 22:31:31.166456: step: 148/77, loss: 0.005941564217209816 2023-01-23 22:31:32.462149: step: 152/77, loss: 0.06688333302736282 2023-01-23 22:31:33.738329: step: 156/77, loss: 0.01571185328066349 2023-01-23 22:31:35.004318: step: 160/77, loss: 0.0064134192653000355 2023-01-23 22:31:36.310110: step: 164/77, loss: 0.0006494708359241486 2023-01-23 22:31:37.581694: step: 168/77, loss: 0.00016186948050744832 2023-01-23 22:31:38.886963: step: 172/77, loss: 0.03260376676917076 2023-01-23 22:31:40.172071: step: 176/77, loss: 0.016576889902353287 2023-01-23 22:31:41.497235: step: 180/77, loss: 0.08860062062740326 2023-01-23 22:31:42.744601: step: 184/77, loss: 0.004775453824549913 2023-01-23 22:31:44.014380: step: 188/77, loss: 0.10617873817682266 2023-01-23 22:31:45.289519: step: 192/77, loss: 0.014482242986559868 2023-01-23 22:31:46.576301: step: 196/77, loss: 0.037338465452194214 2023-01-23 22:31:47.857814: step: 200/77, loss: 0.011880840174853802 2023-01-23 22:31:49.141127: step: 204/77, loss: 0.02243220992386341 2023-01-23 22:31:50.419985: step: 208/77, loss: 0.006865553557872772 2023-01-23 22:31:51.742659: step: 212/77, loss: 0.023941833525896072 2023-01-23 22:31:53.044929: step: 216/77, loss: 0.048299212008714676 2023-01-23 22:31:54.349593: step: 220/77, loss: 0.04769997298717499 2023-01-23 22:31:55.601755: step: 224/77, loss: 0.0007127886055968702 2023-01-23 22:31:56.888629: step: 228/77, loss: 0.0004685519670601934 2023-01-23 22:31:58.203950: step: 232/77, loss: 0.020813144743442535 2023-01-23 22:31:59.483030: step: 236/77, loss: 0.005600340198725462 2023-01-23 22:32:00.799843: step: 240/77, loss: 0.10227024555206299 2023-01-23 22:32:02.144110: step: 244/77, loss: 0.015351877547800541 2023-01-23 22:32:03.467370: step: 248/77, loss: 0.0005788762355223298 2023-01-23 22:32:04.734908: step: 252/77, loss: 0.003927960526198149 2023-01-23 22:32:06.040542: step: 256/77, loss: 0.02673269994556904 2023-01-23 22:32:07.301367: step: 260/77, loss: 0.039880044758319855 2023-01-23 22:32:08.594338: step: 264/77, loss: 0.03469827026128769 2023-01-23 22:32:09.843243: step: 268/77, loss: 0.011209025979042053 2023-01-23 22:32:11.109926: step: 272/77, loss: 0.013481708243489265 2023-01-23 22:32:12.430904: step: 276/77, loss: 0.02489202469587326 2023-01-23 22:32:13.780111: step: 280/77, loss: 0.018024705350399017 2023-01-23 22:32:15.084560: step: 284/77, loss: 0.006658963393419981 2023-01-23 22:32:16.398646: step: 288/77, loss: 0.032713882625103 2023-01-23 22:32:17.682607: step: 292/77, loss: 0.013792910613119602 2023-01-23 22:32:18.987070: step: 296/77, loss: 0.02276519313454628 2023-01-23 22:32:20.256379: step: 300/77, loss: 0.013171669095754623 2023-01-23 22:32:21.524495: step: 304/77, loss: 0.0017072930932044983 2023-01-23 22:32:22.779994: step: 308/77, loss: 0.00681102741509676 2023-01-23 22:32:24.022211: step: 312/77, loss: 0.019400490447878838 2023-01-23 22:32:25.342424: step: 316/77, loss: 0.029169730842113495 2023-01-23 22:32:26.680836: step: 320/77, loss: 0.0014584583695977926 2023-01-23 22:32:27.991319: step: 324/77, loss: 0.04748348891735077 2023-01-23 22:32:29.321292: step: 328/77, loss: 0.013542639091610909 2023-01-23 22:32:30.564164: step: 332/77, loss: 0.03388283774256706 2023-01-23 22:32:31.902798: step: 336/77, loss: 0.00780377397313714 2023-01-23 22:32:33.208682: step: 340/77, loss: 0.039747536182403564 2023-01-23 22:32:34.524916: step: 344/77, loss: 0.014999479055404663 2023-01-23 22:32:35.806840: step: 348/77, loss: 0.023499522358179092 2023-01-23 22:32:37.108951: step: 352/77, loss: 0.2161271721124649 2023-01-23 22:32:38.437014: step: 356/77, loss: 0.018633641302585602 2023-01-23 22:32:39.717914: step: 360/77, loss: 0.017180128023028374 2023-01-23 22:32:41.035818: step: 364/77, loss: 0.14562822878360748 2023-01-23 22:32:42.339436: step: 368/77, loss: 0.04934248328208923 2023-01-23 22:32:43.618773: step: 372/77, loss: 0.004744932055473328 2023-01-23 22:32:44.898870: step: 376/77, loss: 0.03900888189673424 2023-01-23 22:32:46.210245: step: 380/77, loss: 0.014300035312771797 2023-01-23 22:32:47.505341: step: 384/77, loss: 0.02475110813975334 2023-01-23 22:32:48.818737: step: 388/77, loss: 0.0035867562983185053 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9390243902439024, 'r': 0.6260162601626016, 'f1': 0.751219512195122}, 'slot': {'p': 0.4594594594594595, 'r': 0.015454545454545455, 'f1': 0.029903254177660512}, 'combined': 0.022463908016388874, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9390243902439024, 'r': 0.6260162601626016, 'f1': 0.751219512195122}, 'slot': {'p': 0.4594594594594595, 'r': 0.015454545454545455, 'f1': 0.029903254177660512}, 'combined': 0.022463908016388874, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.9390243902439024, 'r': 0.6260162601626016, 'f1': 0.751219512195122}, 'slot': {'p': 0.4594594594594595, 'r': 0.015454545454545455, 'f1': 0.029903254177660512}, 'combined': 0.022463908016388874, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:34:24.726318: step: 4/77, loss: 0.010056269355118275 2023-01-23 22:34:25.963620: step: 8/77, loss: 0.018890120089054108 2023-01-23 22:34:27.258851: step: 12/77, loss: 0.017161967232823372 2023-01-23 22:34:28.498161: step: 16/77, loss: 0.02055450901389122 2023-01-23 22:34:29.699518: step: 20/77, loss: 0.024720754474401474 2023-01-23 22:34:30.943089: step: 24/77, loss: 0.010491413995623589 2023-01-23 22:34:32.213006: step: 28/77, loss: 0.0037658684886991978 2023-01-23 22:34:33.544455: step: 32/77, loss: 0.038764867931604385 2023-01-23 22:34:34.840258: step: 36/77, loss: 0.03026004135608673 2023-01-23 22:34:36.072299: step: 40/77, loss: 0.02923579327762127 2023-01-23 22:34:37.394391: step: 44/77, loss: 0.04140114411711693 2023-01-23 22:34:38.631329: step: 48/77, loss: 0.033918458968400955 2023-01-23 22:34:39.905088: step: 52/77, loss: 0.00033601350151002407 2023-01-23 22:34:41.186530: step: 56/77, loss: 0.0021884054876863956 2023-01-23 22:34:42.468153: step: 60/77, loss: 0.005278467666357756 2023-01-23 22:34:43.734475: step: 64/77, loss: 0.07367675006389618 2023-01-23 22:34:44.991667: step: 68/77, loss: 0.05771408602595329 2023-01-23 22:34:46.270274: step: 72/77, loss: 0.08574645966291428 2023-01-23 22:34:47.553902: step: 76/77, loss: 0.013951526023447514 2023-01-23 22:34:48.798535: step: 80/77, loss: 0.016018124297261238 2023-01-23 22:34:50.066621: step: 84/77, loss: 0.00187723059207201 2023-01-23 22:34:51.351489: step: 88/77, loss: 0.002696490380913019 2023-01-23 22:34:52.663034: step: 92/77, loss: 0.013174856081604958 2023-01-23 22:34:53.986303: step: 96/77, loss: 0.005178231745958328 2023-01-23 22:34:55.298478: step: 100/77, loss: 0.019260739907622337 2023-01-23 22:34:56.621014: step: 104/77, loss: 0.05311047285795212 2023-01-23 22:34:57.871289: step: 108/77, loss: 0.01540825143456459 2023-01-23 22:34:59.145198: step: 112/77, loss: 0.006137767806649208 2023-01-23 22:35:00.395014: step: 116/77, loss: 0.01354296412318945 2023-01-23 22:35:01.742905: step: 120/77, loss: 0.016296926885843277 2023-01-23 22:35:03.037808: step: 124/77, loss: 0.0025672190822660923 2023-01-23 22:35:04.314444: step: 128/77, loss: 0.0595506876707077 2023-01-23 22:35:05.595179: step: 132/77, loss: 0.01555662415921688 2023-01-23 22:35:06.860642: step: 136/77, loss: 0.0031445412896573544 2023-01-23 22:35:08.147677: step: 140/77, loss: 0.0343557633459568 2023-01-23 22:35:09.402344: step: 144/77, loss: 0.00534120062366128 2023-01-23 22:35:10.665164: step: 148/77, loss: 0.015882568433880806 2023-01-23 22:35:11.890804: step: 152/77, loss: 0.0034859557636082172 2023-01-23 22:35:13.174378: step: 156/77, loss: 0.0897187888622284 2023-01-23 22:35:14.417128: step: 160/77, loss: 0.03130680322647095 2023-01-23 22:35:15.711443: step: 164/77, loss: 0.005163657478988171 2023-01-23 22:35:17.028600: step: 168/77, loss: 0.014301144517958164 2023-01-23 22:35:18.336337: step: 172/77, loss: 0.002605058718472719 2023-01-23 22:35:19.604881: step: 176/77, loss: 0.004895820282399654 2023-01-23 22:35:20.863596: step: 180/77, loss: 0.0005318675539456308 2023-01-23 22:35:22.134794: step: 184/77, loss: 0.02352241799235344 2023-01-23 22:35:23.402232: step: 188/77, loss: 0.009760452434420586 2023-01-23 22:35:24.699631: step: 192/77, loss: 0.0005129415076225996 2023-01-23 22:35:25.968249: step: 196/77, loss: 0.04506408050656319 2023-01-23 22:35:27.235440: step: 200/77, loss: 0.009796293452382088 2023-01-23 22:35:28.568910: step: 204/77, loss: 0.00035782958730123937 2023-01-23 22:35:29.876611: step: 208/77, loss: 0.031156811863183975 2023-01-23 22:35:31.203807: step: 212/77, loss: 0.038962021470069885 2023-01-23 22:35:32.468364: step: 216/77, loss: 0.003285457845777273 2023-01-23 22:35:33.790131: step: 220/77, loss: 0.030111942440271378 2023-01-23 22:35:35.084669: step: 224/77, loss: 0.0034210169687867165 2023-01-23 22:35:36.292979: step: 228/77, loss: 0.003368095261976123 2023-01-23 22:35:37.588808: step: 232/77, loss: 0.028834929689764977 2023-01-23 22:35:38.904874: step: 236/77, loss: 0.005496030207723379 2023-01-23 22:35:40.159325: step: 240/77, loss: 0.0867997258901596 2023-01-23 22:35:41.430839: step: 244/77, loss: 0.013414930552244186 2023-01-23 22:35:42.726329: step: 248/77, loss: 0.11154910922050476 2023-01-23 22:35:44.036842: step: 252/77, loss: 0.01679971069097519 2023-01-23 22:35:45.343338: step: 256/77, loss: 0.016359906643629074 2023-01-23 22:35:46.629097: step: 260/77, loss: 0.03159204125404358 2023-01-23 22:35:47.924265: step: 264/77, loss: 0.006481696851551533 2023-01-23 22:35:49.238343: step: 268/77, loss: 0.0011871152091771364 2023-01-23 22:35:50.517909: step: 272/77, loss: 0.02756771445274353 2023-01-23 22:35:51.738421: step: 276/77, loss: 0.006530491169542074 2023-01-23 22:35:53.014321: step: 280/77, loss: 0.04043383151292801 2023-01-23 22:35:54.281109: step: 284/77, loss: 0.03230508789420128 2023-01-23 22:35:55.599701: step: 288/77, loss: 0.014443744905292988 2023-01-23 22:35:56.843245: step: 292/77, loss: 0.009209951385855675 2023-01-23 22:35:58.083120: step: 296/77, loss: 0.019323663786053658 2023-01-23 22:35:59.406885: step: 300/77, loss: 0.09706917405128479 2023-01-23 22:36:00.666152: step: 304/77, loss: 0.017132218927145004 2023-01-23 22:36:01.955078: step: 308/77, loss: 0.008376318961381912 2023-01-23 22:36:03.248453: step: 312/77, loss: 0.018071360886096954 2023-01-23 22:36:04.545265: step: 316/77, loss: 0.021176446229219437 2023-01-23 22:36:05.804245: step: 320/77, loss: 0.008609825745224953 2023-01-23 22:36:07.101833: step: 324/77, loss: 0.015605449676513672 2023-01-23 22:36:08.372928: step: 328/77, loss: 0.027690229937434196 2023-01-23 22:36:09.650602: step: 332/77, loss: 0.10342463850975037 2023-01-23 22:36:10.914466: step: 336/77, loss: 0.07532121986150742 2023-01-23 22:36:12.217745: step: 340/77, loss: 0.0045474437065422535 2023-01-23 22:36:13.500871: step: 344/77, loss: 0.014486493542790413 2023-01-23 22:36:14.770005: step: 348/77, loss: 0.015550438314676285 2023-01-23 22:36:16.066718: step: 352/77, loss: 0.0368066281080246 2023-01-23 22:36:17.324197: step: 356/77, loss: 0.007086616940796375 2023-01-23 22:36:18.598338: step: 360/77, loss: 0.017183110117912292 2023-01-23 22:36:19.906770: step: 364/77, loss: 0.011694397777318954 2023-01-23 22:36:21.154590: step: 368/77, loss: 0.011689573526382446 2023-01-23 22:36:22.402197: step: 372/77, loss: 0.08766619861125946 2023-01-23 22:36:23.693411: step: 376/77, loss: 0.0017116167582571507 2023-01-23 22:36:24.943772: step: 380/77, loss: 0.0179583802819252 2023-01-23 22:36:26.202172: step: 384/77, loss: 0.0024103238247334957 2023-01-23 22:36:27.516710: step: 388/77, loss: 0.004919702652841806 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9746835443037974, 'r': 0.6260162601626016, 'f1': 0.7623762376237624}, 'slot': {'p': 0.4146341463414634, 'r': 0.015454545454545455, 'f1': 0.02979842243645925}, 'combined': 0.02271760918423131, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.9746835443037974, 'r': 0.6260162601626016, 'f1': 0.7623762376237624}, 'slot': {'p': 0.4146341463414634, 'r': 0.015454545454545455, 'f1': 0.02979842243645925}, 'combined': 0.02271760918423131, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9746835443037974, 'r': 0.6260162601626016, 'f1': 0.7623762376237624}, 'slot': {'p': 0.4146341463414634, 'r': 0.015454545454545455, 'f1': 0.02979842243645925}, 'combined': 0.02271760918423131, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:38:03.557251: step: 4/77, loss: 0.009951209649443626 2023-01-23 22:38:04.892717: step: 8/77, loss: 0.01025527622550726 2023-01-23 22:38:06.155505: step: 12/77, loss: 0.010599697008728981 2023-01-23 22:38:07.421032: step: 16/77, loss: 2.8813841709052213e-05 2023-01-23 22:38:08.730170: step: 20/77, loss: 0.015449351631104946 2023-01-23 22:38:09.996727: step: 24/77, loss: 0.013126425445079803 2023-01-23 22:38:11.288214: step: 28/77, loss: 0.024598799645900726 2023-01-23 22:38:12.556417: step: 32/77, loss: 0.015080241486430168 2023-01-23 22:38:13.813909: step: 36/77, loss: 0.04678387939929962 2023-01-23 22:38:15.081684: step: 40/77, loss: 0.012904101982712746 2023-01-23 22:38:16.372027: step: 44/77, loss: 0.015326356515288353 2023-01-23 22:38:17.642512: step: 48/77, loss: 0.021616067737340927 2023-01-23 22:38:18.912342: step: 52/77, loss: 0.016074180603027344 2023-01-23 22:38:20.143427: step: 56/77, loss: 0.013141414150595665 2023-01-23 22:38:21.443653: step: 60/77, loss: 0.05112868547439575 2023-01-23 22:38:22.703365: step: 64/77, loss: 0.0026340484619140625 2023-01-23 22:38:23.956240: step: 68/77, loss: 0.002100118901580572 2023-01-23 22:38:25.234617: step: 72/77, loss: 0.17738491296768188 2023-01-23 22:38:26.512819: step: 76/77, loss: 0.037238575518131256 2023-01-23 22:38:27.768549: step: 80/77, loss: 0.01159091666340828 2023-01-23 22:38:29.053455: step: 84/77, loss: 8.563547453377396e-05 2023-01-23 22:38:30.346865: step: 88/77, loss: 0.0001513077295385301 2023-01-23 22:38:31.630259: step: 92/77, loss: 0.02696680650115013 2023-01-23 22:38:32.890824: step: 96/77, loss: 0.0179781224578619 2023-01-23 22:38:34.161046: step: 100/77, loss: 0.06407640874385834 2023-01-23 22:38:35.482305: step: 104/77, loss: 0.0012260295916348696 2023-01-23 22:38:36.751576: step: 108/77, loss: 0.007572871632874012 2023-01-23 22:38:37.987713: step: 112/77, loss: 0.005561821162700653 2023-01-23 22:38:39.292913: step: 116/77, loss: 0.010646567679941654 2023-01-23 22:38:40.620138: step: 120/77, loss: 0.012919003143906593 2023-01-23 22:38:41.858645: step: 124/77, loss: 0.012604203075170517 2023-01-23 22:38:43.126706: step: 128/77, loss: 0.05541830509901047 2023-01-23 22:38:44.417356: step: 132/77, loss: 0.03553691506385803 2023-01-23 22:38:45.695689: step: 136/77, loss: 0.01568058505654335 2023-01-23 22:38:47.018949: step: 140/77, loss: 0.03287587687373161 2023-01-23 22:38:48.277444: step: 144/77, loss: 0.0027599541936069727 2023-01-23 22:38:49.595676: step: 148/77, loss: 0.01692170463502407 2023-01-23 22:38:50.845629: step: 152/77, loss: 0.08928981423377991 2023-01-23 22:38:52.165177: step: 156/77, loss: 0.04975442215800285 2023-01-23 22:38:53.459861: step: 160/77, loss: 0.02008040063083172 2023-01-23 22:38:54.731235: step: 164/77, loss: 0.0019231241894885898 2023-01-23 22:38:56.037906: step: 168/77, loss: 0.0244793388992548 2023-01-23 22:38:57.297300: step: 172/77, loss: 0.010686839930713177 2023-01-23 22:38:58.560154: step: 176/77, loss: 0.0025486641097813845 2023-01-23 22:38:59.836531: step: 180/77, loss: 0.0068518416956067085 2023-01-23 22:39:01.107780: step: 184/77, loss: 0.014136513695120811 2023-01-23 22:39:02.425132: step: 188/77, loss: 0.03422325477004051 2023-01-23 22:39:03.769558: step: 192/77, loss: 0.0019678741227835417 2023-01-23 22:39:05.088469: step: 196/77, loss: 0.003199803875759244 2023-01-23 22:39:06.338878: step: 200/77, loss: 0.00686542596668005 2023-01-23 22:39:07.636132: step: 204/77, loss: 0.00038556900108233094 2023-01-23 22:39:08.942299: step: 208/77, loss: 0.00295876432210207 2023-01-23 22:39:10.212373: step: 212/77, loss: 8.276679000118747e-05 2023-01-23 22:39:11.495182: step: 216/77, loss: 0.009483201429247856 2023-01-23 22:39:12.763745: step: 220/77, loss: 0.009370522573590279 2023-01-23 22:39:14.030395: step: 224/77, loss: 0.025101710110902786 2023-01-23 22:39:15.357414: step: 228/77, loss: 0.020489763468503952 2023-01-23 22:39:16.638197: step: 232/77, loss: 0.0010126195847988129 2023-01-23 22:39:17.944812: step: 236/77, loss: 0.02698575146496296 2023-01-23 22:39:19.290867: step: 240/77, loss: 0.04893007129430771 2023-01-23 22:39:20.571200: step: 244/77, loss: 0.012933338060975075 2023-01-23 22:39:21.841530: step: 248/77, loss: 0.016066405922174454 2023-01-23 22:39:23.107402: step: 252/77, loss: 0.00607153307646513 2023-01-23 22:39:24.462562: step: 256/77, loss: 0.0004491469881031662 2023-01-23 22:39:25.805071: step: 260/77, loss: 0.14309868216514587 2023-01-23 22:39:27.075536: step: 264/77, loss: 0.01501578837633133 2023-01-23 22:39:28.392340: step: 268/77, loss: 0.006420220248401165 2023-01-23 22:39:29.716016: step: 272/77, loss: 0.028947584331035614 2023-01-23 22:39:30.996601: step: 276/77, loss: 0.03736587241292 2023-01-23 22:39:32.305805: step: 280/77, loss: 0.0055717648938298225 2023-01-23 22:39:33.575650: step: 284/77, loss: 0.017412912100553513 2023-01-23 22:39:34.858055: step: 288/77, loss: 0.01638699881732464 2023-01-23 22:39:36.138818: step: 292/77, loss: 0.003690283978357911 2023-01-23 22:39:37.410229: step: 296/77, loss: 0.0013558377977460623 2023-01-23 22:39:38.710738: step: 300/77, loss: 0.018798088654875755 2023-01-23 22:39:40.008611: step: 304/77, loss: 0.004140047822147608 2023-01-23 22:39:41.268992: step: 308/77, loss: 0.007700146175920963 2023-01-23 22:39:42.525578: step: 312/77, loss: 0.017628833651542664 2023-01-23 22:39:43.786772: step: 316/77, loss: 0.016042888164520264 2023-01-23 22:39:45.070657: step: 320/77, loss: 0.01528505515307188 2023-01-23 22:39:46.388322: step: 324/77, loss: 0.02525682933628559 2023-01-23 22:39:47.718350: step: 328/77, loss: 0.02757682465016842 2023-01-23 22:39:49.004034: step: 332/77, loss: 0.10388769954442978 2023-01-23 22:39:50.335081: step: 336/77, loss: 0.012050546705722809 2023-01-23 22:39:51.658230: step: 340/77, loss: 0.07141268998384476 2023-01-23 22:39:52.953261: step: 344/77, loss: 0.01598818600177765 2023-01-23 22:39:54.260646: step: 348/77, loss: 0.017102017998695374 2023-01-23 22:39:55.519257: step: 352/77, loss: 0.0203529205173254 2023-01-23 22:39:56.741537: step: 356/77, loss: 0.05472177267074585 2023-01-23 22:39:58.026068: step: 360/77, loss: 0.10159629583358765 2023-01-23 22:39:59.318407: step: 364/77, loss: 0.011427942663431168 2023-01-23 22:40:00.577735: step: 368/77, loss: 0.047470469027757645 2023-01-23 22:40:01.921508: step: 372/77, loss: 0.00397944450378418 2023-01-23 22:40:03.241721: step: 376/77, loss: 0.030490266159176826 2023-01-23 22:40:04.548493: step: 380/77, loss: 0.02138015814125538 2023-01-23 22:40:05.847984: step: 384/77, loss: 0.049235884100198746 2023-01-23 22:40:07.142232: step: 388/77, loss: 0.013251261785626411 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9634146341463414, 'r': 0.6422764227642277, 'f1': 0.7707317073170733}, 'slot': {'p': 0.5588235294117647, 'r': 0.017272727272727273, 'f1': 0.03350970017636684}, 'combined': 0.025826988428614448, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 9} Test Korean: {'template': {'p': 0.9753086419753086, 'r': 0.6422764227642277, 'f1': 0.7745098039215687}, 'slot': {'p': 0.5757575757575758, 'r': 0.017272727272727273, 'f1': 0.033539276257722864}, 'combined': 0.025976498278040258, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9753086419753086, 'r': 0.6422764227642277, 'f1': 0.7745098039215687}, 'slot': {'p': 0.5757575757575758, 'r': 0.017272727272727273, 'f1': 0.033539276257722864}, 'combined': 0.025976498278040258, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:41:42.889118: step: 4/77, loss: 0.03768238425254822 2023-01-23 22:41:44.196775: step: 8/77, loss: 0.028346769511699677 2023-01-23 22:41:45.464851: step: 12/77, loss: 0.00744956498965621 2023-01-23 22:41:46.754769: step: 16/77, loss: 0.048924703150987625 2023-01-23 22:41:48.071914: step: 20/77, loss: 0.03151417896151543 2023-01-23 22:41:49.356027: step: 24/77, loss: 0.002606735099107027 2023-01-23 22:41:50.599225: step: 28/77, loss: 0.0024922217708081007 2023-01-23 22:41:51.935832: step: 32/77, loss: 0.048660993576049805 2023-01-23 22:41:53.241182: step: 36/77, loss: 0.004283279180526733 2023-01-23 22:41:54.512297: step: 40/77, loss: 0.04208429902791977 2023-01-23 22:41:55.804980: step: 44/77, loss: 0.01696396991610527 2023-01-23 22:41:57.064470: step: 48/77, loss: 0.010859989561140537 2023-01-23 22:41:58.336540: step: 52/77, loss: 0.006402358412742615 2023-01-23 22:41:59.600074: step: 56/77, loss: 0.0019209606107324362 2023-01-23 22:42:00.920567: step: 60/77, loss: 0.0024111694656312466 2023-01-23 22:42:02.244956: step: 64/77, loss: 0.00038403054350055754 2023-01-23 22:42:03.587650: step: 68/77, loss: 0.0019636116921901703 2023-01-23 22:42:04.866275: step: 72/77, loss: 0.015139022842049599 2023-01-23 22:42:06.183406: step: 76/77, loss: 0.018672920763492584 2023-01-23 22:42:07.502279: step: 80/77, loss: 0.0020270387176424265 2023-01-23 22:42:08.739430: step: 84/77, loss: 0.009439261630177498 2023-01-23 22:42:10.031244: step: 88/77, loss: 0.008923723362386227 2023-01-23 22:42:11.267826: step: 92/77, loss: 0.00011983791046077386 2023-01-23 22:42:12.567493: step: 96/77, loss: 0.006050101015716791 2023-01-23 22:42:13.905819: step: 100/77, loss: 0.0179508775472641 2023-01-23 22:42:15.174543: step: 104/77, loss: 0.017849788069725037 2023-01-23 22:42:16.420091: step: 108/77, loss: 0.017486661672592163 2023-01-23 22:42:17.712825: step: 112/77, loss: 0.007766797207295895 2023-01-23 22:42:18.987365: step: 116/77, loss: 0.019981278106570244 2023-01-23 22:42:20.284104: step: 120/77, loss: 0.002379945944994688 2023-01-23 22:42:21.512410: step: 124/77, loss: 0.011582271195948124 2023-01-23 22:42:22.796007: step: 128/77, loss: 0.040890343487262726 2023-01-23 22:42:24.051891: step: 132/77, loss: 0.019269846379756927 2023-01-23 22:42:25.359219: step: 136/77, loss: 0.03777790069580078 2023-01-23 22:42:26.636103: step: 140/77, loss: 0.00588800897821784 2023-01-23 22:42:27.908516: step: 144/77, loss: 0.04820588603615761 2023-01-23 22:42:29.187426: step: 148/77, loss: 0.009185411036014557 2023-01-23 22:42:30.472938: step: 152/77, loss: 0.002547596348449588 2023-01-23 22:42:31.750903: step: 156/77, loss: 0.030245959758758545 2023-01-23 22:42:33.079532: step: 160/77, loss: 0.0031875655986368656 2023-01-23 22:42:34.325893: step: 164/77, loss: 0.053960613906383514 2023-01-23 22:42:35.625444: step: 168/77, loss: 0.009313058108091354 2023-01-23 22:42:36.903308: step: 172/77, loss: 0.03616435080766678 2023-01-23 22:42:38.226662: step: 176/77, loss: 0.006441659759730101 2023-01-23 22:42:39.583997: step: 180/77, loss: 0.012755388393998146 2023-01-23 22:42:40.850605: step: 184/77, loss: 0.01913641393184662 2023-01-23 22:42:42.183182: step: 188/77, loss: 0.011171862483024597 2023-01-23 22:42:43.452760: step: 192/77, loss: 0.05577773600816727 2023-01-23 22:42:44.737120: step: 196/77, loss: 0.014128005132079124 2023-01-23 22:42:45.997073: step: 200/77, loss: 0.0018707435810938478 2023-01-23 22:42:47.257969: step: 204/77, loss: 0.0009870977373793721 2023-01-23 22:42:48.495280: step: 208/77, loss: 0.015966808423399925 2023-01-23 22:42:49.805574: step: 212/77, loss: 0.003523885505273938 2023-01-23 22:42:51.083576: step: 216/77, loss: 0.0007100008078850806 2023-01-23 22:42:52.431324: step: 220/77, loss: 0.009846840053796768 2023-01-23 22:42:53.693623: step: 224/77, loss: 0.003518306650221348 2023-01-23 22:42:54.992693: step: 228/77, loss: 0.013546239584684372 2023-01-23 22:42:56.238855: step: 232/77, loss: 0.008653189055621624 2023-01-23 22:42:57.561307: step: 236/77, loss: 0.0033769761212170124 2023-01-23 22:42:58.855648: step: 240/77, loss: 0.009792322292923927 2023-01-23 22:43:00.128788: step: 244/77, loss: 0.07129838317632675 2023-01-23 22:43:01.426914: step: 248/77, loss: 0.5841067433357239 2023-01-23 22:43:02.693116: step: 252/77, loss: 0.0452253594994545 2023-01-23 22:43:03.980621: step: 256/77, loss: 0.014134548604488373 2023-01-23 22:43:05.294849: step: 260/77, loss: 0.007768142968416214 2023-01-23 22:43:06.546748: step: 264/77, loss: 0.006193614564836025 2023-01-23 22:43:07.886585: step: 268/77, loss: 0.035763513296842575 2023-01-23 22:43:09.159236: step: 272/77, loss: 0.0006083827465772629 2023-01-23 22:43:10.460966: step: 276/77, loss: 0.07475440204143524 2023-01-23 22:43:11.748239: step: 280/77, loss: 0.0016128707211464643 2023-01-23 22:43:13.055369: step: 284/77, loss: 0.0022124836686998606 2023-01-23 22:43:14.327376: step: 288/77, loss: 0.03743232414126396 2023-01-23 22:43:15.656249: step: 292/77, loss: 0.008901816792786121 2023-01-23 22:43:16.902080: step: 296/77, loss: 0.0004599709063768387 2023-01-23 22:43:18.195844: step: 300/77, loss: 0.019326256588101387 2023-01-23 22:43:19.498873: step: 304/77, loss: 0.05005129799246788 2023-01-23 22:43:20.781329: step: 308/77, loss: 0.021847281605005264 2023-01-23 22:43:22.035575: step: 312/77, loss: 0.01302673015743494 2023-01-23 22:43:23.275471: step: 316/77, loss: 0.03336362540721893 2023-01-23 22:43:24.572861: step: 320/77, loss: 0.035148534923791885 2023-01-23 22:43:25.853998: step: 324/77, loss: 0.02668309211730957 2023-01-23 22:43:27.110356: step: 328/77, loss: 0.010160962119698524 2023-01-23 22:43:28.364909: step: 332/77, loss: 0.018455471843481064 2023-01-23 22:43:29.668411: step: 336/77, loss: 0.0010307406773790717 2023-01-23 22:43:30.952101: step: 340/77, loss: 0.0395384207367897 2023-01-23 22:43:32.196258: step: 344/77, loss: 0.031024860218167305 2023-01-23 22:43:33.539720: step: 348/77, loss: 0.007934034802019596 2023-01-23 22:43:34.849232: step: 352/77, loss: 0.020924028009176254 2023-01-23 22:43:36.123716: step: 356/77, loss: 0.007340208627283573 2023-01-23 22:43:37.369506: step: 360/77, loss: 0.026930680498480797 2023-01-23 22:43:38.625023: step: 364/77, loss: 0.017858322709798813 2023-01-23 22:43:39.958054: step: 368/77, loss: 0.018936611711978912 2023-01-23 22:43:41.229715: step: 372/77, loss: 0.0034472201950848103 2023-01-23 22:43:42.564042: step: 376/77, loss: 0.03166472539305687 2023-01-23 22:43:43.855516: step: 380/77, loss: 0.0009081726893782616 2023-01-23 22:43:45.206759: step: 384/77, loss: 0.02587328478693962 2023-01-23 22:43:46.480435: step: 388/77, loss: 0.0031165271066129208 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9594594594594594, 'r': 0.5772357723577236, 'f1': 0.7208121827411168}, 'slot': {'p': 0.5483870967741935, 'r': 0.015454545454545455, 'f1': 0.030061892130857647}, 'combined': 0.021668978084171505, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5161290322580645, 'r': 0.014545454545454545, 'f1': 0.028293545534924847}, 'combined': 0.020209675382089173, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9594594594594594, 'r': 0.5772357723577236, 'f1': 0.7208121827411168}, 'slot': {'p': 0.5151515151515151, 'r': 0.015454545454545455, 'f1': 0.030008826125330977}, 'combined': 0.02163072746089847, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:45:21.791369: step: 4/77, loss: 0.00913262739777565 2023-01-23 22:45:23.033394: step: 8/77, loss: 0.023702571168541908 2023-01-23 22:45:24.324274: step: 12/77, loss: 7.023927173577249e-05 2023-01-23 22:45:25.665845: step: 16/77, loss: 0.015598413534462452 2023-01-23 22:45:26.972296: step: 20/77, loss: 0.017305485904216766 2023-01-23 22:45:28.287224: step: 24/77, loss: 0.05371349677443504 2023-01-23 22:45:29.585010: step: 28/77, loss: 0.011336155235767365 2023-01-23 22:45:30.902009: step: 32/77, loss: 0.0017175760585814714 2023-01-23 22:45:32.158332: step: 36/77, loss: 0.00044242324656806886 2023-01-23 22:45:33.486416: step: 40/77, loss: 0.00024273883900605142 2023-01-23 22:45:34.763967: step: 44/77, loss: 0.03868889808654785 2023-01-23 22:45:36.051221: step: 48/77, loss: 0.004320107400417328 2023-01-23 22:45:37.336572: step: 52/77, loss: 0.00013211331679485738 2023-01-23 22:45:38.608706: step: 56/77, loss: 0.014553715474903584 2023-01-23 22:45:39.900127: step: 60/77, loss: 0.0039396206848323345 2023-01-23 22:45:41.198657: step: 64/77, loss: 0.008579259738326073 2023-01-23 22:45:42.493904: step: 68/77, loss: 0.02406681329011917 2023-01-23 22:45:43.832638: step: 72/77, loss: 0.026409517973661423 2023-01-23 22:45:45.105137: step: 76/77, loss: 0.02277933806180954 2023-01-23 22:45:46.407768: step: 80/77, loss: 0.014436050318181515 2023-01-23 22:45:47.719567: step: 84/77, loss: 0.012562789022922516 2023-01-23 22:45:49.025731: step: 88/77, loss: 0.00025714567163959146 2023-01-23 22:45:50.271874: step: 92/77, loss: 0.0011654815170913935 2023-01-23 22:45:51.559469: step: 96/77, loss: 7.128174183890224e-05 2023-01-23 22:45:52.814706: step: 100/77, loss: 0.0016908040270209312 2023-01-23 22:45:54.106480: step: 104/77, loss: 0.05296008661389351 2023-01-23 22:45:55.361736: step: 108/77, loss: 0.0006149305845610797 2023-01-23 22:45:56.589831: step: 112/77, loss: 0.07239086925983429 2023-01-23 22:45:57.874917: step: 116/77, loss: 0.005651980172842741 2023-01-23 22:45:59.092382: step: 120/77, loss: 0.025895126163959503 2023-01-23 22:46:00.379158: step: 124/77, loss: 0.03647073358297348 2023-01-23 22:46:01.645020: step: 128/77, loss: 3.863106030621566e-05 2023-01-23 22:46:02.950572: step: 132/77, loss: 0.009307922795414925 2023-01-23 22:46:04.211322: step: 136/77, loss: 0.0012862995499745011 2023-01-23 22:46:05.509897: step: 140/77, loss: 0.011451378464698792 2023-01-23 22:46:06.756867: step: 144/77, loss: 0.0032769562676548958 2023-01-23 22:46:08.059176: step: 148/77, loss: 0.005432584322988987 2023-01-23 22:46:09.304141: step: 152/77, loss: 0.024685127660632133 2023-01-23 22:46:10.597041: step: 156/77, loss: 0.013838456943631172 2023-01-23 22:46:11.860468: step: 160/77, loss: 0.0025978866033256054 2023-01-23 22:46:13.129604: step: 164/77, loss: 0.008826863020658493 2023-01-23 22:46:14.437336: step: 168/77, loss: 0.005616775713860989 2023-01-23 22:46:15.744871: step: 172/77, loss: 0.008297703228890896 2023-01-23 22:46:17.036793: step: 176/77, loss: 0.009488863870501518 2023-01-23 22:46:18.311243: step: 180/77, loss: 0.0032529172021895647 2023-01-23 22:46:19.589668: step: 184/77, loss: 0.0705006793141365 2023-01-23 22:46:20.868182: step: 188/77, loss: 0.008520006202161312 2023-01-23 22:46:22.170529: step: 192/77, loss: 0.0003739010135177523 2023-01-23 22:46:23.435305: step: 196/77, loss: 0.009315535426139832 2023-01-23 22:46:24.689797: step: 200/77, loss: 0.004182401578873396 2023-01-23 22:46:25.960577: step: 204/77, loss: 0.0474054291844368 2023-01-23 22:46:27.226934: step: 208/77, loss: 0.05022357776761055 2023-01-23 22:46:28.473145: step: 212/77, loss: 0.011805780231952667 2023-01-23 22:46:29.725392: step: 216/77, loss: 0.062389228492975235 2023-01-23 22:46:31.037636: step: 220/77, loss: 0.06036899983882904 2023-01-23 22:46:32.317544: step: 224/77, loss: 0.0013627760345116258 2023-01-23 22:46:33.618946: step: 228/77, loss: 0.04754606634378433 2023-01-23 22:46:34.901674: step: 232/77, loss: 0.004517616704106331 2023-01-23 22:46:36.176483: step: 236/77, loss: 6.662487430730835e-05 2023-01-23 22:46:37.515388: step: 240/77, loss: 0.01598535105586052 2023-01-23 22:46:38.857157: step: 244/77, loss: 0.024257568642497063 2023-01-23 22:46:40.146914: step: 248/77, loss: 0.024137431755661964 2023-01-23 22:46:41.383605: step: 252/77, loss: 0.024435149505734444 2023-01-23 22:46:42.672861: step: 256/77, loss: 0.007032178808003664 2023-01-23 22:46:43.971253: step: 260/77, loss: 0.005066059995442629 2023-01-23 22:46:45.282835: step: 264/77, loss: 0.0007375497953034937 2023-01-23 22:46:46.574786: step: 268/77, loss: 0.0025754738599061966 2023-01-23 22:46:47.830150: step: 272/77, loss: 0.012038455344736576 2023-01-23 22:46:49.106260: step: 276/77, loss: 0.03843052312731743 2023-01-23 22:46:50.384041: step: 280/77, loss: 0.01894669234752655 2023-01-23 22:46:51.647074: step: 284/77, loss: 0.015208638273179531 2023-01-23 22:46:52.956335: step: 288/77, loss: 0.006000806577503681 2023-01-23 22:46:54.253318: step: 292/77, loss: 0.0017294568242505193 2023-01-23 22:46:55.524584: step: 296/77, loss: 0.0027547322679311037 2023-01-23 22:46:56.757296: step: 300/77, loss: 0.002827903488650918 2023-01-23 22:46:58.021314: step: 304/77, loss: 0.024109508842229843 2023-01-23 22:46:59.295537: step: 308/77, loss: 0.03593812137842178 2023-01-23 22:47:00.526285: step: 312/77, loss: 0.003940297290682793 2023-01-23 22:47:01.779592: step: 316/77, loss: 0.04253112152218819 2023-01-23 22:47:03.040605: step: 320/77, loss: 0.056184787303209305 2023-01-23 22:47:04.327191: step: 324/77, loss: 0.0005866457941010594 2023-01-23 22:47:05.591624: step: 328/77, loss: 0.0031070266850292683 2023-01-23 22:47:06.834046: step: 332/77, loss: 0.004228291101753712 2023-01-23 22:47:08.035823: step: 336/77, loss: 0.013488009572029114 2023-01-23 22:47:09.322571: step: 340/77, loss: 0.00031433472759090364 2023-01-23 22:47:10.627280: step: 344/77, loss: 0.01137051172554493 2023-01-23 22:47:11.911259: step: 348/77, loss: 0.02159648761153221 2023-01-23 22:47:13.160920: step: 352/77, loss: 0.0018602788913995028 2023-01-23 22:47:14.459243: step: 356/77, loss: 8.241426985478029e-05 2023-01-23 22:47:15.794940: step: 360/77, loss: 0.008178231306374073 2023-01-23 22:47:17.106976: step: 364/77, loss: 0.053727056831121445 2023-01-23 22:47:18.356609: step: 368/77, loss: 0.08346982300281525 2023-01-23 22:47:19.671874: step: 372/77, loss: 0.008330187760293484 2023-01-23 22:47:20.960241: step: 376/77, loss: 0.10757150501012802 2023-01-23 22:47:22.236200: step: 380/77, loss: 0.07443412393331528 2023-01-23 22:47:23.519676: step: 384/77, loss: 0.0016130568692460656 2023-01-23 22:47:24.832120: step: 388/77, loss: 0.03187939152121544 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9397590361445783, 'r': 0.6341463414634146, 'f1': 0.7572815533980584}, 'slot': {'p': 0.47619047619047616, 'r': 0.01818181818181818, 'f1': 0.0350262697022767}, 'combined': 0.026524747929879446, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9397590361445783, 'r': 0.6341463414634146, 'f1': 0.7572815533980584}, 'slot': {'p': 0.46511627906976744, 'r': 0.01818181818181818, 'f1': 0.034995625546806644}, 'combined': 0.026501541676222512, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9397590361445783, 'r': 0.6341463414634146, 'f1': 0.7572815533980584}, 'slot': {'p': 0.46511627906976744, 'r': 0.01818181818181818, 'f1': 0.034995625546806644}, 'combined': 0.026501541676222512, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:49:00.375652: step: 4/77, loss: 0.026703253388404846 2023-01-23 22:49:01.624335: step: 8/77, loss: 6.608450348721817e-05 2023-01-23 22:49:02.851139: step: 12/77, loss: 0.0021594602148979902 2023-01-23 22:49:04.105013: step: 16/77, loss: 0.0006289744051173329 2023-01-23 22:49:05.380424: step: 20/77, loss: 0.018942205235362053 2023-01-23 22:49:06.694742: step: 24/77, loss: 0.013272013515233994 2023-01-23 22:49:07.954467: step: 28/77, loss: 0.01688517816364765 2023-01-23 22:49:09.226141: step: 32/77, loss: 0.01106287818402052 2023-01-23 22:49:10.484196: step: 36/77, loss: 0.032344695180654526 2023-01-23 22:49:11.773044: step: 40/77, loss: 0.0002201721363235265 2023-01-23 22:49:13.068642: step: 44/77, loss: 0.0014146651374176145 2023-01-23 22:49:14.328286: step: 48/77, loss: 0.011268083937466145 2023-01-23 22:49:15.652825: step: 52/77, loss: 0.01278699655085802 2023-01-23 22:49:16.972661: step: 56/77, loss: 0.012389368377625942 2023-01-23 22:49:18.196742: step: 60/77, loss: 0.02331053465604782 2023-01-23 22:49:19.429780: step: 64/77, loss: 0.01478915847837925 2023-01-23 22:49:20.717160: step: 68/77, loss: 0.006758468225598335 2023-01-23 22:49:22.001051: step: 72/77, loss: 0.00636525172740221 2023-01-23 22:49:23.307692: step: 76/77, loss: 0.0365261435508728 2023-01-23 22:49:24.610597: step: 80/77, loss: 0.007071449421346188 2023-01-23 22:49:25.862487: step: 84/77, loss: 0.0159720741212368 2023-01-23 22:49:27.191393: step: 88/77, loss: 3.108736564172432e-05 2023-01-23 22:49:28.477161: step: 92/77, loss: 0.002529098652303219 2023-01-23 22:49:29.770190: step: 96/77, loss: 0.01439334824681282 2023-01-23 22:49:31.013963: step: 100/77, loss: 0.01917685568332672 2023-01-23 22:49:32.316901: step: 104/77, loss: 0.004402280319482088 2023-01-23 22:49:33.577600: step: 108/77, loss: 0.004489831626415253 2023-01-23 22:49:34.865702: step: 112/77, loss: 0.04092923924326897 2023-01-23 22:49:36.113204: step: 116/77, loss: 0.039381761103868484 2023-01-23 22:49:37.338897: step: 120/77, loss: 0.013376251794397831 2023-01-23 22:49:38.630141: step: 124/77, loss: 0.0016149815637618303 2023-01-23 22:49:39.899008: step: 128/77, loss: 0.00484588835388422 2023-01-23 22:49:41.164546: step: 132/77, loss: 0.03189440071582794 2023-01-23 22:49:42.450999: step: 136/77, loss: 0.00017303042113780975 2023-01-23 22:49:43.773489: step: 140/77, loss: 0.00938489194959402 2023-01-23 22:49:45.061068: step: 144/77, loss: 0.01806403324007988 2023-01-23 22:49:46.345375: step: 148/77, loss: 1.3843055057805032e-06 2023-01-23 22:49:47.639709: step: 152/77, loss: 0.0012917781714349985 2023-01-23 22:49:48.908277: step: 156/77, loss: 0.00010156808275496587 2023-01-23 22:49:50.208595: step: 160/77, loss: 5.2645496907643974e-05 2023-01-23 22:49:51.472763: step: 164/77, loss: 0.018050380051136017 2023-01-23 22:49:52.782167: step: 168/77, loss: 0.0017785239033401012 2023-01-23 22:49:54.045237: step: 172/77, loss: 0.004214688669890165 2023-01-23 22:49:55.293604: step: 176/77, loss: 0.004522663075476885 2023-01-23 22:49:56.611293: step: 180/77, loss: 0.004108669701963663 2023-01-23 22:49:57.874178: step: 184/77, loss: 0.005246539134532213 2023-01-23 22:49:59.150304: step: 188/77, loss: 0.03378375247120857 2023-01-23 22:50:00.433238: step: 192/77, loss: 0.01473064161837101 2023-01-23 22:50:01.719838: step: 196/77, loss: 0.12296944111585617 2023-01-23 22:50:03.022571: step: 200/77, loss: 0.038474295288324356 2023-01-23 22:50:04.338804: step: 204/77, loss: 0.027129890397191048 2023-01-23 22:50:05.660734: step: 208/77, loss: 2.7496336770127527e-05 2023-01-23 22:50:06.953646: step: 212/77, loss: 0.0018721886444836855 2023-01-23 22:50:08.232484: step: 216/77, loss: 7.349484076257795e-05 2023-01-23 22:50:09.547192: step: 220/77, loss: 0.028560999780893326 2023-01-23 22:50:10.825127: step: 224/77, loss: 0.00273231347091496 2023-01-23 22:50:12.110624: step: 228/77, loss: 0.017860732972621918 2023-01-23 22:50:13.367063: step: 232/77, loss: 0.0005846356507390738 2023-01-23 22:50:14.679130: step: 236/77, loss: 0.016033103689551353 2023-01-23 22:50:15.973013: step: 240/77, loss: 5.783687447546981e-05 2023-01-23 22:50:17.306872: step: 244/77, loss: 0.012645246461033821 2023-01-23 22:50:18.583240: step: 248/77, loss: 0.01751253381371498 2023-01-23 22:50:19.850493: step: 252/77, loss: 0.0007793352706357837 2023-01-23 22:50:21.185754: step: 256/77, loss: 0.034364163875579834 2023-01-23 22:50:22.415486: step: 260/77, loss: 0.010526706464588642 2023-01-23 22:50:23.698505: step: 264/77, loss: 0.008218883536756039 2023-01-23 22:50:24.944191: step: 268/77, loss: 0.023118719458580017 2023-01-23 22:50:26.241529: step: 272/77, loss: 0.0068781073205173016 2023-01-23 22:50:27.570501: step: 276/77, loss: 0.004687698557972908 2023-01-23 22:50:28.895021: step: 280/77, loss: 0.052164994180202484 2023-01-23 22:50:30.168749: step: 284/77, loss: 0.0036531335208564997 2023-01-23 22:50:31.481528: step: 288/77, loss: 0.019436553120613098 2023-01-23 22:50:32.737145: step: 292/77, loss: 0.00011274051939835772 2023-01-23 22:50:34.033969: step: 296/77, loss: 0.01647758297622204 2023-01-23 22:50:35.302742: step: 300/77, loss: 0.009985197335481644 2023-01-23 22:50:36.645451: step: 304/77, loss: 0.0005102159921079874 2023-01-23 22:50:37.954298: step: 308/77, loss: 0.007778570055961609 2023-01-23 22:50:39.220578: step: 312/77, loss: 6.139573088148609e-05 2023-01-23 22:50:40.499711: step: 316/77, loss: 7.233645737869665e-05 2023-01-23 22:50:41.792039: step: 320/77, loss: 0.0017407573759555817 2023-01-23 22:50:43.110799: step: 324/77, loss: 0.011609593406319618 2023-01-23 22:50:44.384042: step: 328/77, loss: 0.00018634925072547048 2023-01-23 22:50:45.659930: step: 332/77, loss: 0.003662088653072715 2023-01-23 22:50:46.957973: step: 336/77, loss: 0.00013480721099767834 2023-01-23 22:50:48.276685: step: 340/77, loss: 0.02257470041513443 2023-01-23 22:50:49.601286: step: 344/77, loss: 0.04621454328298569 2023-01-23 22:50:50.905973: step: 348/77, loss: 0.0009833829244598746 2023-01-23 22:50:52.191718: step: 352/77, loss: 0.0029590395279228687 2023-01-23 22:50:53.466116: step: 356/77, loss: 0.02674620971083641 2023-01-23 22:50:54.785011: step: 360/77, loss: 0.019447756931185722 2023-01-23 22:50:56.039832: step: 364/77, loss: 0.0001418082683812827 2023-01-23 22:50:57.341257: step: 368/77, loss: 0.014620725065469742 2023-01-23 22:50:58.614413: step: 372/77, loss: 0.026280973106622696 2023-01-23 22:50:59.888847: step: 376/77, loss: 0.00032260813168250024 2023-01-23 22:51:01.141119: step: 380/77, loss: 0.0002889384631998837 2023-01-23 22:51:02.426305: step: 384/77, loss: 0.009093235246837139 2023-01-23 22:51:03.701998: step: 388/77, loss: 0.0010739528806880116 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.9863013698630136, 'r': 0.5853658536585366, 'f1': 0.7346938775510202}, 'slot': {'p': 0.4878048780487805, 'r': 0.01818181818181818, 'f1': 0.035056967572304996}, 'combined': 0.025756139440877134, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Korean: {'template': {'p': 0.9863013698630136, 'r': 0.5853658536585366, 'f1': 0.7346938775510202}, 'slot': {'p': 0.5, 'r': 0.019090909090909092, 'f1': 0.03677758318739055}, 'combined': 0.027020265198899173, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.9863013698630136, 'r': 0.5853658536585366, 'f1': 0.7346938775510202}, 'slot': {'p': 0.47619047619047616, 'r': 0.01818181818181818, 'f1': 0.0350262697022767}, 'combined': 0.02573358590371349, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:52:39.251304: step: 4/77, loss: 2.2610862288274802e-05 2023-01-23 22:52:40.522726: step: 8/77, loss: 0.005637861788272858 2023-01-23 22:52:41.807037: step: 12/77, loss: 0.003688807599246502 2023-01-23 22:52:43.098214: step: 16/77, loss: 0.02665168233215809 2023-01-23 22:52:44.387683: step: 20/77, loss: 0.016970016062259674 2023-01-23 22:52:45.701716: step: 24/77, loss: 1.4817988812865224e-05 2023-01-23 22:52:46.998742: step: 28/77, loss: 6.925305933691561e-05 2023-01-23 22:52:48.287947: step: 32/77, loss: 0.015504427254199982 2023-01-23 22:52:49.519855: step: 36/77, loss: 5.249499008641578e-05 2023-01-23 22:52:50.770179: step: 40/77, loss: 0.008829350583255291 2023-01-23 22:52:51.964725: step: 44/77, loss: 2.025206595135387e-05 2023-01-23 22:52:53.229313: step: 48/77, loss: 4.0971288399305195e-05 2023-01-23 22:52:54.562695: step: 52/77, loss: 0.021271025761961937 2023-01-23 22:52:55.873477: step: 56/77, loss: 0.0010673669166862965 2023-01-23 22:52:57.161095: step: 60/77, loss: 0.0002164973266189918 2023-01-23 22:52:58.423545: step: 64/77, loss: 0.06440609693527222 2023-01-23 22:52:59.709777: step: 68/77, loss: 0.03388819843530655 2023-01-23 22:53:00.986813: step: 72/77, loss: 0.014041759073734283 2023-01-23 22:53:02.294427: step: 76/77, loss: 0.023477237671613693 2023-01-23 22:53:03.579234: step: 80/77, loss: 0.0043355543166399 2023-01-23 22:53:04.821529: step: 84/77, loss: 0.007156530395150185 2023-01-23 22:53:06.083000: step: 88/77, loss: 0.00291500985622406 2023-01-23 22:53:07.372965: step: 92/77, loss: 0.007723457179963589 2023-01-23 22:53:08.606017: step: 96/77, loss: 0.00468220841139555 2023-01-23 22:53:09.861927: step: 100/77, loss: 5.247871013125405e-05 2023-01-23 22:53:11.128418: step: 104/77, loss: 4.394134066387778e-06 2023-01-23 22:53:12.417871: step: 108/77, loss: 6.272006430663168e-05 2023-01-23 22:53:13.683211: step: 112/77, loss: 0.00835472159087658 2023-01-23 22:53:14.917602: step: 116/77, loss: 0.05480552464723587 2023-01-23 22:53:16.229792: step: 120/77, loss: 0.022276371717453003 2023-01-23 22:53:17.580046: step: 124/77, loss: 0.002669721841812134 2023-01-23 22:53:18.870532: step: 128/77, loss: 0.09122525900602341 2023-01-23 22:53:20.192502: step: 132/77, loss: 0.010938310995697975 2023-01-23 22:53:21.488575: step: 136/77, loss: 4.7641540732001886e-05 2023-01-23 22:53:22.817122: step: 140/77, loss: 8.525445446139202e-05 2023-01-23 22:53:24.093097: step: 144/77, loss: 0.008205310441553593 2023-01-23 22:53:25.403543: step: 148/77, loss: 0.02473701536655426 2023-01-23 22:53:26.687842: step: 152/77, loss: 0.025928953662514687 2023-01-23 22:53:27.948673: step: 156/77, loss: 0.0324617400765419 2023-01-23 22:53:29.223379: step: 160/77, loss: 0.0030810926109552383 2023-01-23 22:53:30.568822: step: 164/77, loss: 0.022286182269454002 2023-01-23 22:53:31.858872: step: 168/77, loss: 0.021500881761312485 2023-01-23 22:53:33.185432: step: 172/77, loss: 0.009073024615645409 2023-01-23 22:53:34.446044: step: 176/77, loss: 0.0035487054847180843 2023-01-23 22:53:35.745460: step: 180/77, loss: 0.002166708232834935 2023-01-23 22:53:37.031427: step: 184/77, loss: 0.01816055364906788 2023-01-23 22:53:38.339742: step: 188/77, loss: 0.05229934677481651 2023-01-23 22:53:39.640240: step: 192/77, loss: 0.004905913025140762 2023-01-23 22:53:40.888779: step: 196/77, loss: 0.04279464855790138 2023-01-23 22:53:42.158922: step: 200/77, loss: 1.9669514017550682e-07 2023-01-23 22:53:43.459870: step: 204/77, loss: 0.025499440729618073 2023-01-23 22:53:44.734559: step: 208/77, loss: 0.009133759886026382 2023-01-23 22:53:46.060793: step: 212/77, loss: 0.011703042313456535 2023-01-23 22:53:47.352265: step: 216/77, loss: 0.0017015681369230151 2023-01-23 22:53:48.647527: step: 220/77, loss: 0.03688368201255798 2023-01-23 22:53:49.923797: step: 224/77, loss: 0.03645056113600731 2023-01-23 22:53:51.206230: step: 228/77, loss: 0.022262584418058395 2023-01-23 22:53:52.493984: step: 232/77, loss: 0.00811818242073059 2023-01-23 22:53:53.812181: step: 236/77, loss: 0.05509474128484726 2023-01-23 22:53:55.083963: step: 240/77, loss: 0.08739019930362701 2023-01-23 22:53:56.368693: step: 244/77, loss: 0.03550776094198227 2023-01-23 22:53:57.659303: step: 248/77, loss: 0.0076720230281353 2023-01-23 22:53:58.938341: step: 252/77, loss: 0.03817453980445862 2023-01-23 22:54:00.267228: step: 256/77, loss: 0.00017829393618740141 2023-01-23 22:54:01.585071: step: 260/77, loss: 1.9056222299695946e-05 2023-01-23 22:54:02.856221: step: 264/77, loss: 0.040975864976644516 2023-01-23 22:54:04.170195: step: 268/77, loss: 0.0011579713318496943 2023-01-23 22:54:05.415071: step: 272/77, loss: 0.025947313755750656 2023-01-23 22:54:06.656771: step: 276/77, loss: 0.0001307231286773458 2023-01-23 22:54:07.904452: step: 280/77, loss: 0.03271069750189781 2023-01-23 22:54:09.183393: step: 284/77, loss: 0.015176949091255665 2023-01-23 22:54:10.451087: step: 288/77, loss: 0.0007837112061679363 2023-01-23 22:54:11.755053: step: 292/77, loss: 0.05998732149600983 2023-01-23 22:54:13.061437: step: 296/77, loss: 0.022658394649624825 2023-01-23 22:54:14.303435: step: 300/77, loss: 0.007944842800498009 2023-01-23 22:54:15.599681: step: 304/77, loss: 0.03138115629553795 2023-01-23 22:54:16.905840: step: 308/77, loss: 0.003645453369244933 2023-01-23 22:54:18.182580: step: 312/77, loss: 0.0005130755598656833 2023-01-23 22:54:19.477837: step: 316/77, loss: 0.008485173806548119 2023-01-23 22:54:20.747563: step: 320/77, loss: 8.350548159796745e-05 2023-01-23 22:54:22.025853: step: 324/77, loss: 0.014045147225260735 2023-01-23 22:54:23.288052: step: 328/77, loss: 0.002589387120679021 2023-01-23 22:54:24.577951: step: 332/77, loss: 1.4230943634174764e-05 2023-01-23 22:54:25.902172: step: 336/77, loss: 0.00667186314240098 2023-01-23 22:54:27.176206: step: 340/77, loss: 0.01891062594950199 2023-01-23 22:54:28.470540: step: 344/77, loss: 0.04488614574074745 2023-01-23 22:54:29.759019: step: 348/77, loss: 0.019665377214550972 2023-01-23 22:54:31.090568: step: 352/77, loss: 0.007895408198237419 2023-01-23 22:54:32.419091: step: 356/77, loss: 0.001936072949320078 2023-01-23 22:54:33.722988: step: 360/77, loss: 0.036314986646175385 2023-01-23 22:54:34.974648: step: 364/77, loss: 0.002707503968849778 2023-01-23 22:54:36.235267: step: 368/77, loss: 0.006123952567577362 2023-01-23 22:54:37.526094: step: 372/77, loss: 0.05331498384475708 2023-01-23 22:54:38.840040: step: 376/77, loss: 0.009307156316936016 2023-01-23 22:54:40.110641: step: 380/77, loss: 0.00793666671961546 2023-01-23 22:54:41.395938: step: 384/77, loss: 0.07914161682128906 2023-01-23 22:54:42.734244: step: 388/77, loss: 0.04725559428334236 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.95, 'r': 0.6178861788617886, 'f1': 0.748768472906404}, 'slot': {'p': 0.5, 'r': 0.025454545454545455, 'f1': 0.04844290657439446}, 'combined': 0.036272521178856945, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.95, 'r': 0.6178861788617886, 'f1': 0.748768472906404}, 'slot': {'p': 0.49122807017543857, 'r': 0.025454545454545455, 'f1': 0.0484010371650821}, 'combined': 0.036241170685184634, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.95, 'r': 0.6178861788617886, 'f1': 0.748768472906404}, 'slot': {'p': 0.5087719298245614, 'r': 0.026363636363636363, 'f1': 0.05012964563526361}, 'combined': 0.037535498209655516, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:56:18.657458: step: 4/77, loss: 0.004430257249623537 2023-01-23 22:56:19.964421: step: 8/77, loss: 0.0018078071298077703 2023-01-23 22:56:21.207279: step: 12/77, loss: 0.0023698084987699986 2023-01-23 22:56:22.471968: step: 16/77, loss: 0.015932057052850723 2023-01-23 22:56:23.785956: step: 20/77, loss: 0.00013169035082682967 2023-01-23 22:56:25.015543: step: 24/77, loss: 0.0037448059301823378 2023-01-23 22:56:26.304093: step: 28/77, loss: 0.03657084330916405 2023-01-23 22:56:27.531609: step: 32/77, loss: 0.04973209649324417 2023-01-23 22:56:28.816452: step: 36/77, loss: 0.026404602453112602 2023-01-23 22:56:30.101701: step: 40/77, loss: 0.00232500908896327 2023-01-23 22:56:31.361753: step: 44/77, loss: 0.01054262463003397 2023-01-23 22:56:32.587260: step: 48/77, loss: 0.008980286307632923 2023-01-23 22:56:33.846639: step: 52/77, loss: 0.01709909178316593 2023-01-23 22:56:35.118758: step: 56/77, loss: 0.0010604806011542678 2023-01-23 22:56:36.417917: step: 60/77, loss: 0.0008716561715118587 2023-01-23 22:56:37.657873: step: 64/77, loss: 0.0012432102812454104 2023-01-23 22:56:38.970313: step: 68/77, loss: 0.05664519965648651 2023-01-23 22:56:40.267475: step: 72/77, loss: 0.06241287291049957 2023-01-23 22:56:41.545004: step: 76/77, loss: 0.00022028060629963875 2023-01-23 22:56:42.822736: step: 80/77, loss: 0.08272923529148102 2023-01-23 22:56:44.138347: step: 84/77, loss: 0.010699925944209099 2023-01-23 22:56:45.397472: step: 88/77, loss: 0.05161707103252411 2023-01-23 22:56:46.687426: step: 92/77, loss: 0.005692289210855961 2023-01-23 22:56:48.020846: step: 96/77, loss: 0.0022551261354237795 2023-01-23 22:56:49.300656: step: 100/77, loss: 0.0010835323482751846 2023-01-23 22:56:50.573469: step: 104/77, loss: 0.00023391967988573015 2023-01-23 22:56:51.849694: step: 108/77, loss: 0.04434645548462868 2023-01-23 22:56:53.153234: step: 112/77, loss: 0.013929794542491436 2023-01-23 22:56:54.459158: step: 116/77, loss: 0.012213967740535736 2023-01-23 22:56:55.760060: step: 120/77, loss: 0.04906335845589638 2023-01-23 22:56:57.081871: step: 124/77, loss: 0.004624365828931332 2023-01-23 22:56:58.352389: step: 128/77, loss: 0.0008969166083261371 2023-01-23 22:56:59.670422: step: 132/77, loss: 0.041873492300510406 2023-01-23 22:57:00.953811: step: 136/77, loss: 0.0015139449387788773 2023-01-23 22:57:02.170592: step: 140/77, loss: 0.008322593756020069 2023-01-23 22:57:03.491789: step: 144/77, loss: 0.03902991861104965 2023-01-23 22:57:04.803470: step: 148/77, loss: 0.016263600438833237 2023-01-23 22:57:06.122737: step: 152/77, loss: 0.009588202461600304 2023-01-23 22:57:07.398546: step: 156/77, loss: 0.00011158635606989264 2023-01-23 22:57:08.662453: step: 160/77, loss: 0.020438387989997864 2023-01-23 22:57:09.955328: step: 164/77, loss: 0.04943925142288208 2023-01-23 22:57:11.225703: step: 168/77, loss: 0.008242144249379635 2023-01-23 22:57:12.514224: step: 172/77, loss: 0.008970575407147408 2023-01-23 22:57:13.767299: step: 176/77, loss: 3.817386459559202e-05 2023-01-23 22:57:15.056055: step: 180/77, loss: 0.04439555108547211 2023-01-23 22:57:16.361230: step: 184/77, loss: 0.007772160694003105 2023-01-23 22:57:17.628826: step: 188/77, loss: 0.0020781750790774822 2023-01-23 22:57:18.960427: step: 192/77, loss: 1.6847297956701368e-05 2023-01-23 22:57:20.224478: step: 196/77, loss: 0.021913882344961166 2023-01-23 22:57:21.552632: step: 200/77, loss: 0.0016695652157068253 2023-01-23 22:57:22.867302: step: 204/77, loss: 0.007512289099395275 2023-01-23 22:57:24.167127: step: 208/77, loss: 0.0001379475143039599 2023-01-23 22:57:25.443647: step: 212/77, loss: 0.012302246876060963 2023-01-23 22:57:26.698563: step: 216/77, loss: 0.0013374080881476402 2023-01-23 22:57:27.961010: step: 220/77, loss: 0.0029978842940181494 2023-01-23 22:57:29.215337: step: 224/77, loss: 0.012731088325381279 2023-01-23 22:57:30.502812: step: 228/77, loss: 0.0037896474823355675 2023-01-23 22:57:31.805574: step: 232/77, loss: 0.007226157002151012 2023-01-23 22:57:33.079307: step: 236/77, loss: 0.010333065874874592 2023-01-23 22:57:34.345758: step: 240/77, loss: 0.025543566793203354 2023-01-23 22:57:35.586406: step: 244/77, loss: 0.003890481311827898 2023-01-23 22:57:36.913400: step: 248/77, loss: 0.0061135608702898026 2023-01-23 22:57:38.162818: step: 252/77, loss: 0.0002315741148777306 2023-01-23 22:57:39.435571: step: 256/77, loss: 0.022473495453596115 2023-01-23 22:57:40.724217: step: 260/77, loss: 0.0070287445560097694 2023-01-23 22:57:41.994172: step: 264/77, loss: 0.0042348294518888 2023-01-23 22:57:43.275532: step: 268/77, loss: 0.008537882007658482 2023-01-23 22:57:44.604123: step: 272/77, loss: 0.00023712392430752516 2023-01-23 22:57:45.879773: step: 276/77, loss: 0.016111375764012337 2023-01-23 22:57:47.186359: step: 280/77, loss: 0.023547686636447906 2023-01-23 22:57:48.532224: step: 284/77, loss: 0.09503351151943207 2023-01-23 22:57:49.803416: step: 288/77, loss: 7.513206946896389e-05 2023-01-23 22:57:51.102703: step: 292/77, loss: 0.004765697754919529 2023-01-23 22:57:52.404386: step: 296/77, loss: 0.003875546855852008 2023-01-23 22:57:53.713675: step: 300/77, loss: 0.0038535785861313343 2023-01-23 22:57:55.016814: step: 304/77, loss: 0.003971458412706852 2023-01-23 22:57:56.345588: step: 308/77, loss: 0.020400844514369965 2023-01-23 22:57:57.620631: step: 312/77, loss: 0.00289251864887774 2023-01-23 22:57:58.970314: step: 316/77, loss: 0.0013335293624550104 2023-01-23 22:58:00.277441: step: 320/77, loss: 6.220525392564014e-06 2023-01-23 22:58:01.571456: step: 324/77, loss: 0.00018553411064203829 2023-01-23 22:58:02.935282: step: 328/77, loss: 0.02815510518848896 2023-01-23 22:58:04.220334: step: 332/77, loss: 1.5467263665414066e-06 2023-01-23 22:58:05.478088: step: 336/77, loss: 0.022374749183654785 2023-01-23 22:58:06.744669: step: 340/77, loss: 6.251667946344241e-05 2023-01-23 22:58:08.022414: step: 344/77, loss: 0.0003971634723711759 2023-01-23 22:58:09.319209: step: 348/77, loss: 0.048008985817432404 2023-01-23 22:58:10.624140: step: 352/77, loss: 0.06184585392475128 2023-01-23 22:58:11.907548: step: 356/77, loss: 0.000282044435152784 2023-01-23 22:58:13.207874: step: 360/77, loss: 0.000977915246039629 2023-01-23 22:58:14.461566: step: 364/77, loss: 0.055823519825935364 2023-01-23 22:58:15.791704: step: 368/77, loss: 0.0415327325463295 2023-01-23 22:58:17.087013: step: 372/77, loss: 0.08297859877347946 2023-01-23 22:58:18.396051: step: 376/77, loss: 0.016023965552449226 2023-01-23 22:58:19.637672: step: 380/77, loss: 0.01686960645020008 2023-01-23 22:58:20.964011: step: 384/77, loss: 0.0008093234500847757 2023-01-23 22:58:22.234927: step: 388/77, loss: 0.04213680326938629 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 14} Test Chinese: {'template': {'p': 0.9240506329113924, 'r': 0.5934959349593496, 'f1': 0.7227722772277229}, 'slot': {'p': 0.43636363636363634, 'r': 0.02181818181818182, 'f1': 0.041558441558441565}, 'combined': 0.030037289443230045, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 14} Test Korean: {'template': {'p': 0.925, 'r': 0.6016260162601627, 'f1': 0.7290640394088671}, 'slot': {'p': 0.43636363636363634, 'r': 0.02181818181818182, 'f1': 0.041558441558441565}, 'combined': 0.03029876527413474, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 14} Test Russian: {'template': {'p': 0.925, 'r': 0.6016260162601627, 'f1': 0.7290640394088671}, 'slot': {'p': 0.43636363636363634, 'r': 0.02181818181818182, 'f1': 0.041558441558441565}, 'combined': 0.03029876527413474, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:59:57.731686: step: 4/77, loss: 0.0011530212359502912 2023-01-23 22:59:59.018565: step: 8/77, loss: 0.006455153692513704 2023-01-23 23:00:00.290129: step: 12/77, loss: 0.03983420133590698 2023-01-23 23:00:01.595095: step: 16/77, loss: 0.011238010600209236 2023-01-23 23:00:02.875310: step: 20/77, loss: 0.0026525515131652355 2023-01-23 23:00:04.189852: step: 24/77, loss: 0.012394571676850319 2023-01-23 23:00:05.492206: step: 28/77, loss: 0.01089528203010559 2023-01-23 23:00:06.760862: step: 32/77, loss: 0.002410109620541334 2023-01-23 23:00:08.040718: step: 36/77, loss: 0.07191843539476395 2023-01-23 23:00:09.318868: step: 40/77, loss: 0.04111922159790993 2023-01-23 23:00:10.546523: step: 44/77, loss: 0.006325423717498779 2023-01-23 23:00:11.816327: step: 48/77, loss: 0.0018762395484372973 2023-01-23 23:00:13.142589: step: 52/77, loss: 0.016316326335072517 2023-01-23 23:00:14.426158: step: 56/77, loss: 0.021863887086510658 2023-01-23 23:00:15.720644: step: 60/77, loss: 0.004629537463188171 2023-01-23 23:00:17.011810: step: 64/77, loss: 0.05812714248895645 2023-01-23 23:00:18.339609: step: 68/77, loss: 0.0006646717665717006 2023-01-23 23:00:19.596753: step: 72/77, loss: 0.025584321469068527 2023-01-23 23:00:20.844297: step: 76/77, loss: 0.005306669045239687 2023-01-23 23:00:22.103515: step: 80/77, loss: 0.00021504539472516626 2023-01-23 23:00:23.381639: step: 84/77, loss: 0.03863883763551712 2023-01-23 23:00:24.616807: step: 88/77, loss: 0.013125048018991947 2023-01-23 23:00:25.930321: step: 92/77, loss: 0.0073471637442708015 2023-01-23 23:00:27.216902: step: 96/77, loss: 0.0024326976854354143 2023-01-23 23:00:28.524288: step: 100/77, loss: 0.014627272263169289 2023-01-23 23:00:29.784053: step: 104/77, loss: 0.03934137895703316 2023-01-23 23:00:31.097316: step: 108/77, loss: 0.0004098295175936073 2023-01-23 23:00:32.369814: step: 112/77, loss: 0.0012081761378794909 2023-01-23 23:00:33.632423: step: 116/77, loss: 0.008219428360462189 2023-01-23 23:00:34.910630: step: 120/77, loss: 0.0010092520387843251 2023-01-23 23:00:36.122018: step: 124/77, loss: 0.02881784364581108 2023-01-23 23:00:37.371059: step: 128/77, loss: 0.00022574173635803163 2023-01-23 23:00:38.659464: step: 132/77, loss: 0.014033297076821327 2023-01-23 23:00:39.917031: step: 136/77, loss: 0.00023875743499957025 2023-01-23 23:00:41.198735: step: 140/77, loss: 0.012619758024811745 2023-01-23 23:00:42.480154: step: 144/77, loss: 0.007678681518882513 2023-01-23 23:00:43.727946: step: 148/77, loss: 0.0004265752504579723 2023-01-23 23:00:45.009634: step: 152/77, loss: 0.047454044222831726 2023-01-23 23:00:46.269786: step: 156/77, loss: 0.0020707775838673115 2023-01-23 23:00:47.558667: step: 160/77, loss: 3.5891003790311515e-05 2023-01-23 23:00:48.898502: step: 164/77, loss: 0.014533540233969688 2023-01-23 23:00:50.123338: step: 168/77, loss: 0.0010304294992238283 2023-01-23 23:00:51.382774: step: 172/77, loss: 0.0004204391734674573 2023-01-23 23:00:52.658376: step: 176/77, loss: 0.0028548361733555794 2023-01-23 23:00:53.925962: step: 180/77, loss: 0.0018736727070063353 2023-01-23 23:00:55.203445: step: 184/77, loss: 5.218083970248699e-06 2023-01-23 23:00:56.473462: step: 188/77, loss: 0.012196572497487068 2023-01-23 23:00:57.710595: step: 192/77, loss: 0.03711218386888504 2023-01-23 23:00:59.007639: step: 196/77, loss: 0.019275743514299393 2023-01-23 23:01:00.296042: step: 200/77, loss: 0.001391243189573288 2023-01-23 23:01:01.582325: step: 204/77, loss: 0.004645006265491247 2023-01-23 23:01:02.853249: step: 208/77, loss: 0.00014529861800838262 2023-01-23 23:01:04.126046: step: 212/77, loss: 0.0007189132156781852 2023-01-23 23:01:05.405360: step: 216/77, loss: 0.0036826361902058125 2023-01-23 23:01:06.727489: step: 220/77, loss: 0.016211818903684616 2023-01-23 23:01:08.037190: step: 224/77, loss: 0.008424910716712475 2023-01-23 23:01:09.303986: step: 228/77, loss: 0.005087016150355339 2023-01-23 23:01:10.600886: step: 232/77, loss: 0.0021495078690350056 2023-01-23 23:01:11.865202: step: 236/77, loss: 0.016692375764250755 2023-01-23 23:01:13.154495: step: 240/77, loss: 0.0010836259461939335 2023-01-23 23:01:14.450734: step: 244/77, loss: 8.891993638826534e-05 2023-01-23 23:01:15.723789: step: 248/77, loss: 0.0019844304770231247 2023-01-23 23:01:17.023105: step: 252/77, loss: 0.008615804836153984 2023-01-23 23:01:18.288411: step: 256/77, loss: 0.00010830286919372156 2023-01-23 23:01:19.540445: step: 260/77, loss: 0.009233257733285427 2023-01-23 23:01:20.874419: step: 264/77, loss: 0.01212453655898571 2023-01-23 23:01:22.155998: step: 268/77, loss: 0.0026560192927718163 2023-01-23 23:01:23.426416: step: 272/77, loss: 0.006347885821014643 2023-01-23 23:01:24.702935: step: 276/77, loss: 0.0009034523391164839 2023-01-23 23:01:26.020406: step: 280/77, loss: 0.003021764103323221 2023-01-23 23:01:27.330777: step: 284/77, loss: 0.006079188548028469 2023-01-23 23:01:28.604242: step: 288/77, loss: 0.038373105227947235 2023-01-23 23:01:29.882910: step: 292/77, loss: 0.0031433424446731806 2023-01-23 23:01:31.184967: step: 296/77, loss: 0.02784053236246109 2023-01-23 23:01:32.484915: step: 300/77, loss: 0.0005230466485954821 2023-01-23 23:01:33.811745: step: 304/77, loss: 0.00455853994935751 2023-01-23 23:01:35.073338: step: 308/77, loss: 0.0012094294652342796 2023-01-23 23:01:36.377798: step: 312/77, loss: 0.0003353256033733487 2023-01-23 23:01:37.660120: step: 316/77, loss: 0.00011404056567698717 2023-01-23 23:01:38.944763: step: 320/77, loss: 0.1456245481967926 2023-01-23 23:01:40.230936: step: 324/77, loss: 0.0008838959038257599 2023-01-23 23:01:41.509039: step: 328/77, loss: 0.04460209980607033 2023-01-23 23:01:42.781691: step: 332/77, loss: 0.0077811977826058865 2023-01-23 23:01:44.057715: step: 336/77, loss: 0.000999920885078609 2023-01-23 23:01:45.328808: step: 340/77, loss: 0.0008804184617474675 2023-01-23 23:01:46.609463: step: 344/77, loss: 0.03426405042409897 2023-01-23 23:01:47.909722: step: 348/77, loss: 0.00249478523619473 2023-01-23 23:01:49.237802: step: 352/77, loss: 0.028345339000225067 2023-01-23 23:01:50.525376: step: 356/77, loss: 1.6141852029250003e-05 2023-01-23 23:01:51.836934: step: 360/77, loss: 0.004599923733621836 2023-01-23 23:01:53.073637: step: 364/77, loss: 0.0015322489198297262 2023-01-23 23:01:54.380831: step: 368/77, loss: 0.029466429725289345 2023-01-23 23:01:55.727040: step: 372/77, loss: 0.0002610405208542943 2023-01-23 23:01:57.018609: step: 376/77, loss: 0.00028324045706540346 2023-01-23 23:01:58.268498: step: 380/77, loss: 0.00011046537838410586 2023-01-23 23:01:59.566336: step: 384/77, loss: 0.01054134126752615 2023-01-23 23:02:00.882636: step: 388/77, loss: 0.0020199620630592108 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.8974358974358975, 'r': 0.5691056910569106, 'f1': 0.6965174129353233}, 'slot': {'p': 0.4583333333333333, 'r': 0.02, 'f1': 0.03832752613240418}, 'combined': 0.026695789345953156, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.8974358974358975, 'r': 0.5691056910569106, 'f1': 0.6965174129353233}, 'slot': {'p': 0.45652173913043476, 'r': 0.019090909090909092, 'f1': 0.03664921465968587}, 'combined': 0.025526816180875725, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.8961038961038961, 'r': 0.5609756097560976, 'f1': 0.69}, 'slot': {'p': 0.46808510638297873, 'r': 0.02, 'f1': 0.03836094158674804}, 'combined': 0.026469049694856146, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:03:36.304067: step: 4/77, loss: 0.006552703212946653 2023-01-23 23:03:37.608202: step: 8/77, loss: 0.0009742131805978715 2023-01-23 23:03:38.904442: step: 12/77, loss: 0.0017733649583533406 2023-01-23 23:03:40.248298: step: 16/77, loss: 0.03460262715816498 2023-01-23 23:03:41.552975: step: 20/77, loss: 0.013664139434695244 2023-01-23 23:03:42.849943: step: 24/77, loss: 6.580878107342869e-05 2023-01-23 23:03:44.127116: step: 28/77, loss: 0.011652027256786823 2023-01-23 23:03:45.381734: step: 32/77, loss: 0.02981100231409073 2023-01-23 23:03:46.650967: step: 36/77, loss: 0.018231457099318504 2023-01-23 23:03:47.915813: step: 40/77, loss: 0.009962860494852066 2023-01-23 23:03:49.140728: step: 44/77, loss: 0.01126846019178629 2023-01-23 23:03:50.390825: step: 48/77, loss: 1.236806383531075e-05 2023-01-23 23:03:51.677380: step: 52/77, loss: 0.0025686314329504967 2023-01-23 23:03:52.954870: step: 56/77, loss: 0.022444654256105423 2023-01-23 23:03:54.238928: step: 60/77, loss: 8.586797775933519e-06 2023-01-23 23:03:55.512434: step: 64/77, loss: 6.882055458845571e-06 2023-01-23 23:03:56.819080: step: 68/77, loss: 0.004656787030398846 2023-01-23 23:03:58.077701: step: 72/77, loss: 0.005976193118840456 2023-01-23 23:03:59.334585: step: 76/77, loss: 0.003050893312320113 2023-01-23 23:04:00.631726: step: 80/77, loss: 0.11868611723184586 2023-01-23 23:04:01.916901: step: 84/77, loss: 0.022080106660723686 2023-01-23 23:04:03.181152: step: 88/77, loss: 0.005611381493508816 2023-01-23 23:04:04.478884: step: 92/77, loss: 0.0006953682750463486 2023-01-23 23:04:05.821125: step: 96/77, loss: 0.003470786614343524 2023-01-23 23:04:07.138103: step: 100/77, loss: 0.00038696586852893233 2023-01-23 23:04:08.354654: step: 104/77, loss: 0.04702460765838623 2023-01-23 23:04:09.619874: step: 108/77, loss: 0.00037538877222687006 2023-01-23 23:04:10.952632: step: 112/77, loss: 0.0009814082877710462 2023-01-23 23:04:12.221598: step: 116/77, loss: 0.00010789869702421129 2023-01-23 23:04:13.533935: step: 120/77, loss: 0.02089921198785305 2023-01-23 23:04:14.853497: step: 124/77, loss: 0.014739819802343845 2023-01-23 23:04:16.111934: step: 128/77, loss: 0.001180226681753993 2023-01-23 23:04:17.410974: step: 132/77, loss: 0.04211696237325668 2023-01-23 23:04:18.676471: step: 136/77, loss: 3.5734439734369516e-05 2023-01-23 23:04:19.983837: step: 140/77, loss: 0.04853532090783119 2023-01-23 23:04:21.229956: step: 144/77, loss: 0.004052129108458757 2023-01-23 23:04:22.510710: step: 148/77, loss: 0.0004372121475171298 2023-01-23 23:04:23.780412: step: 152/77, loss: 0.04190046712756157 2023-01-23 23:04:25.072478: step: 156/77, loss: 0.0018922454910352826 2023-01-23 23:04:26.352878: step: 160/77, loss: 0.009773260913789272 2023-01-23 23:04:27.642067: step: 164/77, loss: 0.001528030028566718 2023-01-23 23:04:28.935518: step: 168/77, loss: 0.02765164151787758 2023-01-23 23:04:30.266120: step: 172/77, loss: 0.0005780403153039515 2023-01-23 23:04:31.584692: step: 176/77, loss: 0.0038615819066762924 2023-01-23 23:04:32.848072: step: 180/77, loss: 0.014504548162221909 2023-01-23 23:04:34.151721: step: 184/77, loss: 0.007875760085880756 2023-01-23 23:04:35.436320: step: 188/77, loss: 0.0010432120179757476 2023-01-23 23:04:36.686292: step: 192/77, loss: 1.9311471533001168e-06 2023-01-23 23:04:37.991695: step: 196/77, loss: 0.008792483247816563 2023-01-23 23:04:39.272987: step: 200/77, loss: 0.033647846430540085 2023-01-23 23:04:40.581704: step: 204/77, loss: 8.60294239828363e-05 2023-01-23 23:04:41.893016: step: 208/77, loss: 0.0026480434462428093 2023-01-23 23:04:43.171350: step: 212/77, loss: 0.0073967669159173965 2023-01-23 23:04:44.484765: step: 216/77, loss: 0.027375243604183197 2023-01-23 23:04:45.743602: step: 220/77, loss: 0.0054856291972100735 2023-01-23 23:04:46.991732: step: 224/77, loss: 0.024888327345252037 2023-01-23 23:04:48.258638: step: 228/77, loss: 0.0061912983655929565 2023-01-23 23:04:49.592162: step: 232/77, loss: 0.004951823502779007 2023-01-23 23:04:50.927827: step: 236/77, loss: 0.002676447154954076 2023-01-23 23:04:52.245218: step: 240/77, loss: 4.45177975052502e-05 2023-01-23 23:04:53.488178: step: 244/77, loss: 0.010543622076511383 2023-01-23 23:04:54.780924: step: 248/77, loss: 0.02687632292509079 2023-01-23 23:04:56.051016: step: 252/77, loss: 0.0023010619916021824 2023-01-23 23:04:57.332173: step: 256/77, loss: 0.07297077775001526 2023-01-23 23:04:58.640479: step: 260/77, loss: 0.0007688794867135584 2023-01-23 23:04:59.923082: step: 264/77, loss: 7.96151434769854e-05 2023-01-23 23:05:01.252233: step: 268/77, loss: 0.00010676166857592762 2023-01-23 23:05:02.559214: step: 272/77, loss: 0.011700259521603584 2023-01-23 23:05:03.846957: step: 276/77, loss: 0.010706624016165733 2023-01-23 23:05:05.152116: step: 280/77, loss: 0.00010317780106561258 2023-01-23 23:05:06.418883: step: 284/77, loss: 0.027786539867520332 2023-01-23 23:05:07.738709: step: 288/77, loss: 0.001600670162588358 2023-01-23 23:05:09.052372: step: 292/77, loss: 0.0003801693383138627 2023-01-23 23:05:10.338483: step: 296/77, loss: 0.0010835555149242282 2023-01-23 23:05:11.627869: step: 300/77, loss: 0.0013048271648585796 2023-01-23 23:05:12.866700: step: 304/77, loss: 0.00392378494143486 2023-01-23 23:05:14.149086: step: 308/77, loss: 2.722800854826346e-05 2023-01-23 23:05:15.449843: step: 312/77, loss: 0.01019936054944992 2023-01-23 23:05:16.767471: step: 316/77, loss: 0.04807935282588005 2023-01-23 23:05:18.047522: step: 320/77, loss: 3.8343645428540185e-05 2023-01-23 23:05:19.354811: step: 324/77, loss: 0.00013135296467225999 2023-01-23 23:05:20.660613: step: 328/77, loss: 0.012444362044334412 2023-01-23 23:05:21.928672: step: 332/77, loss: 0.0005562487640418112 2023-01-23 23:05:23.188368: step: 336/77, loss: 0.0027817520312964916 2023-01-23 23:05:24.521562: step: 340/77, loss: 0.04739411547780037 2023-01-23 23:05:25.794753: step: 344/77, loss: 0.015705594792962074 2023-01-23 23:05:27.075370: step: 348/77, loss: 0.014253773726522923 2023-01-23 23:05:28.375248: step: 352/77, loss: 0.0039000764954835176 2023-01-23 23:05:29.681007: step: 356/77, loss: 0.04627962410449982 2023-01-23 23:05:30.933862: step: 360/77, loss: 0.033766452223062515 2023-01-23 23:05:32.166693: step: 364/77, loss: 0.0040741246193647385 2023-01-23 23:05:33.505818: step: 368/77, loss: 0.002382186008617282 2023-01-23 23:05:34.758744: step: 372/77, loss: 0.0462690070271492 2023-01-23 23:05:36.020679: step: 376/77, loss: 0.000892925076186657 2023-01-23 23:05:37.300388: step: 380/77, loss: 0.01855640299618244 2023-01-23 23:05:38.593211: step: 384/77, loss: 0.0007892133435234427 2023-01-23 23:05:39.920068: step: 388/77, loss: 0.05648082494735718 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Chinese: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.02214788732394366, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Korean: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.02233550557073786, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Russian: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.4594594594594595, 'r': 0.015454545454545455, 'f1': 0.029903254177660512}, 'combined': 0.02212840809146878, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:07:16.071397: step: 4/77, loss: 0.0003037060087081045 2023-01-23 23:07:17.356249: step: 8/77, loss: 0.00038242657319642603 2023-01-23 23:07:18.671920: step: 12/77, loss: 0.0013528617564588785 2023-01-23 23:07:19.972438: step: 16/77, loss: 0.00010484598897164688 2023-01-23 23:07:21.224533: step: 20/77, loss: 0.004785007331520319 2023-01-23 23:07:22.526004: step: 24/77, loss: 0.00010676060628611594 2023-01-23 23:07:23.750116: step: 28/77, loss: 0.00020604948804248124 2023-01-23 23:07:25.053127: step: 32/77, loss: 0.0016604478005319834 2023-01-23 23:07:26.335225: step: 36/77, loss: 0.0016582023818045855 2023-01-23 23:07:27.632654: step: 40/77, loss: 0.0004796137218363583 2023-01-23 23:07:28.883593: step: 44/77, loss: 0.008982490748167038 2023-01-23 23:07:30.088253: step: 48/77, loss: 0.005666470155119896 2023-01-23 23:07:31.358993: step: 52/77, loss: 0.013005124405026436 2023-01-23 23:07:32.643390: step: 56/77, loss: 0.00043898681178689003 2023-01-23 23:07:33.892135: step: 60/77, loss: 0.010772701352834702 2023-01-23 23:07:35.140040: step: 64/77, loss: 0.014273944310843945 2023-01-23 23:07:36.369735: step: 68/77, loss: 0.009036684408783913 2023-01-23 23:07:37.673857: step: 72/77, loss: 0.0006658356287516654 2023-01-23 23:07:38.960692: step: 76/77, loss: 1.80274473677855e-05 2023-01-23 23:07:40.203341: step: 80/77, loss: 0.00024116590793710202 2023-01-23 23:07:41.478144: step: 84/77, loss: 0.014361845329403877 2023-01-23 23:07:42.743970: step: 88/77, loss: 0.02260422147810459 2023-01-23 23:07:43.990399: step: 92/77, loss: 0.0029067930299788713 2023-01-23 23:07:45.257600: step: 96/77, loss: 0.0024856228847056627 2023-01-23 23:07:46.552230: step: 100/77, loss: 1.2357354535197373e-05 2023-01-23 23:07:47.849024: step: 104/77, loss: 0.0018393194768577814 2023-01-23 23:07:49.131479: step: 108/77, loss: 4.3470787204569206e-05 2023-01-23 23:07:50.389927: step: 112/77, loss: 0.012341336347162724 2023-01-23 23:07:51.670592: step: 116/77, loss: 0.004242464900016785 2023-01-23 23:07:52.961826: step: 120/77, loss: 0.0019005483482033014 2023-01-23 23:07:54.228296: step: 124/77, loss: 8.54067548061721e-06 2023-01-23 23:07:55.475748: step: 128/77, loss: 0.00011833629105240107 2023-01-23 23:07:56.794385: step: 132/77, loss: 0.04586326330900192 2023-01-23 23:07:58.012360: step: 136/77, loss: 0.012901647947728634 2023-01-23 23:07:59.274542: step: 140/77, loss: 0.0001440030027879402 2023-01-23 23:08:00.553763: step: 144/77, loss: 0.00040081614861264825 2023-01-23 23:08:01.876167: step: 148/77, loss: 0.02381657063961029 2023-01-23 23:08:03.175621: step: 152/77, loss: 0.0023007721174508333 2023-01-23 23:08:04.484582: step: 156/77, loss: 0.0388503298163414 2023-01-23 23:08:05.764634: step: 160/77, loss: 0.0034463191404938698 2023-01-23 23:08:07.079485: step: 164/77, loss: 0.0004672359791584313 2023-01-23 23:08:08.369566: step: 168/77, loss: 0.0018541706958785653 2023-01-23 23:08:09.644523: step: 172/77, loss: 0.0327320471405983 2023-01-23 23:08:10.896698: step: 176/77, loss: 0.0024690069258213043 2023-01-23 23:08:12.173460: step: 180/77, loss: 0.0010510309366509318 2023-01-23 23:08:13.427874: step: 184/77, loss: 0.0008923964924179018 2023-01-23 23:08:14.702787: step: 188/77, loss: 0.00029543269192799926 2023-01-23 23:08:15.973290: step: 192/77, loss: 0.0028764773160219193 2023-01-23 23:08:17.252286: step: 196/77, loss: 1.1312491551507264e-05 2023-01-23 23:08:18.576897: step: 200/77, loss: 0.009550292044878006 2023-01-23 23:08:19.868436: step: 204/77, loss: 0.0052166227251291275 2023-01-23 23:08:21.150608: step: 208/77, loss: 0.0008433779585175216 2023-01-23 23:08:22.421157: step: 212/77, loss: 2.461548319843132e-06 2023-01-23 23:08:23.713218: step: 216/77, loss: 0.0022772536613047123 2023-01-23 23:08:24.977634: step: 220/77, loss: 0.0036285670939832926 2023-01-23 23:08:26.211896: step: 224/77, loss: 0.032222650945186615 2023-01-23 23:08:27.438370: step: 228/77, loss: 6.528257654281333e-05 2023-01-23 23:08:28.736699: step: 232/77, loss: 0.010111379437148571 2023-01-23 23:08:30.018864: step: 236/77, loss: 0.003799677127972245 2023-01-23 23:08:31.308376: step: 240/77, loss: 0.026927510276436806 2023-01-23 23:08:32.635162: step: 244/77, loss: 0.009405246004462242 2023-01-23 23:08:33.948123: step: 248/77, loss: 0.032634347677230835 2023-01-23 23:08:35.225038: step: 252/77, loss: 0.0018777992809191346 2023-01-23 23:08:36.493563: step: 256/77, loss: 0.06961376965045929 2023-01-23 23:08:37.767573: step: 260/77, loss: 0.015779821202158928 2023-01-23 23:08:39.067843: step: 264/77, loss: 0.0018593231216073036 2023-01-23 23:08:40.339084: step: 268/77, loss: 3.989012475358322e-05 2023-01-23 23:08:41.631812: step: 272/77, loss: 2.904075699916575e-05 2023-01-23 23:08:42.908826: step: 276/77, loss: 1.4655574887001421e-05 2023-01-23 23:08:44.186248: step: 280/77, loss: 0.006797629874199629 2023-01-23 23:08:45.456846: step: 284/77, loss: 0.0001961684611160308 2023-01-23 23:08:46.725705: step: 288/77, loss: 0.04722617566585541 2023-01-23 23:08:48.061682: step: 292/77, loss: 0.0010816743597388268 2023-01-23 23:08:49.353409: step: 296/77, loss: 1.5971452739904635e-05 2023-01-23 23:08:50.606034: step: 300/77, loss: 0.03634996712207794 2023-01-23 23:08:51.885634: step: 304/77, loss: 0.004210877697914839 2023-01-23 23:08:53.245041: step: 308/77, loss: 0.0038256642874330282 2023-01-23 23:08:54.480368: step: 312/77, loss: 0.0038271218072623014 2023-01-23 23:08:55.762549: step: 316/77, loss: 0.052637092769145966 2023-01-23 23:08:57.098793: step: 320/77, loss: 0.0030049553606659174 2023-01-23 23:08:58.431658: step: 324/77, loss: 0.0036819763481616974 2023-01-23 23:08:59.667734: step: 328/77, loss: 0.007940493524074554 2023-01-23 23:09:00.997350: step: 332/77, loss: 0.014108079485595226 2023-01-23 23:09:02.331141: step: 336/77, loss: 0.00021898458362556994 2023-01-23 23:09:03.637595: step: 340/77, loss: 7.66866924095666e-06 2023-01-23 23:09:04.846378: step: 344/77, loss: 0.035794321447610855 2023-01-23 23:09:06.159746: step: 348/77, loss: 0.0010580236557871103 2023-01-23 23:09:07.437384: step: 352/77, loss: 0.0006705737905576825 2023-01-23 23:09:08.704762: step: 356/77, loss: 8.463625272270292e-05 2023-01-23 23:09:09.951264: step: 360/77, loss: 0.03241058066487312 2023-01-23 23:09:11.201286: step: 364/77, loss: 0.0008318339241668582 2023-01-23 23:09:12.407008: step: 368/77, loss: 0.008623465895652771 2023-01-23 23:09:13.712187: step: 372/77, loss: 0.0006912436801940203 2023-01-23 23:09:15.009985: step: 376/77, loss: 0.007522268686443567 2023-01-23 23:09:16.315197: step: 380/77, loss: 0.010126580484211445 2023-01-23 23:09:17.609141: step: 384/77, loss: 0.00013385726197157055 2023-01-23 23:09:18.892261: step: 388/77, loss: 0.0001453287695767358 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9512195121951219, 'r': 0.6341463414634146, 'f1': 0.7609756097560976}, 'slot': {'p': 0.5555555555555556, 'r': 0.022727272727272728, 'f1': 0.04366812227074236}, 'combined': 0.03323037597188199, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.9512195121951219, 'r': 0.6341463414634146, 'f1': 0.7609756097560976}, 'slot': {'p': 0.5555555555555556, 'r': 0.022727272727272728, 'f1': 0.04366812227074236}, 'combined': 0.03323037597188199, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9506172839506173, 'r': 0.6260162601626016, 'f1': 0.7549019607843136}, 'slot': {'p': 0.5681818181818182, 'r': 0.022727272727272728, 'f1': 0.04370629370629371}, 'combined': 0.03299396681749623, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:10:55.067131: step: 4/77, loss: 0.001075280481018126 2023-01-23 23:10:56.331472: step: 8/77, loss: 0.0016390618402510881 2023-01-23 23:10:57.595018: step: 12/77, loss: 2.4888377083698288e-05 2023-01-23 23:10:58.870939: step: 16/77, loss: 0.00032724355696700513 2023-01-23 23:11:00.125881: step: 20/77, loss: 3.541986006894149e-05 2023-01-23 23:11:01.407814: step: 24/77, loss: 0.012699036858975887 2023-01-23 23:11:02.685976: step: 28/77, loss: 3.851627843687311e-05 2023-01-23 23:11:03.966092: step: 32/77, loss: 0.0511711984872818 2023-01-23 23:11:05.275416: step: 36/77, loss: 0.001221914542838931 2023-01-23 23:11:06.551995: step: 40/77, loss: 0.019967470318078995 2023-01-23 23:11:07.794259: step: 44/77, loss: 0.00016486311506014317 2023-01-23 23:11:09.054452: step: 48/77, loss: 6.0238003243284766e-06 2023-01-23 23:11:10.313132: step: 52/77, loss: 0.003115791827440262 2023-01-23 23:11:11.592575: step: 56/77, loss: 0.0009298291988670826 2023-01-23 23:11:12.885754: step: 60/77, loss: 2.789767131616827e-05 2023-01-23 23:11:14.137172: step: 64/77, loss: 0.025554176419973373 2023-01-23 23:11:15.418254: step: 68/77, loss: 0.056334611028432846 2023-01-23 23:11:16.701889: step: 72/77, loss: 0.0028062222991138697 2023-01-23 23:11:18.041292: step: 76/77, loss: 0.10629882663488388 2023-01-23 23:11:19.347049: step: 80/77, loss: 0.05901770293712616 2023-01-23 23:11:20.612604: step: 84/77, loss: 1.7697377188596874e-05 2023-01-23 23:11:21.908369: step: 88/77, loss: 0.010094331577420235 2023-01-23 23:11:23.169491: step: 92/77, loss: 0.0006838410045020282 2023-01-23 23:11:24.461691: step: 96/77, loss: 3.121390182059258e-05 2023-01-23 23:11:25.734927: step: 100/77, loss: 0.00021746208949480206 2023-01-23 23:11:27.022795: step: 104/77, loss: 0.00015648044063709676 2023-01-23 23:11:28.285141: step: 108/77, loss: 0.007220795378088951 2023-01-23 23:11:29.559376: step: 112/77, loss: 0.006847000680863857 2023-01-23 23:11:30.836119: step: 116/77, loss: 1.8800854377332143e-05 2023-01-23 23:11:32.125880: step: 120/77, loss: 0.000735764333512634 2023-01-23 23:11:33.375385: step: 124/77, loss: 0.0020050792954862118 2023-01-23 23:11:34.689790: step: 128/77, loss: 6.402538929251023e-06 2023-01-23 23:11:35.970737: step: 132/77, loss: 0.01608336716890335 2023-01-23 23:11:37.259829: step: 136/77, loss: 6.378698162734509e-05 2023-01-23 23:11:38.583160: step: 140/77, loss: 0.0006921316962689161 2023-01-23 23:11:39.831203: step: 144/77, loss: 3.956109139835462e-06 2023-01-23 23:11:41.136285: step: 148/77, loss: 0.008236408233642578 2023-01-23 23:11:42.424339: step: 152/77, loss: 0.003246211213991046 2023-01-23 23:11:43.742873: step: 156/77, loss: 0.04419539496302605 2023-01-23 23:11:45.043428: step: 160/77, loss: 0.0031513397116214037 2023-01-23 23:11:46.316641: step: 164/77, loss: 0.0011053562629967928 2023-01-23 23:11:47.630167: step: 168/77, loss: 0.0018971740501001477 2023-01-23 23:11:48.939008: step: 172/77, loss: 1.8000569980358705e-05 2023-01-23 23:11:50.225607: step: 176/77, loss: 0.0016024464275687933 2023-01-23 23:11:51.469237: step: 180/77, loss: 0.00030853645876049995 2023-01-23 23:11:52.797424: step: 184/77, loss: 0.010142313316464424 2023-01-23 23:11:54.046568: step: 188/77, loss: 0.01915617100894451 2023-01-23 23:11:55.295175: step: 192/77, loss: 0.004522524308413267 2023-01-23 23:11:56.554045: step: 196/77, loss: 0.006432425230741501 2023-01-23 23:11:57.787828: step: 200/77, loss: 0.01698668859899044 2023-01-23 23:11:59.084917: step: 204/77, loss: 0.0007753705722279847 2023-01-23 23:12:00.389833: step: 208/77, loss: 0.003447559429332614 2023-01-23 23:12:01.680807: step: 212/77, loss: 0.07996546477079391 2023-01-23 23:12:02.937956: step: 216/77, loss: 0.0007899499032646418 2023-01-23 23:12:04.218398: step: 220/77, loss: 0.00014828376879449934 2023-01-23 23:12:05.535067: step: 224/77, loss: 0.0031378234270960093 2023-01-23 23:12:06.830700: step: 228/77, loss: 0.0024087419733405113 2023-01-23 23:12:08.134575: step: 232/77, loss: 0.022225063294172287 2023-01-23 23:12:09.429321: step: 236/77, loss: 0.013735632412135601 2023-01-23 23:12:10.698321: step: 240/77, loss: 0.024480065330863 2023-01-23 23:12:11.931342: step: 244/77, loss: 0.0015148434322327375 2023-01-23 23:12:13.203361: step: 248/77, loss: 0.047594137489795685 2023-01-23 23:12:14.525604: step: 252/77, loss: 1.2218940526054212e-07 2023-01-23 23:12:15.906342: step: 256/77, loss: 0.00227353535592556 2023-01-23 23:12:17.189923: step: 260/77, loss: 0.013032825663685799 2023-01-23 23:12:18.458780: step: 264/77, loss: 0.0023532661143690348 2023-01-23 23:12:19.749148: step: 268/77, loss: 1.5874851669650525e-05 2023-01-23 23:12:21.040346: step: 272/77, loss: 3.869067586492747e-05 2023-01-23 23:12:22.339238: step: 276/77, loss: 0.007464665919542313 2023-01-23 23:12:23.617627: step: 280/77, loss: 7.692573490203358e-06 2023-01-23 23:12:24.887462: step: 284/77, loss: 0.0008798784692771733 2023-01-23 23:12:26.155099: step: 288/77, loss: 0.0005066373851150274 2023-01-23 23:12:27.407481: step: 292/77, loss: 0.0033748496789485216 2023-01-23 23:12:28.683402: step: 296/77, loss: 0.03423455357551575 2023-01-23 23:12:29.956580: step: 300/77, loss: 0.0004929510178044438 2023-01-23 23:12:31.284306: step: 304/77, loss: 0.30837780237197876 2023-01-23 23:12:32.593808: step: 308/77, loss: 7.889981498010457e-05 2023-01-23 23:12:33.919148: step: 312/77, loss: 0.05586982145905495 2023-01-23 23:12:35.223806: step: 316/77, loss: 0.007840905338525772 2023-01-23 23:12:36.505392: step: 320/77, loss: 0.0010584781412035227 2023-01-23 23:12:37.788113: step: 324/77, loss: 0.0004989710869267583 2023-01-23 23:12:39.070042: step: 328/77, loss: 0.00011972568609053269 2023-01-23 23:12:40.354946: step: 332/77, loss: 0.0002813867758959532 2023-01-23 23:12:41.660841: step: 336/77, loss: 0.008618929423391819 2023-01-23 23:12:43.013674: step: 340/77, loss: 1.6787849745014682e-05 2023-01-23 23:12:44.307112: step: 344/77, loss: 0.0036653918214142323 2023-01-23 23:12:45.644088: step: 348/77, loss: 0.0006011840887367725 2023-01-23 23:12:46.948915: step: 352/77, loss: 7.024506885500159e-06 2023-01-23 23:12:48.238772: step: 356/77, loss: 9.986665827454999e-05 2023-01-23 23:12:49.530646: step: 360/77, loss: 4.4611333578359336e-06 2023-01-23 23:12:50.857633: step: 364/77, loss: 0.001176069607026875 2023-01-23 23:12:52.135469: step: 368/77, loss: 3.282143370597623e-05 2023-01-23 23:12:53.402678: step: 372/77, loss: 0.002442543162032962 2023-01-23 23:12:54.715614: step: 376/77, loss: 0.04803265258669853 2023-01-23 23:12:56.015459: step: 380/77, loss: 0.0002765162498690188 2023-01-23 23:12:57.283264: step: 384/77, loss: 0.007188045885413885 2023-01-23 23:12:58.548827: step: 388/77, loss: 0.02457827515900135 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9625, 'r': 0.6260162601626016, 'f1': 0.7586206896551725}, 'slot': {'p': 0.49056603773584906, 'r': 0.023636363636363636, 'f1': 0.0450997398091934}, 'combined': 0.034213595717319134, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9625, 'r': 0.6260162601626016, 'f1': 0.7586206896551725}, 'slot': {'p': 0.48148148148148145, 'r': 0.023636363636363636, 'f1': 0.045060658578856154}, 'combined': 0.03418394788740812, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9629629629629629, 'r': 0.6341463414634146, 'f1': 0.7647058823529412}, 'slot': {'p': 0.48148148148148145, 'r': 0.023636363636363636, 'f1': 0.045060658578856154}, 'combined': 0.03445815067794883, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:14:34.528111: step: 4/77, loss: 0.002298800041899085 2023-01-23 23:14:35.786308: step: 8/77, loss: 0.0049286182038486 2023-01-23 23:14:37.099410: step: 12/77, loss: 0.0019144328543916345 2023-01-23 23:14:38.368213: step: 16/77, loss: 0.00031539713381789625 2023-01-23 23:14:39.669398: step: 20/77, loss: 0.0002348479174543172 2023-01-23 23:14:40.964531: step: 24/77, loss: 0.00013729455531574786 2023-01-23 23:14:42.257581: step: 28/77, loss: 6.911471427883953e-05 2023-01-23 23:14:43.524741: step: 32/77, loss: 0.02153841033577919 2023-01-23 23:14:44.812516: step: 36/77, loss: 9.762252739164978e-05 2023-01-23 23:14:46.095973: step: 40/77, loss: 0.003212488256394863 2023-01-23 23:14:47.414739: step: 44/77, loss: 0.003010992892086506 2023-01-23 23:14:48.757825: step: 48/77, loss: 0.0010896417079493403 2023-01-23 23:14:50.052082: step: 52/77, loss: 0.028930403292179108 2023-01-23 23:14:51.393635: step: 56/77, loss: 4.31520129495766e-06 2023-01-23 23:14:52.679759: step: 60/77, loss: 0.012675435282289982 2023-01-23 23:14:53.970370: step: 64/77, loss: 0.0007141504902392626 2023-01-23 23:14:55.269681: step: 68/77, loss: 0.0005412647151388228 2023-01-23 23:14:56.553988: step: 72/77, loss: 4.95978338221903e-06 2023-01-23 23:14:57.804632: step: 76/77, loss: 0.0003975495637860149 2023-01-23 23:14:59.106989: step: 80/77, loss: 0.0002749481936916709 2023-01-23 23:15:00.317589: step: 84/77, loss: 0.016873259097337723 2023-01-23 23:15:01.577699: step: 88/77, loss: 0.030899502336978912 2023-01-23 23:15:02.940973: step: 92/77, loss: 0.003275349037721753 2023-01-23 23:15:04.265840: step: 96/77, loss: 0.047433532774448395 2023-01-23 23:15:05.555630: step: 100/77, loss: 0.0008755750604905188 2023-01-23 23:15:06.859305: step: 104/77, loss: 6.030965960235335e-05 2023-01-23 23:15:08.098645: step: 108/77, loss: 0.043235622346401215 2023-01-23 23:15:09.362983: step: 112/77, loss: 0.01793503761291504 2023-01-23 23:15:10.610872: step: 116/77, loss: 0.0010350135853514075 2023-01-23 23:15:11.892054: step: 120/77, loss: 2.9047590942354873e-05 2023-01-23 23:15:13.183631: step: 124/77, loss: 5.7926597946789116e-05 2023-01-23 23:15:14.427255: step: 128/77, loss: 9.975417924579233e-05 2023-01-23 23:15:15.698911: step: 132/77, loss: 0.016122905537486076 2023-01-23 23:15:17.004245: step: 136/77, loss: 0.012848117388784885 2023-01-23 23:15:18.313314: step: 140/77, loss: 0.00012284377589821815 2023-01-23 23:15:19.588488: step: 144/77, loss: 0.00014236402057576925 2023-01-23 23:15:20.855921: step: 148/77, loss: 0.03676806390285492 2023-01-23 23:15:22.167431: step: 152/77, loss: 0.03311936557292938 2023-01-23 23:15:23.489127: step: 156/77, loss: 3.850349457934499e-05 2023-01-23 23:15:24.791916: step: 160/77, loss: 0.0006413233932107687 2023-01-23 23:15:26.094139: step: 164/77, loss: 0.0027559632435441017 2023-01-23 23:15:27.353281: step: 168/77, loss: 0.0016262399731203914 2023-01-23 23:15:28.639159: step: 172/77, loss: 0.0035593020729720592 2023-01-23 23:15:29.907941: step: 176/77, loss: 0.00036346568958833814 2023-01-23 23:15:31.202816: step: 180/77, loss: 0.0005653153057210147 2023-01-23 23:15:32.492549: step: 184/77, loss: 2.0861619987044833e-08 2023-01-23 23:15:33.823224: step: 188/77, loss: 0.00012467730266507715 2023-01-23 23:15:35.114588: step: 192/77, loss: 0.001481741899624467 2023-01-23 23:15:36.460214: step: 196/77, loss: 6.3034076447365806e-06 2023-01-23 23:15:37.736611: step: 200/77, loss: 0.00010196158837061375 2023-01-23 23:15:39.032770: step: 204/77, loss: 0.0004513350431807339 2023-01-23 23:15:40.349790: step: 208/77, loss: 0.005133800208568573 2023-01-23 23:15:41.648650: step: 212/77, loss: 0.00018107425421476364 2023-01-23 23:15:42.950122: step: 216/77, loss: 2.1516948436328676e-06 2023-01-23 23:15:44.193358: step: 220/77, loss: 0.009150481782853603 2023-01-23 23:15:45.472500: step: 224/77, loss: 0.000125797902001068 2023-01-23 23:15:46.783209: step: 228/77, loss: 0.0288016926497221 2023-01-23 23:15:48.060639: step: 232/77, loss: 7.740493310848251e-05 2023-01-23 23:15:49.297708: step: 236/77, loss: 0.012049062177538872 2023-01-23 23:15:50.565410: step: 240/77, loss: 0.0032380330376327038 2023-01-23 23:15:51.854853: step: 244/77, loss: 6.329387815640075e-06 2023-01-23 23:15:53.181086: step: 248/77, loss: 0.018659718334674835 2023-01-23 23:15:54.460175: step: 252/77, loss: 0.00016582694661337882 2023-01-23 23:15:55.745155: step: 256/77, loss: 1.3560040201809898e-07 2023-01-23 23:15:57.042139: step: 260/77, loss: 0.04998790845274925 2023-01-23 23:15:58.355646: step: 264/77, loss: 5.789184069726616e-05 2023-01-23 23:15:59.647787: step: 268/77, loss: 9.110860992223024e-05 2023-01-23 23:16:00.927704: step: 272/77, loss: 0.01263953372836113 2023-01-23 23:16:02.144015: step: 276/77, loss: 0.0018562874756753445 2023-01-23 23:16:03.492737: step: 280/77, loss: 7.022190402494743e-05 2023-01-23 23:16:04.715074: step: 284/77, loss: 0.026238013058900833 2023-01-23 23:16:06.012567: step: 288/77, loss: 0.009614682756364346 2023-01-23 23:16:07.297995: step: 292/77, loss: 1.8372948034084402e-06 2023-01-23 23:16:08.602524: step: 296/77, loss: 0.01663910411298275 2023-01-23 23:16:09.933633: step: 300/77, loss: 0.02676456980407238 2023-01-23 23:16:11.209510: step: 304/77, loss: 2.913009438998415e-06 2023-01-23 23:16:12.457421: step: 308/77, loss: 0.0046882545575499535 2023-01-23 23:16:13.746222: step: 312/77, loss: 6.624018715228885e-05 2023-01-23 23:16:15.045239: step: 316/77, loss: 1.5274457837222144e-05 2023-01-23 23:16:16.385198: step: 320/77, loss: 0.03436944633722305 2023-01-23 23:16:17.630524: step: 324/77, loss: 0.00013097852934151888 2023-01-23 23:16:18.953459: step: 328/77, loss: 0.0005952545907348394 2023-01-23 23:16:20.291162: step: 332/77, loss: 0.05988472327589989 2023-01-23 23:16:21.583877: step: 336/77, loss: 0.00022210535826161504 2023-01-23 23:16:22.872768: step: 340/77, loss: 4.118560354982037e-06 2023-01-23 23:16:24.138573: step: 344/77, loss: 0.021829169243574142 2023-01-23 23:16:25.453226: step: 348/77, loss: 9.185371163766831e-05 2023-01-23 23:16:26.764143: step: 352/77, loss: 9.739038068801165e-05 2023-01-23 23:16:28.055929: step: 356/77, loss: 0.005260576028376818 2023-01-23 23:16:29.350116: step: 360/77, loss: 0.003987782634794712 2023-01-23 23:16:30.665665: step: 364/77, loss: 0.00033517484553158283 2023-01-23 23:16:31.956161: step: 368/77, loss: 7.898044714238495e-05 2023-01-23 23:16:33.322333: step: 372/77, loss: 0.05937068909406662 2023-01-23 23:16:34.624152: step: 376/77, loss: 0.015781676396727562 2023-01-23 23:16:35.866294: step: 380/77, loss: 0.00015802726557012647 2023-01-23 23:16:37.158223: step: 384/77, loss: 0.021459216251969337 2023-01-23 23:16:38.430917: step: 388/77, loss: 0.0018189732218161225 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 19} Test Chinese: {'template': {'p': 0.9594594594594594, 'r': 0.5772357723577236, 'f1': 0.7208121827411168}, 'slot': {'p': 0.48, 'r': 0.02181818181818182, 'f1': 0.041739130434782605}, 'combined': 0.03008607371441183, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 19} Test Korean: {'template': {'p': 0.96, 'r': 0.5853658536585366, 'f1': 0.7272727272727272}, 'slot': {'p': 0.48, 'r': 0.02181818181818182, 'f1': 0.041739130434782605}, 'combined': 0.030355731225296435, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 19} Test Russian: {'template': {'p': 0.96, 'r': 0.5853658536585366, 'f1': 0.7272727272727272}, 'slot': {'p': 0.48, 'r': 0.02181818181818182, 'f1': 0.041739130434782605}, 'combined': 0.030355731225296435, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:18:15.548319: step: 4/77, loss: 0.00020769896218553185 2023-01-23 23:18:16.858880: step: 8/77, loss: 7.160383393056691e-05 2023-01-23 23:18:18.147428: step: 12/77, loss: 3.1052193207870005e-06 2023-01-23 23:18:19.396249: step: 16/77, loss: 0.0009628063999116421 2023-01-23 23:18:20.689582: step: 20/77, loss: 0.0011009183945134282 2023-01-23 23:18:21.973764: step: 24/77, loss: 0.0008464275742881 2023-01-23 23:18:23.261701: step: 28/77, loss: 0.00027758374926634133 2023-01-23 23:18:24.508496: step: 32/77, loss: 0.007552329450845718 2023-01-23 23:18:25.758087: step: 36/77, loss: 2.311597745574545e-05 2023-01-23 23:18:27.014570: step: 40/77, loss: 0.00032255385303869843 2023-01-23 23:18:28.278177: step: 44/77, loss: 0.00015895852993708104 2023-01-23 23:18:29.566696: step: 48/77, loss: 0.01976187527179718 2023-01-23 23:18:30.827291: step: 52/77, loss: 0.0017516795778647065 2023-01-23 23:18:32.117595: step: 56/77, loss: 2.9258440918056294e-05 2023-01-23 23:18:33.403968: step: 60/77, loss: 5.6702177971601486e-05 2023-01-23 23:18:34.719928: step: 64/77, loss: 0.004498021677136421 2023-01-23 23:18:35.962585: step: 68/77, loss: 3.304888014099561e-05 2023-01-23 23:18:37.255308: step: 72/77, loss: 0.00022221094695851207 2023-01-23 23:18:38.577756: step: 76/77, loss: 0.00012549127859529108 2023-01-23 23:18:39.875650: step: 80/77, loss: 9.402490945831232e-07 2023-01-23 23:18:41.174161: step: 84/77, loss: 0.0007180677494034171 2023-01-23 23:18:42.395407: step: 88/77, loss: 0.00019054979202337563 2023-01-23 23:18:43.657229: step: 92/77, loss: 0.0010646095033735037 2023-01-23 23:18:44.937998: step: 96/77, loss: 0.0002717445604503155 2023-01-23 23:18:46.175124: step: 100/77, loss: 0.004205161239951849 2023-01-23 23:18:47.457319: step: 104/77, loss: 0.0046676695346832275 2023-01-23 23:18:48.718570: step: 108/77, loss: 0.00028675587964244187 2023-01-23 23:18:50.060079: step: 112/77, loss: 0.0002091683418257162 2023-01-23 23:18:51.342411: step: 116/77, loss: 0.0005976128159090877 2023-01-23 23:18:52.624940: step: 120/77, loss: 0.04944419860839844 2023-01-23 23:18:53.893931: step: 124/77, loss: 7.888769687269814e-06 2023-01-23 23:18:55.166096: step: 128/77, loss: 2.536001557018608e-05 2023-01-23 23:18:56.407234: step: 132/77, loss: 1.2457341654226184e-05 2023-01-23 23:18:57.720805: step: 136/77, loss: 4.223461291985586e-05 2023-01-23 23:18:59.042710: step: 140/77, loss: 0.0043779960833489895 2023-01-23 23:19:00.333923: step: 144/77, loss: 0.00011629718210315332 2023-01-23 23:19:01.645867: step: 148/77, loss: 0.011396033689379692 2023-01-23 23:19:02.953582: step: 152/77, loss: 0.0025481493212282658 2023-01-23 23:19:04.235645: step: 156/77, loss: 0.013005420565605164 2023-01-23 23:19:05.467991: step: 160/77, loss: 0.07505260407924652 2023-01-23 23:19:06.764439: step: 164/77, loss: 0.00014636758714914322 2023-01-23 23:19:08.065783: step: 168/77, loss: 0.0018843450816348195 2023-01-23 23:19:09.351008: step: 172/77, loss: 0.007612647954374552 2023-01-23 23:19:10.631872: step: 176/77, loss: 0.0030222514178603888 2023-01-23 23:19:11.926678: step: 180/77, loss: 9.826284076552838e-05 2023-01-23 23:19:13.236517: step: 184/77, loss: 0.0009221957880072296 2023-01-23 23:19:14.516597: step: 188/77, loss: 0.007369095925241709 2023-01-23 23:19:15.762686: step: 192/77, loss: 5.647513603435073e-07 2023-01-23 23:19:17.039929: step: 196/77, loss: 0.003735194681212306 2023-01-23 23:19:18.299779: step: 200/77, loss: 0.0003531751863192767 2023-01-23 23:19:19.605859: step: 204/77, loss: 0.00419339956715703 2023-01-23 23:19:20.888761: step: 208/77, loss: 9.134208085015416e-05 2023-01-23 23:19:22.240392: step: 212/77, loss: 7.7279910328798e-05 2023-01-23 23:19:23.532470: step: 216/77, loss: 1.5623803847120143e-05 2023-01-23 23:19:24.817170: step: 220/77, loss: 0.002031370997428894 2023-01-23 23:19:26.082953: step: 224/77, loss: 0.03100615181028843 2023-01-23 23:19:27.351703: step: 228/77, loss: 0.0006410797941498458 2023-01-23 23:19:28.667758: step: 232/77, loss: 3.231957407479058e-06 2023-01-23 23:19:29.961002: step: 236/77, loss: 4.3285936044412665e-06 2023-01-23 23:19:31.266111: step: 240/77, loss: 0.010497227311134338 2023-01-23 23:19:32.619718: step: 244/77, loss: 0.00025311787612736225 2023-01-23 23:19:33.966268: step: 248/77, loss: 0.01094940584152937 2023-01-23 23:19:35.252303: step: 252/77, loss: 0.005903997924178839 2023-01-23 23:19:36.553964: step: 256/77, loss: 0.0039853062480688095 2023-01-23 23:19:37.853621: step: 260/77, loss: 0.008176511153578758 2023-01-23 23:19:39.136184: step: 264/77, loss: 0.009724855422973633 2023-01-23 23:19:40.416583: step: 268/77, loss: 0.005896150600165129 2023-01-23 23:19:41.722692: step: 272/77, loss: 0.025987211614847183 2023-01-23 23:19:43.012830: step: 276/77, loss: 1.1424914191593416e-05 2023-01-23 23:19:44.308710: step: 280/77, loss: 0.00039195033605210483 2023-01-23 23:19:45.595802: step: 284/77, loss: 0.0040579866617918015 2023-01-23 23:19:46.880259: step: 288/77, loss: 4.544458988675615e-06 2023-01-23 23:19:48.144146: step: 292/77, loss: 9.509296796750277e-05 2023-01-23 23:19:49.397892: step: 296/77, loss: 0.08171162754297256 2023-01-23 23:19:50.664347: step: 300/77, loss: 2.0242150640115142e-05 2023-01-23 23:19:51.911471: step: 304/77, loss: 0.0001572638429934159 2023-01-23 23:19:53.188675: step: 308/77, loss: 0.002182783093303442 2023-01-23 23:19:54.474490: step: 312/77, loss: 0.00037773323128931224 2023-01-23 23:19:55.787601: step: 316/77, loss: 0.0036809672601521015 2023-01-23 23:19:57.040063: step: 320/77, loss: 0.002347944537177682 2023-01-23 23:19:58.370938: step: 324/77, loss: 0.05394945666193962 2023-01-23 23:19:59.663888: step: 328/77, loss: 0.0023451726883649826 2023-01-23 23:20:00.975572: step: 332/77, loss: 0.01851961389183998 2023-01-23 23:20:02.279076: step: 336/77, loss: 4.4434455048758537e-05 2023-01-23 23:20:03.641848: step: 340/77, loss: 0.006946150679141283 2023-01-23 23:20:04.951993: step: 344/77, loss: 0.01549392007291317 2023-01-23 23:20:06.269405: step: 348/77, loss: 4.792132676811889e-05 2023-01-23 23:20:07.552515: step: 352/77, loss: 4.237364009895828e-06 2023-01-23 23:20:08.840127: step: 356/77, loss: 1.0000697329815011e-05 2023-01-23 23:20:10.127801: step: 360/77, loss: 0.00021215454034972936 2023-01-23 23:20:11.411613: step: 364/77, loss: 0.029700905084609985 2023-01-23 23:20:12.687822: step: 368/77, loss: 6.0535454394994304e-05 2023-01-23 23:20:13.992092: step: 372/77, loss: 0.000546629133168608 2023-01-23 23:20:15.282660: step: 376/77, loss: 0.0011142148869112134 2023-01-23 23:20:16.552088: step: 380/77, loss: 0.003580347867682576 2023-01-23 23:20:17.807643: step: 384/77, loss: 0.0002462729753460735 2023-01-23 23:20:19.088993: step: 388/77, loss: 4.011456621810794e-05 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.4375, 'r': 0.019090909090909092, 'f1': 0.036585365853658534}, 'combined': 0.026938478340007278, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.4583333333333333, 'r': 0.02, 'f1': 0.03832752613240418}, 'combined': 0.02822126302286477, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.4666666666666667, 'r': 0.019090909090909092, 'f1': 0.036681222707423584}, 'combined': 0.027009059505963634, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:21:54.693978: step: 4/77, loss: 0.03239181637763977 2023-01-23 23:21:55.961407: step: 8/77, loss: 0.017717987298965454 2023-01-23 23:21:57.286667: step: 12/77, loss: 6.446881343435962e-06 2023-01-23 23:21:58.554124: step: 16/77, loss: 0.008411260321736336 2023-01-23 23:21:59.808735: step: 20/77, loss: 3.678425491671078e-05 2023-01-23 23:22:01.117799: step: 24/77, loss: 7.518615802837303e-06 2023-01-23 23:22:02.402133: step: 28/77, loss: 8.049477764870971e-05 2023-01-23 23:22:03.695095: step: 32/77, loss: 0.00012354887439869344 2023-01-23 23:22:04.940178: step: 36/77, loss: 1.3187103604650474e-06 2023-01-23 23:22:06.206038: step: 40/77, loss: 8.020484528969973e-05 2023-01-23 23:22:07.480633: step: 44/77, loss: 0.0003721020766533911 2023-01-23 23:22:08.817703: step: 48/77, loss: 4.947678462485783e-05 2023-01-23 23:22:10.113872: step: 52/77, loss: 0.006109760142862797 2023-01-23 23:22:11.418874: step: 56/77, loss: 0.03650211915373802 2023-01-23 23:22:12.693897: step: 60/77, loss: 1.3884582585887983e-05 2023-01-23 23:22:13.942736: step: 64/77, loss: 2.6498164515942335e-05 2023-01-23 23:22:15.184099: step: 68/77, loss: 0.0009089701343327761 2023-01-23 23:22:16.450052: step: 72/77, loss: 0.000358206540113315 2023-01-23 23:22:17.728359: step: 76/77, loss: 0.028453640639781952 2023-01-23 23:22:19.031532: step: 80/77, loss: 0.0003351859631948173 2023-01-23 23:22:20.292960: step: 84/77, loss: 4.7906294639687985e-05 2023-01-23 23:22:21.595343: step: 88/77, loss: 0.00013332579692360014 2023-01-23 23:22:22.904783: step: 92/77, loss: 0.0007689341437071562 2023-01-23 23:22:24.166500: step: 96/77, loss: 0.0031823881436139345 2023-01-23 23:22:25.490884: step: 100/77, loss: 0.00011390875442884862 2023-01-23 23:22:26.797617: step: 104/77, loss: 0.0002913151402026415 2023-01-23 23:22:28.016295: step: 108/77, loss: 8.261164111900143e-06 2023-01-23 23:22:29.300061: step: 112/77, loss: 5.380424227041658e-06 2023-01-23 23:22:30.591361: step: 116/77, loss: 0.005360376555472612 2023-01-23 23:22:31.902812: step: 120/77, loss: 2.903307176893577e-05 2023-01-23 23:22:33.222663: step: 124/77, loss: 2.8209698939463124e-05 2023-01-23 23:22:34.513819: step: 128/77, loss: 4.687665568781085e-05 2023-01-23 23:22:35.810226: step: 132/77, loss: 0.0002770746359601617 2023-01-23 23:22:37.074463: step: 136/77, loss: 0.00012425713066477329 2023-01-23 23:22:38.349464: step: 140/77, loss: 0.00012386722664814442 2023-01-23 23:22:39.591343: step: 144/77, loss: 0.04620302841067314 2023-01-23 23:22:40.860051: step: 148/77, loss: 0.023962827399373055 2023-01-23 23:22:42.181875: step: 152/77, loss: 1.7255728380405344e-05 2023-01-23 23:22:43.457264: step: 156/77, loss: 0.0002274275029776618 2023-01-23 23:22:44.701511: step: 160/77, loss: 1.3703098375117406e-05 2023-01-23 23:22:46.000101: step: 164/77, loss: 4.8758891352918e-05 2023-01-23 23:22:47.292938: step: 168/77, loss: 0.009159489534795284 2023-01-23 23:22:48.553126: step: 172/77, loss: 0.0037564877420663834 2023-01-23 23:22:49.839861: step: 176/77, loss: 0.0002380924706812948 2023-01-23 23:22:51.073066: step: 180/77, loss: 5.755915935878875e-06 2023-01-23 23:22:52.353793: step: 184/77, loss: 5.0621150876395404e-05 2023-01-23 23:22:53.625584: step: 188/77, loss: 2.6403948140796274e-05 2023-01-23 23:22:54.887839: step: 192/77, loss: 0.00014365346578415483 2023-01-23 23:22:56.169667: step: 196/77, loss: 0.002432482549920678 2023-01-23 23:22:57.429256: step: 200/77, loss: 4.91056744067464e-05 2023-01-23 23:22:58.716647: step: 204/77, loss: 0.0019472897984087467 2023-01-23 23:23:00.006646: step: 208/77, loss: 0.049116019159555435 2023-01-23 23:23:01.282983: step: 212/77, loss: 0.00023824565869290382 2023-01-23 23:23:02.525607: step: 216/77, loss: 0.0004655311640817672 2023-01-23 23:23:03.813054: step: 220/77, loss: 5.9485246310941875e-05 2023-01-23 23:23:05.089248: step: 224/77, loss: 0.003002239391207695 2023-01-23 23:23:06.368157: step: 228/77, loss: 0.0006095260032452643 2023-01-23 23:23:07.625872: step: 232/77, loss: 0.027523092925548553 2023-01-23 23:23:08.916778: step: 236/77, loss: 0.13839784264564514 2023-01-23 23:23:10.220336: step: 240/77, loss: 0.0003813329676631838 2023-01-23 23:23:11.515169: step: 244/77, loss: 0.0010819652816280723 2023-01-23 23:23:12.839620: step: 248/77, loss: 0.0006821186398155987 2023-01-23 23:23:14.061906: step: 252/77, loss: 1.6465527323816787e-06 2023-01-23 23:23:15.357999: step: 256/77, loss: 6.989516805333551e-06 2023-01-23 23:23:16.643617: step: 260/77, loss: 0.0004569220182020217 2023-01-23 23:23:17.915427: step: 264/77, loss: 2.5564697352820076e-05 2023-01-23 23:23:19.174651: step: 268/77, loss: 0.0005146386101841927 2023-01-23 23:23:20.453141: step: 272/77, loss: 0.00024345022393390536 2023-01-23 23:23:21.736958: step: 276/77, loss: 2.1641742932843044e-05 2023-01-23 23:23:23.004353: step: 280/77, loss: 5.6872628192650154e-05 2023-01-23 23:23:24.305081: step: 284/77, loss: 0.004902479238808155 2023-01-23 23:23:25.591452: step: 288/77, loss: 8.766914834268391e-05 2023-01-23 23:23:26.845271: step: 292/77, loss: 2.6567552140477346e-06 2023-01-23 23:23:28.131922: step: 296/77, loss: 0.026077698916196823 2023-01-23 23:23:29.403150: step: 300/77, loss: 0.0014514094218611717 2023-01-23 23:23:30.677919: step: 304/77, loss: 4.962047341905418e-07 2023-01-23 23:23:31.998496: step: 308/77, loss: 4.67259633296635e-06 2023-01-23 23:23:33.256138: step: 312/77, loss: 0.026495279744267464 2023-01-23 23:23:34.565902: step: 316/77, loss: 0.0038940461818128824 2023-01-23 23:23:35.822108: step: 320/77, loss: 0.029748141765594482 2023-01-23 23:23:37.120898: step: 324/77, loss: 0.00021692071459256113 2023-01-23 23:23:38.413691: step: 328/77, loss: 0.004932933486998081 2023-01-23 23:23:39.654094: step: 332/77, loss: 0.00018782397091854364 2023-01-23 23:23:40.918914: step: 336/77, loss: 0.0314268097281456 2023-01-23 23:23:42.234306: step: 340/77, loss: 9.739911183714867e-05 2023-01-23 23:23:43.509336: step: 344/77, loss: 0.0005904044955968857 2023-01-23 23:23:44.788160: step: 348/77, loss: 0.005018756724894047 2023-01-23 23:23:46.122249: step: 352/77, loss: 0.0028589535504579544 2023-01-23 23:23:47.408598: step: 356/77, loss: 0.13345322012901306 2023-01-23 23:23:48.710435: step: 360/77, loss: 0.005826289765536785 2023-01-23 23:23:49.976646: step: 364/77, loss: 0.000740259129088372 2023-01-23 23:23:51.206804: step: 368/77, loss: 0.010807367041707039 2023-01-23 23:23:52.481255: step: 372/77, loss: 0.049246374517679214 2023-01-23 23:23:53.758054: step: 376/77, loss: 2.707458406803198e-06 2023-01-23 23:23:55.100311: step: 380/77, loss: 0.0026401567738503218 2023-01-23 23:23:56.403743: step: 384/77, loss: 0.01615547016263008 2023-01-23 23:23:57.732383: step: 388/77, loss: 3.602041761041619e-05 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5772357723577236, 'f1': 0.71356783919598}, 'slot': {'p': 0.44, 'r': 0.02, 'f1': 0.03826086956521739}, 'combined': 0.027301726021411406, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.9342105263157895, 'r': 0.5772357723577236, 'f1': 0.71356783919598}, 'slot': {'p': 0.44, 'r': 0.02, 'f1': 0.03826086956521739}, 'combined': 0.027301726021411406, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.9342105263157895, 'r': 0.5772357723577236, 'f1': 0.71356783919598}, 'slot': {'p': 0.4489795918367347, 'r': 0.02, 'f1': 0.038294168842471714}, 'combined': 0.02732548731472856, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:25:33.915997: step: 4/77, loss: 0.0007809511735104024 2023-01-23 23:25:35.224166: step: 8/77, loss: 0.001551383757032454 2023-01-23 23:25:36.510853: step: 12/77, loss: 0.007308583240956068 2023-01-23 23:25:37.764241: step: 16/77, loss: 2.1040819774498232e-05 2023-01-23 23:25:39.016462: step: 20/77, loss: 0.0037530087865889072 2023-01-23 23:25:40.337827: step: 24/77, loss: 0.0008601925801485777 2023-01-23 23:25:41.620154: step: 28/77, loss: 9.703524119686335e-05 2023-01-23 23:25:42.899073: step: 32/77, loss: 0.00015865601017139852 2023-01-23 23:25:44.174388: step: 36/77, loss: 0.00012202590005472302 2023-01-23 23:25:45.462656: step: 40/77, loss: 0.011112484149634838 2023-01-23 23:25:46.715008: step: 44/77, loss: 0.000339729362167418 2023-01-23 23:25:47.944573: step: 48/77, loss: 0.0005652804975397885 2023-01-23 23:25:49.215078: step: 52/77, loss: 0.003238065168261528 2023-01-23 23:25:50.491144: step: 56/77, loss: 4.828845339943655e-05 2023-01-23 23:25:51.738027: step: 60/77, loss: 0.003508348250761628 2023-01-23 23:25:53.070289: step: 64/77, loss: 8.385243745578919e-06 2023-01-23 23:25:54.385836: step: 68/77, loss: 0.035650648176670074 2023-01-23 23:25:55.656164: step: 72/77, loss: 0.037739142775535583 2023-01-23 23:25:56.954664: step: 76/77, loss: 0.0001413650024915114 2023-01-23 23:25:58.217301: step: 80/77, loss: 0.0006701253587380052 2023-01-23 23:25:59.480057: step: 84/77, loss: 0.0020222077146172523 2023-01-23 23:26:00.798612: step: 88/77, loss: 0.0008589206263422966 2023-01-23 23:26:02.075332: step: 92/77, loss: 0.0011655199341475964 2023-01-23 23:26:03.332363: step: 96/77, loss: 0.0014887371798977256 2023-01-23 23:26:04.596665: step: 100/77, loss: 0.001581578399054706 2023-01-23 23:26:05.857640: step: 104/77, loss: 0.00011187476047780365 2023-01-23 23:26:07.140297: step: 108/77, loss: 0.00048642870387993753 2023-01-23 23:26:08.394006: step: 112/77, loss: 0.000898391823284328 2023-01-23 23:26:09.706933: step: 116/77, loss: 8.436571988568176e-06 2023-01-23 23:26:10.986894: step: 120/77, loss: 0.02246893011033535 2023-01-23 23:26:12.265542: step: 124/77, loss: 0.001038522575981915 2023-01-23 23:26:13.523617: step: 128/77, loss: 3.281307726865634e-05 2023-01-23 23:26:14.796580: step: 132/77, loss: 0.003178850281983614 2023-01-23 23:26:16.076381: step: 136/77, loss: 1.7324404325336218e-05 2023-01-23 23:26:17.378574: step: 140/77, loss: 0.0005918988026678562 2023-01-23 23:26:18.645008: step: 144/77, loss: 2.3243248506332748e-05 2023-01-23 23:26:19.903412: step: 148/77, loss: 0.0066335974261164665 2023-01-23 23:26:21.193197: step: 152/77, loss: 0.00011396820627851412 2023-01-23 23:26:22.451382: step: 156/77, loss: 0.03955855965614319 2023-01-23 23:26:23.753245: step: 160/77, loss: 9.686138218967244e-05 2023-01-23 23:26:25.062521: step: 164/77, loss: 0.021154014393687248 2023-01-23 23:26:26.363405: step: 168/77, loss: 0.00011484503920655698 2023-01-23 23:26:27.698154: step: 172/77, loss: 0.0008551405044272542 2023-01-23 23:26:29.043459: step: 176/77, loss: 0.02964947558939457 2023-01-23 23:26:30.351646: step: 180/77, loss: 0.0017165419412776828 2023-01-23 23:26:31.672302: step: 184/77, loss: 0.004964523948729038 2023-01-23 23:26:32.997653: step: 188/77, loss: 0.002340970328077674 2023-01-23 23:26:34.326917: step: 192/77, loss: 0.0009994141291826963 2023-01-23 23:26:35.629676: step: 196/77, loss: 0.005752595141530037 2023-01-23 23:26:36.860185: step: 200/77, loss: 4.916898251394741e-05 2023-01-23 23:26:38.155733: step: 204/77, loss: 0.0013054257724434137 2023-01-23 23:26:39.428918: step: 208/77, loss: 5.486142617883161e-05 2023-01-23 23:26:40.711306: step: 212/77, loss: 0.00024407217279076576 2023-01-23 23:26:42.045941: step: 216/77, loss: 1.1915855793631636e-05 2023-01-23 23:26:43.385899: step: 220/77, loss: 0.0007540610968135297 2023-01-23 23:26:44.735506: step: 224/77, loss: 0.00014220400771591812 2023-01-23 23:26:46.025992: step: 228/77, loss: 0.07779377698898315 2023-01-23 23:26:47.328924: step: 232/77, loss: 0.032325536012649536 2023-01-23 23:26:48.610855: step: 236/77, loss: 5.522904302779352e-06 2023-01-23 23:26:49.878367: step: 240/77, loss: 0.0035868207924067974 2023-01-23 23:26:51.226419: step: 244/77, loss: 0.004020586609840393 2023-01-23 23:26:52.533533: step: 248/77, loss: 0.0007980067748576403 2023-01-23 23:26:53.896625: step: 252/77, loss: 1.5416650057886727e-05 2023-01-23 23:26:55.179131: step: 256/77, loss: 1.2635971415875247e-06 2023-01-23 23:26:56.542071: step: 260/77, loss: 0.0012691058218479156 2023-01-23 23:26:57.818254: step: 264/77, loss: 0.016745002940297127 2023-01-23 23:26:59.149188: step: 268/77, loss: 0.0318622961640358 2023-01-23 23:27:00.505539: step: 272/77, loss: 8.759008051129058e-05 2023-01-23 23:27:01.826418: step: 276/77, loss: 0.0004965168191120028 2023-01-23 23:27:03.121850: step: 280/77, loss: 1.5795176011579315e-07 2023-01-23 23:27:04.395755: step: 284/77, loss: 2.293473153258674e-05 2023-01-23 23:27:05.733133: step: 288/77, loss: 3.388850382179953e-05 2023-01-23 23:27:07.028009: step: 292/77, loss: 0.009840208105742931 2023-01-23 23:27:08.334060: step: 296/77, loss: 0.0010038908803835511 2023-01-23 23:27:09.671624: step: 300/77, loss: 4.372560579213314e-05 2023-01-23 23:27:10.951000: step: 304/77, loss: 0.0003445250040385872 2023-01-23 23:27:12.263693: step: 308/77, loss: 0.00039814409683458507 2023-01-23 23:27:13.572782: step: 312/77, loss: 0.0046538361348211765 2023-01-23 23:27:14.892376: step: 316/77, loss: 0.022742586210370064 2023-01-23 23:27:16.224095: step: 320/77, loss: 0.01748577691614628 2023-01-23 23:27:17.506018: step: 324/77, loss: 0.00017750824918039143 2023-01-23 23:27:18.788834: step: 328/77, loss: 3.159026960020128e-07 2023-01-23 23:27:20.067869: step: 332/77, loss: 0.00035073619801551104 2023-01-23 23:27:21.373135: step: 336/77, loss: 4.3746422306867316e-05 2023-01-23 23:27:22.694588: step: 340/77, loss: 0.045224498957395554 2023-01-23 23:27:24.055223: step: 344/77, loss: 0.03973131626844406 2023-01-23 23:27:25.325750: step: 348/77, loss: 0.0003504282212816179 2023-01-23 23:27:26.637248: step: 352/77, loss: 7.479263877030462e-05 2023-01-23 23:27:27.957507: step: 356/77, loss: 0.00043065831414423883 2023-01-23 23:27:29.257503: step: 360/77, loss: 0.0263584665954113 2023-01-23 23:27:30.552062: step: 364/77, loss: 7.684577576583251e-05 2023-01-23 23:27:31.860509: step: 368/77, loss: 7.579052908113226e-05 2023-01-23 23:27:33.197891: step: 372/77, loss: 0.005002635531127453 2023-01-23 23:27:34.495424: step: 376/77, loss: 0.047192104160785675 2023-01-23 23:27:35.771614: step: 380/77, loss: 0.0021299412474036217 2023-01-23 23:27:37.020276: step: 384/77, loss: 0.0011768265394493937 2023-01-23 23:27:38.314597: step: 388/77, loss: 0.02807861752808094 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.974025974025974, 'r': 0.6097560975609756, 'f1': 0.7500000000000001}, 'slot': {'p': 0.5, 'r': 0.02, 'f1': 0.038461538461538464}, 'combined': 0.02884615384615385, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.974025974025974, 'r': 0.6097560975609756, 'f1': 0.7500000000000001}, 'slot': {'p': 0.5, 'r': 0.019090909090909092, 'f1': 0.03677758318739055}, 'combined': 0.027583187390542916, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.974025974025974, 'r': 0.6097560975609756, 'f1': 0.7500000000000001}, 'slot': {'p': 0.5116279069767442, 'r': 0.02, 'f1': 0.03849518810148731}, 'combined': 0.02887139107611549, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:29:14.887216: step: 4/77, loss: 0.000223344934056513 2023-01-23 23:29:16.158423: step: 8/77, loss: 0.0002499055990483612 2023-01-23 23:29:17.463878: step: 12/77, loss: 0.003980610053986311 2023-01-23 23:29:18.739089: step: 16/77, loss: 0.0001560335367685184 2023-01-23 23:29:19.976260: step: 20/77, loss: 0.009259466081857681 2023-01-23 23:29:21.279924: step: 24/77, loss: 0.005971284583210945 2023-01-23 23:29:22.603797: step: 28/77, loss: 0.0023534297943115234 2023-01-23 23:29:23.870726: step: 32/77, loss: 0.0012932337122038007 2023-01-23 23:29:25.129540: step: 36/77, loss: 0.06185340881347656 2023-01-23 23:29:26.409107: step: 40/77, loss: 0.027082540094852448 2023-01-23 23:29:27.668597: step: 44/77, loss: 5.294245056575164e-05 2023-01-23 23:29:28.962294: step: 48/77, loss: 1.238259415003995e-06 2023-01-23 23:29:30.224432: step: 52/77, loss: 2.2649683728559467e-07 2023-01-23 23:29:31.486506: step: 56/77, loss: 0.0020577346440404654 2023-01-23 23:29:32.772295: step: 60/77, loss: 0.0 2023-01-23 23:29:34.059753: step: 64/77, loss: 4.52091044280678e-05 2023-01-23 23:29:35.372381: step: 68/77, loss: 1.0430811769879256e-08 2023-01-23 23:29:36.675746: step: 72/77, loss: 0.0011307750828564167 2023-01-23 23:29:37.943646: step: 76/77, loss: 5.794014214188792e-05 2023-01-23 23:29:39.211609: step: 80/77, loss: 1.281494945715167e-07 2023-01-23 23:29:40.461591: step: 84/77, loss: 0.000748545688111335 2023-01-23 23:29:41.709992: step: 88/77, loss: 3.0113683351373766e-06 2023-01-23 23:29:43.013989: step: 92/77, loss: 0.003903226926922798 2023-01-23 23:29:44.344767: step: 96/77, loss: 0.031046070158481598 2023-01-23 23:29:45.644422: step: 100/77, loss: 9.00256636668928e-05 2023-01-23 23:29:46.917902: step: 104/77, loss: 8.201535092666745e-05 2023-01-23 23:29:48.218730: step: 108/77, loss: 1.2367941337743105e-07 2023-01-23 23:29:49.471069: step: 112/77, loss: 0.0028015454299747944 2023-01-23 23:29:50.772222: step: 116/77, loss: 1.5853851209612912e-06 2023-01-23 23:29:52.040191: step: 120/77, loss: 1.0672863936633803e-05 2023-01-23 23:29:53.332762: step: 124/77, loss: 6.288173040047695e-07 2023-01-23 23:29:54.630878: step: 128/77, loss: 0.002125179162248969 2023-01-23 23:29:55.893304: step: 132/77, loss: 0.0003999666660092771 2023-01-23 23:29:57.143740: step: 136/77, loss: 0.06220989674329758 2023-01-23 23:29:58.407799: step: 140/77, loss: 0.00030411180341616273 2023-01-23 23:29:59.648261: step: 144/77, loss: 0.00021978866425342858 2023-01-23 23:30:00.971217: step: 148/77, loss: 0.0013275524834170938 2023-01-23 23:30:02.266235: step: 152/77, loss: 0.0007778856670483947 2023-01-23 23:30:03.573012: step: 156/77, loss: 0.03293122723698616 2023-01-23 23:30:04.809747: step: 160/77, loss: 1.7612574083614163e-06 2023-01-23 23:30:06.070137: step: 164/77, loss: 0.043627262115478516 2023-01-23 23:30:07.315324: step: 168/77, loss: 0.001226570806466043 2023-01-23 23:30:08.546002: step: 172/77, loss: 0.00017758070316631347 2023-01-23 23:30:09.831611: step: 176/77, loss: 3.354599903104827e-05 2023-01-23 23:30:11.099950: step: 180/77, loss: 0.03583592548966408 2023-01-23 23:30:12.353464: step: 184/77, loss: 5.370784492697567e-05 2023-01-23 23:30:13.654242: step: 188/77, loss: 0.0017548176692798734 2023-01-23 23:30:14.936442: step: 192/77, loss: 0.02631654031574726 2023-01-23 23:30:16.197604: step: 196/77, loss: 9.08508081920445e-06 2023-01-23 23:30:17.438458: step: 200/77, loss: 0.0017153595108538866 2023-01-23 23:30:18.799135: step: 204/77, loss: 0.00011920402175746858 2023-01-23 23:30:20.084845: step: 208/77, loss: 0.03767101839184761 2023-01-23 23:30:21.364617: step: 212/77, loss: 2.3928218070068397e-05 2023-01-23 23:30:22.683544: step: 216/77, loss: 3.788954472838668e-06 2023-01-23 23:30:23.936098: step: 220/77, loss: 0.0002056130178971216 2023-01-23 23:30:25.222592: step: 224/77, loss: 0.000769798643887043 2023-01-23 23:30:26.502483: step: 228/77, loss: 0.00010402412590337917 2023-01-23 23:30:27.788077: step: 232/77, loss: 0.00010068294795928523 2023-01-23 23:30:29.057024: step: 236/77, loss: 0.002481456147506833 2023-01-23 23:30:30.343589: step: 240/77, loss: 5.516312376130372e-05 2023-01-23 23:30:31.665390: step: 244/77, loss: 3.2243500754702836e-05 2023-01-23 23:30:32.963132: step: 248/77, loss: 0.0004081795923411846 2023-01-23 23:30:34.257515: step: 252/77, loss: 8.850173799146432e-06 2023-01-23 23:30:35.498768: step: 256/77, loss: 0.0003328286111354828 2023-01-23 23:30:36.796107: step: 260/77, loss: 0.0030909974593669176 2023-01-23 23:30:38.060261: step: 264/77, loss: 4.4013545448251534e-06 2023-01-23 23:30:39.352709: step: 268/77, loss: 0.012226199731230736 2023-01-23 23:30:40.680835: step: 272/77, loss: 0.0010382995242252946 2023-01-23 23:30:41.968170: step: 276/77, loss: 0.00013937246694695204 2023-01-23 23:30:43.274847: step: 280/77, loss: 0.021476108580827713 2023-01-23 23:30:44.580781: step: 284/77, loss: 0.009720050729811192 2023-01-23 23:30:45.908953: step: 288/77, loss: 0.005399320740252733 2023-01-23 23:30:47.199979: step: 292/77, loss: 4.12450208386872e-06 2023-01-23 23:30:48.425988: step: 296/77, loss: 0.0008546271128579974 2023-01-23 23:30:49.720806: step: 300/77, loss: 0.0015399702824652195 2023-01-23 23:30:50.999754: step: 304/77, loss: 0.0012370938202366233 2023-01-23 23:30:52.289226: step: 308/77, loss: 0.024090373888611794 2023-01-23 23:30:53.547261: step: 312/77, loss: 6.0301408666418865e-06 2023-01-23 23:30:54.840483: step: 316/77, loss: 1.606405567144975e-05 2023-01-23 23:30:56.123366: step: 320/77, loss: 0.0001153816920123063 2023-01-23 23:30:57.370660: step: 324/77, loss: 0.0015452578663825989 2023-01-23 23:30:58.667903: step: 328/77, loss: 0.0007853202987462282 2023-01-23 23:30:59.955234: step: 332/77, loss: 4.006969174952246e-05 2023-01-23 23:31:01.273738: step: 336/77, loss: 4.237548182572937e-06 2023-01-23 23:31:02.582394: step: 340/77, loss: 5.247162334853783e-05 2023-01-23 23:31:03.890717: step: 344/77, loss: 4.022320354124531e-05 2023-01-23 23:31:05.182595: step: 348/77, loss: 0.004062959458678961 2023-01-23 23:31:06.491386: step: 352/77, loss: 5.144433816894889e-05 2023-01-23 23:31:07.805599: step: 356/77, loss: 0.04348785802721977 2023-01-23 23:31:09.065446: step: 360/77, loss: 2.714705260586925e-05 2023-01-23 23:31:10.397965: step: 364/77, loss: 7.023421494523063e-05 2023-01-23 23:31:11.702901: step: 368/77, loss: 0.003611247520893812 2023-01-23 23:31:13.020610: step: 372/77, loss: 6.616061227759928e-07 2023-01-23 23:31:14.383291: step: 376/77, loss: 0.0027029793709516525 2023-01-23 23:31:15.703175: step: 380/77, loss: 3.5358483728487045e-05 2023-01-23 23:31:17.017035: step: 384/77, loss: 5.154731115908362e-05 2023-01-23 23:31:18.289153: step: 388/77, loss: 0.0008300709305331111 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9620253164556962, 'r': 0.6178861788617886, 'f1': 0.7524752475247525}, 'slot': {'p': 0.5106382978723404, 'r': 0.02181818181818182, 'f1': 0.041848299912816043}, 'combined': 0.03148980983538633, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.5111111111111111, 'r': 0.02090909090909091, 'f1': 0.04017467248908297}, 'combined': 0.029981098872449975, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.5217391304347826, 'r': 0.02181818181818182, 'f1': 0.041884816753926704}, 'combined': 0.031257325935766196, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:32:54.829715: step: 4/77, loss: 0.000768980011343956 2023-01-23 23:32:56.097034: step: 8/77, loss: 0.0007521238876506686 2023-01-23 23:32:57.399996: step: 12/77, loss: 0.0003945432836189866 2023-01-23 23:32:58.664261: step: 16/77, loss: 2.3855304789321963e-06 2023-01-23 23:32:59.918558: step: 20/77, loss: 0.0001138556472142227 2023-01-23 23:33:01.196362: step: 24/77, loss: 1.5491634258069098e-05 2023-01-23 23:33:02.498108: step: 28/77, loss: 3.6268065741751343e-06 2023-01-23 23:33:03.781361: step: 32/77, loss: 0.01696803607046604 2023-01-23 23:33:05.063696: step: 36/77, loss: 1.2886805961898062e-05 2023-01-23 23:33:06.306211: step: 40/77, loss: 0.05772382393479347 2023-01-23 23:33:07.574399: step: 44/77, loss: 8.849301957525313e-05 2023-01-23 23:33:08.878767: step: 48/77, loss: 3.100880803685868e-06 2023-01-23 23:33:10.137273: step: 52/77, loss: 0.021691516041755676 2023-01-23 23:33:11.380118: step: 56/77, loss: 0.00035898658097721636 2023-01-23 23:33:12.693463: step: 60/77, loss: 7.435634188368567e-07 2023-01-23 23:33:14.010419: step: 64/77, loss: 0.0022912865970283747 2023-01-23 23:33:15.341844: step: 68/77, loss: 1.6972255707514705e-06 2023-01-23 23:33:16.583660: step: 72/77, loss: 0.0005609336076304317 2023-01-23 23:33:17.883278: step: 76/77, loss: 9.501824933977332e-06 2023-01-23 23:33:19.179360: step: 80/77, loss: 0.0011109262704849243 2023-01-23 23:33:20.463547: step: 84/77, loss: 0.0012305447598919272 2023-01-23 23:33:21.754447: step: 88/77, loss: 0.0032722926698625088 2023-01-23 23:33:23.022019: step: 92/77, loss: 0.0003757534723263234 2023-01-23 23:33:24.344087: step: 96/77, loss: 0.0036070612259209156 2023-01-23 23:33:25.614422: step: 100/77, loss: 0.0010984732070937753 2023-01-23 23:33:26.899528: step: 104/77, loss: 0.011957976035773754 2023-01-23 23:33:28.187925: step: 108/77, loss: 0.00025756648392416537 2023-01-23 23:33:29.427750: step: 112/77, loss: 0.0005127987242303789 2023-01-23 23:33:30.735263: step: 116/77, loss: 3.300553362350911e-05 2023-01-23 23:33:32.070998: step: 120/77, loss: 0.0017934296047315001 2023-01-23 23:33:33.386490: step: 124/77, loss: 0.0026127288583666086 2023-01-23 23:33:34.715611: step: 128/77, loss: 0.0007074242457747459 2023-01-23 23:33:36.019601: step: 132/77, loss: 1.341101665275346e-07 2023-01-23 23:33:37.302270: step: 136/77, loss: 0.03210016340017319 2023-01-23 23:33:38.553723: step: 140/77, loss: 0.0003734312776941806 2023-01-23 23:33:39.866437: step: 144/77, loss: 4.366786015452817e-05 2023-01-23 23:33:41.183110: step: 148/77, loss: 2.6971114493790083e-05 2023-01-23 23:33:42.480468: step: 152/77, loss: 0.08323478698730469 2023-01-23 23:33:43.755510: step: 156/77, loss: 0.000449273589765653 2023-01-23 23:33:45.004111: step: 160/77, loss: 4.991859441361157e-07 2023-01-23 23:33:46.305075: step: 164/77, loss: 0.00018791876209434122 2023-01-23 23:33:47.572568: step: 168/77, loss: 0.0005903139826841652 2023-01-23 23:33:48.858704: step: 172/77, loss: 0.047918688505887985 2023-01-23 23:33:50.139265: step: 176/77, loss: 1.1026830293303647e-07 2023-01-23 23:33:51.421319: step: 180/77, loss: 1.6316491837642388e-06 2023-01-23 23:33:52.718638: step: 184/77, loss: 0.008988398127257824 2023-01-23 23:33:53.984966: step: 188/77, loss: 3.704776463564485e-05 2023-01-23 23:33:55.310920: step: 192/77, loss: 0.00014370733697433025 2023-01-23 23:33:56.574135: step: 196/77, loss: 2.8802598990296246e-06 2023-01-23 23:33:57.888322: step: 200/77, loss: 5.006774586036045e-07 2023-01-23 23:33:59.193258: step: 204/77, loss: 0.00034350191708654165 2023-01-23 23:34:00.490080: step: 208/77, loss: 0.03332750126719475 2023-01-23 23:34:01.800182: step: 212/77, loss: 1.9984458049293607e-05 2023-01-23 23:34:03.071457: step: 216/77, loss: 0.01212351955473423 2023-01-23 23:34:04.360725: step: 220/77, loss: 5.1427905418677256e-05 2023-01-23 23:34:05.615514: step: 224/77, loss: 0.02780834026634693 2023-01-23 23:34:06.858732: step: 228/77, loss: 0.00018175665172748268 2023-01-23 23:34:08.115527: step: 232/77, loss: 0.015692438930273056 2023-01-23 23:34:09.418311: step: 236/77, loss: 0.023305445909500122 2023-01-23 23:34:10.711705: step: 240/77, loss: 7.883200851210859e-06 2023-01-23 23:34:11.980622: step: 244/77, loss: 0.00015500333392992616 2023-01-23 23:34:13.201448: step: 248/77, loss: 0.0010909754782915115 2023-01-23 23:34:14.509442: step: 252/77, loss: 0.00029513833578675985 2023-01-23 23:34:15.839394: step: 256/77, loss: 2.437727061987971e-06 2023-01-23 23:34:17.145401: step: 260/77, loss: 4.5102613512426615e-05 2023-01-23 23:34:18.421791: step: 264/77, loss: 0.00025953652220778167 2023-01-23 23:34:19.704410: step: 268/77, loss: 6.334707450150745e-06 2023-01-23 23:34:21.011613: step: 272/77, loss: 0.02517692744731903 2023-01-23 23:34:22.294775: step: 276/77, loss: 0.00019247032469138503 2023-01-23 23:34:23.636575: step: 280/77, loss: 2.308360672031995e-05 2023-01-23 23:34:24.889775: step: 284/77, loss: 0.02027995139360428 2023-01-23 23:34:26.090820: step: 288/77, loss: 0.0476166270673275 2023-01-23 23:34:27.373541: step: 292/77, loss: 3.230684524169192e-05 2023-01-23 23:34:28.664311: step: 296/77, loss: 1.7493573523097439e-06 2023-01-23 23:34:29.981073: step: 300/77, loss: 8.653674740344286e-06 2023-01-23 23:34:31.235456: step: 304/77, loss: 3.8240621506702155e-05 2023-01-23 23:34:32.518028: step: 308/77, loss: 6.724038030370139e-06 2023-01-23 23:34:33.764234: step: 312/77, loss: 0.0011275878641754389 2023-01-23 23:34:35.029958: step: 316/77, loss: 1.1667386843328131e-06 2023-01-23 23:34:36.315997: step: 320/77, loss: 0.030673200264573097 2023-01-23 23:34:37.577969: step: 324/77, loss: 0.0004541727830655873 2023-01-23 23:34:38.846149: step: 328/77, loss: 0.0002860415552277118 2023-01-23 23:34:40.115213: step: 332/77, loss: 0.00033971594530157745 2023-01-23 23:34:41.408425: step: 336/77, loss: 0.000280650652712211 2023-01-23 23:34:42.728009: step: 340/77, loss: 0.0005106690223328769 2023-01-23 23:34:44.028181: step: 344/77, loss: 3.868105068249861e-06 2023-01-23 23:34:45.338163: step: 348/77, loss: 4.0064624045044184e-05 2023-01-23 23:34:46.643724: step: 352/77, loss: 7.297834963537753e-05 2023-01-23 23:34:47.953002: step: 356/77, loss: 0.0032857412006706 2023-01-23 23:34:49.278160: step: 360/77, loss: 1.8298316035725293e-06 2023-01-23 23:34:50.551859: step: 364/77, loss: 0.000829779717605561 2023-01-23 23:34:51.848894: step: 368/77, loss: 3.9558733988087624e-05 2023-01-23 23:34:53.161837: step: 372/77, loss: 2.1275000108289532e-05 2023-01-23 23:34:54.483113: step: 376/77, loss: 2.1526575437746942e-05 2023-01-23 23:34:55.748706: step: 380/77, loss: 0.00012161017366452143 2023-01-23 23:34:57.087598: step: 384/77, loss: 0.0010571739403530955 2023-01-23 23:34:58.327303: step: 388/77, loss: 0.0003159367188345641 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.45098039215686275, 'r': 0.02090909090909091, 'f1': 0.03996524761077324}, 'combined': 0.029574283231972198, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.46, 'r': 0.02090909090909091, 'f1': 0.04}, 'combined': 0.029850746268656716, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.46, 'r': 0.02090909090909091, 'f1': 0.04}, 'combined': 0.029850746268656716, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:36:37.179729: step: 4/77, loss: 0.005719621200114489 2023-01-23 23:36:38.462382: step: 8/77, loss: 0.008860756643116474 2023-01-23 23:36:39.773585: step: 12/77, loss: 0.0028032902628183365 2023-01-23 23:36:41.067363: step: 16/77, loss: 6.109435730650148e-07 2023-01-23 23:36:42.373097: step: 20/77, loss: 2.298797517141793e-05 2023-01-23 23:36:43.668893: step: 24/77, loss: 6.934415523573989e-06 2023-01-23 23:36:44.955418: step: 28/77, loss: 0.00017443645629100502 2023-01-23 23:36:46.281488: step: 32/77, loss: 3.634882523328997e-05 2023-01-23 23:36:47.550060: step: 36/77, loss: 7.09361211193027e-06 2023-01-23 23:36:48.865432: step: 40/77, loss: 0.016919534653425217 2023-01-23 23:36:50.163762: step: 44/77, loss: 8.22349829832092e-06 2023-01-23 23:36:51.406887: step: 48/77, loss: 0.0002756982285063714 2023-01-23 23:36:52.689008: step: 52/77, loss: 5.055268957221415e-06 2023-01-23 23:36:53.966779: step: 56/77, loss: 0.0003850968205370009 2023-01-23 23:36:55.291668: step: 60/77, loss: 0.0004995691706426442 2023-01-23 23:36:56.590611: step: 64/77, loss: 0.001346847042441368 2023-01-23 23:36:57.863793: step: 68/77, loss: 0.0004229228070471436 2023-01-23 23:36:59.095241: step: 72/77, loss: 0.03281763941049576 2023-01-23 23:37:00.363778: step: 76/77, loss: 0.038519665598869324 2023-01-23 23:37:01.675141: step: 80/77, loss: 0.009992430917918682 2023-01-23 23:37:02.964182: step: 84/77, loss: 0.002286661881953478 2023-01-23 23:37:04.221755: step: 88/77, loss: 0.0002374086616327986 2023-01-23 23:37:05.536257: step: 92/77, loss: 8.106205496005714e-05 2023-01-23 23:37:06.806142: step: 96/77, loss: 0.0010453971335664392 2023-01-23 23:37:08.146318: step: 100/77, loss: 0.0001758149592205882 2023-01-23 23:37:09.441708: step: 104/77, loss: 0.009996388107538223 2023-01-23 23:37:10.768739: step: 108/77, loss: 0.0004022825451102108 2023-01-23 23:37:12.033176: step: 112/77, loss: 0.004816494882106781 2023-01-23 23:37:13.319231: step: 116/77, loss: 0.005425009410828352 2023-01-23 23:37:14.605711: step: 120/77, loss: 2.0414493917542131e-07 2023-01-23 23:37:15.913271: step: 124/77, loss: 9.027479973156005e-05 2023-01-23 23:37:17.171258: step: 128/77, loss: 0.06450998783111572 2023-01-23 23:37:18.488786: step: 132/77, loss: 9.049760410562158e-05 2023-01-23 23:37:19.755217: step: 136/77, loss: 5.903685814701021e-05 2023-01-23 23:37:21.024816: step: 140/77, loss: 4.318053015595069e-06 2023-01-23 23:37:22.312275: step: 144/77, loss: 2.9786574486934114e-06 2023-01-23 23:37:23.638921: step: 148/77, loss: 3.1425793167727534e-06 2023-01-23 23:37:24.936956: step: 152/77, loss: 9.415208478458226e-05 2023-01-23 23:37:26.256130: step: 156/77, loss: 5.3753981774207205e-05 2023-01-23 23:37:27.560666: step: 160/77, loss: 0.0004957958590239286 2023-01-23 23:37:28.852926: step: 164/77, loss: 0.0008796628098934889 2023-01-23 23:37:30.156895: step: 168/77, loss: 0.002826994052156806 2023-01-23 23:37:31.442688: step: 172/77, loss: 0.00016862266056705266 2023-01-23 23:37:32.696203: step: 176/77, loss: 3.725287101019603e-08 2023-01-23 23:37:33.976448: step: 180/77, loss: 0.00611914461478591 2023-01-23 23:37:35.285581: step: 184/77, loss: 0.0009060569573193789 2023-01-23 23:37:36.660795: step: 188/77, loss: 8.879319648258388e-05 2023-01-23 23:37:37.914834: step: 192/77, loss: 0.00410617096349597 2023-01-23 23:37:39.229513: step: 196/77, loss: 0.001179800252430141 2023-01-23 23:37:40.514614: step: 200/77, loss: 1.5466710010514362e-06 2023-01-23 23:37:41.745992: step: 204/77, loss: 0.00045527133625000715 2023-01-23 23:37:43.040216: step: 208/77, loss: 0.006096724420785904 2023-01-23 23:37:44.398890: step: 212/77, loss: 0.05618688464164734 2023-01-23 23:37:45.719758: step: 216/77, loss: 0.0006208279519341886 2023-01-23 23:37:47.047071: step: 220/77, loss: 1.430508973498945e-07 2023-01-23 23:37:48.409465: step: 224/77, loss: 4.76835111840046e-07 2023-01-23 23:37:49.739299: step: 228/77, loss: 0.0012825513258576393 2023-01-23 23:37:51.048035: step: 232/77, loss: 0.00010251560888718814 2023-01-23 23:37:52.333834: step: 236/77, loss: 0.00014983654546085745 2023-01-23 23:37:53.612543: step: 240/77, loss: 0.0007250534254126251 2023-01-23 23:37:54.924283: step: 244/77, loss: 3.565517909009941e-05 2023-01-23 23:37:56.223727: step: 248/77, loss: 3.2824344089021906e-05 2023-01-23 23:37:57.509505: step: 252/77, loss: 0.0004473893204703927 2023-01-23 23:37:58.821283: step: 256/77, loss: 0.0002924882574006915 2023-01-23 23:38:00.074640: step: 260/77, loss: 0.0029194727540016174 2023-01-23 23:38:01.372237: step: 264/77, loss: 0.00955183431506157 2023-01-23 23:38:02.677726: step: 268/77, loss: 0.027585407719016075 2023-01-23 23:38:03.957620: step: 272/77, loss: 1.8800383259076625e-05 2023-01-23 23:38:05.250474: step: 276/77, loss: 0.0003292300389148295 2023-01-23 23:38:06.528873: step: 280/77, loss: 6.807313184253871e-05 2023-01-23 23:38:07.854111: step: 284/77, loss: 0.001798221142962575 2023-01-23 23:38:09.189350: step: 288/77, loss: 0.0032155895605683327 2023-01-23 23:38:10.473424: step: 292/77, loss: 1.2091225471522193e-05 2023-01-23 23:38:11.786086: step: 296/77, loss: 0.00014940323308110237 2023-01-23 23:38:13.062385: step: 300/77, loss: 0.018790228292346 2023-01-23 23:38:14.385512: step: 304/77, loss: 4.5596917175316776e-07 2023-01-23 23:38:15.717019: step: 308/77, loss: 5.038719609729014e-05 2023-01-23 23:38:17.066981: step: 312/77, loss: 0.00027558201691135764 2023-01-23 23:38:18.419685: step: 316/77, loss: 0.0001998369989451021 2023-01-23 23:38:19.705498: step: 320/77, loss: 0.00041272686212323606 2023-01-23 23:38:21.004363: step: 324/77, loss: 6.429352652048692e-06 2023-01-23 23:38:22.316187: step: 328/77, loss: 1.4448265574174002e-05 2023-01-23 23:38:23.628706: step: 332/77, loss: 0.0008989330381155014 2023-01-23 23:38:24.959003: step: 336/77, loss: 0.009158177301287651 2023-01-23 23:38:26.248027: step: 340/77, loss: 9.308809239882976e-06 2023-01-23 23:38:27.560041: step: 344/77, loss: 0.0002032999909715727 2023-01-23 23:38:28.864679: step: 348/77, loss: 8.418882089245017e-07 2023-01-23 23:38:30.198932: step: 352/77, loss: 9.536737621829161e-08 2023-01-23 23:38:31.467259: step: 356/77, loss: 0.0013869872782379389 2023-01-23 23:38:32.766318: step: 360/77, loss: 0.00016957623302005231 2023-01-23 23:38:34.111801: step: 364/77, loss: 0.0033187230583280325 2023-01-23 23:38:35.421219: step: 368/77, loss: 0.00044626800809055567 2023-01-23 23:38:36.714772: step: 372/77, loss: 0.0005619988078251481 2023-01-23 23:38:38.006205: step: 376/77, loss: 4.350573817646364e-06 2023-01-23 23:38:39.286162: step: 380/77, loss: 0.002264243783429265 2023-01-23 23:38:40.563197: step: 384/77, loss: 1.1637284842436202e-06 2023-01-23 23:38:41.893269: step: 388/77, loss: 0.026438845321536064 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.05152741994847258, 'epoch': 25} Test Chinese: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.5, 'r': 0.02, 'f1': 0.038461538461538464}, 'combined': 0.028319938767699962, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.05152741994847258, 'epoch': 25} Test Korean: {'template': {'p': 0.9493670886075949, 'r': 0.6097560975609756, 'f1': 0.7425742574257426}, 'slot': {'p': 0.5116279069767442, 'r': 0.02, 'f1': 0.03849518810148731}, 'combined': 0.02858553571892622, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.05152741994847258, 'epoch': 25} Test Russian: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.4888888888888889, 'r': 0.02, 'f1': 0.03842794759825328}, 'combined': 0.028295205196723804, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:40:20.424980: step: 4/77, loss: 7.405794235637586e-07 2023-01-23 23:40:21.733154: step: 8/77, loss: 4.420810910232831e-06 2023-01-23 23:40:23.078344: step: 12/77, loss: 0.009897311218082905 2023-01-23 23:40:24.356758: step: 16/77, loss: 6.879275588289602e-06 2023-01-23 23:40:25.718331: step: 20/77, loss: 0.001125571085140109 2023-01-23 23:40:27.011767: step: 24/77, loss: 5.025583959650248e-05 2023-01-23 23:40:28.377440: step: 28/77, loss: 0.03362608328461647 2023-01-23 23:40:29.689329: step: 32/77, loss: 0.00027511155349202454 2023-01-23 23:40:31.029720: step: 36/77, loss: 0.012906921096146107 2023-01-23 23:40:32.365431: step: 40/77, loss: 6.1619521147804335e-06 2023-01-23 23:40:33.594943: step: 44/77, loss: 1.0552851563261356e-05 2023-01-23 23:40:34.875440: step: 48/77, loss: 0.0006020730943419039 2023-01-23 23:40:36.184513: step: 52/77, loss: 0.005116917658597231 2023-01-23 23:40:37.512696: step: 56/77, loss: 1.1251551768509671e-05 2023-01-23 23:40:38.822874: step: 60/77, loss: 0.024727124720811844 2023-01-23 23:40:40.137139: step: 64/77, loss: 8.761744538787752e-07 2023-01-23 23:40:41.448292: step: 68/77, loss: 0.006382717750966549 2023-01-23 23:40:42.697508: step: 72/77, loss: 0.0001176731166196987 2023-01-23 23:40:43.984015: step: 76/77, loss: 8.199903095373884e-05 2023-01-23 23:40:45.311961: step: 80/77, loss: 0.007674494292587042 2023-01-23 23:40:46.631907: step: 84/77, loss: 0.08472293615341187 2023-01-23 23:40:47.962418: step: 88/77, loss: 1.7702009245113004e-06 2023-01-23 23:40:49.230623: step: 92/77, loss: 0.0014696570578962564 2023-01-23 23:40:50.552122: step: 96/77, loss: 4.095823442185065e-06 2023-01-23 23:40:51.832285: step: 100/77, loss: 5.805840191897005e-05 2023-01-23 23:40:53.096219: step: 104/77, loss: 0.003786920104175806 2023-01-23 23:40:54.365269: step: 108/77, loss: 0.012779447250068188 2023-01-23 23:40:55.639549: step: 112/77, loss: 1.1473886019075508e-07 2023-01-23 23:40:56.918377: step: 116/77, loss: 4.9697860958985984e-05 2023-01-23 23:40:58.233897: step: 120/77, loss: 3.697064676089212e-05 2023-01-23 23:40:59.468773: step: 124/77, loss: 0.0015872609801590443 2023-01-23 23:41:00.737169: step: 128/77, loss: 0.006903848610818386 2023-01-23 23:41:02.017106: step: 132/77, loss: 0.005575467366725206 2023-01-23 23:41:03.312101: step: 136/77, loss: 0.005671120248734951 2023-01-23 23:41:04.616968: step: 140/77, loss: 4.039318810100667e-05 2023-01-23 23:41:05.899417: step: 144/77, loss: 0.00020730840333271772 2023-01-23 23:41:07.175923: step: 148/77, loss: 9.252969903172925e-06 2023-01-23 23:41:08.464337: step: 152/77, loss: 0.0001278579729842022 2023-01-23 23:41:09.776890: step: 156/77, loss: 0.027770182117819786 2023-01-23 23:41:11.043659: step: 160/77, loss: 4.7683688109145805e-08 2023-01-23 23:41:12.323713: step: 164/77, loss: 0.00022923552023712546 2023-01-23 23:41:13.612199: step: 168/77, loss: 2.0663854229496792e-05 2023-01-23 23:41:14.921355: step: 172/77, loss: 4.5862248953199014e-05 2023-01-23 23:41:16.208697: step: 176/77, loss: 0.0016720297280699015 2023-01-23 23:41:17.461336: step: 180/77, loss: 0.00010752508387668058 2023-01-23 23:41:18.784903: step: 184/77, loss: 0.0006299956585280597 2023-01-23 23:41:20.064588: step: 188/77, loss: 0.011298703029751778 2023-01-23 23:41:21.348851: step: 192/77, loss: 6.590948032680899e-05 2023-01-23 23:41:22.616093: step: 196/77, loss: 5.185305781196803e-06 2023-01-23 23:41:23.906441: step: 200/77, loss: 2.2649717834610783e-07 2023-01-23 23:41:25.213969: step: 204/77, loss: 0.0001983733382076025 2023-01-23 23:41:26.522611: step: 208/77, loss: 3.112400372629054e-05 2023-01-23 23:41:27.788230: step: 212/77, loss: 2.7714126190403476e-05 2023-01-23 23:41:29.165431: step: 216/77, loss: 0.00013845518697053194 2023-01-23 23:41:30.496808: step: 220/77, loss: 3.977232699980959e-05 2023-01-23 23:41:31.804057: step: 224/77, loss: 0.000929908303078264 2023-01-23 23:41:33.102435: step: 228/77, loss: 1.7579246559762396e-05 2023-01-23 23:41:34.403468: step: 232/77, loss: 0.0007625438156537712 2023-01-23 23:41:35.722047: step: 236/77, loss: 4.9652739107841626e-05 2023-01-23 23:41:37.039831: step: 240/77, loss: 1.2576102790262667e-06 2023-01-23 23:41:38.343600: step: 244/77, loss: 8.551467908546329e-05 2023-01-23 23:41:39.610007: step: 248/77, loss: 0.000155415793415159 2023-01-23 23:41:40.927686: step: 252/77, loss: 7.673896789128776e-07 2023-01-23 23:41:42.206355: step: 256/77, loss: 1.6112244338728487e-05 2023-01-23 23:41:43.533908: step: 260/77, loss: 0.020630180835723877 2023-01-23 23:41:44.807764: step: 264/77, loss: 0.0011570448987185955 2023-01-23 23:41:46.060320: step: 268/77, loss: 6.015104190737475e-06 2023-01-23 23:41:47.346885: step: 272/77, loss: 3.4272662929879516e-08 2023-01-23 23:41:48.647792: step: 276/77, loss: 0.018049897626042366 2023-01-23 23:41:49.980671: step: 280/77, loss: 0.00012221011274959892 2023-01-23 23:41:51.266874: step: 284/77, loss: 0.009114380925893784 2023-01-23 23:41:52.505457: step: 288/77, loss: 0.0005977398250252008 2023-01-23 23:41:53.800039: step: 292/77, loss: 0.002121040364727378 2023-01-23 23:41:55.053399: step: 296/77, loss: 1.7078427845262922e-05 2023-01-23 23:41:56.350759: step: 300/77, loss: 1.7234993720194325e-05 2023-01-23 23:41:57.677399: step: 304/77, loss: 2.008646333706565e-06 2023-01-23 23:41:59.011843: step: 308/77, loss: 1.3609464986075182e-05 2023-01-23 23:42:00.330953: step: 312/77, loss: 1.9376695490791462e-05 2023-01-23 23:42:01.590147: step: 316/77, loss: 0.0002871832111850381 2023-01-23 23:42:02.864360: step: 320/77, loss: 8.973329386208206e-06 2023-01-23 23:42:04.173782: step: 324/77, loss: 0.0001281930017285049 2023-01-23 23:42:05.471157: step: 328/77, loss: 0.006611789111047983 2023-01-23 23:42:06.737470: step: 332/77, loss: 6.0470832977443933e-05 2023-01-23 23:42:08.036735: step: 336/77, loss: 0.00381635595113039 2023-01-23 23:42:09.354937: step: 340/77, loss: 1.8114980775862932e-05 2023-01-23 23:42:10.656041: step: 344/77, loss: 0.0001864713995018974 2023-01-23 23:42:11.937428: step: 348/77, loss: 0.017800893634557724 2023-01-23 23:42:13.251800: step: 352/77, loss: 0.008395765908062458 2023-01-23 23:42:14.501939: step: 356/77, loss: 1.6391246049352048e-07 2023-01-23 23:42:15.778777: step: 360/77, loss: 0.0011608715867623687 2023-01-23 23:42:17.108122: step: 364/77, loss: 2.5806618850765517e-06 2023-01-23 23:42:18.417164: step: 368/77, loss: 0.00028230599127709866 2023-01-23 23:42:19.731499: step: 372/77, loss: 6.524189757328713e-06 2023-01-23 23:42:21.013889: step: 376/77, loss: 5.015195711166598e-06 2023-01-23 23:42:22.275312: step: 380/77, loss: 0.00017462043615523726 2023-01-23 23:42:23.567167: step: 384/77, loss: 2.010021489695646e-06 2023-01-23 23:42:24.862123: step: 388/77, loss: 9.521669562673196e-05 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 26} Test Chinese: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.02955553819037448, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 26} Test Korean: {'template': {'p': 0.9493670886075949, 'r': 0.6097560975609756, 'f1': 0.7425742574257426}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.02978065897261042, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 26} Test Russian: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.02952977050232707, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:44:03.207039: step: 4/77, loss: 4.3786039896076545e-05 2023-01-23 23:44:04.538051: step: 8/77, loss: 1.3262005893466267e-07 2023-01-23 23:44:05.862491: step: 12/77, loss: 0.004400265868753195 2023-01-23 23:44:07.107545: step: 16/77, loss: 1.0698581718315836e-06 2023-01-23 23:44:08.441117: step: 20/77, loss: 0.023978853598237038 2023-01-23 23:44:09.720015: step: 24/77, loss: 4.8583251555101015e-06 2023-01-23 23:44:10.990076: step: 28/77, loss: 0.03092852607369423 2023-01-23 23:44:12.271688: step: 32/77, loss: 0.0003409860364627093 2023-01-23 23:44:13.579382: step: 36/77, loss: 6.979777390370145e-05 2023-01-23 23:44:14.844619: step: 40/77, loss: 0.0005804693792015314 2023-01-23 23:44:16.197531: step: 44/77, loss: 0.011256477795541286 2023-01-23 23:44:17.456545: step: 48/77, loss: 1.1070699656556826e-05 2023-01-23 23:44:18.768114: step: 52/77, loss: 3.316633183203521e-06 2023-01-23 23:44:20.045689: step: 56/77, loss: 3.561252924555447e-06 2023-01-23 23:44:21.296742: step: 60/77, loss: 9.22357571653265e-07 2023-01-23 23:44:22.586979: step: 64/77, loss: 1.6862717529875226e-05 2023-01-23 23:44:23.872305: step: 68/77, loss: 0.00019232039630878717 2023-01-23 23:44:25.160531: step: 72/77, loss: 0.016592321917414665 2023-01-23 23:44:26.441932: step: 76/77, loss: 1.5529300071648322e-05 2023-01-23 23:44:27.784666: step: 80/77, loss: 1.6067247997852974e-05 2023-01-23 23:44:29.046020: step: 84/77, loss: 8.976113167591393e-05 2023-01-23 23:44:30.348535: step: 88/77, loss: 0.04106712341308594 2023-01-23 23:44:31.634921: step: 92/77, loss: 1.9371507065102378e-08 2023-01-23 23:44:33.002429: step: 96/77, loss: 2.0861591565335402e-07 2023-01-23 23:44:34.269594: step: 100/77, loss: 2.2351732908987287e-08 2023-01-23 23:44:35.583665: step: 104/77, loss: 0.0007628971361555159 2023-01-23 23:44:36.847828: step: 108/77, loss: 2.9802318390892424e-09 2023-01-23 23:44:38.184858: step: 112/77, loss: 2.0861618210687993e-08 2023-01-23 23:44:39.493672: step: 116/77, loss: 0.0005382683593779802 2023-01-23 23:44:40.803498: step: 120/77, loss: 2.1457547916270414e-07 2023-01-23 23:44:42.107352: step: 124/77, loss: 3.502124309306964e-05 2023-01-23 23:44:43.459633: step: 128/77, loss: 3.145249866065569e-05 2023-01-23 23:44:44.703168: step: 132/77, loss: 0.00019145975238643587 2023-01-23 23:44:46.021543: step: 136/77, loss: 0.0002411496825516224 2023-01-23 23:44:47.326455: step: 140/77, loss: 0.0012163245119154453 2023-01-23 23:44:48.596580: step: 144/77, loss: 0.00010317091800970957 2023-01-23 23:44:49.926492: step: 148/77, loss: 4.620007985067787e-06 2023-01-23 23:44:51.230099: step: 152/77, loss: 8.60757427290082e-05 2023-01-23 23:44:52.511568: step: 156/77, loss: 6.705515431804088e-08 2023-01-23 23:44:53.809799: step: 160/77, loss: 0.00013355020200833678 2023-01-23 23:44:55.125558: step: 164/77, loss: 0.005990986712276936 2023-01-23 23:44:56.417951: step: 168/77, loss: 0.00027787365252152085 2023-01-23 23:44:57.659511: step: 172/77, loss: 0.04725373163819313 2023-01-23 23:44:58.946430: step: 176/77, loss: 7.853271381463856e-05 2023-01-23 23:45:00.269106: step: 180/77, loss: 0.00018676927720662206 2023-01-23 23:45:01.505801: step: 184/77, loss: 0.006754858419299126 2023-01-23 23:45:02.835225: step: 188/77, loss: 8.532601350452751e-05 2023-01-23 23:45:04.142853: step: 192/77, loss: 0.0007004007347859442 2023-01-23 23:45:05.440544: step: 196/77, loss: 0.0011839126236736774 2023-01-23 23:45:06.707002: step: 200/77, loss: 0.0004985693376511335 2023-01-23 23:45:07.986767: step: 204/77, loss: 1.244201371264353e-06 2023-01-23 23:45:09.276824: step: 208/77, loss: 0.08877816051244736 2023-01-23 23:45:10.529085: step: 212/77, loss: 1.2874467074652785e-06 2023-01-23 23:45:11.873155: step: 216/77, loss: 6.705516852889559e-08 2023-01-23 23:45:13.158238: step: 220/77, loss: 1.627982419449836e-05 2023-01-23 23:45:14.470982: step: 224/77, loss: 6.169013317958161e-07 2023-01-23 23:45:15.770560: step: 228/77, loss: 5.7242756156483665e-05 2023-01-23 23:45:17.054985: step: 232/77, loss: 0.00034215141204185784 2023-01-23 23:45:18.331105: step: 236/77, loss: 0.0015011318027973175 2023-01-23 23:45:19.674185: step: 240/77, loss: 0.0012336316285654902 2023-01-23 23:45:20.974135: step: 244/77, loss: 0.00022003508638590574 2023-01-23 23:45:22.242664: step: 248/77, loss: 0.01599096693098545 2023-01-23 23:45:23.534494: step: 252/77, loss: 9.920450247591361e-05 2023-01-23 23:45:24.838589: step: 256/77, loss: 0.00031245272839441895 2023-01-23 23:45:26.152444: step: 260/77, loss: 1.2031691767333541e-05 2023-01-23 23:45:27.454838: step: 264/77, loss: 9.238696208058172e-08 2023-01-23 23:45:28.759169: step: 268/77, loss: 6.817108805989847e-05 2023-01-23 23:45:30.085941: step: 272/77, loss: 9.894110917230137e-07 2023-01-23 23:45:31.431753: step: 276/77, loss: 0.0005339889321476221 2023-01-23 23:45:32.692442: step: 280/77, loss: 0.00012003527081105858 2023-01-23 23:45:33.980430: step: 284/77, loss: 1.629951293580234e-05 2023-01-23 23:45:35.307312: step: 288/77, loss: 7.010930858086795e-05 2023-01-23 23:45:36.637581: step: 292/77, loss: 1.5854463981668232e-06 2023-01-23 23:45:37.913147: step: 296/77, loss: 1.568728475831449e-05 2023-01-23 23:45:39.246476: step: 300/77, loss: 0.003171185264363885 2023-01-23 23:45:40.608580: step: 304/77, loss: 4.264489234628854e-06 2023-01-23 23:45:41.875558: step: 308/77, loss: 0.007752659730613232 2023-01-23 23:45:43.187480: step: 312/77, loss: 4.3374842789489776e-05 2023-01-23 23:45:44.494689: step: 316/77, loss: 0.00010408271919004619 2023-01-23 23:45:45.843290: step: 320/77, loss: 0.0005090486956760287 2023-01-23 23:45:47.111774: step: 324/77, loss: 9.227226109942421e-05 2023-01-23 23:45:48.366754: step: 328/77, loss: 0.003174896351993084 2023-01-23 23:45:49.653970: step: 332/77, loss: 0.0017001423984766006 2023-01-23 23:45:50.932579: step: 336/77, loss: 0.00042848309385590255 2023-01-23 23:45:52.265999: step: 340/77, loss: 0.0005391308222897351 2023-01-23 23:45:53.535918: step: 344/77, loss: 0.0006425658357329667 2023-01-23 23:45:54.893418: step: 348/77, loss: 1.4663381080026738e-05 2023-01-23 23:45:56.162591: step: 352/77, loss: 5.437660729512572e-05 2023-01-23 23:45:57.477774: step: 356/77, loss: 0.14871054887771606 2023-01-23 23:45:58.761272: step: 360/77, loss: 2.2351734685344127e-08 2023-01-23 23:46:00.036654: step: 364/77, loss: 0.0003183669177815318 2023-01-23 23:46:01.391822: step: 368/77, loss: 2.8474557893787278e-06 2023-01-23 23:46:02.660021: step: 372/77, loss: 8.405734115513042e-05 2023-01-23 23:46:03.991777: step: 376/77, loss: 3.2782395464892033e-07 2023-01-23 23:46:05.284268: step: 380/77, loss: 3.4272460425199824e-07 2023-01-23 23:46:06.596297: step: 384/77, loss: 4.10609118262073e-06 2023-01-23 23:46:07.911582: step: 388/77, loss: 0.018402086570858955 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9518072289156626, 'r': 0.6422764227642277, 'f1': 0.7669902912621358}, 'slot': {'p': 0.5, 'r': 0.022727272727272728, 'f1': 0.04347826086956522}, 'combined': 0.03334740396791895, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04929577464788733, 'epoch': 27} Test Korean: {'template': {'p': 0.9634146341463414, 'r': 0.6422764227642277, 'f1': 0.7707317073170733}, 'slot': {'p': 0.5102040816326531, 'r': 0.022727272727272728, 'f1': 0.043516100957354226}, 'combined': 0.03353923878664375, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9634146341463414, 'r': 0.6422764227642277, 'f1': 0.7707317073170733}, 'slot': {'p': 0.5102040816326531, 'r': 0.022727272727272728, 'f1': 0.043516100957354226}, 'combined': 0.03353923878664375, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:47:46.386111: step: 4/77, loss: 1.582411414347007e-06 2023-01-23 23:47:47.675516: step: 8/77, loss: 0.00010717161057982594 2023-01-23 23:47:48.942348: step: 12/77, loss: 1.4007048321218463e-07 2023-01-23 23:47:50.250393: step: 16/77, loss: 8.115675154840574e-05 2023-01-23 23:47:51.537062: step: 20/77, loss: 0.03191979229450226 2023-01-23 23:47:52.844184: step: 24/77, loss: 1.1622893225649022e-07 2023-01-23 23:47:54.177434: step: 28/77, loss: 3.9084603486116976e-05 2023-01-23 23:47:55.446645: step: 32/77, loss: 0.002637272235006094 2023-01-23 23:47:56.704214: step: 36/77, loss: 0.05554957687854767 2023-01-23 23:47:58.005549: step: 40/77, loss: 3.560824188753031e-05 2023-01-23 23:47:59.280647: step: 44/77, loss: 1.0132781369520671e-07 2023-01-23 23:48:00.588547: step: 48/77, loss: 0.000232343387324363 2023-01-23 23:48:01.856123: step: 52/77, loss: 1.0445479574627825e-06 2023-01-23 23:48:03.168190: step: 56/77, loss: 1.3514809324988164e-06 2023-01-23 23:48:04.465015: step: 60/77, loss: 1.9591614545788616e-05 2023-01-23 23:48:05.778861: step: 64/77, loss: 0.04506542533636093 2023-01-23 23:48:07.126812: step: 68/77, loss: 4.386712680570781e-05 2023-01-23 23:48:08.400231: step: 72/77, loss: 7.815002390998416e-06 2023-01-23 23:48:09.734530: step: 76/77, loss: 3.305016434751451e-05 2023-01-23 23:48:11.031789: step: 80/77, loss: 0.0001534569018986076 2023-01-23 23:48:12.388583: step: 84/77, loss: 0.0012685380643233657 2023-01-23 23:48:13.657578: step: 88/77, loss: 7.361089160440315e-07 2023-01-23 23:48:14.979023: step: 92/77, loss: 2.9308637294889195e-06 2023-01-23 23:48:16.308728: step: 96/77, loss: 0.0008253856794908643 2023-01-23 23:48:17.572304: step: 100/77, loss: 0.00027290164143778384 2023-01-23 23:48:18.852384: step: 104/77, loss: 0.0004043147200718522 2023-01-23 23:48:20.164742: step: 108/77, loss: 0.00016128386778291315 2023-01-23 23:48:21.455474: step: 112/77, loss: 0.005868277978152037 2023-01-23 23:48:22.760735: step: 116/77, loss: 1.4603088516196294e-07 2023-01-23 23:48:24.046443: step: 120/77, loss: 0.0019091747235506773 2023-01-23 23:48:25.377476: step: 124/77, loss: 0.011760505847632885 2023-01-23 23:48:26.711276: step: 128/77, loss: 0.00010481792560312897 2023-01-23 23:48:28.029765: step: 132/77, loss: 1.921913462865632e-05 2023-01-23 23:48:29.338280: step: 136/77, loss: 2.227662662335206e-06 2023-01-23 23:48:30.643787: step: 140/77, loss: 5.334582056093495e-07 2023-01-23 23:48:32.002005: step: 144/77, loss: 3.588026402212563e-06 2023-01-23 23:48:33.319089: step: 148/77, loss: 0.00016857915034051985 2023-01-23 23:48:34.615992: step: 152/77, loss: 0.031219307333230972 2023-01-23 23:48:35.957769: step: 156/77, loss: 0.0020155508536845446 2023-01-23 23:48:37.258183: step: 160/77, loss: 1.321755553362891e-05 2023-01-23 23:48:38.528010: step: 164/77, loss: 0.018532564863562584 2023-01-23 23:48:39.858106: step: 168/77, loss: 1.5903380699455738e-05 2023-01-23 23:48:41.194111: step: 172/77, loss: 0.024859033524990082 2023-01-23 23:48:42.509943: step: 176/77, loss: 2.0681964087998495e-06 2023-01-23 23:48:43.772779: step: 180/77, loss: 2.6700056423578644e-06 2023-01-23 23:48:45.083244: step: 184/77, loss: 1.8923403786175186e-06 2023-01-23 23:48:46.388702: step: 188/77, loss: 1.3589453828899423e-06 2023-01-23 23:48:47.639077: step: 192/77, loss: 0.04262559115886688 2023-01-23 23:48:48.991325: step: 196/77, loss: 8.77968250279082e-06 2023-01-23 23:48:50.276690: step: 200/77, loss: 0.0001077373162843287 2023-01-23 23:48:51.608895: step: 204/77, loss: 0.00020571955246850848 2023-01-23 23:48:52.923122: step: 208/77, loss: 0.0005280431942082942 2023-01-23 23:48:54.249410: step: 212/77, loss: 1.048697049554903e-05 2023-01-23 23:48:55.561875: step: 216/77, loss: 4.216946035739966e-05 2023-01-23 23:48:56.821417: step: 220/77, loss: 4.079658083355753e-06 2023-01-23 23:48:58.108762: step: 224/77, loss: 0.0004498214984778315 2023-01-23 23:48:59.419758: step: 228/77, loss: 0.0011945064179599285 2023-01-23 23:49:00.706258: step: 232/77, loss: 8.898941814550199e-06 2023-01-23 23:49:01.994267: step: 236/77, loss: 4.5448163632499927e-07 2023-01-23 23:49:03.278839: step: 240/77, loss: 7.256832645907707e-07 2023-01-23 23:49:04.582647: step: 244/77, loss: 0.009154018014669418 2023-01-23 23:49:05.890479: step: 248/77, loss: 9.619673801353201e-05 2023-01-23 23:49:07.201176: step: 252/77, loss: 0.00031977289472706616 2023-01-23 23:49:08.533520: step: 256/77, loss: 0.050523921847343445 2023-01-23 23:49:09.857749: step: 260/77, loss: 1.8461626041244017e-06 2023-01-23 23:49:11.160864: step: 264/77, loss: 9.738103835843503e-05 2023-01-23 23:49:12.438735: step: 268/77, loss: 1.4901155864777138e-08 2023-01-23 23:49:13.755981: step: 272/77, loss: 1.162021635536803e-05 2023-01-23 23:49:15.044704: step: 276/77, loss: 0.0044382489286363125 2023-01-23 23:49:16.304723: step: 280/77, loss: 2.4317955649166834e-06 2023-01-23 23:49:17.636541: step: 284/77, loss: 1.6182344779736013e-06 2023-01-23 23:49:18.977637: step: 288/77, loss: 3.710630699060857e-05 2023-01-23 23:49:20.323258: step: 292/77, loss: 2.1636074052366894e-06 2023-01-23 23:49:21.635333: step: 296/77, loss: 0.004715841729193926 2023-01-23 23:49:22.924037: step: 300/77, loss: 9.745041325004422e-07 2023-01-23 23:49:24.161847: step: 304/77, loss: 0.00031040236353874207 2023-01-23 23:49:25.450263: step: 308/77, loss: 2.852852048818022e-05 2023-01-23 23:49:26.702956: step: 312/77, loss: 0.0031233031768351793 2023-01-23 23:49:27.973708: step: 316/77, loss: 0.0001433442666893825 2023-01-23 23:49:29.293382: step: 320/77, loss: 0.006575982552021742 2023-01-23 23:49:30.611178: step: 324/77, loss: 7.856477168388665e-06 2023-01-23 23:49:31.909411: step: 328/77, loss: 0.00030488992342725396 2023-01-23 23:49:33.216083: step: 332/77, loss: 2.0770582978002494e-06 2023-01-23 23:49:34.472722: step: 336/77, loss: 1.2665948645462777e-07 2023-01-23 23:49:35.776830: step: 340/77, loss: 3.27336965710856e-06 2023-01-23 23:49:37.033422: step: 344/77, loss: 4.828689634450711e-05 2023-01-23 23:49:38.332315: step: 348/77, loss: 0.037822216749191284 2023-01-23 23:49:39.582820: step: 352/77, loss: 7.748373172944412e-05 2023-01-23 23:49:40.856710: step: 356/77, loss: 1.3292006769916043e-05 2023-01-23 23:49:42.186368: step: 360/77, loss: 0.00017053502961061895 2023-01-23 23:49:43.473038: step: 364/77, loss: 1.1916748917428777e-05 2023-01-23 23:49:44.795884: step: 368/77, loss: 0.019462842494249344 2023-01-23 23:49:46.111929: step: 372/77, loss: 4.768004146171734e-06 2023-01-23 23:49:47.397164: step: 376/77, loss: 1.578027513460256e-05 2023-01-23 23:49:48.691284: step: 380/77, loss: 0.011081244796514511 2023-01-23 23:49:50.004578: step: 384/77, loss: 0.052583497017621994 2023-01-23 23:49:51.307255: step: 388/77, loss: 3.352726878347312e-07 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 28} Test Chinese: {'template': {'p': 0.9743589743589743, 'r': 0.6178861788617886, 'f1': 0.7562189054726368}, 'slot': {'p': 0.5609756097560976, 'r': 0.02090909090909091, 'f1': 0.040315512708150744}, 'combined': 0.030487352893725936, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04839676356008391, 'epoch': 28} Test Korean: {'template': {'p': 0.9743589743589743, 'r': 0.6178861788617886, 'f1': 0.7562189054726368}, 'slot': {'p': 0.5609756097560976, 'r': 0.02090909090909091, 'f1': 0.040315512708150744}, 'combined': 0.030487352893725936, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04839676356008391, 'epoch': 28} Test Russian: {'template': {'p': 0.9743589743589743, 'r': 0.6178861788617886, 'f1': 0.7562189054726368}, 'slot': {'p': 0.5609756097560976, 'r': 0.02090909090909091, 'f1': 0.040315512708150744}, 'combined': 0.030487352893725936, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:51:29.985580: step: 4/77, loss: 3.144855872960761e-05 2023-01-23 23:51:31.277181: step: 8/77, loss: 5.751747949034325e-07 2023-01-23 23:51:32.575635: step: 12/77, loss: 8.017163054319099e-05 2023-01-23 23:51:33.862515: step: 16/77, loss: 0.022102270275354385 2023-01-23 23:51:35.133749: step: 20/77, loss: 2.7567057259147987e-07 2023-01-23 23:51:36.411349: step: 24/77, loss: 4.811163307749666e-06 2023-01-23 23:51:37.677136: step: 28/77, loss: 8.746854973651352e-07 2023-01-23 23:51:38.994423: step: 32/77, loss: 6.541539505633409e-07 2023-01-23 23:51:40.320434: step: 36/77, loss: 7.522347732447088e-05 2023-01-23 23:51:41.604671: step: 40/77, loss: 0.0008213004330173135 2023-01-23 23:51:42.873824: step: 44/77, loss: 2.7119872925140953e-07 2023-01-23 23:51:44.196142: step: 48/77, loss: 3.8976580981398e-05 2023-01-23 23:51:45.457209: step: 52/77, loss: 0.024472558870911598 2023-01-23 23:51:46.752428: step: 56/77, loss: 0.002236069878563285 2023-01-23 23:51:48.086534: step: 60/77, loss: 0.0008119684061966836 2023-01-23 23:51:49.379463: step: 64/77, loss: 0.00016473043069709092 2023-01-23 23:51:50.689650: step: 68/77, loss: 1.0659737199603114e-05 2023-01-23 23:51:51.966448: step: 72/77, loss: 6.331157783279195e-05 2023-01-23 23:51:53.303497: step: 76/77, loss: 8.398948921239935e-06 2023-01-23 23:51:54.605166: step: 80/77, loss: 1.9222457581236085e-07 2023-01-23 23:51:55.874799: step: 84/77, loss: 0.00012843337026424706 2023-01-23 23:51:57.194498: step: 88/77, loss: 2.4824919819366187e-05 2023-01-23 23:51:58.519981: step: 92/77, loss: 7.043376172077842e-06 2023-01-23 23:51:59.889297: step: 96/77, loss: 0.002021679887548089 2023-01-23 23:52:01.208906: step: 100/77, loss: 4.277397238183767e-06 2023-01-23 23:52:02.476890: step: 104/77, loss: 0.01951354369521141 2023-01-23 23:52:03.787590: step: 108/77, loss: 0.0237126424908638 2023-01-23 23:52:05.032959: step: 112/77, loss: 2.9460461519192904e-05 2023-01-23 23:52:06.330025: step: 116/77, loss: 4.008111318398733e-06 2023-01-23 23:52:07.663592: step: 120/77, loss: 0.00021640595514327288 2023-01-23 23:52:09.006727: step: 124/77, loss: 0.013219022192060947 2023-01-23 23:52:10.307241: step: 128/77, loss: 1.1595335308811627e-05 2023-01-23 23:52:11.607946: step: 132/77, loss: 0.00018398706743028015 2023-01-23 23:52:12.938268: step: 136/77, loss: 4.208624886814505e-05 2023-01-23 23:52:14.279547: step: 140/77, loss: 7.261715381901013e-06 2023-01-23 23:52:15.639876: step: 144/77, loss: 0.02468251623213291 2023-01-23 23:52:16.942300: step: 148/77, loss: 4.6508765080943704e-05 2023-01-23 23:52:18.215590: step: 152/77, loss: 1.3247890819911845e-05 2023-01-23 23:52:19.525342: step: 156/77, loss: 2.968638546008151e-05 2023-01-23 23:52:20.830657: step: 160/77, loss: 0.0024238319601863623 2023-01-23 23:52:22.126994: step: 164/77, loss: 8.205627636925783e-06 2023-01-23 23:52:23.412080: step: 168/77, loss: 0.0014939033426344395 2023-01-23 23:52:24.702332: step: 172/77, loss: 9.685743407317204e-08 2023-01-23 23:52:26.014683: step: 176/77, loss: 1.5675155964345322e-06 2023-01-23 23:52:27.313005: step: 180/77, loss: 2.3959746613400057e-06 2023-01-23 23:52:28.563200: step: 184/77, loss: 0.00026503464323468506 2023-01-23 23:52:29.866401: step: 188/77, loss: 0.00022445156355388463 2023-01-23 23:52:31.214079: step: 192/77, loss: 7.375857649094542e-07 2023-01-23 23:52:32.540753: step: 196/77, loss: 2.0861621763401672e-08 2023-01-23 23:52:33.853936: step: 200/77, loss: 0.0003767440211959183 2023-01-23 23:52:35.089153: step: 204/77, loss: 7.617295341333374e-05 2023-01-23 23:52:36.433086: step: 208/77, loss: 0.00012021034490317106 2023-01-23 23:52:37.762354: step: 212/77, loss: 0.005869630724191666 2023-01-23 23:52:39.051711: step: 216/77, loss: 0.00020114541985094547 2023-01-23 23:52:40.360494: step: 220/77, loss: 0.004399897996336222 2023-01-23 23:52:41.671310: step: 224/77, loss: 0.0016715697711333632 2023-01-23 23:52:42.973418: step: 228/77, loss: 0.00018241455836687237 2023-01-23 23:52:44.251671: step: 232/77, loss: 0.0009146218653768301 2023-01-23 23:52:45.566630: step: 236/77, loss: 0.0018498735735192895 2023-01-23 23:52:46.790696: step: 240/77, loss: 0.00022577299387194216 2023-01-23 23:52:48.132233: step: 244/77, loss: 0.05294421315193176 2023-01-23 23:52:49.490677: step: 248/77, loss: 0.0485006682574749 2023-01-23 23:52:50.771838: step: 252/77, loss: 5.894068635825533e-06 2023-01-23 23:52:52.031118: step: 256/77, loss: 1.2963988638148294e-07 2023-01-23 23:52:53.366886: step: 260/77, loss: 5.3644139796915624e-08 2023-01-23 23:52:54.608955: step: 264/77, loss: 0.002035744721069932 2023-01-23 23:52:55.910394: step: 268/77, loss: 0.017132868990302086 2023-01-23 23:52:57.175635: step: 272/77, loss: 3.955773718189448e-05 2023-01-23 23:52:58.452992: step: 276/77, loss: 3.992898200522177e-06 2023-01-23 23:52:59.724857: step: 280/77, loss: 0.0005716330488212407 2023-01-23 23:53:00.977059: step: 284/77, loss: 4.1489121940685436e-05 2023-01-23 23:53:02.279675: step: 288/77, loss: 5.015826172893867e-05 2023-01-23 23:53:03.640915: step: 292/77, loss: 9.168793621938676e-05 2023-01-23 23:53:04.945121: step: 296/77, loss: 1.1026848767414776e-07 2023-01-23 23:53:06.269370: step: 300/77, loss: 0.002865537302568555 2023-01-23 23:53:07.521068: step: 304/77, loss: 0.0002901027328334749 2023-01-23 23:53:08.791562: step: 308/77, loss: 0.0021394614595919847 2023-01-23 23:53:10.097248: step: 312/77, loss: 4.060235369252041e-06 2023-01-23 23:53:11.444334: step: 316/77, loss: 0.0009134943829849362 2023-01-23 23:53:12.774402: step: 320/77, loss: 1.3618976026918972e-06 2023-01-23 23:53:14.090467: step: 324/77, loss: 0.00023269267694558948 2023-01-23 23:53:15.392492: step: 328/77, loss: 9.249313734471798e-05 2023-01-23 23:53:16.726161: step: 332/77, loss: 7.471351273125038e-05 2023-01-23 23:53:18.037128: step: 336/77, loss: 0.00022001775505486876 2023-01-23 23:53:19.320811: step: 340/77, loss: 0.00011256830475758761 2023-01-23 23:53:20.619092: step: 344/77, loss: 4.7683684556432127e-08 2023-01-23 23:53:21.892290: step: 348/77, loss: 9.553597010381054e-06 2023-01-23 23:53:23.190399: step: 352/77, loss: 0.002149000996723771 2023-01-23 23:53:24.519435: step: 356/77, loss: 0.00040330199408344924 2023-01-23 23:53:25.819250: step: 360/77, loss: 4.79812626963394e-07 2023-01-23 23:53:27.161999: step: 364/77, loss: 1.3858060299298813e-07 2023-01-23 23:53:28.467825: step: 368/77, loss: 0.023484427481889725 2023-01-23 23:53:29.751286: step: 372/77, loss: 0.02371416613459587 2023-01-23 23:53:31.061752: step: 376/77, loss: 9.029912462210632e-07 2023-01-23 23:53:32.371968: step: 380/77, loss: 1.163185606856132e-05 2023-01-23 23:53:33.660114: step: 384/77, loss: 6.451639819715638e-06 2023-01-23 23:53:34.951016: step: 388/77, loss: 5.81145016553819e-08 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 29} Test Chinese: {'template': {'p': 0.9605263157894737, 'r': 0.5934959349593496, 'f1': 0.7336683417085428}, 'slot': {'p': 0.575, 'r': 0.02090909090909091, 'f1': 0.04035087719298246}, 'combined': 0.029604161156660497, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Korean: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.5641025641025641, 'r': 0.02, 'f1': 0.038630377524143986}, 'combined': 0.028586479367866548, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Russian: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.575, 'r': 0.02090909090909091, 'f1': 0.04035087719298246}, 'combined': 0.029859649122807017, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5}