Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:33:22.229175: step: 4/77, loss: 1.0566054582595825 2023-01-22 07:33:23.479926: step: 8/77, loss: 1.0537757873535156 2023-01-22 07:33:24.762582: step: 12/77, loss: 1.0559892654418945 2023-01-22 07:33:26.094148: step: 16/77, loss: 1.0583343505859375 2023-01-22 07:33:27.403936: step: 20/77, loss: 1.0633549690246582 2023-01-22 07:33:28.758651: step: 24/77, loss: 1.0405185222625732 2023-01-22 07:33:30.041712: step: 28/77, loss: 1.0492514371871948 2023-01-22 07:33:31.368421: step: 32/77, loss: 1.0514004230499268 2023-01-22 07:33:32.692741: step: 36/77, loss: 1.0344562530517578 2023-01-22 07:33:34.040854: step: 40/77, loss: 1.0398215055465698 2023-01-22 07:33:35.381811: step: 44/77, loss: 1.018258810043335 2023-01-22 07:33:36.674161: step: 48/77, loss: 1.0067038536071777 2023-01-22 07:33:38.007989: step: 52/77, loss: 1.0088309049606323 2023-01-22 07:33:39.331541: step: 56/77, loss: 0.9948985576629639 2023-01-22 07:33:40.604612: step: 60/77, loss: 0.9983891248703003 2023-01-22 07:33:41.909122: step: 64/77, loss: 0.9850597977638245 2023-01-22 07:33:43.200773: step: 68/77, loss: 0.95908522605896 2023-01-22 07:33:44.512782: step: 72/77, loss: 0.9487576484680176 2023-01-22 07:33:45.842823: step: 76/77, loss: 0.9316009879112244 2023-01-22 07:33:47.172426: step: 80/77, loss: 0.9227492213249207 2023-01-22 07:33:48.495840: step: 84/77, loss: 0.9227690696716309 2023-01-22 07:33:49.778243: step: 88/77, loss: 0.8968497514724731 2023-01-22 07:33:51.071843: step: 92/77, loss: 0.8681602478027344 2023-01-22 07:33:52.341881: step: 96/77, loss: 0.8769820928573608 2023-01-22 07:33:53.648173: step: 100/77, loss: 0.881488025188446 2023-01-22 07:33:54.896719: step: 104/77, loss: 0.8428722620010376 2023-01-22 07:33:56.240614: step: 108/77, loss: 0.7961598634719849 2023-01-22 07:33:57.527331: step: 112/77, loss: 0.8437957763671875 2023-01-22 07:33:58.854329: step: 116/77, loss: 0.7791297435760498 2023-01-22 07:34:00.241821: step: 120/77, loss: 0.7618874907493591 2023-01-22 07:34:01.557732: step: 124/77, loss: 0.7741999626159668 2023-01-22 07:34:02.821588: step: 128/77, loss: 0.7475306987762451 2023-01-22 07:34:04.118590: step: 132/77, loss: 0.7159825563430786 2023-01-22 07:34:05.392920: step: 136/77, loss: 0.6631938815116882 2023-01-22 07:34:06.752337: step: 140/77, loss: 0.7139487266540527 2023-01-22 07:34:08.057723: step: 144/77, loss: 0.6545838713645935 2023-01-22 07:34:09.336377: step: 148/77, loss: 0.6513230800628662 2023-01-22 07:34:10.688407: step: 152/77, loss: 0.6200884580612183 2023-01-22 07:34:12.034639: step: 156/77, loss: 0.5634316205978394 2023-01-22 07:34:13.351085: step: 160/77, loss: 0.6044880747795105 2023-01-22 07:34:14.676877: step: 164/77, loss: 0.5412224531173706 2023-01-22 07:34:15.970272: step: 168/77, loss: 0.5273445844650269 2023-01-22 07:34:17.289100: step: 172/77, loss: 0.4999113082885742 2023-01-22 07:34:18.581586: step: 176/77, loss: 0.4900851547718048 2023-01-22 07:34:19.948784: step: 180/77, loss: 0.44046568870544434 2023-01-22 07:34:21.234963: step: 184/77, loss: 0.40524929761886597 2023-01-22 07:34:22.570119: step: 188/77, loss: 0.35899320244789124 2023-01-22 07:34:23.871435: step: 192/77, loss: 0.4494326412677765 2023-01-22 07:34:25.179732: step: 196/77, loss: 0.3220992684364319 2023-01-22 07:34:26.484472: step: 200/77, loss: 0.3402412533760071 2023-01-22 07:34:27.786110: step: 204/77, loss: 0.4437170624732971 2023-01-22 07:34:29.128496: step: 208/77, loss: 0.24657674133777618 2023-01-22 07:34:30.453341: step: 212/77, loss: 0.2253047227859497 2023-01-22 07:34:31.746059: step: 216/77, loss: 0.2559199631214142 2023-01-22 07:34:33.035723: step: 220/77, loss: 0.2264961451292038 2023-01-22 07:34:34.368763: step: 224/77, loss: 0.23335587978363037 2023-01-22 07:34:35.688276: step: 228/77, loss: 0.23723739385604858 2023-01-22 07:34:37.002135: step: 232/77, loss: 0.20965644717216492 2023-01-22 07:34:38.296226: step: 236/77, loss: 0.27283912897109985 2023-01-22 07:34:39.566912: step: 240/77, loss: 0.119234099984169 2023-01-22 07:34:40.864159: step: 244/77, loss: 0.3087769150733948 2023-01-22 07:34:42.183419: step: 248/77, loss: 0.1239112913608551 2023-01-22 07:34:43.536528: step: 252/77, loss: 0.11339374631643295 2023-01-22 07:34:44.864010: step: 256/77, loss: 0.15533994138240814 2023-01-22 07:34:46.204284: step: 260/77, loss: 0.11168913543224335 2023-01-22 07:34:47.473415: step: 264/77, loss: 0.08665478974580765 2023-01-22 07:34:48.738780: step: 268/77, loss: 0.06983567774295807 2023-01-22 07:34:50.069275: step: 272/77, loss: 0.32832252979278564 2023-01-22 07:34:51.386302: step: 276/77, loss: 0.1965167075395584 2023-01-22 07:34:52.670602: step: 280/77, loss: 0.08166693896055222 2023-01-22 07:34:53.990948: step: 284/77, loss: 0.04782284051179886 2023-01-22 07:34:55.282648: step: 288/77, loss: 0.34814804792404175 2023-01-22 07:34:56.569931: step: 292/77, loss: 0.043856192380189896 2023-01-22 07:34:57.901184: step: 296/77, loss: 0.09605462104082108 2023-01-22 07:34:59.166292: step: 300/77, loss: 0.24693399667739868 2023-01-22 07:35:00.457472: step: 304/77, loss: 0.06090881675481796 2023-01-22 07:35:01.743967: step: 308/77, loss: 0.08499729633331299 2023-01-22 07:35:03.027593: step: 312/77, loss: 0.027350492775440216 2023-01-22 07:35:04.380580: step: 316/77, loss: 0.05126441642642021 2023-01-22 07:35:05.733690: step: 320/77, loss: 0.10123664885759354 2023-01-22 07:35:07.059188: step: 324/77, loss: 0.06947410106658936 2023-01-22 07:35:08.385809: step: 328/77, loss: 0.10531488060951233 2023-01-22 07:35:09.676749: step: 332/77, loss: 0.27845829725265503 2023-01-22 07:35:10.971656: step: 336/77, loss: 0.15196259319782257 2023-01-22 07:35:12.263521: step: 340/77, loss: 0.24861951172351837 2023-01-22 07:35:13.554456: step: 344/77, loss: 0.05084856599569321 2023-01-22 07:35:14.877071: step: 348/77, loss: 0.04008103907108307 2023-01-22 07:35:16.203889: step: 352/77, loss: 0.09764869511127472 2023-01-22 07:35:17.557428: step: 356/77, loss: 0.07287060469388962 2023-01-22 07:35:18.831970: step: 360/77, loss: 0.07838761061429977 2023-01-22 07:35:20.159474: step: 364/77, loss: 0.10594262182712555 2023-01-22 07:35:21.495718: step: 368/77, loss: 0.18141813576221466 2023-01-22 07:35:22.854035: step: 372/77, loss: 0.04820623993873596 2023-01-22 07:35:24.136364: step: 376/77, loss: 0.0767727792263031 2023-01-22 07:35:25.460615: step: 380/77, loss: 0.14602497220039368 2023-01-22 07:35:26.791115: step: 384/77, loss: 0.060212016105651855 2023-01-22 07:35:28.129428: step: 388/77, loss: 0.12397897988557816 ================================================== Loss: 0.484 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:37:39.686271: step: 4/77, loss: 0.08628113567829132 2023-01-22 07:37:41.008532: step: 8/77, loss: 0.19332587718963623 2023-01-22 07:37:42.297351: step: 12/77, loss: 0.12840916216373444 2023-01-22 07:37:43.587205: step: 16/77, loss: 0.09008502960205078 2023-01-22 07:37:44.850834: step: 20/77, loss: 0.11896339058876038 2023-01-22 07:37:46.133666: step: 24/77, loss: 0.0842742994427681 2023-01-22 07:37:47.411921: step: 28/77, loss: 0.07516887784004211 2023-01-22 07:37:48.740161: step: 32/77, loss: 0.17610064148902893 2023-01-22 07:37:50.100146: step: 36/77, loss: 0.09164276719093323 2023-01-22 07:37:51.387110: step: 40/77, loss: 0.23728494346141815 2023-01-22 07:37:52.691439: step: 44/77, loss: 0.05573999136686325 2023-01-22 07:37:53.986592: step: 48/77, loss: 0.05871153622865677 2023-01-22 07:37:55.252902: step: 52/77, loss: 0.31232115626335144 2023-01-22 07:37:56.552329: step: 56/77, loss: 0.10275619477033615 2023-01-22 07:37:57.840255: step: 60/77, loss: 0.0885278731584549 2023-01-22 07:37:59.147052: step: 64/77, loss: 0.1984003782272339 2023-01-22 07:38:00.458840: step: 68/77, loss: 0.33665239810943604 2023-01-22 07:38:01.816312: step: 72/77, loss: 0.2795385718345642 2023-01-22 07:38:03.108199: step: 76/77, loss: 0.1644812524318695 2023-01-22 07:38:04.399776: step: 80/77, loss: 0.22791095077991486 2023-01-22 07:38:05.698699: step: 84/77, loss: 0.07969798147678375 2023-01-22 07:38:07.018152: step: 88/77, loss: 0.07784508913755417 2023-01-22 07:38:08.278668: step: 92/77, loss: 0.07529832422733307 2023-01-22 07:38:09.565886: step: 96/77, loss: 0.04053203761577606 2023-01-22 07:38:10.863870: step: 100/77, loss: 0.04332693666219711 2023-01-22 07:38:12.218227: step: 104/77, loss: 0.12364348769187927 2023-01-22 07:38:13.556218: step: 108/77, loss: 0.11022159457206726 2023-01-22 07:38:14.881765: step: 112/77, loss: 0.041119933128356934 2023-01-22 07:38:16.181083: step: 116/77, loss: 0.09332107752561569 2023-01-22 07:38:17.515181: step: 120/77, loss: 0.08409717679023743 2023-01-22 07:38:18.846746: step: 124/77, loss: 0.09579865634441376 2023-01-22 07:38:20.158319: step: 128/77, loss: 0.2133885622024536 2023-01-22 07:38:21.484803: step: 132/77, loss: 0.10296496748924255 2023-01-22 07:38:22.799268: step: 136/77, loss: 0.13981102406978607 2023-01-22 07:38:24.110443: step: 140/77, loss: 0.27962273359298706 2023-01-22 07:38:25.444446: step: 144/77, loss: 0.1111413836479187 2023-01-22 07:38:26.734941: step: 148/77, loss: 0.08607277274131775 2023-01-22 07:38:27.995742: step: 152/77, loss: 0.22137989103794098 2023-01-22 07:38:29.324140: step: 156/77, loss: 0.11324742436408997 2023-01-22 07:38:30.662633: step: 160/77, loss: 0.16135777533054352 2023-01-22 07:38:31.959881: step: 164/77, loss: 0.10355418920516968 2023-01-22 07:38:33.264106: step: 168/77, loss: 0.20681661367416382 2023-01-22 07:38:34.629861: step: 172/77, loss: 0.09634518623352051 2023-01-22 07:38:35.930575: step: 176/77, loss: 0.04597388207912445 2023-01-22 07:38:37.245088: step: 180/77, loss: 0.0687372162938118 2023-01-22 07:38:38.568697: step: 184/77, loss: 0.08381231129169464 2023-01-22 07:38:39.892764: step: 188/77, loss: 0.055870767682790756 2023-01-22 07:38:41.147322: step: 192/77, loss: 0.11192315816879272 2023-01-22 07:38:42.512658: step: 196/77, loss: 0.0834076777100563 2023-01-22 07:38:43.810720: step: 200/77, loss: 0.06754839420318604 2023-01-22 07:38:45.056188: step: 204/77, loss: 0.06366132944822311 2023-01-22 07:38:46.372195: step: 208/77, loss: 0.08090916275978088 2023-01-22 07:38:47.690094: step: 212/77, loss: 0.051006052643060684 2023-01-22 07:38:49.042084: step: 216/77, loss: 0.048985555768013 2023-01-22 07:38:50.355658: step: 220/77, loss: 0.04030236601829529 2023-01-22 07:38:51.596969: step: 224/77, loss: 0.10942952334880829 2023-01-22 07:38:52.952180: step: 228/77, loss: 0.05575104430317879 2023-01-22 07:38:54.226115: step: 232/77, loss: 0.057492777705192566 2023-01-22 07:38:55.563642: step: 236/77, loss: 0.12040974199771881 2023-01-22 07:38:56.913443: step: 240/77, loss: 0.10147053003311157 2023-01-22 07:38:58.167404: step: 244/77, loss: 0.15351510047912598 2023-01-22 07:38:59.486532: step: 248/77, loss: 0.2754939794540405 2023-01-22 07:39:00.791290: step: 252/77, loss: 0.08110547065734863 2023-01-22 07:39:02.089951: step: 256/77, loss: 0.11938928067684174 2023-01-22 07:39:03.464232: step: 260/77, loss: 0.0844007134437561 2023-01-22 07:39:04.813481: step: 264/77, loss: 0.20228734612464905 2023-01-22 07:39:06.132935: step: 268/77, loss: 0.19483360648155212 2023-01-22 07:39:07.426600: step: 272/77, loss: 0.09005934000015259 2023-01-22 07:39:08.738638: step: 276/77, loss: 0.10872792452573776 2023-01-22 07:39:10.026493: step: 280/77, loss: 0.035562627017498016 2023-01-22 07:39:11.331006: step: 284/77, loss: 0.06048338860273361 2023-01-22 07:39:12.638084: step: 288/77, loss: 0.02235712856054306 2023-01-22 07:39:13.958900: step: 292/77, loss: 0.06341751664876938 2023-01-22 07:39:15.286400: step: 296/77, loss: 0.019283972680568695 2023-01-22 07:39:16.591457: step: 300/77, loss: 0.14400897920131683 2023-01-22 07:39:17.853310: step: 304/77, loss: 0.24055436253547668 2023-01-22 07:39:19.194936: step: 308/77, loss: 0.14787493646144867 2023-01-22 07:39:20.517173: step: 312/77, loss: 0.18950051069259644 2023-01-22 07:39:21.776418: step: 316/77, loss: 0.19316518306732178 2023-01-22 07:39:23.145252: step: 320/77, loss: 0.03579552844166756 2023-01-22 07:39:24.459262: step: 324/77, loss: 0.17878445982933044 2023-01-22 07:39:25.763477: step: 328/77, loss: 0.09883566200733185 2023-01-22 07:39:27.059928: step: 332/77, loss: 0.07325631380081177 2023-01-22 07:39:28.385855: step: 336/77, loss: 0.10381695628166199 2023-01-22 07:39:29.651717: step: 340/77, loss: 0.03180614113807678 2023-01-22 07:39:30.998383: step: 344/77, loss: 0.0712357610464096 2023-01-22 07:39:32.281769: step: 348/77, loss: 0.1428757905960083 2023-01-22 07:39:33.568838: step: 352/77, loss: 0.048817943781614304 2023-01-22 07:39:34.873929: step: 356/77, loss: 0.03985461965203285 2023-01-22 07:39:36.187032: step: 360/77, loss: 0.08410288393497467 2023-01-22 07:39:37.555245: step: 364/77, loss: 0.18013249337673187 2023-01-22 07:39:38.845402: step: 368/77, loss: 0.08539636433124542 2023-01-22 07:39:40.204030: step: 372/77, loss: 0.08473024517297745 2023-01-22 07:39:41.483433: step: 376/77, loss: 0.06411333382129669 2023-01-22 07:39:42.799211: step: 380/77, loss: 0.2369568943977356 2023-01-22 07:39:44.116145: step: 384/77, loss: 0.18589796125888824 2023-01-22 07:39:45.398268: step: 388/77, loss: 0.04241977632045746 ================================================== Loss: 0.116 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:41:27.710931: step: 4/77, loss: 0.05408244580030441 2023-01-22 07:41:29.026328: step: 8/77, loss: 0.08903515338897705 2023-01-22 07:41:30.348383: step: 12/77, loss: 0.10017208009958267 2023-01-22 07:41:31.673704: step: 16/77, loss: 0.2349729984998703 2023-01-22 07:41:32.984536: step: 20/77, loss: 0.09357035160064697 2023-01-22 07:41:34.299344: step: 24/77, loss: 0.05663056671619415 2023-01-22 07:41:35.594985: step: 28/77, loss: 0.027106977999210358 2023-01-22 07:41:36.877699: step: 32/77, loss: 0.051700297743082047 2023-01-22 07:41:38.173387: step: 36/77, loss: 0.16336293518543243 2023-01-22 07:41:39.501520: step: 40/77, loss: 0.1838156133890152 2023-01-22 07:41:40.810189: step: 44/77, loss: 0.08277752995491028 2023-01-22 07:41:42.153020: step: 48/77, loss: 0.04305151477456093 2023-01-22 07:41:43.441259: step: 52/77, loss: 0.08392839878797531 2023-01-22 07:41:44.726082: step: 56/77, loss: 0.12962347269058228 2023-01-22 07:41:46.028750: step: 60/77, loss: 0.013288728892803192 2023-01-22 07:41:47.301493: step: 64/77, loss: 0.08217846602201462 2023-01-22 07:41:48.586382: step: 68/77, loss: 0.02144557423889637 2023-01-22 07:41:49.935903: step: 72/77, loss: 0.027972429990768433 2023-01-22 07:41:51.201777: step: 76/77, loss: 0.058149661868810654 2023-01-22 07:41:52.439751: step: 80/77, loss: 0.07532479614019394 2023-01-22 07:41:53.717962: step: 84/77, loss: 0.03448043763637543 2023-01-22 07:41:55.018372: step: 88/77, loss: 0.03737421706318855 2023-01-22 07:41:56.336674: step: 92/77, loss: 0.041130442172288895 2023-01-22 07:41:57.672324: step: 96/77, loss: 0.07225271314382553 2023-01-22 07:41:58.988383: step: 100/77, loss: 0.0408330075442791 2023-01-22 07:42:00.335404: step: 104/77, loss: 0.04161320626735687 2023-01-22 07:42:01.630762: step: 108/77, loss: 0.07850818336009979 2023-01-22 07:42:02.934080: step: 112/77, loss: 0.08138226717710495 2023-01-22 07:42:04.284929: step: 116/77, loss: 0.10798808187246323 2023-01-22 07:42:05.628493: step: 120/77, loss: 0.10373760759830475 2023-01-22 07:42:06.983531: step: 124/77, loss: 0.033469058573246 2023-01-22 07:42:08.308336: step: 128/77, loss: 0.0710071548819542 2023-01-22 07:42:09.634925: step: 132/77, loss: 0.023496082052588463 2023-01-22 07:42:10.928568: step: 136/77, loss: 0.05800343304872513 2023-01-22 07:42:12.264715: step: 140/77, loss: 0.06916903704404831 2023-01-22 07:42:13.561960: step: 144/77, loss: 0.10620550066232681 2023-01-22 07:42:14.846316: step: 148/77, loss: 0.038168005645275116 2023-01-22 07:42:16.115513: step: 152/77, loss: 0.028475288301706314 2023-01-22 07:42:17.419961: step: 156/77, loss: 0.0343252494931221 2023-01-22 07:42:18.707954: step: 160/77, loss: 0.06465528160333633 2023-01-22 07:42:19.987537: step: 164/77, loss: 0.015443643555045128 2023-01-22 07:42:21.318602: step: 168/77, loss: 0.024469029158353806 2023-01-22 07:42:22.623324: step: 172/77, loss: 0.0772266834974289 2023-01-22 07:42:23.956432: step: 176/77, loss: 0.07612720876932144 2023-01-22 07:42:25.280091: step: 180/77, loss: 0.01303707342594862 2023-01-22 07:42:26.603292: step: 184/77, loss: 0.028561269864439964 2023-01-22 07:42:27.939870: step: 188/77, loss: 0.03664770722389221 2023-01-22 07:42:29.274722: step: 192/77, loss: 0.009424678049981594 2023-01-22 07:42:30.582797: step: 196/77, loss: 0.09547540545463562 2023-01-22 07:42:31.811679: step: 200/77, loss: 0.11896775662899017 2023-01-22 07:42:33.119278: step: 204/77, loss: 0.09404917806386948 2023-01-22 07:42:34.480248: step: 208/77, loss: 0.08699438720941544 2023-01-22 07:42:35.750384: step: 212/77, loss: 0.028635360300540924 2023-01-22 07:42:37.044346: step: 216/77, loss: 0.017172960564494133 2023-01-22 07:42:38.374292: step: 220/77, loss: 0.06895173341035843 2023-01-22 07:42:39.702593: step: 224/77, loss: 0.10485464334487915 2023-01-22 07:42:41.001528: step: 228/77, loss: 0.08182030916213989 2023-01-22 07:42:42.342545: step: 232/77, loss: 0.021310996264219284 2023-01-22 07:42:43.618978: step: 236/77, loss: 0.05425971746444702 2023-01-22 07:42:44.930882: step: 240/77, loss: 0.05239848420023918 2023-01-22 07:42:46.299441: step: 244/77, loss: 0.01816706918179989 2023-01-22 07:42:47.625674: step: 248/77, loss: 0.016700895503163338 2023-01-22 07:42:48.924584: step: 252/77, loss: 0.05829840153455734 2023-01-22 07:42:50.216610: step: 256/77, loss: 0.01977475732564926 2023-01-22 07:42:51.525789: step: 260/77, loss: 0.01967601850628853 2023-01-22 07:42:52.820009: step: 264/77, loss: 0.11919214576482773 2023-01-22 07:42:54.136198: step: 268/77, loss: 0.04131526127457619 2023-01-22 07:42:55.440907: step: 272/77, loss: 0.006939525716006756 2023-01-22 07:42:56.783912: step: 276/77, loss: 0.0067922016605734825 2023-01-22 07:42:58.096840: step: 280/77, loss: 0.04663277789950371 2023-01-22 07:42:59.334063: step: 284/77, loss: 0.035179682075977325 2023-01-22 07:43:00.641917: step: 288/77, loss: 0.15610793232917786 2023-01-22 07:43:01.923705: step: 292/77, loss: 0.0425829254090786 2023-01-22 07:43:03.216023: step: 296/77, loss: 0.013804452493786812 2023-01-22 07:43:04.501576: step: 300/77, loss: 0.02332937717437744 2023-01-22 07:43:05.787534: step: 304/77, loss: 0.028147635981440544 2023-01-22 07:43:07.100976: step: 308/77, loss: 0.012575867585837841 2023-01-22 07:43:08.455083: step: 312/77, loss: 0.010541887953877449 2023-01-22 07:43:09.760561: step: 316/77, loss: 0.046181872487068176 2023-01-22 07:43:11.068809: step: 320/77, loss: 0.1516311913728714 2023-01-22 07:43:12.372303: step: 324/77, loss: 0.059479713439941406 2023-01-22 07:43:13.676183: step: 328/77, loss: 0.038284484297037125 2023-01-22 07:43:15.025585: step: 332/77, loss: 0.014213360846042633 2023-01-22 07:43:16.339885: step: 336/77, loss: 0.04028288275003433 2023-01-22 07:43:17.626901: step: 340/77, loss: 0.018666526302695274 2023-01-22 07:43:18.968910: step: 344/77, loss: 0.03541828691959381 2023-01-22 07:43:20.274460: step: 348/77, loss: 0.02697652578353882 2023-01-22 07:43:21.536689: step: 352/77, loss: 0.04431845247745514 2023-01-22 07:43:22.844845: step: 356/77, loss: 0.020619817078113556 2023-01-22 07:43:24.139323: step: 360/77, loss: 0.05683548003435135 2023-01-22 07:43:25.397037: step: 364/77, loss: 0.026009604334831238 2023-01-22 07:43:26.712190: step: 368/77, loss: 0.022610411047935486 2023-01-22 07:43:27.975218: step: 372/77, loss: 0.01979699730873108 2023-01-22 07:43:29.260101: step: 376/77, loss: 0.016157492995262146 2023-01-22 07:43:30.593057: step: 380/77, loss: 0.033022940158843994 2023-01-22 07:43:31.880707: step: 384/77, loss: 0.03987208753824234 2023-01-22 07:43:33.266635: step: 388/77, loss: 0.028012875467538834 ================================================== Loss: 0.055 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Russian: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:45:42.916627: step: 4/77, loss: 0.019014548510313034 2023-01-22 07:45:44.167417: step: 8/77, loss: 0.0735415667295456 2023-01-22 07:45:45.473324: step: 12/77, loss: 0.04280354827642441 2023-01-22 07:45:46.801108: step: 16/77, loss: 0.029063032940030098 2023-01-22 07:45:48.073991: step: 20/77, loss: 0.010564737021923065 2023-01-22 07:45:49.423788: step: 24/77, loss: 0.021256964653730392 2023-01-22 07:45:50.746440: step: 28/77, loss: 0.030867785215377808 2023-01-22 07:45:52.050412: step: 32/77, loss: 0.026070473715662956 2023-01-22 07:45:53.323682: step: 36/77, loss: 0.005676222033798695 2023-01-22 07:45:54.622664: step: 40/77, loss: 0.0662388727068901 2023-01-22 07:45:55.893091: step: 44/77, loss: 0.01055849902331829 2023-01-22 07:45:57.124538: step: 48/77, loss: 0.056679219007492065 2023-01-22 07:45:58.404760: step: 52/77, loss: 0.00822590570896864 2023-01-22 07:45:59.728175: step: 56/77, loss: 0.10689514875411987 2023-01-22 07:46:01.109741: step: 60/77, loss: 0.02256307564675808 2023-01-22 07:46:02.404436: step: 64/77, loss: 0.07043624669313431 2023-01-22 07:46:03.741072: step: 68/77, loss: 0.04915996640920639 2023-01-22 07:46:05.047006: step: 72/77, loss: 0.048030249774456024 2023-01-22 07:46:06.379974: step: 76/77, loss: 0.0073328884318470955 2023-01-22 07:46:07.639026: step: 80/77, loss: 0.017796359956264496 2023-01-22 07:46:08.969944: step: 84/77, loss: 0.00830540619790554 2023-01-22 07:46:10.315674: step: 88/77, loss: 0.04534914344549179 2023-01-22 07:46:11.616609: step: 92/77, loss: 0.0539388582110405 2023-01-22 07:46:12.955788: step: 96/77, loss: 0.03533366695046425 2023-01-22 07:46:14.243985: step: 100/77, loss: 0.02420997992157936 2023-01-22 07:46:15.569685: step: 104/77, loss: 0.00585087900981307 2023-01-22 07:46:16.876051: step: 108/77, loss: 0.014047197997570038 2023-01-22 07:46:18.168827: step: 112/77, loss: 0.0512496754527092 2023-01-22 07:46:19.509939: step: 116/77, loss: 0.049538418650627136 2023-01-22 07:46:20.802538: step: 120/77, loss: 0.03853524476289749 2023-01-22 07:46:22.105842: step: 124/77, loss: 0.04238808527588844 2023-01-22 07:46:23.428774: step: 128/77, loss: 0.05831409990787506 2023-01-22 07:46:24.749329: step: 132/77, loss: 0.06485219299793243 2023-01-22 07:46:26.035577: step: 136/77, loss: 0.04830478876829147 2023-01-22 07:46:27.383758: step: 140/77, loss: 0.016403838992118835 2023-01-22 07:46:28.684804: step: 144/77, loss: 0.022437868639826775 2023-01-22 07:46:29.975845: step: 148/77, loss: 0.13560843467712402 2023-01-22 07:46:31.295621: step: 152/77, loss: 0.009262963198125362 2023-01-22 07:46:32.626491: step: 156/77, loss: 0.06167292222380638 2023-01-22 07:46:33.950459: step: 160/77, loss: 0.053442005068063736 2023-01-22 07:46:35.264305: step: 164/77, loss: 0.05808179825544357 2023-01-22 07:46:36.550301: step: 168/77, loss: 0.0050662122666835785 2023-01-22 07:46:37.872851: step: 172/77, loss: 0.01068087387830019 2023-01-22 07:46:39.198678: step: 176/77, loss: 0.055597029626369476 2023-01-22 07:46:40.545952: step: 180/77, loss: 0.017954226583242416 2023-01-22 07:46:41.882185: step: 184/77, loss: 0.024529630318284035 2023-01-22 07:46:43.212025: step: 188/77, loss: 0.015078309923410416 2023-01-22 07:46:44.514103: step: 192/77, loss: 0.061848364770412445 2023-01-22 07:46:45.856113: step: 196/77, loss: 0.01075062993913889 2023-01-22 07:46:47.182216: step: 200/77, loss: 0.005327839404344559 2023-01-22 07:46:48.495833: step: 204/77, loss: 0.02629464492201805 2023-01-22 07:46:49.792820: step: 208/77, loss: 0.011574016883969307 2023-01-22 07:46:51.106024: step: 212/77, loss: 0.012719389982521534 2023-01-22 07:46:52.419038: step: 216/77, loss: 0.007977721281349659 2023-01-22 07:46:53.745628: step: 220/77, loss: 0.006969841662794352 2023-01-22 07:46:55.045791: step: 224/77, loss: 0.013916095718741417 2023-01-22 07:46:56.343221: step: 228/77, loss: 0.015030771493911743 2023-01-22 07:46:57.653284: step: 232/77, loss: 0.031597938388586044 2023-01-22 07:46:58.963118: step: 236/77, loss: 0.011530094780027866 2023-01-22 07:47:00.261620: step: 240/77, loss: 0.030616004019975662 2023-01-22 07:47:01.539752: step: 244/77, loss: 0.006037105806171894 2023-01-22 07:47:02.794755: step: 248/77, loss: 0.09851083904504776 2023-01-22 07:47:04.112556: step: 252/77, loss: 0.10384593158960342 2023-01-22 07:47:05.396564: step: 256/77, loss: 0.009898346848785877 2023-01-22 07:47:06.685613: step: 260/77, loss: 0.04109061509370804 2023-01-22 07:47:08.002856: step: 264/77, loss: 0.0414285734295845 2023-01-22 07:47:09.296287: step: 268/77, loss: 0.021689075976610184 2023-01-22 07:47:10.585614: step: 272/77, loss: 0.034337542951107025 2023-01-22 07:47:11.892150: step: 276/77, loss: 0.004574917256832123 2023-01-22 07:47:13.209018: step: 280/77, loss: 0.006308156065642834 2023-01-22 07:47:14.503070: step: 284/77, loss: 0.016056323423981667 2023-01-22 07:47:15.787209: step: 288/77, loss: 0.053165268152952194 2023-01-22 07:47:17.068977: step: 292/77, loss: 0.014298057183623314 2023-01-22 07:47:18.355265: step: 296/77, loss: 0.005691438913345337 2023-01-22 07:47:19.674137: step: 300/77, loss: 0.4969761371612549 2023-01-22 07:47:20.962989: step: 304/77, loss: 0.011333977803587914 2023-01-22 07:47:22.287815: step: 308/77, loss: 0.06957821547985077 2023-01-22 07:47:23.590596: step: 312/77, loss: 0.06074652075767517 2023-01-22 07:47:24.861598: step: 316/77, loss: 0.03135789930820465 2023-01-22 07:47:26.171069: step: 320/77, loss: 0.05328745394945145 2023-01-22 07:47:27.453938: step: 324/77, loss: 0.033600203692913055 2023-01-22 07:47:28.729704: step: 328/77, loss: 0.008021079003810883 2023-01-22 07:47:30.039384: step: 332/77, loss: 0.048412859439849854 2023-01-22 07:47:31.362970: step: 336/77, loss: 0.004309589043259621 2023-01-22 07:47:32.709398: step: 340/77, loss: 0.022743750363588333 2023-01-22 07:47:34.031045: step: 344/77, loss: 0.005848175846040249 2023-01-22 07:47:35.373678: step: 348/77, loss: 0.05571676418185234 2023-01-22 07:47:36.617288: step: 352/77, loss: 0.016229940578341484 2023-01-22 07:47:37.974470: step: 356/77, loss: 0.056965842843055725 2023-01-22 07:47:39.269856: step: 360/77, loss: 0.0034566251561045647 2023-01-22 07:47:40.574410: step: 364/77, loss: 0.015656542032957077 2023-01-22 07:47:41.903386: step: 368/77, loss: 0.023984838277101517 2023-01-22 07:47:43.218603: step: 372/77, loss: 0.03030308522284031 2023-01-22 07:47:44.527739: step: 376/77, loss: 0.01274532824754715 2023-01-22 07:47:45.791611: step: 380/77, loss: 0.01196263451129198 2023-01-22 07:47:47.059240: step: 384/77, loss: 0.044877029955387115 2023-01-22 07:47:48.413103: step: 388/77, loss: 0.0034370715729892254 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 3} Test Chinese: {'template': {'p': 0.9571428571428572, 'r': 0.5, 'f1': 0.6568627450980392}, 'slot': {'p': 0.6153846153846154, 'r': 0.014375561545372867, 'f1': 0.028094820017559263}, 'combined': 0.01845444059976932, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 3} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.5074626865671642, 'f1': 0.6634146341463415}, 'slot': {'p': 0.6296296296296297, 'r': 0.015274034141958671, 'f1': 0.02982456140350877}, 'combined': 0.01978605049208387, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 3} Test Russian: {'template': {'p': 0.9577464788732394, 'r': 0.5074626865671642, 'f1': 0.6634146341463415}, 'slot': {'p': 0.5769230769230769, 'r': 0.013477088948787063, 'f1': 0.02633889376646181}, 'combined': 0.017473607571896616, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 3} Test for Russian: {'template': {'p': 0.9577464788732394, 'r': 0.5074626865671642, 'f1': 0.6634146341463415}, 'slot': {'p': 0.5769230769230769, 'r': 0.013477088948787063, 'f1': 0.02633889376646181}, 'combined': 0.017473607571896616, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:49:37.686335: step: 4/77, loss: 0.24395133554935455 2023-01-22 07:49:38.949076: step: 8/77, loss: 0.042564891278743744 2023-01-22 07:49:40.252032: step: 12/77, loss: 0.009077097289264202 2023-01-22 07:49:41.550107: step: 16/77, loss: 0.038285721093416214 2023-01-22 07:49:42.872905: step: 20/77, loss: 0.0023387169931083918 2023-01-22 07:49:44.198771: step: 24/77, loss: 0.06897217780351639 2023-01-22 07:49:45.469311: step: 28/77, loss: 0.002033111173659563 2023-01-22 07:49:46.795672: step: 32/77, loss: 0.0338224321603775 2023-01-22 07:49:48.084569: step: 36/77, loss: 0.05583617091178894 2023-01-22 07:49:49.399213: step: 40/77, loss: 0.0054557062685489655 2023-01-22 07:49:50.722444: step: 44/77, loss: 0.06645769625902176 2023-01-22 07:49:52.029625: step: 48/77, loss: 0.03576688468456268 2023-01-22 07:49:53.386842: step: 52/77, loss: 0.05891217663884163 2023-01-22 07:49:54.715347: step: 56/77, loss: 0.010125461965799332 2023-01-22 07:49:55.973495: step: 60/77, loss: 0.06767314672470093 2023-01-22 07:49:57.292075: step: 64/77, loss: 0.03810075297951698 2023-01-22 07:49:58.643718: step: 68/77, loss: 0.015344061888754368 2023-01-22 07:49:59.988102: step: 72/77, loss: 0.02123776264488697 2023-01-22 07:50:01.302792: step: 76/77, loss: 0.015418780036270618 2023-01-22 07:50:02.616833: step: 80/77, loss: 0.009558499790728092 2023-01-22 07:50:03.979782: step: 84/77, loss: 0.03018416091799736 2023-01-22 07:50:05.221810: step: 88/77, loss: 0.03399305045604706 2023-01-22 07:50:06.555925: step: 92/77, loss: 0.11518257856369019 2023-01-22 07:50:07.842570: step: 96/77, loss: 0.048495933413505554 2023-01-22 07:50:09.183891: step: 100/77, loss: 0.0012406861642375588 2023-01-22 07:50:10.458897: step: 104/77, loss: 0.038416363298892975 2023-01-22 07:50:11.754024: step: 108/77, loss: 0.06289390474557877 2023-01-22 07:50:13.113613: step: 112/77, loss: 0.00570332445204258 2023-01-22 07:50:14.440932: step: 116/77, loss: 0.0008757521864026785 2023-01-22 07:50:15.754472: step: 120/77, loss: 0.03343869373202324 2023-01-22 07:50:17.086826: step: 124/77, loss: 0.010786929167807102 2023-01-22 07:50:18.436243: step: 128/77, loss: 0.01748708076775074 2023-01-22 07:50:19.773988: step: 132/77, loss: 0.028637580573558807 2023-01-22 07:50:21.076872: step: 136/77, loss: 0.04966537281870842 2023-01-22 07:50:22.370949: step: 140/77, loss: 0.04209902510046959 2023-01-22 07:50:23.662432: step: 144/77, loss: 0.035914335399866104 2023-01-22 07:50:24.946790: step: 148/77, loss: 0.044010721147060394 2023-01-22 07:50:26.243577: step: 152/77, loss: 0.013883043080568314 2023-01-22 07:50:27.549214: step: 156/77, loss: 0.021576108410954475 2023-01-22 07:50:28.906365: step: 160/77, loss: 0.0158415324985981 2023-01-22 07:50:30.224017: step: 164/77, loss: 0.20719078183174133 2023-01-22 07:50:31.556260: step: 168/77, loss: 0.0026145929004997015 2023-01-22 07:50:32.895609: step: 172/77, loss: 0.011889268644154072 2023-01-22 07:50:34.145921: step: 176/77, loss: 0.07418306171894073 2023-01-22 07:50:35.453277: step: 180/77, loss: 0.04796472564339638 2023-01-22 07:50:36.758283: step: 184/77, loss: 0.13556009531021118 2023-01-22 07:50:38.035238: step: 188/77, loss: 0.008009164594113827 2023-01-22 07:50:39.411091: step: 192/77, loss: 0.03636176884174347 2023-01-22 07:50:40.742330: step: 196/77, loss: 0.0731358677148819 2023-01-22 07:50:42.089416: step: 200/77, loss: 0.06386769562959671 2023-01-22 07:50:43.457806: step: 204/77, loss: 0.009611038491129875 2023-01-22 07:50:44.776010: step: 208/77, loss: 0.012586880475282669 2023-01-22 07:50:46.083053: step: 212/77, loss: 0.010348862037062645 2023-01-22 07:50:47.444233: step: 216/77, loss: 0.01834016479551792 2023-01-22 07:50:48.763954: step: 220/77, loss: 0.05512729287147522 2023-01-22 07:50:50.082317: step: 224/77, loss: 0.019282493740320206 2023-01-22 07:50:51.418275: step: 228/77, loss: 0.01623130775988102 2023-01-22 07:50:52.714447: step: 232/77, loss: 0.006039030849933624 2023-01-22 07:50:54.042958: step: 236/77, loss: 0.010130537673830986 2023-01-22 07:50:55.372114: step: 240/77, loss: 0.00915328785777092 2023-01-22 07:50:56.629643: step: 244/77, loss: 0.019957851618528366 2023-01-22 07:50:57.994168: step: 248/77, loss: 0.037982597947120667 2023-01-22 07:50:59.307498: step: 252/77, loss: 0.0435798242688179 2023-01-22 07:51:00.616111: step: 256/77, loss: 0.022857429459691048 2023-01-22 07:51:01.930236: step: 260/77, loss: 0.003933214582502842 2023-01-22 07:51:03.268558: step: 264/77, loss: 0.04226960241794586 2023-01-22 07:51:04.558064: step: 268/77, loss: 0.0034107593819499016 2023-01-22 07:51:05.895041: step: 272/77, loss: 0.0026267259381711483 2023-01-22 07:51:07.203287: step: 276/77, loss: 0.008702527731657028 2023-01-22 07:51:08.450901: step: 280/77, loss: 0.020024804398417473 2023-01-22 07:51:09.773847: step: 284/77, loss: 0.01928548887372017 2023-01-22 07:51:11.074126: step: 288/77, loss: 0.048891812562942505 2023-01-22 07:51:12.385873: step: 292/77, loss: 0.02976638823747635 2023-01-22 07:51:13.684886: step: 296/77, loss: 0.004443894140422344 2023-01-22 07:51:14.974033: step: 300/77, loss: 0.011118062771856785 2023-01-22 07:51:16.236115: step: 304/77, loss: 0.046159714460372925 2023-01-22 07:51:17.542819: step: 308/77, loss: 0.011610114015638828 2023-01-22 07:51:18.805195: step: 312/77, loss: 0.022255709394812584 2023-01-22 07:51:20.069290: step: 316/77, loss: 0.017545828595757484 2023-01-22 07:51:21.385601: step: 320/77, loss: 0.024126345291733742 2023-01-22 07:51:22.693483: step: 324/77, loss: 0.012279022485017776 2023-01-22 07:51:23.923346: step: 328/77, loss: 0.031908974051475525 2023-01-22 07:51:25.283927: step: 332/77, loss: 0.007514918223023415 2023-01-22 07:51:26.562863: step: 336/77, loss: 0.04214081913232803 2023-01-22 07:51:27.892771: step: 340/77, loss: 0.014605971053242683 2023-01-22 07:51:29.216386: step: 344/77, loss: 0.056578878313302994 2023-01-22 07:51:30.514305: step: 348/77, loss: 0.07047766447067261 2023-01-22 07:51:31.801870: step: 352/77, loss: 0.016497528180480003 2023-01-22 07:51:33.151244: step: 356/77, loss: 0.02124914340674877 2023-01-22 07:51:34.447745: step: 360/77, loss: 0.038115061819553375 2023-01-22 07:51:35.742052: step: 364/77, loss: 0.012755529955029488 2023-01-22 07:51:37.028767: step: 368/77, loss: 0.008632929995656013 2023-01-22 07:51:38.311893: step: 372/77, loss: 0.015460866503417492 2023-01-22 07:51:39.578894: step: 376/77, loss: 0.014853318221867085 2023-01-22 07:51:40.893192: step: 380/77, loss: 0.02519097365438938 2023-01-22 07:51:42.232577: step: 384/77, loss: 0.009393636137247086 2023-01-22 07:51:43.588485: step: 388/77, loss: 0.005673499777913094 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9855072463768116, 'r': 0.5074626865671642, 'f1': 0.6699507389162562}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.013031756194657506, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.9855072463768116, 'r': 0.5074626865671642, 'f1': 0.6699507389162562}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.013031756194657506, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:53:31.327359: step: 4/77, loss: 0.0173548124730587 2023-01-22 07:53:32.619362: step: 8/77, loss: 0.01768217794597149 2023-01-22 07:53:33.934158: step: 12/77, loss: 0.05099344998598099 2023-01-22 07:53:35.230605: step: 16/77, loss: 0.00381911126896739 2023-01-22 07:53:36.517498: step: 20/77, loss: 0.026492225006222725 2023-01-22 07:53:37.769502: step: 24/77, loss: 0.004659312777221203 2023-01-22 07:53:39.038952: step: 28/77, loss: 0.005350128747522831 2023-01-22 07:53:40.363385: step: 32/77, loss: 0.15542210638523102 2023-01-22 07:53:41.676835: step: 36/77, loss: 0.0018877113470807672 2023-01-22 07:53:42.940656: step: 40/77, loss: 0.012737736105918884 2023-01-22 07:53:44.225627: step: 44/77, loss: 0.1805691421031952 2023-01-22 07:53:45.515560: step: 48/77, loss: 0.004664191044867039 2023-01-22 07:53:46.827771: step: 52/77, loss: 0.020634517073631287 2023-01-22 07:53:48.161073: step: 56/77, loss: 0.07404866814613342 2023-01-22 07:53:49.504056: step: 60/77, loss: 0.029311828315258026 2023-01-22 07:53:50.801549: step: 64/77, loss: 0.01757962442934513 2023-01-22 07:53:52.097127: step: 68/77, loss: 0.01078371237963438 2023-01-22 07:53:53.445784: step: 72/77, loss: 0.05148717761039734 2023-01-22 07:53:54.743534: step: 76/77, loss: 0.09286247938871384 2023-01-22 07:53:56.018796: step: 80/77, loss: 0.03365876525640488 2023-01-22 07:53:57.284287: step: 84/77, loss: 0.0045639462769031525 2023-01-22 07:53:58.599741: step: 88/77, loss: 0.08936309069395065 2023-01-22 07:53:59.886074: step: 92/77, loss: 0.040674157440662384 2023-01-22 07:54:01.208681: step: 96/77, loss: 0.013063786551356316 2023-01-22 07:54:02.493682: step: 100/77, loss: 0.018701188266277313 2023-01-22 07:54:03.778276: step: 104/77, loss: 0.016493361443281174 2023-01-22 07:54:05.079855: step: 108/77, loss: 0.010979781858623028 2023-01-22 07:54:06.366303: step: 112/77, loss: 0.010761067271232605 2023-01-22 07:54:07.688835: step: 116/77, loss: 0.002823087153956294 2023-01-22 07:54:09.009217: step: 120/77, loss: 0.01579410396516323 2023-01-22 07:54:10.311903: step: 124/77, loss: 0.017261408269405365 2023-01-22 07:54:11.628498: step: 128/77, loss: 0.0017213891260325909 2023-01-22 07:54:12.905131: step: 132/77, loss: 0.16353674232959747 2023-01-22 07:54:14.199037: step: 136/77, loss: 0.0066542369313538074 2023-01-22 07:54:15.498537: step: 140/77, loss: 0.015729660168290138 2023-01-22 07:54:16.806933: step: 144/77, loss: 0.04757007211446762 2023-01-22 07:54:18.075364: step: 148/77, loss: 0.0190782081335783 2023-01-22 07:54:19.368913: step: 152/77, loss: 0.019761735573410988 2023-01-22 07:54:20.618861: step: 156/77, loss: 0.16782283782958984 2023-01-22 07:54:21.876972: step: 160/77, loss: 0.03685488924384117 2023-01-22 07:54:23.209522: step: 164/77, loss: 0.026806870475411415 2023-01-22 07:54:24.561453: step: 168/77, loss: 0.09175720065832138 2023-01-22 07:54:25.908966: step: 172/77, loss: 0.04123001918196678 2023-01-22 07:54:27.179061: step: 176/77, loss: 0.01284695416688919 2023-01-22 07:54:28.524582: step: 180/77, loss: 0.01674005389213562 2023-01-22 07:54:29.827440: step: 184/77, loss: 0.0624990351498127 2023-01-22 07:54:31.123784: step: 188/77, loss: 0.01610327512025833 2023-01-22 07:54:32.406636: step: 192/77, loss: 0.10640712827444077 2023-01-22 07:54:33.687627: step: 196/77, loss: 0.02318374626338482 2023-01-22 07:54:34.951842: step: 200/77, loss: 0.00702270632609725 2023-01-22 07:54:36.216862: step: 204/77, loss: 0.06984404474496841 2023-01-22 07:54:37.514037: step: 208/77, loss: 0.04751594364643097 2023-01-22 07:54:38.833637: step: 212/77, loss: 0.0018362406408414245 2023-01-22 07:54:40.099419: step: 216/77, loss: 0.03405271843075752 2023-01-22 07:54:41.398943: step: 220/77, loss: 0.02549353428184986 2023-01-22 07:54:42.709061: step: 224/77, loss: 0.08385471999645233 2023-01-22 07:54:44.040980: step: 228/77, loss: 0.0008324494701810181 2023-01-22 07:54:45.385609: step: 232/77, loss: 0.012937519699335098 2023-01-22 07:54:46.662925: step: 236/77, loss: 0.000671170768328011 2023-01-22 07:54:48.007834: step: 240/77, loss: 0.007344944402575493 2023-01-22 07:54:49.352889: step: 244/77, loss: 0.0027922093868255615 2023-01-22 07:54:50.658513: step: 248/77, loss: 0.025115441530942917 2023-01-22 07:54:51.996928: step: 252/77, loss: 0.02576635032892227 2023-01-22 07:54:53.292037: step: 256/77, loss: 0.01716598868370056 2023-01-22 07:54:54.588273: step: 260/77, loss: 0.04405806213617325 2023-01-22 07:54:55.848016: step: 264/77, loss: 0.08885978162288666 2023-01-22 07:54:57.205640: step: 268/77, loss: 0.011347277089953423 2023-01-22 07:54:58.507568: step: 272/77, loss: 0.0019689826294779778 2023-01-22 07:54:59.862257: step: 276/77, loss: 0.004658084828406572 2023-01-22 07:55:01.192412: step: 280/77, loss: 0.09460316598415375 2023-01-22 07:55:02.503614: step: 284/77, loss: 0.0011397113557904959 2023-01-22 07:55:03.823920: step: 288/77, loss: 0.021794844418764114 2023-01-22 07:55:05.111948: step: 292/77, loss: 0.008161050267517567 2023-01-22 07:55:06.447820: step: 296/77, loss: 0.03600213676691055 2023-01-22 07:55:07.778035: step: 300/77, loss: 0.018938438966870308 2023-01-22 07:55:09.092079: step: 304/77, loss: 0.006088805850595236 2023-01-22 07:55:10.402139: step: 308/77, loss: 0.008946132846176624 2023-01-22 07:55:11.678696: step: 312/77, loss: 0.009896229021251202 2023-01-22 07:55:13.030008: step: 316/77, loss: 0.02469654195010662 2023-01-22 07:55:14.397122: step: 320/77, loss: 0.0005184942274354398 2023-01-22 07:55:15.733064: step: 324/77, loss: 0.03663252666592598 2023-01-22 07:55:17.034219: step: 328/77, loss: 0.005019849166274071 2023-01-22 07:55:18.312467: step: 332/77, loss: 0.0011025880230590701 2023-01-22 07:55:19.609031: step: 336/77, loss: 0.0429910384118557 2023-01-22 07:55:20.889578: step: 340/77, loss: 0.028678392991423607 2023-01-22 07:55:22.180334: step: 344/77, loss: 0.04023951292037964 2023-01-22 07:55:23.484950: step: 348/77, loss: 0.006216683890670538 2023-01-22 07:55:24.797694: step: 352/77, loss: 0.01433058362454176 2023-01-22 07:55:26.076957: step: 356/77, loss: 0.016391603276133537 2023-01-22 07:55:27.377799: step: 360/77, loss: 0.021528517827391624 2023-01-22 07:55:28.762881: step: 364/77, loss: 0.005811501760035753 2023-01-22 07:55:30.071351: step: 368/77, loss: 0.0071782199665904045 2023-01-22 07:55:31.373889: step: 372/77, loss: 0.01076382864266634 2023-01-22 07:55:32.724014: step: 376/77, loss: 0.0397380106151104 2023-01-22 07:55:34.025039: step: 380/77, loss: 0.017241651192307472 2023-01-22 07:55:35.300434: step: 384/77, loss: 0.010189219377934933 2023-01-22 07:55:36.635668: step: 388/77, loss: 0.07457630336284637 ================================================== Loss: 0.031 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 5} Test Chinese: {'template': {'p': 0.8690476190476191, 'r': 0.5447761194029851, 'f1': 0.6697247706422019}, 'slot': {'p': 0.475, 'r': 0.017070979335130278, 'f1': 0.03295750216825672}, 'combined': 0.022072455580575606, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 5} Test Korean: {'template': {'p': 0.8765432098765432, 'r': 0.5298507462686567, 'f1': 0.6604651162790698}, 'slot': {'p': 0.5277777777777778, 'r': 0.017070979335130278, 'f1': 0.03307223672758921}, 'combined': 0.02184305867589613, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 5} Test Russian: {'template': {'p': 0.9, 'r': 0.5373134328358209, 'f1': 0.6728971962616822}, 'slot': {'p': 0.5135135135135135, 'r': 0.017070979335130278, 'f1': 0.03304347826086956}, 'combined': 0.022234863876472975, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:57:17.082194: step: 4/77, loss: 0.043663907796144485 2023-01-22 07:57:18.356000: step: 8/77, loss: 0.005766963120549917 2023-01-22 07:57:19.611427: step: 12/77, loss: 0.00684958603233099 2023-01-22 07:57:20.861313: step: 16/77, loss: 0.020765742287039757 2023-01-22 07:57:22.171353: step: 20/77, loss: 0.04183043912053108 2023-01-22 07:57:23.509507: step: 24/77, loss: 0.11025308072566986 2023-01-22 07:57:24.801551: step: 28/77, loss: 0.035530585795640945 2023-01-22 07:57:26.141084: step: 32/77, loss: 0.08741918206214905 2023-01-22 07:57:27.445644: step: 36/77, loss: 0.07130548357963562 2023-01-22 07:57:28.722037: step: 40/77, loss: 0.02018367126584053 2023-01-22 07:57:29.993056: step: 44/77, loss: 0.042092032730579376 2023-01-22 07:57:31.327144: step: 48/77, loss: 0.017223650589585304 2023-01-22 07:57:32.618802: step: 52/77, loss: 0.009587359614670277 2023-01-22 07:57:33.872732: step: 56/77, loss: 0.016000591218471527 2023-01-22 07:57:35.159342: step: 60/77, loss: 0.01175003033131361 2023-01-22 07:57:36.450832: step: 64/77, loss: 0.025416970252990723 2023-01-22 07:57:37.777406: step: 68/77, loss: 0.022085528820753098 2023-01-22 07:57:39.135535: step: 72/77, loss: 0.09108749032020569 2023-01-22 07:57:40.451322: step: 76/77, loss: 0.032236192375421524 2023-01-22 07:57:41.751466: step: 80/77, loss: 0.011872172355651855 2023-01-22 07:57:43.079980: step: 84/77, loss: 0.00896656047552824 2023-01-22 07:57:44.396683: step: 88/77, loss: 0.06494638323783875 2023-01-22 07:57:45.669336: step: 92/77, loss: 0.041225992143154144 2023-01-22 07:57:46.941796: step: 96/77, loss: 0.02453225664794445 2023-01-22 07:57:48.248753: step: 100/77, loss: 0.09559760242700577 2023-01-22 07:57:49.521011: step: 104/77, loss: 0.015864849090576172 2023-01-22 07:57:50.800371: step: 108/77, loss: 0.013929332606494427 2023-01-22 07:57:52.111468: step: 112/77, loss: 0.023366829380393028 2023-01-22 07:57:53.384697: step: 116/77, loss: 0.09240493923425674 2023-01-22 07:57:54.695917: step: 120/77, loss: 0.021885264664888382 2023-01-22 07:57:55.978990: step: 124/77, loss: 0.001103585003875196 2023-01-22 07:57:57.276194: step: 128/77, loss: 0.01976066455245018 2023-01-22 07:57:58.597271: step: 132/77, loss: 0.00035930052399635315 2023-01-22 07:57:59.887552: step: 136/77, loss: 0.025959907099604607 2023-01-22 07:58:01.215774: step: 140/77, loss: 0.001159863080829382 2023-01-22 07:58:02.463115: step: 144/77, loss: 0.01121261902153492 2023-01-22 07:58:03.751442: step: 148/77, loss: 0.03004949912428856 2023-01-22 07:58:05.085604: step: 152/77, loss: 0.0026157714892178774 2023-01-22 07:58:06.426253: step: 156/77, loss: 0.003008859930559993 2023-01-22 07:58:07.721417: step: 160/77, loss: 0.014713558368384838 2023-01-22 07:58:08.999333: step: 164/77, loss: 0.053925808519124985 2023-01-22 07:58:10.298935: step: 168/77, loss: 0.054232362657785416 2023-01-22 07:58:11.588773: step: 172/77, loss: 0.008975783362984657 2023-01-22 07:58:12.900840: step: 176/77, loss: 0.0080420495942235 2023-01-22 07:58:14.210501: step: 180/77, loss: 0.026775937527418137 2023-01-22 07:58:15.511835: step: 184/77, loss: 0.0018847854807972908 2023-01-22 07:58:16.844443: step: 188/77, loss: 0.017605334520339966 2023-01-22 07:58:18.135289: step: 192/77, loss: 0.007907466031610966 2023-01-22 07:58:19.467175: step: 196/77, loss: 0.01664300262928009 2023-01-22 07:58:20.785217: step: 200/77, loss: 0.030148349702358246 2023-01-22 07:58:22.060244: step: 204/77, loss: 0.03769388794898987 2023-01-22 07:58:23.341053: step: 208/77, loss: 0.06115224212408066 2023-01-22 07:58:24.632248: step: 212/77, loss: 0.003218221478164196 2023-01-22 07:58:25.973523: step: 216/77, loss: 0.0035331116523593664 2023-01-22 07:58:27.229525: step: 220/77, loss: 0.04016996547579765 2023-01-22 07:58:28.538576: step: 224/77, loss: 0.01222273800522089 2023-01-22 07:58:29.852863: step: 228/77, loss: 0.002561622764915228 2023-01-22 07:58:31.136898: step: 232/77, loss: 0.019069360569119453 2023-01-22 07:58:32.436319: step: 236/77, loss: 0.004311387427151203 2023-01-22 07:58:33.723294: step: 240/77, loss: 0.035350359976291656 2023-01-22 07:58:35.047146: step: 244/77, loss: 0.061739981174468994 2023-01-22 07:58:36.385452: step: 248/77, loss: 0.07043258100748062 2023-01-22 07:58:37.689171: step: 252/77, loss: 0.023818643763661385 2023-01-22 07:58:39.010190: step: 256/77, loss: 0.025439318269491196 2023-01-22 07:58:40.336908: step: 260/77, loss: 0.002321291249245405 2023-01-22 07:58:41.615620: step: 264/77, loss: 0.011257769539952278 2023-01-22 07:58:42.948825: step: 268/77, loss: 0.017980799078941345 2023-01-22 07:58:44.252449: step: 272/77, loss: 0.021800994873046875 2023-01-22 07:58:45.553691: step: 276/77, loss: 0.005132108926773071 2023-01-22 07:58:46.906405: step: 280/77, loss: 0.062424689531326294 2023-01-22 07:58:48.237002: step: 284/77, loss: 0.06942526996135712 2023-01-22 07:58:49.504857: step: 288/77, loss: 0.015416646376252174 2023-01-22 07:58:50.812504: step: 292/77, loss: 0.02062246948480606 2023-01-22 07:58:52.128289: step: 296/77, loss: 0.014743135310709476 2023-01-22 07:58:53.420202: step: 300/77, loss: 0.005518500227481127 2023-01-22 07:58:54.716815: step: 304/77, loss: 0.003828242188319564 2023-01-22 07:58:56.017890: step: 308/77, loss: 0.01607200689613819 2023-01-22 07:58:57.340512: step: 312/77, loss: 0.007805492728948593 2023-01-22 07:58:58.673222: step: 316/77, loss: 0.05071113258600235 2023-01-22 07:58:59.969196: step: 320/77, loss: 0.020300591364502907 2023-01-22 07:59:01.275403: step: 324/77, loss: 0.006258299574255943 2023-01-22 07:59:02.581440: step: 328/77, loss: 0.12374333292245865 2023-01-22 07:59:03.866619: step: 332/77, loss: 0.019471395760774612 2023-01-22 07:59:05.181517: step: 336/77, loss: 0.013056513853371143 2023-01-22 07:59:06.532159: step: 340/77, loss: 0.00616362364962697 2023-01-22 07:59:07.794944: step: 344/77, loss: 0.005581936798989773 2023-01-22 07:59:09.082935: step: 348/77, loss: 0.027472928166389465 2023-01-22 07:59:10.393507: step: 352/77, loss: 0.021108750253915787 2023-01-22 07:59:11.723435: step: 356/77, loss: 0.006503445096313953 2023-01-22 07:59:13.016452: step: 360/77, loss: 0.0004131326568312943 2023-01-22 07:59:14.334371: step: 364/77, loss: 0.01506958156824112 2023-01-22 07:59:15.585218: step: 368/77, loss: 0.00557693699374795 2023-01-22 07:59:16.906152: step: 372/77, loss: 0.03858831524848938 2023-01-22 07:59:18.241894: step: 376/77, loss: 0.006581415422260761 2023-01-22 07:59:19.573618: step: 380/77, loss: 0.029814526438713074 2023-01-22 07:59:20.908274: step: 384/77, loss: 0.009821525774896145 2023-01-22 07:59:22.240244: step: 388/77, loss: 0.0022138720378279686 ================================================== Loss: 0.026 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5298507462686567, 'f1': 0.6761904761904761}, 'slot': {'p': 0.6129032258064516, 'r': 0.017070979335130278, 'f1': 0.033216783216783216}, 'combined': 0.022460872460872458, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.6129032258064516, 'r': 0.017070979335130278, 'f1': 0.033216783216783216}, 'combined': 0.022875709196463912, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Russian: {'template': {'p': 0.935064935064935, 'r': 0.5373134328358209, 'f1': 0.6824644549763033}, 'slot': {'p': 0.6129032258064516, 'r': 0.017070979335130278, 'f1': 0.033216783216783216}, 'combined': 0.022669273854107976, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:01:02.748643: step: 4/77, loss: 0.00011637747229542583 2023-01-22 08:01:04.054590: step: 8/77, loss: 0.0368674173951149 2023-01-22 08:01:05.334028: step: 12/77, loss: 0.02475127950310707 2023-01-22 08:01:06.672104: step: 16/77, loss: 0.01712360978126526 2023-01-22 08:01:07.976046: step: 20/77, loss: 0.012559721246361732 2023-01-22 08:01:09.288557: step: 24/77, loss: 0.00196480518206954 2023-01-22 08:01:10.675194: step: 28/77, loss: 0.005120502784848213 2023-01-22 08:01:12.030452: step: 32/77, loss: 0.10957054793834686 2023-01-22 08:01:13.357366: step: 36/77, loss: 0.01413258071988821 2023-01-22 08:01:14.646078: step: 40/77, loss: 0.014781583100557327 2023-01-22 08:01:15.924404: step: 44/77, loss: 0.02707362174987793 2023-01-22 08:01:17.224842: step: 48/77, loss: 0.015094796195626259 2023-01-22 08:01:18.506070: step: 52/77, loss: 0.009382286109030247 2023-01-22 08:01:19.826546: step: 56/77, loss: 0.00782015547156334 2023-01-22 08:01:21.108871: step: 60/77, loss: 0.0029509845189750195 2023-01-22 08:01:22.400952: step: 64/77, loss: 0.0021931114606559277 2023-01-22 08:01:23.669247: step: 68/77, loss: 0.047177016735076904 2023-01-22 08:01:24.977866: step: 72/77, loss: 0.039223603904247284 2023-01-22 08:01:26.322404: step: 76/77, loss: 0.16369330883026123 2023-01-22 08:01:27.544290: step: 80/77, loss: 0.0793350338935852 2023-01-22 08:01:28.845090: step: 84/77, loss: 0.09823714941740036 2023-01-22 08:01:30.152372: step: 88/77, loss: 0.03174787759780884 2023-01-22 08:01:31.499568: step: 92/77, loss: 0.00029889249708503485 2023-01-22 08:01:32.798969: step: 96/77, loss: 0.007706539239734411 2023-01-22 08:01:34.100659: step: 100/77, loss: 0.06518713384866714 2023-01-22 08:01:35.355259: step: 104/77, loss: 0.023982468992471695 2023-01-22 08:01:36.649181: step: 108/77, loss: 0.007779199630022049 2023-01-22 08:01:37.952265: step: 112/77, loss: 0.021028827875852585 2023-01-22 08:01:39.252155: step: 116/77, loss: 0.012213161215186119 2023-01-22 08:01:40.542094: step: 120/77, loss: 5.075455555925146e-05 2023-01-22 08:01:41.884569: step: 124/77, loss: 0.022922176867723465 2023-01-22 08:01:43.225763: step: 128/77, loss: 0.004472864791750908 2023-01-22 08:01:44.536522: step: 132/77, loss: 0.011102437973022461 2023-01-22 08:01:45.856683: step: 136/77, loss: 0.0611458458006382 2023-01-22 08:01:47.153399: step: 140/77, loss: 2.13903040275909e-05 2023-01-22 08:01:48.420788: step: 144/77, loss: 0.011816516518592834 2023-01-22 08:01:49.695392: step: 148/77, loss: 0.037825245410203934 2023-01-22 08:01:50.963312: step: 152/77, loss: 0.029810432344675064 2023-01-22 08:01:52.237794: step: 156/77, loss: 0.015813257545232773 2023-01-22 08:01:53.559895: step: 160/77, loss: 0.025386273860931396 2023-01-22 08:01:54.888690: step: 164/77, loss: 0.12720629572868347 2023-01-22 08:01:56.229547: step: 168/77, loss: 0.022320548072457314 2023-01-22 08:01:57.530874: step: 172/77, loss: 0.006334016565233469 2023-01-22 08:01:58.830965: step: 176/77, loss: 0.01868707872927189 2023-01-22 08:02:00.182194: step: 180/77, loss: 0.021900024265050888 2023-01-22 08:02:01.506005: step: 184/77, loss: 0.01332312822341919 2023-01-22 08:02:02.834524: step: 188/77, loss: 0.0190892331302166 2023-01-22 08:02:04.159496: step: 192/77, loss: 0.053591687232255936 2023-01-22 08:02:05.405378: step: 196/77, loss: 0.03223322704434395 2023-01-22 08:02:06.797918: step: 200/77, loss: 0.012321964837610722 2023-01-22 08:02:08.113740: step: 204/77, loss: 0.05096723884344101 2023-01-22 08:02:09.407702: step: 208/77, loss: 0.012583065778017044 2023-01-22 08:02:10.719846: step: 212/77, loss: 0.22834046185016632 2023-01-22 08:02:12.019081: step: 216/77, loss: 0.006164837162941694 2023-01-22 08:02:13.296422: step: 220/77, loss: 0.04228643327951431 2023-01-22 08:02:14.583139: step: 224/77, loss: 0.014050657860934734 2023-01-22 08:02:15.867947: step: 228/77, loss: 0.029714161530137062 2023-01-22 08:02:17.195131: step: 232/77, loss: 0.01477883756160736 2023-01-22 08:02:18.526205: step: 236/77, loss: 0.030889861285686493 2023-01-22 08:02:19.837673: step: 240/77, loss: 0.014133702963590622 2023-01-22 08:02:21.177578: step: 244/77, loss: 0.0030390899628400803 2023-01-22 08:02:22.478193: step: 248/77, loss: 0.00498980050906539 2023-01-22 08:02:23.758267: step: 252/77, loss: 0.031213590875267982 2023-01-22 08:02:25.015532: step: 256/77, loss: 0.004602812230587006 2023-01-22 08:02:26.316369: step: 260/77, loss: 0.04868490993976593 2023-01-22 08:02:27.607077: step: 264/77, loss: 0.0032033356837928295 2023-01-22 08:02:28.906347: step: 268/77, loss: 0.024175122380256653 2023-01-22 08:02:30.212385: step: 272/77, loss: 0.011258386075496674 2023-01-22 08:02:31.540422: step: 276/77, loss: 0.0016684290021657944 2023-01-22 08:02:32.845336: step: 280/77, loss: 0.000780187314376235 2023-01-22 08:02:34.176757: step: 284/77, loss: 0.009816921316087246 2023-01-22 08:02:35.497320: step: 288/77, loss: 0.005105116404592991 2023-01-22 08:02:36.795724: step: 292/77, loss: 0.013287038542330265 2023-01-22 08:02:38.113937: step: 296/77, loss: 0.025341173633933067 2023-01-22 08:02:39.417542: step: 300/77, loss: 0.003929181955754757 2023-01-22 08:02:40.746022: step: 304/77, loss: 0.017698541283607483 2023-01-22 08:02:42.033326: step: 308/77, loss: 0.022486358880996704 2023-01-22 08:02:43.344520: step: 312/77, loss: 0.003166760317981243 2023-01-22 08:02:44.655097: step: 316/77, loss: 0.009733240120112896 2023-01-22 08:02:45.981412: step: 320/77, loss: 0.003576915245503187 2023-01-22 08:02:47.284039: step: 324/77, loss: 0.039995431900024414 2023-01-22 08:02:48.623711: step: 328/77, loss: 0.06405875086784363 2023-01-22 08:02:49.949556: step: 332/77, loss: 0.03697868436574936 2023-01-22 08:02:51.248324: step: 336/77, loss: 8.15482489997521e-05 2023-01-22 08:02:52.589043: step: 340/77, loss: 0.0017763026989996433 2023-01-22 08:02:53.892300: step: 344/77, loss: 0.0001419158943463117 2023-01-22 08:02:55.207380: step: 348/77, loss: 0.0014657576102763414 2023-01-22 08:02:56.487731: step: 352/77, loss: 0.009428405202925205 2023-01-22 08:02:57.787554: step: 356/77, loss: 0.06886311620473862 2023-01-22 08:02:59.077914: step: 360/77, loss: 0.03953403979539871 2023-01-22 08:03:00.386014: step: 364/77, loss: 0.026917902752757072 2023-01-22 08:03:01.732650: step: 368/77, loss: 0.017108548432588577 2023-01-22 08:03:03.066419: step: 372/77, loss: 0.006558586843311787 2023-01-22 08:03:04.382187: step: 376/77, loss: 0.004070008639246225 2023-01-22 08:03:05.700747: step: 380/77, loss: 0.011937337927520275 2023-01-22 08:03:07.026544: step: 384/77, loss: 0.025983836501836777 2023-01-22 08:03:08.374574: step: 388/77, loss: 0.013943709433078766 ================================================== Loss: 0.026 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9024390243902439, 'r': 0.5522388059701493, 'f1': 0.6851851851851852}, 'slot': {'p': 0.42857142857142855, 'r': 0.018867924528301886, 'f1': 0.03614457831325301}, 'combined': 0.024765729585006693, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.925, 'r': 0.5522388059701493, 'f1': 0.6915887850467289}, 'slot': {'p': 0.45652173913043476, 'r': 0.018867924528301886, 'f1': 0.0362381363244176}, 'combined': 0.025061888672961705, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.925, 'r': 0.5522388059701493, 'f1': 0.6915887850467289}, 'slot': {'p': 0.4666666666666667, 'r': 0.018867924528301886, 'f1': 0.03626943005181347}, 'combined': 0.025083531063870994, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:04:51.203619: step: 4/77, loss: 0.04400216042995453 2023-01-22 08:04:52.510406: step: 8/77, loss: 0.0165130402892828 2023-01-22 08:04:53.790617: step: 12/77, loss: 0.011227885261178017 2023-01-22 08:04:55.125491: step: 16/77, loss: 0.012780023738741875 2023-01-22 08:04:56.430449: step: 20/77, loss: 0.007339693605899811 2023-01-22 08:04:57.710074: step: 24/77, loss: 0.018071576952934265 2023-01-22 08:04:58.992757: step: 28/77, loss: 0.011834011413156986 2023-01-22 08:05:00.310358: step: 32/77, loss: 0.04743792116641998 2023-01-22 08:05:01.593014: step: 36/77, loss: 0.054762475192546844 2023-01-22 08:05:02.856518: step: 40/77, loss: 0.025785135105252266 2023-01-22 08:05:04.151140: step: 44/77, loss: 0.004111032467335463 2023-01-22 08:05:05.439977: step: 48/77, loss: 0.04216965287923813 2023-01-22 08:05:06.786648: step: 52/77, loss: 0.00041233195224776864 2023-01-22 08:05:08.094549: step: 56/77, loss: 0.0029579598922282457 2023-01-22 08:05:09.411401: step: 60/77, loss: 0.007977910339832306 2023-01-22 08:05:10.741603: step: 64/77, loss: 0.026709450408816338 2023-01-22 08:05:12.089019: step: 68/77, loss: 0.10443099588155746 2023-01-22 08:05:13.451580: step: 72/77, loss: 0.0038933041505515575 2023-01-22 08:05:14.695808: step: 76/77, loss: 0.009387347847223282 2023-01-22 08:05:16.004160: step: 80/77, loss: 0.0030997178982943296 2023-01-22 08:05:17.287265: step: 84/77, loss: 0.006862640380859375 2023-01-22 08:05:18.594651: step: 88/77, loss: 0.008558643981814384 2023-01-22 08:05:19.897655: step: 92/77, loss: 0.004854598548263311 2023-01-22 08:05:21.199599: step: 96/77, loss: 0.029413817450404167 2023-01-22 08:05:22.495120: step: 100/77, loss: 0.019570060074329376 2023-01-22 08:05:23.820884: step: 104/77, loss: 0.00334225594997406 2023-01-22 08:05:25.092007: step: 108/77, loss: 0.006600016728043556 2023-01-22 08:05:26.394243: step: 112/77, loss: 0.057152628898620605 2023-01-22 08:05:27.713652: step: 116/77, loss: 0.05850166454911232 2023-01-22 08:05:28.970327: step: 120/77, loss: 0.026945604011416435 2023-01-22 08:05:30.272464: step: 124/77, loss: 0.002308004070073366 2023-01-22 08:05:31.581408: step: 128/77, loss: 0.030956268310546875 2023-01-22 08:05:32.915702: step: 132/77, loss: 0.004936728626489639 2023-01-22 08:05:34.221788: step: 136/77, loss: 0.021307937800884247 2023-01-22 08:05:35.506454: step: 140/77, loss: 0.07446351647377014 2023-01-22 08:05:36.821190: step: 144/77, loss: 0.002486872486770153 2023-01-22 08:05:38.103942: step: 148/77, loss: 0.0009867411572486162 2023-01-22 08:05:39.433831: step: 152/77, loss: 0.0032195388339459896 2023-01-22 08:05:40.750934: step: 156/77, loss: 0.03516755253076553 2023-01-22 08:05:42.120586: step: 160/77, loss: 0.1058875247836113 2023-01-22 08:05:43.417513: step: 164/77, loss: 0.034278132021427155 2023-01-22 08:05:44.733391: step: 168/77, loss: 0.008507543243467808 2023-01-22 08:05:46.026625: step: 172/77, loss: 0.0005227087531238794 2023-01-22 08:05:47.341688: step: 176/77, loss: 0.03415210545063019 2023-01-22 08:05:48.647528: step: 180/77, loss: 0.003491913666948676 2023-01-22 08:05:49.921433: step: 184/77, loss: 0.00460367975756526 2023-01-22 08:05:51.220162: step: 188/77, loss: 0.01312391459941864 2023-01-22 08:05:52.519774: step: 192/77, loss: 0.060950737446546555 2023-01-22 08:05:53.817869: step: 196/77, loss: 0.0038686206098645926 2023-01-22 08:05:55.158518: step: 200/77, loss: 0.011776718311011791 2023-01-22 08:05:56.455661: step: 204/77, loss: 0.06949067115783691 2023-01-22 08:05:57.750749: step: 208/77, loss: 0.004097940865904093 2023-01-22 08:05:59.097830: step: 212/77, loss: 0.043237391859292984 2023-01-22 08:06:00.396867: step: 216/77, loss: 0.04034322500228882 2023-01-22 08:06:01.702985: step: 220/77, loss: 0.00045904243597760797 2023-01-22 08:06:02.989998: step: 224/77, loss: 0.01494511030614376 2023-01-22 08:06:04.305404: step: 228/77, loss: 0.0987260490655899 2023-01-22 08:06:05.579947: step: 232/77, loss: 0.0036450622137635946 2023-01-22 08:06:06.898675: step: 236/77, loss: 0.004664699546992779 2023-01-22 08:06:08.230350: step: 240/77, loss: 0.023832963779568672 2023-01-22 08:06:09.542006: step: 244/77, loss: 0.02122790366411209 2023-01-22 08:06:10.889314: step: 248/77, loss: 0.0848722830414772 2023-01-22 08:06:12.210148: step: 252/77, loss: 0.00020455481717363 2023-01-22 08:06:13.499612: step: 256/77, loss: 0.023084642365574837 2023-01-22 08:06:14.795205: step: 260/77, loss: 0.039016205817461014 2023-01-22 08:06:16.086948: step: 264/77, loss: 0.02520732954144478 2023-01-22 08:06:17.373652: step: 268/77, loss: 0.0230961162596941 2023-01-22 08:06:18.683998: step: 272/77, loss: 0.00813551526516676 2023-01-22 08:06:19.974554: step: 276/77, loss: 0.001504933345131576 2023-01-22 08:06:21.274093: step: 280/77, loss: 0.0649275928735733 2023-01-22 08:06:22.579927: step: 284/77, loss: 0.009410201571881771 2023-01-22 08:06:23.913305: step: 288/77, loss: 0.004082283470779657 2023-01-22 08:06:25.224140: step: 292/77, loss: 0.002070082351565361 2023-01-22 08:06:26.586164: step: 296/77, loss: 0.03177042677998543 2023-01-22 08:06:27.911721: step: 300/77, loss: 0.10870395600795746 2023-01-22 08:06:29.240473: step: 304/77, loss: 0.0013557918136939406 2023-01-22 08:06:30.544357: step: 308/77, loss: 0.019374998286366463 2023-01-22 08:06:31.855189: step: 312/77, loss: 0.009799455292522907 2023-01-22 08:06:33.152870: step: 316/77, loss: 0.026309311389923096 2023-01-22 08:06:34.519047: step: 320/77, loss: 0.010569714941084385 2023-01-22 08:06:35.790321: step: 324/77, loss: 0.018344147130846977 2023-01-22 08:06:37.099593: step: 328/77, loss: 0.022369852289557457 2023-01-22 08:06:38.387670: step: 332/77, loss: 0.03339612856507301 2023-01-22 08:06:39.687235: step: 336/77, loss: 0.0063545554876327515 2023-01-22 08:06:40.946333: step: 340/77, loss: 0.004273498430848122 2023-01-22 08:06:42.196043: step: 344/77, loss: 0.030692612752318382 2023-01-22 08:06:43.491417: step: 348/77, loss: 0.015207581222057343 2023-01-22 08:06:44.791010: step: 352/77, loss: 0.005335003137588501 2023-01-22 08:06:46.064796: step: 356/77, loss: 0.018144914880394936 2023-01-22 08:06:47.375426: step: 360/77, loss: 0.016128433868288994 2023-01-22 08:06:48.654472: step: 364/77, loss: 0.005331959575414658 2023-01-22 08:06:49.995084: step: 368/77, loss: 0.019862277433276176 2023-01-22 08:06:51.310314: step: 372/77, loss: 0.00042862031841650605 2023-01-22 08:06:52.580745: step: 376/77, loss: 0.015438038855791092 2023-01-22 08:06:53.886524: step: 380/77, loss: 0.02398861199617386 2023-01-22 08:06:55.176929: step: 384/77, loss: 0.0012583807110786438 2023-01-22 08:06:56.506177: step: 388/77, loss: 0.04697339981794357 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 1.0, 'r': 0.48507462686567165, 'f1': 0.6532663316582915}, 'slot': {'p': 0.6666666666666666, 'r': 0.016172506738544475, 'f1': 0.03157894736842106}, 'combined': 0.020629463104998684, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 1.0, 'r': 0.47761194029850745, 'f1': 0.6464646464646464}, 'slot': {'p': 0.6896551724137931, 'r': 0.017969451931716084, 'f1': 0.03502626970227671}, 'combined': 0.022643245060057667, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 1.0, 'r': 0.47761194029850745, 'f1': 0.6464646464646464}, 'slot': {'p': 0.6923076923076923, 'r': 0.016172506738544475, 'f1': 0.03160667251975418}, 'combined': 0.02043259637640674, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:08:36.711122: step: 4/77, loss: 0.004059008788317442 2023-01-22 08:08:38.029798: step: 8/77, loss: 0.008530894294381142 2023-01-22 08:08:39.289595: step: 12/77, loss: 0.08699976652860641 2023-01-22 08:08:40.562175: step: 16/77, loss: 0.004244130104780197 2023-01-22 08:08:41.907522: step: 20/77, loss: 0.01405458152294159 2023-01-22 08:08:43.204957: step: 24/77, loss: 0.06256169825792313 2023-01-22 08:08:44.487893: step: 28/77, loss: 0.003683938877657056 2023-01-22 08:08:45.800892: step: 32/77, loss: 0.00039898944669403136 2023-01-22 08:08:47.096771: step: 36/77, loss: 0.019498568028211594 2023-01-22 08:08:48.341850: step: 40/77, loss: 0.007669855374842882 2023-01-22 08:08:49.685117: step: 44/77, loss: 0.0028932204004377127 2023-01-22 08:08:50.965987: step: 48/77, loss: 0.005956342443823814 2023-01-22 08:08:52.262151: step: 52/77, loss: 0.003917335532605648 2023-01-22 08:08:53.605594: step: 56/77, loss: 0.0020870023872703314 2023-01-22 08:08:54.918275: step: 60/77, loss: 0.037281736731529236 2023-01-22 08:08:56.222182: step: 64/77, loss: 0.006114881951361895 2023-01-22 08:08:57.506008: step: 68/77, loss: 0.00022297426767181605 2023-01-22 08:08:58.839500: step: 72/77, loss: 0.060727570205926895 2023-01-22 08:09:00.161300: step: 76/77, loss: 0.04985472559928894 2023-01-22 08:09:01.475808: step: 80/77, loss: 0.001316759968176484 2023-01-22 08:09:02.776300: step: 84/77, loss: 0.028501683846116066 2023-01-22 08:09:04.076816: step: 88/77, loss: 0.05243585258722305 2023-01-22 08:09:05.441102: step: 92/77, loss: 0.038167450577020645 2023-01-22 08:09:06.755994: step: 96/77, loss: 0.13844504952430725 2023-01-22 08:09:08.069713: step: 100/77, loss: 0.014699107967317104 2023-01-22 08:09:09.392324: step: 104/77, loss: 0.00032180227572098374 2023-01-22 08:09:10.710304: step: 108/77, loss: 0.011475548148155212 2023-01-22 08:09:11.967500: step: 112/77, loss: 0.012297457084059715 2023-01-22 08:09:13.242390: step: 116/77, loss: 0.021710721775889397 2023-01-22 08:09:14.558062: step: 120/77, loss: 0.001743190223351121 2023-01-22 08:09:15.865935: step: 124/77, loss: 0.007135982625186443 2023-01-22 08:09:17.160335: step: 128/77, loss: 0.011817889288067818 2023-01-22 08:09:18.429745: step: 132/77, loss: 0.001370633952319622 2023-01-22 08:09:19.787535: step: 136/77, loss: 0.00016879255417734385 2023-01-22 08:09:21.047062: step: 140/77, loss: 0.0038989256136119366 2023-01-22 08:09:22.302664: step: 144/77, loss: 0.04668494313955307 2023-01-22 08:09:23.629890: step: 148/77, loss: 0.005964198615401983 2023-01-22 08:09:24.905268: step: 152/77, loss: 0.01362668164074421 2023-01-22 08:09:26.180119: step: 156/77, loss: 0.008104304783046246 2023-01-22 08:09:27.454777: step: 160/77, loss: 0.02379458025097847 2023-01-22 08:09:28.733024: step: 164/77, loss: 0.027938585728406906 2023-01-22 08:09:30.027182: step: 168/77, loss: 0.005503904074430466 2023-01-22 08:09:31.308852: step: 172/77, loss: 0.006314740050584078 2023-01-22 08:09:32.619546: step: 176/77, loss: 0.006072746589779854 2023-01-22 08:09:33.961033: step: 180/77, loss: 0.02708953619003296 2023-01-22 08:09:35.296779: step: 184/77, loss: 0.134894460439682 2023-01-22 08:09:36.615056: step: 188/77, loss: 0.01490715704858303 2023-01-22 08:09:37.900066: step: 192/77, loss: 0.027591338381171227 2023-01-22 08:09:39.178775: step: 196/77, loss: 0.018336333334445953 2023-01-22 08:09:40.459477: step: 200/77, loss: 0.0029937070794403553 2023-01-22 08:09:41.799777: step: 204/77, loss: 0.061283551156520844 2023-01-22 08:09:43.092816: step: 208/77, loss: 0.01410150621086359 2023-01-22 08:09:44.394152: step: 212/77, loss: 0.015505759045481682 2023-01-22 08:09:45.692963: step: 216/77, loss: 0.020994171500205994 2023-01-22 08:09:47.060388: step: 220/77, loss: 0.0008524280274286866 2023-01-22 08:09:48.321315: step: 224/77, loss: 0.024159442633390427 2023-01-22 08:09:49.639246: step: 228/77, loss: 0.04099274054169655 2023-01-22 08:09:50.945100: step: 232/77, loss: 0.013209857046604156 2023-01-22 08:09:52.251488: step: 236/77, loss: 0.01315020676702261 2023-01-22 08:09:53.565757: step: 240/77, loss: 0.07746905833482742 2023-01-22 08:09:54.846848: step: 244/77, loss: 0.02136453054845333 2023-01-22 08:09:56.117321: step: 248/77, loss: 0.010054754093289375 2023-01-22 08:09:57.414953: step: 252/77, loss: 0.001023219432681799 2023-01-22 08:09:58.673460: step: 256/77, loss: 0.005565670784562826 2023-01-22 08:09:59.962021: step: 260/77, loss: 8.016253559617326e-06 2023-01-22 08:10:01.230794: step: 264/77, loss: 0.015554104000329971 2023-01-22 08:10:02.553232: step: 268/77, loss: 0.0036264844238758087 2023-01-22 08:10:03.907487: step: 272/77, loss: 0.0014844255056232214 2023-01-22 08:10:05.209921: step: 276/77, loss: 0.001290302723646164 2023-01-22 08:10:06.478954: step: 280/77, loss: 0.0001928550045704469 2023-01-22 08:10:07.820955: step: 284/77, loss: 0.008350521326065063 2023-01-22 08:10:09.127251: step: 288/77, loss: 0.05311926081776619 2023-01-22 08:10:10.454044: step: 292/77, loss: 0.013220196589827538 2023-01-22 08:10:11.771724: step: 296/77, loss: 0.4162817895412445 2023-01-22 08:10:13.087134: step: 300/77, loss: 0.038585688918828964 2023-01-22 08:10:14.382797: step: 304/77, loss: 0.0023205061443150043 2023-01-22 08:10:15.678506: step: 308/77, loss: 0.004533206578344107 2023-01-22 08:10:16.973106: step: 312/77, loss: 0.006979439407587051 2023-01-22 08:10:18.282882: step: 316/77, loss: 0.00823113601654768 2023-01-22 08:10:19.591874: step: 320/77, loss: 0.0049030412919819355 2023-01-22 08:10:20.953959: step: 324/77, loss: 0.001947331242263317 2023-01-22 08:10:22.231830: step: 328/77, loss: 0.010946845635771751 2023-01-22 08:10:23.550928: step: 332/77, loss: 0.07835371792316437 2023-01-22 08:10:24.900519: step: 336/77, loss: 0.016691043972969055 2023-01-22 08:10:26.185155: step: 340/77, loss: 0.023241087794303894 2023-01-22 08:10:27.485949: step: 344/77, loss: 0.00034647007123567164 2023-01-22 08:10:28.779958: step: 348/77, loss: 0.0037985360249876976 2023-01-22 08:10:30.086080: step: 352/77, loss: 0.04664488509297371 2023-01-22 08:10:31.412466: step: 356/77, loss: 0.004729550331830978 2023-01-22 08:10:32.721311: step: 360/77, loss: 0.005308020394295454 2023-01-22 08:10:34.044983: step: 364/77, loss: 0.006872281432151794 2023-01-22 08:10:35.359393: step: 368/77, loss: 0.046257421374320984 2023-01-22 08:10:36.654787: step: 372/77, loss: 0.0035345409996807575 2023-01-22 08:10:37.957892: step: 376/77, loss: 0.0029179975390434265 2023-01-22 08:10:39.283683: step: 380/77, loss: 0.003412567311897874 2023-01-22 08:10:40.627242: step: 384/77, loss: 0.001803032704629004 2023-01-22 08:10:41.964118: step: 388/77, loss: 0.0013890385162085295 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9857142857142858, 'r': 0.5149253731343284, 'f1': 0.6764705882352942}, 'slot': {'p': 0.6451612903225806, 'r': 0.017969451931716084, 'f1': 0.03496503496503497}, 'combined': 0.023652817770464834, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.9848484848484849, 'r': 0.48507462686567165, 'f1': 0.65}, 'slot': {'p': 0.6451612903225806, 'r': 0.017969451931716084, 'f1': 0.03496503496503497}, 'combined': 0.02272727272727273, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9850746268656716, 'r': 0.4925373134328358, 'f1': 0.6567164179104477}, 'slot': {'p': 0.6451612903225806, 'r': 0.017969451931716084, 'f1': 0.03496503496503497}, 'combined': 0.02296211251435132, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:12:22.007798: step: 4/77, loss: 0.004019314423203468 2023-01-22 08:12:23.330268: step: 8/77, loss: 0.0018244950333610177 2023-01-22 08:12:24.645529: step: 12/77, loss: 0.013065481558442116 2023-01-22 08:12:25.944157: step: 16/77, loss: 0.030329227447509766 2023-01-22 08:12:27.292524: step: 20/77, loss: 0.008612009696662426 2023-01-22 08:12:28.577297: step: 24/77, loss: 0.00204270682297647 2023-01-22 08:12:29.856893: step: 28/77, loss: 0.002712585497647524 2023-01-22 08:12:31.166046: step: 32/77, loss: 0.003777566133067012 2023-01-22 08:12:32.509643: step: 36/77, loss: 0.0516265407204628 2023-01-22 08:12:33.783334: step: 40/77, loss: 0.006463993806391954 2023-01-22 08:12:35.110672: step: 44/77, loss: 0.012646461836993694 2023-01-22 08:12:36.351806: step: 48/77, loss: 0.011762295849621296 2023-01-22 08:12:37.644504: step: 52/77, loss: 0.0002011667238548398 2023-01-22 08:12:38.953062: step: 56/77, loss: 0.00680114608258009 2023-01-22 08:12:40.290842: step: 60/77, loss: 0.029793288558721542 2023-01-22 08:12:41.600324: step: 64/77, loss: 0.0047124773263931274 2023-01-22 08:12:42.892536: step: 68/77, loss: 0.009396566078066826 2023-01-22 08:12:44.223810: step: 72/77, loss: 0.0017562780994921923 2023-01-22 08:12:45.567296: step: 76/77, loss: 0.01609233021736145 2023-01-22 08:12:46.844502: step: 80/77, loss: 0.0014276818837970495 2023-01-22 08:12:48.139553: step: 84/77, loss: 0.00016602581308688968 2023-01-22 08:12:49.438316: step: 88/77, loss: 0.0024550859816372395 2023-01-22 08:12:50.707953: step: 92/77, loss: 0.008442103862762451 2023-01-22 08:12:52.006265: step: 96/77, loss: 0.001850472646765411 2023-01-22 08:12:53.283056: step: 100/77, loss: 0.00015385696315206587 2023-01-22 08:12:54.596447: step: 104/77, loss: 0.0010777512798085809 2023-01-22 08:12:55.867212: step: 108/77, loss: 0.003965794574469328 2023-01-22 08:12:57.167622: step: 112/77, loss: 0.00920006912201643 2023-01-22 08:12:58.465114: step: 116/77, loss: 9.757414227351546e-05 2023-01-22 08:12:59.800022: step: 120/77, loss: 0.004842578433454037 2023-01-22 08:13:01.109123: step: 124/77, loss: 0.0011513237841427326 2023-01-22 08:13:02.463633: step: 128/77, loss: 0.03656904026865959 2023-01-22 08:13:03.769656: step: 132/77, loss: 0.006750887259840965 2023-01-22 08:13:05.059833: step: 136/77, loss: 0.002498416928574443 2023-01-22 08:13:06.371524: step: 140/77, loss: 0.002844251925125718 2023-01-22 08:13:07.650241: step: 144/77, loss: 0.005857444833964109 2023-01-22 08:13:08.935797: step: 148/77, loss: 0.0807381123304367 2023-01-22 08:13:10.280034: step: 152/77, loss: 0.0003607768740039319 2023-01-22 08:13:11.559513: step: 156/77, loss: 0.0107691939920187 2023-01-22 08:13:12.859033: step: 160/77, loss: 0.00013894679432269186 2023-01-22 08:13:14.139002: step: 164/77, loss: 0.00034505908843129873 2023-01-22 08:13:15.445843: step: 168/77, loss: 0.047398604452610016 2023-01-22 08:13:16.697913: step: 172/77, loss: 0.0016510799759998918 2023-01-22 08:13:18.054938: step: 176/77, loss: 0.01577381230890751 2023-01-22 08:13:19.392784: step: 180/77, loss: 0.004901238717138767 2023-01-22 08:13:20.687343: step: 184/77, loss: 0.04541084170341492 2023-01-22 08:13:22.030726: step: 188/77, loss: 0.06697969138622284 2023-01-22 08:13:23.350674: step: 192/77, loss: 0.0028941608034074306 2023-01-22 08:13:24.670386: step: 196/77, loss: 0.00033571728272363544 2023-01-22 08:13:25.965369: step: 200/77, loss: 0.00047282129526138306 2023-01-22 08:13:27.242486: step: 204/77, loss: 0.00794710498303175 2023-01-22 08:13:28.539002: step: 208/77, loss: 0.022247185930609703 2023-01-22 08:13:29.830189: step: 212/77, loss: 0.03522004187107086 2023-01-22 08:13:31.105132: step: 216/77, loss: 0.005268169101327658 2023-01-22 08:13:32.391096: step: 220/77, loss: 0.008936934173107147 2023-01-22 08:13:33.722698: step: 224/77, loss: 0.003135459730401635 2023-01-22 08:13:35.046628: step: 228/77, loss: 0.007926437072455883 2023-01-22 08:13:36.358056: step: 232/77, loss: 0.010014479048550129 2023-01-22 08:13:37.723125: step: 236/77, loss: 0.0014004079857841134 2023-01-22 08:13:39.008331: step: 240/77, loss: 0.006368785165250301 2023-01-22 08:13:40.292607: step: 244/77, loss: 0.01043130923062563 2023-01-22 08:13:41.600933: step: 248/77, loss: 4.2198873416054994e-05 2023-01-22 08:13:42.924785: step: 252/77, loss: 0.03743477910757065 2023-01-22 08:13:44.228682: step: 256/77, loss: 0.011894501745700836 2023-01-22 08:13:45.512877: step: 260/77, loss: 0.010070500895380974 2023-01-22 08:13:46.839128: step: 264/77, loss: 0.0018026400357484818 2023-01-22 08:13:48.176298: step: 268/77, loss: 0.0027212868444621563 2023-01-22 08:13:49.499072: step: 272/77, loss: 0.0017126111779361963 2023-01-22 08:13:50.745057: step: 276/77, loss: 0.00834929384291172 2023-01-22 08:13:52.034453: step: 280/77, loss: 0.03722648695111275 2023-01-22 08:13:53.357852: step: 284/77, loss: 0.011383737437427044 2023-01-22 08:13:54.693552: step: 288/77, loss: 0.026112789288163185 2023-01-22 08:13:56.006653: step: 292/77, loss: 0.003575179958716035 2023-01-22 08:13:57.307271: step: 296/77, loss: 0.012457441538572311 2023-01-22 08:13:58.652141: step: 300/77, loss: 0.0018797186203300953 2023-01-22 08:13:59.968656: step: 304/77, loss: 0.03331933543086052 2023-01-22 08:14:01.235192: step: 308/77, loss: 0.04128976911306381 2023-01-22 08:14:02.551960: step: 312/77, loss: 0.0009756057406775653 2023-01-22 08:14:03.856742: step: 316/77, loss: 0.006699277553707361 2023-01-22 08:14:05.200456: step: 320/77, loss: 0.0006561750778928399 2023-01-22 08:14:06.499426: step: 324/77, loss: 0.006393713876605034 2023-01-22 08:14:07.851075: step: 328/77, loss: 0.00039606340578757226 2023-01-22 08:14:09.143352: step: 332/77, loss: 0.033246491104364395 2023-01-22 08:14:10.490849: step: 336/77, loss: 0.03028215281665325 2023-01-22 08:14:11.789456: step: 340/77, loss: 0.03625636175274849 2023-01-22 08:14:13.056069: step: 344/77, loss: 7.374807319138199e-05 2023-01-22 08:14:14.325788: step: 348/77, loss: 0.01907702349126339 2023-01-22 08:14:15.621827: step: 352/77, loss: 0.003939451649785042 2023-01-22 08:14:16.915595: step: 356/77, loss: 0.038318932056427 2023-01-22 08:14:18.275643: step: 360/77, loss: 0.016218364238739014 2023-01-22 08:14:19.616738: step: 364/77, loss: 0.01653050258755684 2023-01-22 08:14:20.936328: step: 368/77, loss: 0.0006744784768670797 2023-01-22 08:14:22.252071: step: 372/77, loss: 0.013431889936327934 2023-01-22 08:14:23.560479: step: 376/77, loss: 0.11695510149002075 2023-01-22 08:14:24.843790: step: 380/77, loss: 0.006448196247220039 2023-01-22 08:14:26.177046: step: 384/77, loss: 0.02021513134241104 2023-01-22 08:14:27.486440: step: 388/77, loss: 0.010459277778863907 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 10} Test Chinese: {'template': {'p': 1.0, 'r': 0.5373134328358209, 'f1': 0.6990291262135923}, 'slot': {'p': 0.5945945945945946, 'r': 0.019766397124887692, 'f1': 0.03826086956521739}, 'combined': 0.02674546222034614, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 10} Test Korean: {'template': {'p': 1.0, 'r': 0.5373134328358209, 'f1': 0.6990291262135923}, 'slot': {'p': 0.6, 'r': 0.018867924528301886, 'f1': 0.036585365853658534}, 'combined': 0.02557423632488752, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 10} Test Russian: {'template': {'p': 1.0, 'r': 0.5298507462686567, 'f1': 0.6926829268292682}, 'slot': {'p': 0.6176470588235294, 'r': 0.018867924528301886, 'f1': 0.036617262423714034}, 'combined': 0.025364152508133623, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:16:07.644344: step: 4/77, loss: 0.004519676323980093 2023-01-22 08:16:08.985534: step: 8/77, loss: 0.010373960249125957 2023-01-22 08:16:10.293083: step: 12/77, loss: 0.006382956635206938 2023-01-22 08:16:11.575934: step: 16/77, loss: 0.02072090096771717 2023-01-22 08:16:12.880211: step: 20/77, loss: 0.007683451287448406 2023-01-22 08:16:14.167231: step: 24/77, loss: 0.031069735065102577 2023-01-22 08:16:15.434767: step: 28/77, loss: 9.526885696686804e-05 2023-01-22 08:16:16.706751: step: 32/77, loss: 0.0003794231452047825 2023-01-22 08:16:18.037734: step: 36/77, loss: 0.002033552620559931 2023-01-22 08:16:19.296407: step: 40/77, loss: 0.016326041892170906 2023-01-22 08:16:20.595056: step: 44/77, loss: 0.019845878705382347 2023-01-22 08:16:21.926735: step: 48/77, loss: 0.000536456354893744 2023-01-22 08:16:23.258462: step: 52/77, loss: 0.0061861746944487095 2023-01-22 08:16:24.577943: step: 56/77, loss: 0.00497963884845376 2023-01-22 08:16:25.907869: step: 60/77, loss: 0.01730026677250862 2023-01-22 08:16:27.247524: step: 64/77, loss: 0.001555685419589281 2023-01-22 08:16:28.564341: step: 68/77, loss: 0.030430197715759277 2023-01-22 08:16:29.853482: step: 72/77, loss: 0.005852745845913887 2023-01-22 08:16:31.179556: step: 76/77, loss: 3.0581431929022074e-05 2023-01-22 08:16:32.457831: step: 80/77, loss: 0.008818567730486393 2023-01-22 08:16:33.756288: step: 84/77, loss: 0.010246563702821732 2023-01-22 08:16:35.068559: step: 88/77, loss: 0.13901685178279877 2023-01-22 08:16:36.415443: step: 92/77, loss: 0.0018688386771827936 2023-01-22 08:16:37.729613: step: 96/77, loss: 0.03900507837533951 2023-01-22 08:16:39.071244: step: 100/77, loss: 0.005112081300467253 2023-01-22 08:16:40.422601: step: 104/77, loss: 0.016174569725990295 2023-01-22 08:16:41.705673: step: 108/77, loss: 0.0031978185288608074 2023-01-22 08:16:43.076643: step: 112/77, loss: 0.007094390690326691 2023-01-22 08:16:44.453364: step: 116/77, loss: 0.007978803478181362 2023-01-22 08:16:45.740035: step: 120/77, loss: 0.010525790974497795 2023-01-22 08:16:47.030602: step: 124/77, loss: 0.009775063954293728 2023-01-22 08:16:48.334434: step: 128/77, loss: 0.004989398177713156 2023-01-22 08:16:49.605631: step: 132/77, loss: 0.002119546290487051 2023-01-22 08:16:50.924652: step: 136/77, loss: 0.004756305366754532 2023-01-22 08:16:52.248878: step: 140/77, loss: 0.0028865262866020203 2023-01-22 08:16:53.578270: step: 144/77, loss: 0.004879987332969904 2023-01-22 08:16:54.888667: step: 148/77, loss: 0.002608640119433403 2023-01-22 08:16:56.132316: step: 152/77, loss: 0.016980547457933426 2023-01-22 08:16:57.464110: step: 156/77, loss: 0.0318780392408371 2023-01-22 08:16:58.746190: step: 160/77, loss: 0.010919311083853245 2023-01-22 08:17:00.067225: step: 164/77, loss: 0.02614099346101284 2023-01-22 08:17:01.378280: step: 168/77, loss: 0.0016549699939787388 2023-01-22 08:17:02.688480: step: 172/77, loss: 0.00027187608066014946 2023-01-22 08:17:03.971287: step: 176/77, loss: 0.015034875832498074 2023-01-22 08:17:05.273002: step: 180/77, loss: 0.03275790065526962 2023-01-22 08:17:06.568954: step: 184/77, loss: 1.341791357845068e-05 2023-01-22 08:17:07.874003: step: 188/77, loss: 0.0011008772999048233 2023-01-22 08:17:09.197234: step: 192/77, loss: 0.05528085306286812 2023-01-22 08:17:10.525059: step: 196/77, loss: 0.007917086593806744 2023-01-22 08:17:11.827509: step: 200/77, loss: 0.027716726064682007 2023-01-22 08:17:13.126549: step: 204/77, loss: 0.010608302429318428 2023-01-22 08:17:14.430194: step: 208/77, loss: 0.007032268680632114 2023-01-22 08:17:15.732312: step: 212/77, loss: 0.004656591452658176 2023-01-22 08:17:16.993871: step: 216/77, loss: 0.00042243345524184406 2023-01-22 08:17:18.302680: step: 220/77, loss: 0.0030772360041737556 2023-01-22 08:17:19.632524: step: 224/77, loss: 0.0013640880351886153 2023-01-22 08:17:20.924123: step: 228/77, loss: 0.016188669949769974 2023-01-22 08:17:22.180172: step: 232/77, loss: 0.00018536573043093085 2023-01-22 08:17:23.442371: step: 236/77, loss: 0.026044102385640144 2023-01-22 08:17:24.773570: step: 240/77, loss: 0.001282632234506309 2023-01-22 08:17:26.089036: step: 244/77, loss: 0.013146881945431232 2023-01-22 08:17:27.390756: step: 248/77, loss: 0.005962767638266087 2023-01-22 08:17:28.694443: step: 252/77, loss: 0.001891864463686943 2023-01-22 08:17:30.015654: step: 256/77, loss: 0.03969700634479523 2023-01-22 08:17:31.279692: step: 260/77, loss: 0.03692417964339256 2023-01-22 08:17:32.570976: step: 264/77, loss: 0.00492922542616725 2023-01-22 08:17:33.888579: step: 268/77, loss: 0.012601925060153008 2023-01-22 08:17:35.178283: step: 272/77, loss: 0.018331564962863922 2023-01-22 08:17:36.474304: step: 276/77, loss: 0.001236966927535832 2023-01-22 08:17:37.804037: step: 280/77, loss: 0.009361336007714272 2023-01-22 08:17:39.105332: step: 284/77, loss: 0.029684297740459442 2023-01-22 08:17:40.404740: step: 288/77, loss: 0.0017025243723765016 2023-01-22 08:17:41.716851: step: 292/77, loss: 0.018443405628204346 2023-01-22 08:17:43.038288: step: 296/77, loss: 0.0002197076682932675 2023-01-22 08:17:44.320997: step: 300/77, loss: 0.07794710993766785 2023-01-22 08:17:45.619266: step: 304/77, loss: 0.0007880099583417177 2023-01-22 08:17:46.946172: step: 308/77, loss: 0.02216692827641964 2023-01-22 08:17:48.245984: step: 312/77, loss: 0.05256933346390724 2023-01-22 08:17:49.575685: step: 316/77, loss: 0.046504754573106766 2023-01-22 08:17:50.911511: step: 320/77, loss: 0.016857344657182693 2023-01-22 08:17:52.199772: step: 324/77, loss: 0.013420961797237396 2023-01-22 08:17:53.481305: step: 328/77, loss: 0.0002451570762787014 2023-01-22 08:17:54.782580: step: 332/77, loss: 0.04551130160689354 2023-01-22 08:17:56.076850: step: 336/77, loss: 0.0486028753221035 2023-01-22 08:17:57.376354: step: 340/77, loss: 0.0007528806454502046 2023-01-22 08:17:58.651037: step: 344/77, loss: 0.004098730627447367 2023-01-22 08:17:59.949035: step: 348/77, loss: 0.040485743433237076 2023-01-22 08:18:01.220341: step: 352/77, loss: 0.00328265642747283 2023-01-22 08:18:02.484579: step: 356/77, loss: 0.0029611773788928986 2023-01-22 08:18:03.773140: step: 360/77, loss: 0.009117941372096539 2023-01-22 08:18:05.069407: step: 364/77, loss: 0.024599190801382065 2023-01-22 08:18:06.350069: step: 368/77, loss: 0.002736852038651705 2023-01-22 08:18:07.643022: step: 372/77, loss: 0.011240647174417973 2023-01-22 08:18:09.025339: step: 376/77, loss: 0.026055969297885895 2023-01-22 08:18:10.323768: step: 380/77, loss: 0.0010039397748187184 2023-01-22 08:18:11.637687: step: 384/77, loss: 0.0022746820468455553 2023-01-22 08:18:12.896015: step: 388/77, loss: 0.031541526317596436 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9863013698630136, 'r': 0.5373134328358209, 'f1': 0.6956521739130435}, 'slot': {'p': 0.5405405405405406, 'r': 0.017969451931716084, 'f1': 0.034782608695652174}, 'combined': 0.024196597353497166, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 11} Test Korean: {'template': {'p': 0.9861111111111112, 'r': 0.5298507462686567, 'f1': 0.6893203883495146}, 'slot': {'p': 0.5405405405405406, 'r': 0.017969451931716084, 'f1': 0.034782608695652174}, 'combined': 0.02397636133389616, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9861111111111112, 'r': 0.5298507462686567, 'f1': 0.6893203883495146}, 'slot': {'p': 0.5555555555555556, 'r': 0.017969451931716084, 'f1': 0.03481288076588338}, 'combined': 0.02399722848910408, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:19:53.642432: step: 4/77, loss: 0.0033823195844888687 2023-01-22 08:19:54.964949: step: 8/77, loss: 0.006067562848329544 2023-01-22 08:19:56.250225: step: 12/77, loss: 0.004152682609856129 2023-01-22 08:19:57.497568: step: 16/77, loss: 0.011536704376339912 2023-01-22 08:19:58.799592: step: 20/77, loss: 0.03795253485441208 2023-01-22 08:20:00.099247: step: 24/77, loss: 0.0029349522665143013 2023-01-22 08:20:01.392224: step: 28/77, loss: 0.04994878172874451 2023-01-22 08:20:02.695564: step: 32/77, loss: 0.00848055724054575 2023-01-22 08:20:04.042237: step: 36/77, loss: 0.00016331578081008047 2023-01-22 08:20:05.321275: step: 40/77, loss: 0.006215088535100222 2023-01-22 08:20:06.655665: step: 44/77, loss: 0.0010558516951277852 2023-01-22 08:20:07.979944: step: 48/77, loss: 0.032813701778650284 2023-01-22 08:20:09.292255: step: 52/77, loss: 0.0005068772588856518 2023-01-22 08:20:10.564457: step: 56/77, loss: 0.02557031437754631 2023-01-22 08:20:11.876334: step: 60/77, loss: 0.0047400458715856075 2023-01-22 08:20:13.222631: step: 64/77, loss: 0.009990248829126358 2023-01-22 08:20:14.449089: step: 68/77, loss: 0.03629093989729881 2023-01-22 08:20:15.770164: step: 72/77, loss: 0.04313212260603905 2023-01-22 08:20:17.123153: step: 76/77, loss: 0.008982475847005844 2023-01-22 08:20:18.392935: step: 80/77, loss: 0.0032843714579939842 2023-01-22 08:20:19.754104: step: 84/77, loss: 3.893310349667445e-05 2023-01-22 08:20:21.130395: step: 88/77, loss: 0.003442638088017702 2023-01-22 08:20:22.448597: step: 92/77, loss: 0.0004835306026507169 2023-01-22 08:20:23.744697: step: 96/77, loss: 0.0011237069265916944 2023-01-22 08:20:25.036671: step: 100/77, loss: 0.002386020962148905 2023-01-22 08:20:26.337356: step: 104/77, loss: 0.006551727186888456 2023-01-22 08:20:27.633087: step: 108/77, loss: 0.01057566050440073 2023-01-22 08:20:28.928251: step: 112/77, loss: 0.013844773173332214 2023-01-22 08:20:30.239273: step: 116/77, loss: 0.001763289445079863 2023-01-22 08:20:31.525597: step: 120/77, loss: 0.050311699509620667 2023-01-22 08:20:32.832735: step: 124/77, loss: 0.010518746450543404 2023-01-22 08:20:34.118015: step: 128/77, loss: 0.12409986555576324 2023-01-22 08:20:35.459433: step: 132/77, loss: 0.0004061115032527596 2023-01-22 08:20:36.766477: step: 136/77, loss: 0.001003948738798499 2023-01-22 08:20:38.093376: step: 140/77, loss: 0.0005174941616132855 2023-01-22 08:20:39.399181: step: 144/77, loss: 0.006630543153733015 2023-01-22 08:20:40.745418: step: 148/77, loss: 0.029026571661233902 2023-01-22 08:20:42.082062: step: 152/77, loss: 0.04973183572292328 2023-01-22 08:20:43.363642: step: 156/77, loss: 0.0009474740945734084 2023-01-22 08:20:44.666477: step: 160/77, loss: 0.0030602377373725176 2023-01-22 08:20:46.008255: step: 164/77, loss: 0.059144243597984314 2023-01-22 08:20:47.354592: step: 168/77, loss: 0.009352781809866428 2023-01-22 08:20:48.637491: step: 172/77, loss: 0.004000375512987375 2023-01-22 08:20:49.949707: step: 176/77, loss: 0.03193020820617676 2023-01-22 08:20:51.271323: step: 180/77, loss: 0.0019103833474218845 2023-01-22 08:20:52.551836: step: 184/77, loss: 0.0012183418730273843 2023-01-22 08:20:53.863960: step: 188/77, loss: 0.005255671218037605 2023-01-22 08:20:55.201946: step: 192/77, loss: 0.04504666104912758 2023-01-22 08:20:56.554603: step: 196/77, loss: 0.022459331899881363 2023-01-22 08:20:57.903662: step: 200/77, loss: 0.0003197941405232996 2023-01-22 08:20:59.272362: step: 204/77, loss: 0.0009358513634651899 2023-01-22 08:21:00.591086: step: 208/77, loss: 0.010235392488539219 2023-01-22 08:21:01.920513: step: 212/77, loss: 0.0063028484582901 2023-01-22 08:21:03.264309: step: 216/77, loss: 0.0019760727882385254 2023-01-22 08:21:04.583377: step: 220/77, loss: 0.0003104644129052758 2023-01-22 08:21:05.969136: step: 224/77, loss: 0.000779816647991538 2023-01-22 08:21:07.354541: step: 228/77, loss: 0.001094711828045547 2023-01-22 08:21:08.698290: step: 232/77, loss: 0.0022505666129291058 2023-01-22 08:21:09.978741: step: 236/77, loss: 0.004705403000116348 2023-01-22 08:21:11.349378: step: 240/77, loss: 6.31944349152036e-05 2023-01-22 08:21:12.668876: step: 244/77, loss: 0.007898539304733276 2023-01-22 08:21:13.968703: step: 248/77, loss: 0.0002214372652815655 2023-01-22 08:21:15.256048: step: 252/77, loss: 0.0009268993744626641 2023-01-22 08:21:16.596024: step: 256/77, loss: 0.0010784551268443465 2023-01-22 08:21:17.888395: step: 260/77, loss: 0.001904657343402505 2023-01-22 08:21:19.114673: step: 264/77, loss: 0.05391979590058327 2023-01-22 08:21:20.418194: step: 268/77, loss: 0.016363475471735 2023-01-22 08:21:21.727983: step: 272/77, loss: 2.981504849230987e-06 2023-01-22 08:21:23.053333: step: 276/77, loss: 4.402042395668104e-05 2023-01-22 08:21:24.349079: step: 280/77, loss: 0.0005392417078837752 2023-01-22 08:21:25.626886: step: 284/77, loss: 0.0011079860851168633 2023-01-22 08:21:26.931297: step: 288/77, loss: 0.21042890846729279 2023-01-22 08:21:28.205797: step: 292/77, loss: 0.06970375776290894 2023-01-22 08:21:29.532982: step: 296/77, loss: 0.014009656384587288 2023-01-22 08:21:30.895857: step: 300/77, loss: 0.00030892904032953084 2023-01-22 08:21:32.192593: step: 304/77, loss: 0.017677268013358116 2023-01-22 08:21:33.461633: step: 308/77, loss: 0.09594322741031647 2023-01-22 08:21:34.774513: step: 312/77, loss: 0.0032438847701996565 2023-01-22 08:21:36.013114: step: 316/77, loss: 0.004886825103312731 2023-01-22 08:21:37.335603: step: 320/77, loss: 0.0006108014495112002 2023-01-22 08:21:38.607950: step: 324/77, loss: 0.06285342574119568 2023-01-22 08:21:39.909058: step: 328/77, loss: 0.00013249233597889543 2023-01-22 08:21:41.181745: step: 332/77, loss: 0.0010816589929163456 2023-01-22 08:21:42.523110: step: 336/77, loss: 0.006212495267391205 2023-01-22 08:21:43.818193: step: 340/77, loss: 0.0132598252967 2023-01-22 08:21:45.103782: step: 344/77, loss: 0.012078355997800827 2023-01-22 08:21:46.372789: step: 348/77, loss: 0.006435449235141277 2023-01-22 08:21:47.646565: step: 352/77, loss: 0.017631251364946365 2023-01-22 08:21:48.982921: step: 356/77, loss: 0.0931030884385109 2023-01-22 08:21:50.302929: step: 360/77, loss: 0.04081280156970024 2023-01-22 08:21:51.556319: step: 364/77, loss: 7.617353548994288e-05 2023-01-22 08:21:52.878857: step: 368/77, loss: 0.0042196400463581085 2023-01-22 08:21:54.201843: step: 372/77, loss: 0.0030183957424014807 2023-01-22 08:21:55.480329: step: 376/77, loss: 0.00011321428610244766 2023-01-22 08:21:56.750175: step: 380/77, loss: 0.011876048520207405 2023-01-22 08:21:58.036633: step: 384/77, loss: 0.00406003650277853 2023-01-22 08:21:59.363423: step: 388/77, loss: 0.08133828639984131 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.5238095238095238, 'r': 0.019766397124887692, 'f1': 0.0380952380952381}, 'combined': 0.026235399820305483, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Korean: {'template': {'p': 0.9230769230769231, 'r': 0.5373134328358209, 'f1': 0.6792452830188679}, 'slot': {'p': 0.5365853658536586, 'r': 0.019766397124887692, 'f1': 0.038128249566724434}, 'combined': 0.025898433667963766, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5238095238095238, 'r': 0.019766397124887692, 'f1': 0.0380952380952381}, 'combined': 0.026122448979591834, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:23:39.843627: step: 4/77, loss: 0.0003183061198797077 2023-01-22 08:23:41.131382: step: 8/77, loss: 0.040560282766819 2023-01-22 08:23:42.465260: step: 12/77, loss: 0.04178724065423012 2023-01-22 08:23:43.737307: step: 16/77, loss: 0.01956675946712494 2023-01-22 08:23:44.996145: step: 20/77, loss: 0.06916426122188568 2023-01-22 08:23:46.320383: step: 24/77, loss: 6.770234449504642e-06 2023-01-22 08:23:47.602509: step: 28/77, loss: 0.00825644563883543 2023-01-22 08:23:48.949489: step: 32/77, loss: 0.00011275661381660029 2023-01-22 08:23:50.246291: step: 36/77, loss: 0.035464927554130554 2023-01-22 08:23:51.521871: step: 40/77, loss: 0.0007295497925952077 2023-01-22 08:23:52.778482: step: 44/77, loss: 7.648255996173248e-05 2023-01-22 08:23:54.056961: step: 48/77, loss: 0.005698735825717449 2023-01-22 08:23:55.365063: step: 52/77, loss: 0.00267980108037591 2023-01-22 08:23:56.663951: step: 56/77, loss: 0.004135094583034515 2023-01-22 08:23:57.941754: step: 60/77, loss: 0.0002119252021657303 2023-01-22 08:23:59.224064: step: 64/77, loss: 3.185726745869033e-05 2023-01-22 08:24:00.519374: step: 68/77, loss: 0.04375888407230377 2023-01-22 08:24:01.853798: step: 72/77, loss: 3.7466426874743775e-05 2023-01-22 08:24:03.192340: step: 76/77, loss: 1.2116726793465205e-05 2023-01-22 08:24:04.494330: step: 80/77, loss: 0.0002926643064711243 2023-01-22 08:24:05.786821: step: 84/77, loss: 0.005509909242391586 2023-01-22 08:24:07.133054: step: 88/77, loss: 0.02041170559823513 2023-01-22 08:24:08.433840: step: 92/77, loss: 0.0026900265365839005 2023-01-22 08:24:09.705295: step: 96/77, loss: 0.008952000178396702 2023-01-22 08:24:11.023126: step: 100/77, loss: 1.0245260455121752e-05 2023-01-22 08:24:12.320999: step: 104/77, loss: 4.525334134086734e-06 2023-01-22 08:24:13.593039: step: 108/77, loss: 4.0762461139820516e-05 2023-01-22 08:24:14.865947: step: 112/77, loss: 0.00012925347255077213 2023-01-22 08:24:16.136167: step: 116/77, loss: 0.033975325524806976 2023-01-22 08:24:17.427382: step: 120/77, loss: 0.002418851014226675 2023-01-22 08:24:18.741445: step: 124/77, loss: 0.03567567840218544 2023-01-22 08:24:20.017029: step: 128/77, loss: 0.0027435519732534885 2023-01-22 08:24:21.269963: step: 132/77, loss: 0.011629242449998856 2023-01-22 08:24:22.536141: step: 136/77, loss: 0.0009894848335534334 2023-01-22 08:24:23.839379: step: 140/77, loss: 0.007758474443107843 2023-01-22 08:24:25.127330: step: 144/77, loss: 0.012338060885667801 2023-01-22 08:24:26.449419: step: 148/77, loss: 0.008329820819199085 2023-01-22 08:24:27.709916: step: 152/77, loss: 0.0021699117496609688 2023-01-22 08:24:29.012804: step: 156/77, loss: 0.00035712175304070115 2023-01-22 08:24:30.311404: step: 160/77, loss: 0.001182187581434846 2023-01-22 08:24:31.600112: step: 164/77, loss: 0.22069135308265686 2023-01-22 08:24:32.891969: step: 168/77, loss: 0.0002262511698063463 2023-01-22 08:24:34.209785: step: 172/77, loss: 0.03320368751883507 2023-01-22 08:24:35.523378: step: 176/77, loss: 0.00016430506366305053 2023-01-22 08:24:36.837290: step: 180/77, loss: 0.017425213009119034 2023-01-22 08:24:38.169170: step: 184/77, loss: 0.0005045488360337913 2023-01-22 08:24:39.463336: step: 188/77, loss: 0.0009198979823850095 2023-01-22 08:24:40.821661: step: 192/77, loss: 0.004155697301030159 2023-01-22 08:24:42.143731: step: 196/77, loss: 0.009954680688679218 2023-01-22 08:24:43.452235: step: 200/77, loss: 0.035099174827337265 2023-01-22 08:24:44.751455: step: 204/77, loss: 0.010139863938093185 2023-01-22 08:24:46.060990: step: 208/77, loss: 0.018720045685768127 2023-01-22 08:24:47.409771: step: 212/77, loss: 0.017439253628253937 2023-01-22 08:24:48.732381: step: 216/77, loss: 0.01844675838947296 2023-01-22 08:24:50.070566: step: 220/77, loss: 0.0011854919139295816 2023-01-22 08:24:51.377033: step: 224/77, loss: 0.006082172971218824 2023-01-22 08:24:52.705600: step: 228/77, loss: 0.004856535699218512 2023-01-22 08:24:54.021500: step: 232/77, loss: 0.00015938098658807576 2023-01-22 08:24:55.304887: step: 236/77, loss: 0.0014069630997255445 2023-01-22 08:24:56.612338: step: 240/77, loss: 9.374695946462452e-05 2023-01-22 08:24:57.882285: step: 244/77, loss: 0.01892547309398651 2023-01-22 08:24:59.196987: step: 248/77, loss: 0.0004414403811097145 2023-01-22 08:25:00.566608: step: 252/77, loss: 0.1034957766532898 2023-01-22 08:25:01.874890: step: 256/77, loss: 2.6847968911170028e-05 2023-01-22 08:25:03.210132: step: 260/77, loss: 0.05694052577018738 2023-01-22 08:25:04.549041: step: 264/77, loss: 0.0010568362195044756 2023-01-22 08:25:05.841573: step: 268/77, loss: 0.008948981761932373 2023-01-22 08:25:07.126183: step: 272/77, loss: 0.0004628953174687922 2023-01-22 08:25:08.452220: step: 276/77, loss: 0.0007866702508181334 2023-01-22 08:25:09.783077: step: 280/77, loss: 0.001445968635380268 2023-01-22 08:25:11.078683: step: 284/77, loss: 0.0001449974370189011 2023-01-22 08:25:12.399062: step: 288/77, loss: 0.0007071525906212628 2023-01-22 08:25:13.650113: step: 292/77, loss: 7.404296047752723e-05 2023-01-22 08:25:14.967591: step: 296/77, loss: 0.008954092860221863 2023-01-22 08:25:16.282922: step: 300/77, loss: 0.004929321818053722 2023-01-22 08:25:17.613695: step: 304/77, loss: 0.0034438367001712322 2023-01-22 08:25:18.867813: step: 308/77, loss: 0.0035439524799585342 2023-01-22 08:25:20.180802: step: 312/77, loss: 0.013492235913872719 2023-01-22 08:25:21.518756: step: 316/77, loss: 0.00016831423272378743 2023-01-22 08:25:22.828318: step: 320/77, loss: 0.07191120833158493 2023-01-22 08:25:24.125757: step: 324/77, loss: 0.0024667498655617237 2023-01-22 08:25:25.423995: step: 328/77, loss: 0.003168292809277773 2023-01-22 08:25:26.760794: step: 332/77, loss: 5.0638423999771476e-05 2023-01-22 08:25:28.096854: step: 336/77, loss: 0.005427079740911722 2023-01-22 08:25:29.344594: step: 340/77, loss: 0.12284014374017715 2023-01-22 08:25:30.572248: step: 344/77, loss: 0.012342492118477821 2023-01-22 08:25:31.857287: step: 348/77, loss: 0.0013616869691759348 2023-01-22 08:25:33.164000: step: 352/77, loss: 3.9185226341942325e-05 2023-01-22 08:25:34.486442: step: 356/77, loss: 0.04308632016181946 2023-01-22 08:25:35.802251: step: 360/77, loss: 0.004393482580780983 2023-01-22 08:25:37.162392: step: 364/77, loss: 0.0007518457132391632 2023-01-22 08:25:38.458543: step: 368/77, loss: 0.010810460895299911 2023-01-22 08:25:39.768268: step: 372/77, loss: 0.0010192915797233582 2023-01-22 08:25:41.088014: step: 376/77, loss: 2.2996693587629125e-05 2023-01-22 08:25:42.450668: step: 380/77, loss: 4.207800520816818e-05 2023-01-22 08:25:43.748306: step: 384/77, loss: 0.002860223175957799 2023-01-22 08:25:45.014640: step: 388/77, loss: 0.00014594709500670433 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5625, 'r': 0.016172506738544475, 'f1': 0.031441048034934506}, 'combined': 0.02155957579538366, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5625, 'r': 0.016172506738544475, 'f1': 0.031441048034934506}, 'combined': 0.02155957579538366, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.59375, 'r': 0.017070979335130278, 'f1': 0.03318777292576419}, 'combined': 0.0227573300062383, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:27:25.222595: step: 4/77, loss: 0.0025070586707443 2023-01-22 08:27:26.500633: step: 8/77, loss: 0.0009089748491533101 2023-01-22 08:27:27.848751: step: 12/77, loss: 0.002720245625823736 2023-01-22 08:27:29.120237: step: 16/77, loss: 0.00035778438905254006 2023-01-22 08:27:30.433810: step: 20/77, loss: 0.0068440865725278854 2023-01-22 08:27:31.730844: step: 24/77, loss: 0.007065152749419212 2023-01-22 08:27:33.046213: step: 28/77, loss: 0.00010182852565776557 2023-01-22 08:27:34.324217: step: 32/77, loss: 0.0007257150718942285 2023-01-22 08:27:35.646492: step: 36/77, loss: 0.0004841696354560554 2023-01-22 08:27:36.948836: step: 40/77, loss: 0.00044304801849648356 2023-01-22 08:27:38.212640: step: 44/77, loss: 2.9887085474911146e-05 2023-01-22 08:27:39.537829: step: 48/77, loss: 0.04122829809784889 2023-01-22 08:27:40.860407: step: 52/77, loss: 1.3771279554930516e-05 2023-01-22 08:27:42.189343: step: 56/77, loss: 0.027065474539995193 2023-01-22 08:27:43.507181: step: 60/77, loss: 0.02150728926062584 2023-01-22 08:27:44.835403: step: 64/77, loss: 0.0026987313758581877 2023-01-22 08:27:46.159626: step: 68/77, loss: 9.448805940337479e-06 2023-01-22 08:27:47.507058: step: 72/77, loss: 0.031193898990750313 2023-01-22 08:27:48.783796: step: 76/77, loss: 0.0018287431448698044 2023-01-22 08:27:50.095416: step: 80/77, loss: 0.0008180844597518444 2023-01-22 08:27:51.397284: step: 84/77, loss: 0.006824632175266743 2023-01-22 08:27:52.694523: step: 88/77, loss: 0.004198823124170303 2023-01-22 08:27:54.002989: step: 92/77, loss: 0.05608784034848213 2023-01-22 08:27:55.280629: step: 96/77, loss: 0.003223490435630083 2023-01-22 08:27:56.609262: step: 100/77, loss: 0.0006620727945119143 2023-01-22 08:27:57.887874: step: 104/77, loss: 0.0002506496093701571 2023-01-22 08:27:59.144619: step: 108/77, loss: 0.0017600820865482092 2023-01-22 08:28:00.484751: step: 112/77, loss: 0.0040322570130229 2023-01-22 08:28:01.833277: step: 116/77, loss: 9.456691623199731e-05 2023-01-22 08:28:03.156193: step: 120/77, loss: 0.032587483525276184 2023-01-22 08:28:04.438118: step: 124/77, loss: 0.0019689786713570356 2023-01-22 08:28:05.753939: step: 128/77, loss: 0.0011962441494688392 2023-01-22 08:28:06.997711: step: 132/77, loss: 0.0035147222224622965 2023-01-22 08:28:08.285620: step: 136/77, loss: 0.0013859305763617158 2023-01-22 08:28:09.677843: step: 140/77, loss: 1.1076562259404454e-05 2023-01-22 08:28:10.970246: step: 144/77, loss: 0.0012191644636914134 2023-01-22 08:28:12.261323: step: 148/77, loss: 0.0005226008943282068 2023-01-22 08:28:13.541721: step: 152/77, loss: 0.010527782142162323 2023-01-22 08:28:14.871773: step: 156/77, loss: 0.03147884085774422 2023-01-22 08:28:16.167739: step: 160/77, loss: 0.02116863988339901 2023-01-22 08:28:17.526253: step: 164/77, loss: 0.0002335396275157109 2023-01-22 08:28:18.859126: step: 168/77, loss: 1.208573485200759e-05 2023-01-22 08:28:20.175477: step: 172/77, loss: 0.00584996584802866 2023-01-22 08:28:21.504538: step: 176/77, loss: 0.01264292374253273 2023-01-22 08:28:22.833653: step: 180/77, loss: 0.00016159848019015044 2023-01-22 08:28:24.123798: step: 184/77, loss: 4.650797927752137e-05 2023-01-22 08:28:25.375235: step: 188/77, loss: 0.0141700254753232 2023-01-22 08:28:26.653136: step: 192/77, loss: 0.0003191042342223227 2023-01-22 08:28:27.913065: step: 196/77, loss: 0.0017583910375833511 2023-01-22 08:28:29.190488: step: 200/77, loss: 0.00024490864598192275 2023-01-22 08:28:30.474969: step: 204/77, loss: 0.0005271589034236968 2023-01-22 08:28:31.778161: step: 208/77, loss: 0.004894105717539787 2023-01-22 08:28:33.041868: step: 212/77, loss: 0.04371624067425728 2023-01-22 08:28:34.303573: step: 216/77, loss: 0.011503464542329311 2023-01-22 08:28:35.572792: step: 220/77, loss: 0.037097468972206116 2023-01-22 08:28:36.886576: step: 224/77, loss: 0.07253731042146683 2023-01-22 08:28:38.190604: step: 228/77, loss: 0.032016571611166 2023-01-22 08:28:39.515787: step: 232/77, loss: 0.0006164589431136847 2023-01-22 08:28:40.857107: step: 236/77, loss: 0.002216715831309557 2023-01-22 08:28:42.217912: step: 240/77, loss: 0.024763930588960648 2023-01-22 08:28:43.473366: step: 244/77, loss: 0.003464376088231802 2023-01-22 08:28:44.770494: step: 248/77, loss: 1.4835750334896147e-05 2023-01-22 08:28:46.031051: step: 252/77, loss: 0.0022979378700256348 2023-01-22 08:28:47.358924: step: 256/77, loss: 0.0008279864559881389 2023-01-22 08:28:48.681050: step: 260/77, loss: 0.0003143183421343565 2023-01-22 08:28:49.973519: step: 264/77, loss: 1.83588908839738e-05 2023-01-22 08:28:51.228160: step: 268/77, loss: 0.006365937180817127 2023-01-22 08:28:52.510811: step: 272/77, loss: 0.0013630648609250784 2023-01-22 08:28:53.813812: step: 276/77, loss: 0.00014090965851210058 2023-01-22 08:28:55.162162: step: 280/77, loss: 0.002885822206735611 2023-01-22 08:28:56.480789: step: 284/77, loss: 0.01567690260708332 2023-01-22 08:28:57.805333: step: 288/77, loss: 0.0012530626263469458 2023-01-22 08:28:59.083352: step: 292/77, loss: 0.002671103226020932 2023-01-22 08:29:00.402959: step: 296/77, loss: 0.0026012749876827 2023-01-22 08:29:01.657137: step: 300/77, loss: 0.03614744171500206 2023-01-22 08:29:02.921046: step: 304/77, loss: 1.5982883269316517e-05 2023-01-22 08:29:04.248908: step: 308/77, loss: 0.03939162939786911 2023-01-22 08:29:05.534812: step: 312/77, loss: 0.006850957404822111 2023-01-22 08:29:06.846667: step: 316/77, loss: 7.452460704371333e-05 2023-01-22 08:29:08.163704: step: 320/77, loss: 0.008703973144292831 2023-01-22 08:29:09.509806: step: 324/77, loss: 0.004363438580185175 2023-01-22 08:29:10.845489: step: 328/77, loss: 0.00017505805590189993 2023-01-22 08:29:12.195766: step: 332/77, loss: 0.0018185640219599009 2023-01-22 08:29:13.516135: step: 336/77, loss: 0.01590447686612606 2023-01-22 08:29:14.853417: step: 340/77, loss: 0.02160751074552536 2023-01-22 08:29:16.137249: step: 344/77, loss: 0.03739278391003609 2023-01-22 08:29:17.481052: step: 348/77, loss: 0.004457756876945496 2023-01-22 08:29:18.773743: step: 352/77, loss: 0.03349829837679863 2023-01-22 08:29:20.114689: step: 356/77, loss: 0.0016237336676567793 2023-01-22 08:29:21.384590: step: 360/77, loss: 0.006631201598793268 2023-01-22 08:29:22.666560: step: 364/77, loss: 0.0008796528563834727 2023-01-22 08:29:23.928538: step: 368/77, loss: 0.004901571664959192 2023-01-22 08:29:25.249130: step: 372/77, loss: 0.0009514682460576296 2023-01-22 08:29:26.519972: step: 376/77, loss: 0.001806218409910798 2023-01-22 08:29:27.862057: step: 380/77, loss: 0.004638840444386005 2023-01-22 08:29:29.143913: step: 384/77, loss: 0.0008214544504880905 2023-01-22 08:29:30.414985: step: 388/77, loss: 2.1455241949297488e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Chinese: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.6, 'r': 0.0215633423180593, 'f1': 0.04163052905464007}, 'combined': 0.028821135499366206, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Korean: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.6153846153846154, 'r': 0.0215633423180593, 'f1': 0.04166666666666667}, 'combined': 0.028846153846153855, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Russian: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.6, 'r': 0.0215633423180593, 'f1': 0.04163052905464007}, 'combined': 0.028821135499366206, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:31:11.093449: step: 4/77, loss: 0.0339067205786705 2023-01-22 08:31:12.392172: step: 8/77, loss: 0.000985633465461433 2023-01-22 08:31:13.738656: step: 12/77, loss: 0.005644312594085932 2023-01-22 08:31:15.097069: step: 16/77, loss: 0.02237703837454319 2023-01-22 08:31:16.373184: step: 20/77, loss: 0.00017622807354200631 2023-01-22 08:31:17.666750: step: 24/77, loss: 3.434408790781163e-05 2023-01-22 08:31:18.929770: step: 28/77, loss: 0.0021593275014311075 2023-01-22 08:31:20.278710: step: 32/77, loss: 0.002325713401660323 2023-01-22 08:31:21.559733: step: 36/77, loss: 0.0001473966403864324 2023-01-22 08:31:22.839778: step: 40/77, loss: 0.00011630407243501395 2023-01-22 08:31:24.176336: step: 44/77, loss: 0.0056089069694280624 2023-01-22 08:31:25.509773: step: 48/77, loss: 0.0008513731881976128 2023-01-22 08:31:26.802496: step: 52/77, loss: 0.028125649318099022 2023-01-22 08:31:28.100807: step: 56/77, loss: 0.07658828794956207 2023-01-22 08:31:29.419531: step: 60/77, loss: 0.0002985998580697924 2023-01-22 08:31:30.765428: step: 64/77, loss: 0.01489199511706829 2023-01-22 08:31:32.060268: step: 68/77, loss: 0.02099183388054371 2023-01-22 08:31:33.336177: step: 72/77, loss: 1.1802884728240315e-05 2023-01-22 08:31:34.637775: step: 76/77, loss: 0.029980359598994255 2023-01-22 08:31:35.882060: step: 80/77, loss: 0.00036515307147055864 2023-01-22 08:31:37.185731: step: 84/77, loss: 0.028227414935827255 2023-01-22 08:31:38.509213: step: 88/77, loss: 0.0003867174091283232 2023-01-22 08:31:39.824064: step: 92/77, loss: 0.0001741455344017595 2023-01-22 08:31:41.129731: step: 96/77, loss: 0.019959811121225357 2023-01-22 08:31:42.451684: step: 100/77, loss: 3.03964097838616e-05 2023-01-22 08:31:43.709512: step: 104/77, loss: 0.002722999081015587 2023-01-22 08:31:44.994760: step: 108/77, loss: 0.004611394368112087 2023-01-22 08:31:46.296139: step: 112/77, loss: 0.0010619304375723004 2023-01-22 08:31:47.607424: step: 116/77, loss: 0.004179549403488636 2023-01-22 08:31:48.916888: step: 120/77, loss: 4.156599607085809e-05 2023-01-22 08:31:50.227069: step: 124/77, loss: 8.118282130453736e-05 2023-01-22 08:31:51.492868: step: 128/77, loss: 0.001088459393940866 2023-01-22 08:31:52.811158: step: 132/77, loss: 0.0002421422686893493 2023-01-22 08:31:54.131104: step: 136/77, loss: 6.279639637796208e-05 2023-01-22 08:31:55.416437: step: 140/77, loss: 0.008729882538318634 2023-01-22 08:31:56.690594: step: 144/77, loss: 0.0124210761860013 2023-01-22 08:31:57.965187: step: 148/77, loss: 0.0011566146276891232 2023-01-22 08:31:59.294647: step: 152/77, loss: 3.217463381588459e-05 2023-01-22 08:32:00.597635: step: 156/77, loss: 0.011010373942553997 2023-01-22 08:32:01.921364: step: 160/77, loss: 5.280670848151203e-06 2023-01-22 08:32:03.185392: step: 164/77, loss: 0.001265925238840282 2023-01-22 08:32:04.494520: step: 168/77, loss: 0.004644365515559912 2023-01-22 08:32:05.822256: step: 172/77, loss: 0.0004617736558429897 2023-01-22 08:32:07.063287: step: 176/77, loss: 0.0005241798353381455 2023-01-22 08:32:08.322031: step: 180/77, loss: 0.0013182410039007664 2023-01-22 08:32:09.635558: step: 184/77, loss: 2.7897960535483435e-05 2023-01-22 08:32:10.948646: step: 188/77, loss: 0.0011652555549517274 2023-01-22 08:32:12.305946: step: 192/77, loss: 0.03303450718522072 2023-01-22 08:32:13.623005: step: 196/77, loss: 0.00035298606962896883 2023-01-22 08:32:14.922369: step: 200/77, loss: 0.0004992393078282475 2023-01-22 08:32:16.180440: step: 204/77, loss: 0.0003858382988255471 2023-01-22 08:32:17.494098: step: 208/77, loss: 0.00010792753892019391 2023-01-22 08:32:18.774976: step: 212/77, loss: 0.007372554857283831 2023-01-22 08:32:20.060988: step: 216/77, loss: 0.03849584981799126 2023-01-22 08:32:21.382500: step: 220/77, loss: 0.023224491626024246 2023-01-22 08:32:22.677859: step: 224/77, loss: 0.02985999919474125 2023-01-22 08:32:24.002155: step: 228/77, loss: 0.019635101780295372 2023-01-22 08:32:25.316900: step: 232/77, loss: 0.040794577449560165 2023-01-22 08:32:26.581500: step: 236/77, loss: 0.00278811389580369 2023-01-22 08:32:27.893831: step: 240/77, loss: 0.0011358339106664062 2023-01-22 08:32:29.191715: step: 244/77, loss: 0.0002098227705573663 2023-01-22 08:32:30.487986: step: 248/77, loss: 0.004070743452757597 2023-01-22 08:32:31.821236: step: 252/77, loss: 0.00046853855019435287 2023-01-22 08:32:33.122594: step: 256/77, loss: 0.004174651578068733 2023-01-22 08:32:34.406555: step: 260/77, loss: 0.04534554481506348 2023-01-22 08:32:35.677357: step: 264/77, loss: 0.0013716259272769094 2023-01-22 08:32:37.001093: step: 268/77, loss: 0.008802559226751328 2023-01-22 08:32:38.300198: step: 272/77, loss: 0.0013205144787207246 2023-01-22 08:32:39.595077: step: 276/77, loss: 0.002434749389067292 2023-01-22 08:32:40.950365: step: 280/77, loss: 0.0026853452436625957 2023-01-22 08:32:42.257373: step: 284/77, loss: 0.01034157257527113 2023-01-22 08:32:43.548871: step: 288/77, loss: 0.0013206511503085494 2023-01-22 08:32:44.823538: step: 292/77, loss: 0.0029643403831869364 2023-01-22 08:32:46.100253: step: 296/77, loss: 0.0017079674871638417 2023-01-22 08:32:47.403104: step: 300/77, loss: 0.002972458256408572 2023-01-22 08:32:48.786119: step: 304/77, loss: 0.00101885583717376 2023-01-22 08:32:50.093232: step: 308/77, loss: 0.005774295423179865 2023-01-22 08:32:51.438942: step: 312/77, loss: 0.05470731854438782 2023-01-22 08:32:52.716524: step: 316/77, loss: 0.0007659116527065635 2023-01-22 08:32:54.038158: step: 320/77, loss: 0.001400307402946055 2023-01-22 08:32:55.354944: step: 324/77, loss: 0.04949542135000229 2023-01-22 08:32:56.656850: step: 328/77, loss: 0.0005832273163832724 2023-01-22 08:32:57.958087: step: 332/77, loss: 0.000931663264054805 2023-01-22 08:32:59.280709: step: 336/77, loss: 8.43062880448997e-05 2023-01-22 08:33:00.571322: step: 340/77, loss: 0.00026131156482733786 2023-01-22 08:33:01.853573: step: 344/77, loss: 1.9478706235531718e-05 2023-01-22 08:33:03.185597: step: 348/77, loss: 8.584909664932638e-05 2023-01-22 08:33:04.432829: step: 352/77, loss: 0.000725393183529377 2023-01-22 08:33:05.705211: step: 356/77, loss: 0.03045252338051796 2023-01-22 08:33:07.000051: step: 360/77, loss: 0.0006746129947714508 2023-01-22 08:33:08.302111: step: 364/77, loss: 1.7055519492714666e-05 2023-01-22 08:33:09.604060: step: 368/77, loss: 0.06524211168289185 2023-01-22 08:33:10.951629: step: 372/77, loss: 0.007068153936415911 2023-01-22 08:33:12.243000: step: 376/77, loss: 0.0007754460093565285 2023-01-22 08:33:13.524818: step: 380/77, loss: 0.005678238812834024 2023-01-22 08:33:14.845228: step: 384/77, loss: 0.007888175547122955 2023-01-22 08:33:16.141256: step: 388/77, loss: 0.03222333639860153 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5223880597014925, 'f1': 0.6796116504854368}, 'slot': {'p': 0.5666666666666667, 'r': 0.015274034141958671, 'f1': 0.029746281714785654}, 'combined': 0.02021591961199025, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5223880597014925, 'f1': 0.6796116504854368}, 'slot': {'p': 0.5806451612903226, 'r': 0.016172506738544475, 'f1': 0.03146853146853148}, 'combined': 0.021386380609681586, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5223880597014925, 'f1': 0.6796116504854368}, 'slot': {'p': 0.5806451612903226, 'r': 0.016172506738544475, 'f1': 0.03146853146853148}, 'combined': 0.021386380609681586, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:34:56.693817: step: 4/77, loss: 0.05094633251428604 2023-01-22 08:34:57.944983: step: 8/77, loss: 0.016016079112887383 2023-01-22 08:34:59.306980: step: 12/77, loss: 9.06312998267822e-05 2023-01-22 08:35:00.575107: step: 16/77, loss: 0.004453817382454872 2023-01-22 08:35:01.840408: step: 20/77, loss: 0.05539938434958458 2023-01-22 08:35:03.152135: step: 24/77, loss: 0.012972598895430565 2023-01-22 08:35:04.466472: step: 28/77, loss: 0.0011476046638563275 2023-01-22 08:35:05.806082: step: 32/77, loss: 0.004908998031169176 2023-01-22 08:35:07.111231: step: 36/77, loss: 0.004188721068203449 2023-01-22 08:35:08.455348: step: 40/77, loss: 0.16545259952545166 2023-01-22 08:35:09.763380: step: 44/77, loss: 0.031235119327902794 2023-01-22 08:35:11.075389: step: 48/77, loss: 0.004235537722706795 2023-01-22 08:35:12.442370: step: 52/77, loss: 0.0013708991464227438 2023-01-22 08:35:13.763030: step: 56/77, loss: 0.0005379368085414171 2023-01-22 08:35:15.035640: step: 60/77, loss: 0.016450677067041397 2023-01-22 08:35:16.328355: step: 64/77, loss: 0.00257531157694757 2023-01-22 08:35:17.604909: step: 68/77, loss: 0.0007961698574945331 2023-01-22 08:35:18.931893: step: 72/77, loss: 0.0009506650967523456 2023-01-22 08:35:20.268923: step: 76/77, loss: 1.0695758646761533e-05 2023-01-22 08:35:21.586288: step: 80/77, loss: 0.02750951610505581 2023-01-22 08:35:22.914456: step: 84/77, loss: 0.00876179151237011 2023-01-22 08:35:24.229968: step: 88/77, loss: 0.004473384935408831 2023-01-22 08:35:25.566386: step: 92/77, loss: 0.018638458102941513 2023-01-22 08:35:26.838940: step: 96/77, loss: 0.020320996642112732 2023-01-22 08:35:28.162042: step: 100/77, loss: 0.00042116676922887564 2023-01-22 08:35:29.554542: step: 104/77, loss: 0.013269875198602676 2023-01-22 08:35:30.889457: step: 108/77, loss: 0.0019854374695569277 2023-01-22 08:35:32.161381: step: 112/77, loss: 0.061921678483486176 2023-01-22 08:35:33.497665: step: 116/77, loss: 0.07140583544969559 2023-01-22 08:35:34.808185: step: 120/77, loss: 0.0031149161513894796 2023-01-22 08:35:36.129656: step: 124/77, loss: 0.0018452100921422243 2023-01-22 08:35:37.444332: step: 128/77, loss: 2.8724532967316918e-05 2023-01-22 08:35:38.739139: step: 132/77, loss: 0.019663723185658455 2023-01-22 08:35:40.061774: step: 136/77, loss: 0.007577202282845974 2023-01-22 08:35:41.304771: step: 140/77, loss: 2.6508414521231316e-06 2023-01-22 08:35:42.600375: step: 144/77, loss: 3.179639679729007e-05 2023-01-22 08:35:43.866239: step: 148/77, loss: 0.0009142406051978469 2023-01-22 08:35:45.214703: step: 152/77, loss: 0.06840498745441437 2023-01-22 08:35:46.520717: step: 156/77, loss: 1.7841080989455804e-05 2023-01-22 08:35:47.827916: step: 160/77, loss: 0.03614654392004013 2023-01-22 08:35:49.102146: step: 164/77, loss: 0.0021394919604063034 2023-01-22 08:35:50.392740: step: 168/77, loss: 8.524881195626222e-06 2023-01-22 08:35:51.678473: step: 172/77, loss: 0.0006625548703595996 2023-01-22 08:35:52.937682: step: 176/77, loss: 0.000172696789377369 2023-01-22 08:35:54.212676: step: 180/77, loss: 0.00027654992300085723 2023-01-22 08:35:55.486442: step: 184/77, loss: 0.003033407498151064 2023-01-22 08:35:56.824268: step: 188/77, loss: 0.003654744243249297 2023-01-22 08:35:58.134499: step: 192/77, loss: 0.000612216885201633 2023-01-22 08:35:59.399783: step: 196/77, loss: 0.004389288369566202 2023-01-22 08:36:00.649471: step: 200/77, loss: 0.0975179523229599 2023-01-22 08:36:01.943680: step: 204/77, loss: 4.933265518047847e-05 2023-01-22 08:36:03.253916: step: 208/77, loss: 0.0010515564354136586 2023-01-22 08:36:04.519329: step: 212/77, loss: 0.05458948016166687 2023-01-22 08:36:05.843241: step: 216/77, loss: 0.007991497404873371 2023-01-22 08:36:07.098144: step: 220/77, loss: 0.0030231704004108906 2023-01-22 08:36:08.411023: step: 224/77, loss: 0.012408047914505005 2023-01-22 08:36:09.718076: step: 228/77, loss: 0.0007940920768305659 2023-01-22 08:36:11.036362: step: 232/77, loss: 0.0036044989246875048 2023-01-22 08:36:12.350202: step: 236/77, loss: 0.012100227177143097 2023-01-22 08:36:13.659141: step: 240/77, loss: 0.01591656729578972 2023-01-22 08:36:14.986942: step: 244/77, loss: 9.944305929820985e-05 2023-01-22 08:36:16.311893: step: 248/77, loss: 0.03312674164772034 2023-01-22 08:36:17.636607: step: 252/77, loss: 0.00335933780297637 2023-01-22 08:36:18.936502: step: 256/77, loss: 0.002072680974379182 2023-01-22 08:36:20.202995: step: 260/77, loss: 0.005116648506373167 2023-01-22 08:36:21.451143: step: 264/77, loss: 0.0030165542848408222 2023-01-22 08:36:22.738179: step: 268/77, loss: 0.0020216633565723896 2023-01-22 08:36:24.032416: step: 272/77, loss: 0.04804634675383568 2023-01-22 08:36:25.310316: step: 276/77, loss: 0.013111795298755169 2023-01-22 08:36:26.688776: step: 280/77, loss: 0.00015685016114730388 2023-01-22 08:36:28.023374: step: 284/77, loss: 4.8443831474287435e-05 2023-01-22 08:36:29.330248: step: 288/77, loss: 0.0008266958757303655 2023-01-22 08:36:30.622022: step: 292/77, loss: 0.013310312293469906 2023-01-22 08:36:31.918060: step: 296/77, loss: 0.007367967162281275 2023-01-22 08:36:33.233424: step: 300/77, loss: 0.0004364282067399472 2023-01-22 08:36:34.547352: step: 304/77, loss: 6.982243576203473e-06 2023-01-22 08:36:35.829956: step: 308/77, loss: 0.0018932627281174064 2023-01-22 08:36:37.110750: step: 312/77, loss: 0.009672918356955051 2023-01-22 08:36:38.444805: step: 316/77, loss: 0.0009078417788259685 2023-01-22 08:36:39.727608: step: 320/77, loss: 2.581998887762893e-05 2023-01-22 08:36:41.023837: step: 324/77, loss: 3.023641329491511e-05 2023-01-22 08:36:42.308354: step: 328/77, loss: 0.05564001575112343 2023-01-22 08:36:43.625313: step: 332/77, loss: 0.03747791051864624 2023-01-22 08:36:44.907538: step: 336/77, loss: 0.00712593412026763 2023-01-22 08:36:46.243148: step: 340/77, loss: 0.0008041571127250791 2023-01-22 08:36:47.573120: step: 344/77, loss: 0.0017460859380662441 2023-01-22 08:36:48.873796: step: 348/77, loss: 0.022884294390678406 2023-01-22 08:36:50.194257: step: 352/77, loss: 0.0002562448207754642 2023-01-22 08:36:51.484968: step: 356/77, loss: 0.0006773895001970232 2023-01-22 08:36:52.791670: step: 360/77, loss: 0.0025339152198284864 2023-01-22 08:36:54.111021: step: 364/77, loss: 0.00035924420808441937 2023-01-22 08:36:55.431471: step: 368/77, loss: 0.01046315673738718 2023-01-22 08:36:56.766843: step: 372/77, loss: 0.011791563592851162 2023-01-22 08:36:58.070549: step: 376/77, loss: 0.004284712485969067 2023-01-22 08:36:59.375010: step: 380/77, loss: 0.028272613883018494 2023-01-22 08:37:00.688611: step: 384/77, loss: 0.0030928582418709993 2023-01-22 08:37:01.995303: step: 388/77, loss: 3.754132194444537e-05 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.948051948051948, 'r': 0.5447761194029851, 'f1': 0.6919431279620855}, 'slot': {'p': 0.5714285714285714, 'r': 0.0215633423180593, 'f1': 0.04155844155844156}, 'combined': 0.028756078045177578, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.948051948051948, 'r': 0.5447761194029851, 'f1': 0.6919431279620855}, 'slot': {'p': 0.5714285714285714, 'r': 0.0215633423180593, 'f1': 0.04155844155844156}, 'combined': 0.028756078045177578, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.948051948051948, 'r': 0.5447761194029851, 'f1': 0.6919431279620855}, 'slot': {'p': 0.5641025641025641, 'r': 0.019766397124887692, 'f1': 0.03819444444444445}, 'combined': 0.02642838335966299, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:38:42.533783: step: 4/77, loss: 4.712946247309446e-05 2023-01-22 08:38:43.850506: step: 8/77, loss: 0.002000941429287195 2023-01-22 08:38:45.158218: step: 12/77, loss: 5.53809786651982e-06 2023-01-22 08:38:46.461014: step: 16/77, loss: 0.001969966571778059 2023-01-22 08:38:47.748063: step: 20/77, loss: 9.836013305175584e-06 2023-01-22 08:38:49.022130: step: 24/77, loss: 6.016323368385201e-06 2023-01-22 08:38:50.340343: step: 28/77, loss: 0.0003786985471379012 2023-01-22 08:38:51.646835: step: 32/77, loss: 0.008694401942193508 2023-01-22 08:38:52.941384: step: 36/77, loss: 0.0004617235972546041 2023-01-22 08:38:54.247125: step: 40/77, loss: 0.0008308365941047668 2023-01-22 08:38:55.503849: step: 44/77, loss: 0.0004890845157206059 2023-01-22 08:38:56.847230: step: 48/77, loss: 0.0028582194354385138 2023-01-22 08:38:58.136868: step: 52/77, loss: 5.671056351275183e-05 2023-01-22 08:38:59.415080: step: 56/77, loss: 0.005951540544629097 2023-01-22 08:39:00.697612: step: 60/77, loss: 1.183111976388318e-06 2023-01-22 08:39:02.014799: step: 64/77, loss: 0.0020196991972625256 2023-01-22 08:39:03.339623: step: 68/77, loss: 0.002741363365203142 2023-01-22 08:39:04.652962: step: 72/77, loss: 0.00334263127297163 2023-01-22 08:39:05.916702: step: 76/77, loss: 0.0001288486091652885 2023-01-22 08:39:07.236999: step: 80/77, loss: 0.11691176891326904 2023-01-22 08:39:08.514447: step: 84/77, loss: 0.002223538001999259 2023-01-22 08:39:09.815709: step: 88/77, loss: 0.011185433715581894 2023-01-22 08:39:11.108851: step: 92/77, loss: 6.673255847999826e-05 2023-01-22 08:39:12.469385: step: 96/77, loss: 0.01403880026191473 2023-01-22 08:39:13.769031: step: 100/77, loss: 0.03029460832476616 2023-01-22 08:39:15.025485: step: 104/77, loss: 0.00014710072719026357 2023-01-22 08:39:16.287994: step: 108/77, loss: 0.00015252029697876424 2023-01-22 08:39:17.551333: step: 112/77, loss: 0.01632443442940712 2023-01-22 08:39:18.881942: step: 116/77, loss: 0.00362035003490746 2023-01-22 08:39:20.206500: step: 120/77, loss: 0.00628438638523221 2023-01-22 08:39:21.527811: step: 124/77, loss: 0.00045070049236528575 2023-01-22 08:39:22.818927: step: 128/77, loss: 0.00020078939269296825 2023-01-22 08:39:24.158158: step: 132/77, loss: 0.002593017416074872 2023-01-22 08:39:25.417054: step: 136/77, loss: 1.6041687558754347e-05 2023-01-22 08:39:26.792377: step: 140/77, loss: 0.0005025569698773324 2023-01-22 08:39:28.106708: step: 144/77, loss: 0.0067786527797579765 2023-01-22 08:39:29.428408: step: 148/77, loss: 0.0007784969639033079 2023-01-22 08:39:30.757917: step: 152/77, loss: 0.00020644588221330196 2023-01-22 08:39:32.059431: step: 156/77, loss: 0.0005968852783553302 2023-01-22 08:39:33.370218: step: 160/77, loss: 0.0005378980422392488 2023-01-22 08:39:34.652788: step: 164/77, loss: 0.008257530629634857 2023-01-22 08:39:36.019611: step: 168/77, loss: 0.006465138401836157 2023-01-22 08:39:37.313363: step: 172/77, loss: 5.838445576955564e-05 2023-01-22 08:39:38.609656: step: 176/77, loss: 0.06327299028635025 2023-01-22 08:39:39.908778: step: 180/77, loss: 0.000856110651511699 2023-01-22 08:39:41.211674: step: 184/77, loss: 0.0012292754836380482 2023-01-22 08:39:42.521165: step: 188/77, loss: 0.0006952299736440182 2023-01-22 08:39:43.833030: step: 192/77, loss: 1.1477966836537234e-05 2023-01-22 08:39:45.181629: step: 196/77, loss: 0.0002503438445273787 2023-01-22 08:39:46.539275: step: 200/77, loss: 0.017838360741734505 2023-01-22 08:39:47.850957: step: 204/77, loss: 0.0012066513299942017 2023-01-22 08:39:49.119481: step: 208/77, loss: 0.0015220101922750473 2023-01-22 08:39:50.437576: step: 212/77, loss: 0.0004304148897062987 2023-01-22 08:39:51.738929: step: 216/77, loss: 0.0018209205009043217 2023-01-22 08:39:53.072072: step: 220/77, loss: 0.03783602640032768 2023-01-22 08:39:54.376941: step: 224/77, loss: 0.0006194835295900702 2023-01-22 08:39:55.690946: step: 228/77, loss: 0.005525450222194195 2023-01-22 08:39:56.940851: step: 232/77, loss: 0.00037290374166332185 2023-01-22 08:39:58.269541: step: 236/77, loss: 0.00038193512591533363 2023-01-22 08:39:59.546545: step: 240/77, loss: 0.00047047025873325765 2023-01-22 08:40:00.850058: step: 244/77, loss: 0.004478194285184145 2023-01-22 08:40:02.144785: step: 248/77, loss: 0.0015100076561793685 2023-01-22 08:40:03.472856: step: 252/77, loss: 0.0014932039193809032 2023-01-22 08:40:04.794342: step: 256/77, loss: 0.00242552999407053 2023-01-22 08:40:06.083842: step: 260/77, loss: 0.0003645646502263844 2023-01-22 08:40:07.474230: step: 264/77, loss: 2.7907240109925624e-06 2023-01-22 08:40:08.769323: step: 268/77, loss: 0.005610452964901924 2023-01-22 08:40:10.104917: step: 272/77, loss: 0.0010953948367387056 2023-01-22 08:40:11.384925: step: 276/77, loss: 0.059296153485774994 2023-01-22 08:40:12.729945: step: 280/77, loss: 0.0027845720760524273 2023-01-22 08:40:14.071614: step: 284/77, loss: 0.02690986916422844 2023-01-22 08:40:15.343138: step: 288/77, loss: 0.007932419888675213 2023-01-22 08:40:16.670400: step: 292/77, loss: 7.73666615714319e-05 2023-01-22 08:40:18.013537: step: 296/77, loss: 0.0003109094104729593 2023-01-22 08:40:19.290326: step: 300/77, loss: 0.16546620428562164 2023-01-22 08:40:20.587327: step: 304/77, loss: 0.0034834558609873056 2023-01-22 08:40:21.850444: step: 308/77, loss: 0.0013043548678979278 2023-01-22 08:40:23.128550: step: 312/77, loss: 0.0002218848094344139 2023-01-22 08:40:24.442422: step: 316/77, loss: 0.3924459218978882 2023-01-22 08:40:25.780901: step: 320/77, loss: 0.0009078291477635503 2023-01-22 08:40:27.087327: step: 324/77, loss: 0.0002599633007775992 2023-01-22 08:40:28.432466: step: 328/77, loss: 0.0053293174132704735 2023-01-22 08:40:29.724790: step: 332/77, loss: 0.00010220670083072037 2023-01-22 08:40:31.031393: step: 336/77, loss: 0.03909461200237274 2023-01-22 08:40:32.307741: step: 340/77, loss: 0.0007392231491394341 2023-01-22 08:40:33.675828: step: 344/77, loss: 0.003251375164836645 2023-01-22 08:40:34.956480: step: 348/77, loss: 6.195087917149067e-05 2023-01-22 08:40:36.245672: step: 352/77, loss: 0.0010424887295812368 2023-01-22 08:40:37.516481: step: 356/77, loss: 0.0004965576226823032 2023-01-22 08:40:38.813137: step: 360/77, loss: 0.005341102834790945 2023-01-22 08:40:40.096786: step: 364/77, loss: 0.0003410697099752724 2023-01-22 08:40:41.404655: step: 368/77, loss: 0.00011480034299893305 2023-01-22 08:40:42.757648: step: 372/77, loss: 0.0009775557555258274 2023-01-22 08:40:44.104229: step: 376/77, loss: 0.02208123356103897 2023-01-22 08:40:45.399178: step: 380/77, loss: 0.21449889242649078 2023-01-22 08:40:46.706462: step: 384/77, loss: 0.008393362164497375 2023-01-22 08:40:48.007875: step: 388/77, loss: 0.036982741206884384 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03213610586011342, 'f1': 0.06039076376554175}, 'combined': 0.04201096609776818, 'epoch': 17} Test Chinese: {'template': {'p': 0.9014084507042254, 'r': 0.47761194029850745, 'f1': 0.6243902439024391}, 'slot': {'p': 0.4594594594594595, 'r': 0.015274034141958671, 'f1': 0.02956521739130435}, 'combined': 0.01846023329798516, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03213610586011342, 'f1': 0.06039076376554175}, 'combined': 0.04201096609776818, 'epoch': 17} Test Korean: {'template': {'p': 0.9014084507042254, 'r': 0.47761194029850745, 'f1': 0.6243902439024391}, 'slot': {'p': 0.47368421052631576, 'r': 0.016172506738544475, 'f1': 0.03127715030408341}, 'combined': 0.019529147506939886, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03213610586011342, 'f1': 0.06039076376554175}, 'combined': 0.04201096609776818, 'epoch': 17} Test Russian: {'template': {'p': 0.9154929577464789, 'r': 0.48507462686567165, 'f1': 0.6341463414634146}, 'slot': {'p': 0.4722222222222222, 'r': 0.015274034141958671, 'f1': 0.029590948651000874}, 'combined': 0.01876499182746397, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:42:28.659657: step: 4/77, loss: 0.004367659334093332 2023-01-22 08:42:29.959154: step: 8/77, loss: 0.01038439478725195 2023-01-22 08:42:31.234046: step: 12/77, loss: 2.6343550416640937e-05 2023-01-22 08:42:32.501241: step: 16/77, loss: 0.0012721351813524961 2023-01-22 08:42:33.769334: step: 20/77, loss: 0.03590906783938408 2023-01-22 08:42:35.070233: step: 24/77, loss: 0.001787964254617691 2023-01-22 08:42:36.351128: step: 28/77, loss: 9.45565443544183e-06 2023-01-22 08:42:37.668851: step: 32/77, loss: 0.002850484335795045 2023-01-22 08:42:38.953679: step: 36/77, loss: 0.027393924072384834 2023-01-22 08:42:40.274814: step: 40/77, loss: 0.003990638069808483 2023-01-22 08:42:41.591644: step: 44/77, loss: 2.9969392926432192e-05 2023-01-22 08:42:42.873992: step: 48/77, loss: 0.00023694556148257107 2023-01-22 08:42:44.180996: step: 52/77, loss: 0.0016057910397648811 2023-01-22 08:42:45.469433: step: 56/77, loss: 7.71491540945135e-05 2023-01-22 08:42:46.762879: step: 60/77, loss: 0.0019408424850553274 2023-01-22 08:42:48.070369: step: 64/77, loss: 2.2850603272672743e-05 2023-01-22 08:42:49.366402: step: 68/77, loss: 0.003779355902224779 2023-01-22 08:42:50.699268: step: 72/77, loss: 0.001685172668658197 2023-01-22 08:42:52.030289: step: 76/77, loss: 0.0012687535490840673 2023-01-22 08:42:53.375279: step: 80/77, loss: 0.008859056048095226 2023-01-22 08:42:54.674412: step: 84/77, loss: 0.04166802018880844 2023-01-22 08:42:55.960853: step: 88/77, loss: 0.023681601509451866 2023-01-22 08:42:57.253927: step: 92/77, loss: 0.002396229188889265 2023-01-22 08:42:58.556515: step: 96/77, loss: 0.0005476967780850828 2023-01-22 08:42:59.869394: step: 100/77, loss: 0.010149845853447914 2023-01-22 08:43:01.196256: step: 104/77, loss: 4.5273111027199775e-05 2023-01-22 08:43:02.487011: step: 108/77, loss: 0.0003784815198741853 2023-01-22 08:43:03.726072: step: 112/77, loss: 0.0028103527147322893 2023-01-22 08:43:05.031449: step: 116/77, loss: 0.0007598382653668523 2023-01-22 08:43:06.349269: step: 120/77, loss: 0.0029842997901141644 2023-01-22 08:43:07.616548: step: 124/77, loss: 0.00048490293556824327 2023-01-22 08:43:08.955777: step: 128/77, loss: 0.002160892356187105 2023-01-22 08:43:10.268930: step: 132/77, loss: 0.002475053770467639 2023-01-22 08:43:11.594013: step: 136/77, loss: 0.00031813987880013883 2023-01-22 08:43:12.860112: step: 140/77, loss: 1.4521887351293117e-05 2023-01-22 08:43:14.123069: step: 144/77, loss: 0.0018738629296422005 2023-01-22 08:43:15.439964: step: 148/77, loss: 0.0006999174365773797 2023-01-22 08:43:16.758099: step: 152/77, loss: 0.006983298808336258 2023-01-22 08:43:18.062495: step: 156/77, loss: 0.0010001725750043988 2023-01-22 08:43:19.336412: step: 160/77, loss: 0.0005602678284049034 2023-01-22 08:43:20.650337: step: 164/77, loss: 0.04589071869850159 2023-01-22 08:43:21.942912: step: 168/77, loss: 0.0003480600717011839 2023-01-22 08:43:23.245577: step: 172/77, loss: 7.911981811048463e-06 2023-01-22 08:43:24.567615: step: 176/77, loss: 0.004396666772663593 2023-01-22 08:43:25.850024: step: 180/77, loss: 0.03344809636473656 2023-01-22 08:43:27.176545: step: 184/77, loss: 0.0002191819075960666 2023-01-22 08:43:28.513516: step: 188/77, loss: 0.0014253761619329453 2023-01-22 08:43:29.823551: step: 192/77, loss: 0.05388971418142319 2023-01-22 08:43:31.173858: step: 196/77, loss: 0.001033472130075097 2023-01-22 08:43:32.486996: step: 200/77, loss: 0.0002074778894893825 2023-01-22 08:43:33.767466: step: 204/77, loss: 0.001877657719887793 2023-01-22 08:43:35.050249: step: 208/77, loss: 0.001743351574987173 2023-01-22 08:43:36.352137: step: 212/77, loss: 0.00012812843488063663 2023-01-22 08:43:37.640226: step: 216/77, loss: 0.0215502567589283 2023-01-22 08:43:38.934564: step: 220/77, loss: 3.8145283269841457e-06 2023-01-22 08:43:40.263434: step: 224/77, loss: 0.0010601211106404662 2023-01-22 08:43:41.562350: step: 228/77, loss: 0.0022998847998678684 2023-01-22 08:43:42.849404: step: 232/77, loss: 0.007071727886795998 2023-01-22 08:43:44.188386: step: 236/77, loss: 0.00010882413334911689 2023-01-22 08:43:45.532627: step: 240/77, loss: 0.0014115388039499521 2023-01-22 08:43:46.824714: step: 244/77, loss: 0.035735324025154114 2023-01-22 08:43:48.079331: step: 248/77, loss: 1.12286361400038e-05 2023-01-22 08:43:49.396469: step: 252/77, loss: 0.04284053295850754 2023-01-22 08:43:50.779575: step: 256/77, loss: 0.0025267975870519876 2023-01-22 08:43:52.080532: step: 260/77, loss: 0.0003095833817496896 2023-01-22 08:43:53.376029: step: 264/77, loss: 0.004611496813595295 2023-01-22 08:43:54.628278: step: 268/77, loss: 0.0005131922662258148 2023-01-22 08:43:55.960571: step: 272/77, loss: 0.005772388074547052 2023-01-22 08:43:57.293454: step: 276/77, loss: 0.001374881248921156 2023-01-22 08:43:58.589917: step: 280/77, loss: 0.0005107524339109659 2023-01-22 08:43:59.881573: step: 284/77, loss: 0.0014153217198327184 2023-01-22 08:44:01.208472: step: 288/77, loss: 0.00014606077456846833 2023-01-22 08:44:02.504473: step: 292/77, loss: 0.01991288922727108 2023-01-22 08:44:03.806364: step: 296/77, loss: 0.016153793781995773 2023-01-22 08:44:05.141298: step: 300/77, loss: 0.07304572314023972 2023-01-22 08:44:06.447481: step: 304/77, loss: 0.00026726553915068507 2023-01-22 08:44:07.770575: step: 308/77, loss: 0.002561915433034301 2023-01-22 08:44:09.093758: step: 312/77, loss: 0.0008868585573509336 2023-01-22 08:44:10.437962: step: 316/77, loss: 0.0014801579527556896 2023-01-22 08:44:11.734027: step: 320/77, loss: 0.000655533978715539 2023-01-22 08:44:13.021644: step: 324/77, loss: 0.002514325315132737 2023-01-22 08:44:14.321587: step: 328/77, loss: 5.803751719213324e-06 2023-01-22 08:44:15.631014: step: 332/77, loss: 0.00031647857395000756 2023-01-22 08:44:16.926772: step: 336/77, loss: 0.0026181014254689217 2023-01-22 08:44:18.257943: step: 340/77, loss: 0.00027506123296916485 2023-01-22 08:44:19.537528: step: 344/77, loss: 0.003296251641586423 2023-01-22 08:44:20.874459: step: 348/77, loss: 0.0004255310632288456 2023-01-22 08:44:22.161385: step: 352/77, loss: 0.06913476437330246 2023-01-22 08:44:23.440155: step: 356/77, loss: 0.0009715624619275331 2023-01-22 08:44:24.700544: step: 360/77, loss: 0.000134461690322496 2023-01-22 08:44:26.018319: step: 364/77, loss: 4.3554922740440816e-05 2023-01-22 08:44:27.281792: step: 368/77, loss: 8.07677861303091e-05 2023-01-22 08:44:28.589390: step: 372/77, loss: 0.02088063955307007 2023-01-22 08:44:29.884151: step: 376/77, loss: 0.001944305724464357 2023-01-22 08:44:31.232470: step: 380/77, loss: 8.24704966362333e-06 2023-01-22 08:44:32.532028: step: 384/77, loss: 9.012554073706269e-05 2023-01-22 08:44:33.835551: step: 388/77, loss: 0.01869414746761322 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9452054794520548, 'r': 0.5149253731343284, 'f1': 0.6666666666666667}, 'slot': {'p': 0.5405405405405406, 'r': 0.017969451931716084, 'f1': 0.034782608695652174}, 'combined': 0.023188405797101453, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9583333333333334, 'r': 0.5149253731343284, 'f1': 0.6699029126213593}, 'slot': {'p': 0.5277777777777778, 'r': 0.017070979335130278, 'f1': 0.03307223672758921}, 'combined': 0.0221551877107151, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5149253731343284, 'f1': 0.6699029126213593}, 'slot': {'p': 0.5277777777777778, 'r': 0.017070979335130278, 'f1': 0.03307223672758921}, 'combined': 0.0221551877107151, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:46:14.455745: step: 4/77, loss: 0.00010956196638289839 2023-01-22 08:46:15.784445: step: 8/77, loss: 0.00674779387190938 2023-01-22 08:46:17.141463: step: 12/77, loss: 0.0008990313508547843 2023-01-22 08:46:18.464681: step: 16/77, loss: 0.001597610767930746 2023-01-22 08:46:19.793897: step: 20/77, loss: 0.08414094150066376 2023-01-22 08:46:21.086373: step: 24/77, loss: 0.00018882961012423038 2023-01-22 08:46:22.371222: step: 28/77, loss: 0.052721038460731506 2023-01-22 08:46:23.664206: step: 32/77, loss: 0.0020808421541005373 2023-01-22 08:46:24.963976: step: 36/77, loss: 0.0009288503788411617 2023-01-22 08:46:26.268653: step: 40/77, loss: 1.3552657037507743e-05 2023-01-22 08:46:27.564615: step: 44/77, loss: 0.000139696552651003 2023-01-22 08:46:28.854269: step: 48/77, loss: 0.0003846232430078089 2023-01-22 08:46:30.171141: step: 52/77, loss: 7.299314893316478e-05 2023-01-22 08:46:31.448488: step: 56/77, loss: 0.03577619045972824 2023-01-22 08:46:32.709063: step: 60/77, loss: 1.30085572891403e-06 2023-01-22 08:46:34.047407: step: 64/77, loss: 0.00017460936214774847 2023-01-22 08:46:35.341082: step: 68/77, loss: 0.00021287830895744264 2023-01-22 08:46:36.654449: step: 72/77, loss: 0.02898317575454712 2023-01-22 08:46:37.987366: step: 76/77, loss: 0.10362723469734192 2023-01-22 08:46:39.296434: step: 80/77, loss: 0.00018808335880748928 2023-01-22 08:46:40.632389: step: 84/77, loss: 3.3553280900378013e-06 2023-01-22 08:46:41.965057: step: 88/77, loss: 0.000569433846976608 2023-01-22 08:46:43.273177: step: 92/77, loss: 0.00043255172204226255 2023-01-22 08:46:44.590266: step: 96/77, loss: 0.0009702723473310471 2023-01-22 08:46:45.876258: step: 100/77, loss: 0.0018793041817843914 2023-01-22 08:46:47.171199: step: 104/77, loss: 8.680361497681588e-05 2023-01-22 08:46:48.474005: step: 108/77, loss: 0.0003911007952410728 2023-01-22 08:46:49.719040: step: 112/77, loss: 0.00037496176082640886 2023-01-22 08:46:50.987569: step: 116/77, loss: 0.042494677007198334 2023-01-22 08:46:52.291750: step: 120/77, loss: 0.0002248030068585649 2023-01-22 08:46:53.591430: step: 124/77, loss: 0.0025414375122636557 2023-01-22 08:46:54.874088: step: 128/77, loss: 0.041003961116075516 2023-01-22 08:46:56.210622: step: 132/77, loss: 0.04007624834775925 2023-01-22 08:46:57.507476: step: 136/77, loss: 0.004495857283473015 2023-01-22 08:46:58.823018: step: 140/77, loss: 0.0015246759867295623 2023-01-22 08:47:00.116266: step: 144/77, loss: 0.00025638059014454484 2023-01-22 08:47:01.439277: step: 148/77, loss: 0.0013521756045520306 2023-01-22 08:47:02.711421: step: 152/77, loss: 9.346644947072491e-05 2023-01-22 08:47:04.033494: step: 156/77, loss: 0.00020250340458005667 2023-01-22 08:47:05.357604: step: 160/77, loss: 0.033732958137989044 2023-01-22 08:47:06.704035: step: 164/77, loss: 0.0006303130066953599 2023-01-22 08:47:08.003233: step: 168/77, loss: 0.00043823482701554894 2023-01-22 08:47:09.320649: step: 172/77, loss: 4.336075289756991e-05 2023-01-22 08:47:10.599026: step: 176/77, loss: 0.00014787666441407055 2023-01-22 08:47:11.967848: step: 180/77, loss: 4.3609088606899604e-05 2023-01-22 08:47:13.226289: step: 184/77, loss: 0.0050060986541211605 2023-01-22 08:47:14.512191: step: 188/77, loss: 0.0015303846448659897 2023-01-22 08:47:15.820864: step: 192/77, loss: 5.7042638218263164e-05 2023-01-22 08:47:17.154343: step: 196/77, loss: 0.02419361099600792 2023-01-22 08:47:18.485600: step: 200/77, loss: 0.0019897716119885445 2023-01-22 08:47:19.778722: step: 204/77, loss: 0.015149969607591629 2023-01-22 08:47:21.131905: step: 208/77, loss: 0.0032350337132811546 2023-01-22 08:47:22.442626: step: 212/77, loss: 0.0019988366402685642 2023-01-22 08:47:23.783615: step: 216/77, loss: 0.012999859638512135 2023-01-22 08:47:25.082672: step: 220/77, loss: 0.0004743538156617433 2023-01-22 08:47:26.396860: step: 224/77, loss: 0.0030647912062704563 2023-01-22 08:47:27.727296: step: 228/77, loss: 0.0001289759238716215 2023-01-22 08:47:29.031312: step: 232/77, loss: 4.184803401585668e-05 2023-01-22 08:47:30.312452: step: 236/77, loss: 0.006597783882170916 2023-01-22 08:47:31.599121: step: 240/77, loss: 3.632171137724072e-05 2023-01-22 08:47:32.883723: step: 244/77, loss: 0.021282393485307693 2023-01-22 08:47:34.171461: step: 248/77, loss: 1.4173661838867702e-05 2023-01-22 08:47:35.504565: step: 252/77, loss: 0.008453577756881714 2023-01-22 08:47:36.843617: step: 256/77, loss: 0.03167552500963211 2023-01-22 08:47:38.164087: step: 260/77, loss: 5.700542897102423e-05 2023-01-22 08:47:39.475106: step: 264/77, loss: 3.3676496968837455e-07 2023-01-22 08:47:40.807289: step: 268/77, loss: 6.962824045331217e-06 2023-01-22 08:47:42.154246: step: 272/77, loss: 6.902157474542037e-05 2023-01-22 08:47:43.485097: step: 276/77, loss: 0.004239629954099655 2023-01-22 08:47:44.853575: step: 280/77, loss: 5.466717266244814e-05 2023-01-22 08:47:46.163568: step: 284/77, loss: 0.0015965744387358427 2023-01-22 08:47:47.488350: step: 288/77, loss: 0.0020482433028519154 2023-01-22 08:47:48.790494: step: 292/77, loss: 2.950424686787301e-07 2023-01-22 08:47:50.132732: step: 296/77, loss: 0.00022944994270801544 2023-01-22 08:47:51.442067: step: 300/77, loss: 0.0002290060801897198 2023-01-22 08:47:52.716462: step: 304/77, loss: 5.758761290053371e-06 2023-01-22 08:47:54.023561: step: 308/77, loss: 0.0002715633891057223 2023-01-22 08:47:55.262341: step: 312/77, loss: 5.757574399467558e-06 2023-01-22 08:47:56.575441: step: 316/77, loss: 0.007970299571752548 2023-01-22 08:47:57.864686: step: 320/77, loss: 4.443063517101109e-06 2023-01-22 08:47:59.190039: step: 324/77, loss: 0.007511195261031389 2023-01-22 08:48:00.453157: step: 328/77, loss: 1.9043177417188417e-06 2023-01-22 08:48:01.781341: step: 332/77, loss: 0.08256176859140396 2023-01-22 08:48:03.070372: step: 336/77, loss: 4.316621561883949e-05 2023-01-22 08:48:04.400071: step: 340/77, loss: 7.719926361460239e-05 2023-01-22 08:48:05.754345: step: 344/77, loss: 2.679053068277426e-05 2023-01-22 08:48:07.090468: step: 348/77, loss: 0.0007886448875069618 2023-01-22 08:48:08.384382: step: 352/77, loss: 0.018656939268112183 2023-01-22 08:48:09.698019: step: 356/77, loss: 8.270098987850361e-07 2023-01-22 08:48:11.063389: step: 360/77, loss: 0.0008322189096361399 2023-01-22 08:48:12.402693: step: 364/77, loss: 0.0003038544673472643 2023-01-22 08:48:13.737834: step: 368/77, loss: 0.0002964224258903414 2023-01-22 08:48:15.022583: step: 372/77, loss: 8.963259460870177e-05 2023-01-22 08:48:16.348099: step: 376/77, loss: 0.0005112845101393759 2023-01-22 08:48:17.621260: step: 380/77, loss: 1.5869254639255814e-05 2023-01-22 08:48:18.884015: step: 384/77, loss: 0.0074441516771912575 2023-01-22 08:48:20.192581: step: 388/77, loss: 8.359365892829373e-06 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.96, 'r': 0.5373134328358209, 'f1': 0.6889952153110048}, 'slot': {'p': 0.5526315789473685, 'r': 0.018867924528301886, 'f1': 0.036490008688097306}, 'combined': 0.02514144139275604, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.96, 'r': 0.5373134328358209, 'f1': 0.6889952153110048}, 'slot': {'p': 0.5641025641025641, 'r': 0.019766397124887692, 'f1': 0.03819444444444445}, 'combined': 0.026315789473684216, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.5526315789473685, 'r': 0.018867924528301886, 'f1': 0.036490008688097306}, 'combined': 0.025262313707144293, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:50:00.618374: step: 4/77, loss: 0.0005232529947534204 2023-01-22 08:50:01.887752: step: 8/77, loss: 0.0008650250383652747 2023-01-22 08:50:03.256608: step: 12/77, loss: 0.00010305993782822043 2023-01-22 08:50:04.544941: step: 16/77, loss: 2.065046464849729e-05 2023-01-22 08:50:05.810770: step: 20/77, loss: 0.0001110277371481061 2023-01-22 08:50:07.095507: step: 24/77, loss: 0.00022450985852628946 2023-01-22 08:50:08.445200: step: 28/77, loss: 0.0008693195995874703 2023-01-22 08:50:09.779955: step: 32/77, loss: 0.009220481850206852 2023-01-22 08:50:11.057288: step: 36/77, loss: 0.000709481246303767 2023-01-22 08:50:12.437052: step: 40/77, loss: 6.52720991638489e-05 2023-01-22 08:50:13.731126: step: 44/77, loss: 0.00014434370677918196 2023-01-22 08:50:15.001879: step: 48/77, loss: 0.0001451051648473367 2023-01-22 08:50:16.307581: step: 52/77, loss: 0.005191781558096409 2023-01-22 08:50:17.604438: step: 56/77, loss: 0.0004353547701612115 2023-01-22 08:50:18.926347: step: 60/77, loss: 0.0043207453563809395 2023-01-22 08:50:20.252826: step: 64/77, loss: 7.72473358665593e-05 2023-01-22 08:50:21.510330: step: 68/77, loss: 0.00032503451802767813 2023-01-22 08:50:22.876385: step: 72/77, loss: 0.006325506139546633 2023-01-22 08:50:24.150438: step: 76/77, loss: 0.0026867722626775503 2023-01-22 08:50:25.464072: step: 80/77, loss: 0.049152620136737823 2023-01-22 08:50:26.746911: step: 84/77, loss: 0.00011464212002465501 2023-01-22 08:50:27.997400: step: 88/77, loss: 0.026460302993655205 2023-01-22 08:50:29.356283: step: 92/77, loss: 0.00032751355320215225 2023-01-22 08:50:30.640785: step: 96/77, loss: 0.11882049590349197 2023-01-22 08:50:31.969178: step: 100/77, loss: 0.0011056429939344525 2023-01-22 08:50:33.263532: step: 104/77, loss: 0.0001578621449880302 2023-01-22 08:50:34.624352: step: 108/77, loss: 9.804853107198142e-07 2023-01-22 08:50:35.870023: step: 112/77, loss: 5.513399514711637e-07 2023-01-22 08:50:37.151669: step: 116/77, loss: 4.7314893890870735e-05 2023-01-22 08:50:38.435051: step: 120/77, loss: 0.011877724900841713 2023-01-22 08:50:39.755583: step: 124/77, loss: 1.4156088923300558e-07 2023-01-22 08:50:41.095485: step: 128/77, loss: 0.00023786452948115766 2023-01-22 08:50:42.448221: step: 132/77, loss: 0.0008837380446493626 2023-01-22 08:50:43.750719: step: 136/77, loss: 5.608001447399147e-05 2023-01-22 08:50:45.021910: step: 140/77, loss: 2.1426265448099002e-05 2023-01-22 08:50:46.291055: step: 144/77, loss: 0.030281029641628265 2023-01-22 08:50:47.590126: step: 148/77, loss: 0.03355312719941139 2023-01-22 08:50:48.881223: step: 152/77, loss: 0.0007835748256184161 2023-01-22 08:50:50.208023: step: 156/77, loss: 0.00023194306413643062 2023-01-22 08:50:51.491402: step: 160/77, loss: 2.2264994186116382e-05 2023-01-22 08:50:52.832012: step: 164/77, loss: 0.0006950413808226585 2023-01-22 08:50:54.131046: step: 168/77, loss: 0.004247922450304031 2023-01-22 08:50:55.457365: step: 172/77, loss: 0.00030433133360929787 2023-01-22 08:50:56.775636: step: 176/77, loss: 0.0017913315678015351 2023-01-22 08:50:58.098605: step: 180/77, loss: 0.026901597157120705 2023-01-22 08:50:59.429459: step: 184/77, loss: 0.024439463391900063 2023-01-22 08:51:00.700477: step: 188/77, loss: 8.106151767606207e-07 2023-01-22 08:51:02.013607: step: 192/77, loss: 0.0004149790620431304 2023-01-22 08:51:03.308234: step: 196/77, loss: 0.00034722022246569395 2023-01-22 08:51:04.592899: step: 200/77, loss: 1.1565061868168414e-05 2023-01-22 08:51:05.913944: step: 204/77, loss: 0.023462709039449692 2023-01-22 08:51:07.212420: step: 208/77, loss: 0.009737570770084858 2023-01-22 08:51:08.491529: step: 212/77, loss: 3.904085019712511e-07 2023-01-22 08:51:09.795381: step: 216/77, loss: 0.14411501586437225 2023-01-22 08:51:11.143949: step: 220/77, loss: 0.02945737913250923 2023-01-22 08:51:12.434495: step: 224/77, loss: 0.0024472614750266075 2023-01-22 08:51:13.762135: step: 228/77, loss: 0.02036593109369278 2023-01-22 08:51:15.039976: step: 232/77, loss: 4.921863728668541e-05 2023-01-22 08:51:16.318885: step: 236/77, loss: 3.2393309084000066e-06 2023-01-22 08:51:17.621301: step: 240/77, loss: 0.00900073628872633 2023-01-22 08:51:18.949197: step: 244/77, loss: 0.019549444317817688 2023-01-22 08:51:20.253121: step: 248/77, loss: 0.021410422399640083 2023-01-22 08:51:21.574131: step: 252/77, loss: 0.0005116247921250761 2023-01-22 08:51:22.846882: step: 256/77, loss: 0.029065445065498352 2023-01-22 08:51:24.185806: step: 260/77, loss: 2.5479623218416236e-05 2023-01-22 08:51:25.501641: step: 264/77, loss: 0.005156747531145811 2023-01-22 08:51:26.810177: step: 268/77, loss: 0.06347062438726425 2023-01-22 08:51:28.081308: step: 272/77, loss: 0.011640047654509544 2023-01-22 08:51:29.328327: step: 276/77, loss: 0.0010201624827459455 2023-01-22 08:51:30.645625: step: 280/77, loss: 0.0004513526218943298 2023-01-22 08:51:31.935771: step: 284/77, loss: 0.03519264608621597 2023-01-22 08:51:33.260307: step: 288/77, loss: 0.0007706336909905076 2023-01-22 08:51:34.518681: step: 292/77, loss: 0.0003431455115787685 2023-01-22 08:51:35.796279: step: 296/77, loss: 0.0010055579477921128 2023-01-22 08:51:37.084136: step: 300/77, loss: 0.005652496591210365 2023-01-22 08:51:38.402077: step: 304/77, loss: 3.254468902014196e-05 2023-01-22 08:51:39.743155: step: 308/77, loss: 0.0005605981568805873 2023-01-22 08:51:41.075951: step: 312/77, loss: 0.0028895533178001642 2023-01-22 08:51:42.365346: step: 316/77, loss: 0.0020729885436594486 2023-01-22 08:51:43.683436: step: 320/77, loss: 0.0057830954901874065 2023-01-22 08:51:44.979519: step: 324/77, loss: 6.959859456401318e-05 2023-01-22 08:51:46.276718: step: 328/77, loss: 0.001899764989502728 2023-01-22 08:51:47.631650: step: 332/77, loss: 0.04220954701304436 2023-01-22 08:51:48.943122: step: 336/77, loss: 0.000656688294839114 2023-01-22 08:51:50.280186: step: 340/77, loss: 0.0028479767497628927 2023-01-22 08:51:51.625670: step: 344/77, loss: 0.00893397070467472 2023-01-22 08:51:52.897387: step: 348/77, loss: 0.0005671484395861626 2023-01-22 08:51:54.144068: step: 352/77, loss: 0.00010676322563085705 2023-01-22 08:51:55.461555: step: 356/77, loss: 0.00015144373173825443 2023-01-22 08:51:56.764967: step: 360/77, loss: 0.0004946649423800409 2023-01-22 08:51:58.080779: step: 364/77, loss: 4.4703462265260896e-08 2023-01-22 08:51:59.335248: step: 368/77, loss: 0.025740794837474823 2023-01-22 08:52:00.632224: step: 372/77, loss: 0.011371146887540817 2023-01-22 08:52:01.967525: step: 376/77, loss: 0.00037400436121970415 2023-01-22 08:52:03.271144: step: 380/77, loss: 0.03912031278014183 2023-01-22 08:52:04.610999: step: 384/77, loss: 0.0008152026566676795 2023-01-22 08:52:05.934785: step: 388/77, loss: 2.2828007786301896e-06 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5149253731343284, 'f1': 0.6699029126213593}, 'slot': {'p': 0.6774193548387096, 'r': 0.018867924528301886, 'f1': 0.03671328671328671}, 'combined': 0.02459433770113382, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5074626865671642, 'f1': 0.6666666666666667}, 'slot': {'p': 0.65625, 'r': 0.018867924528301886, 'f1': 0.03668122270742358}, 'combined': 0.02445414847161572, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5, 'f1': 0.6600985221674878}, 'slot': {'p': 0.6875, 'r': 0.019766397124887692, 'f1': 0.03842794759825328}, 'combined': 0.025366231419536648, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:53:46.351984: step: 4/77, loss: 0.004129846580326557 2023-01-22 08:53:47.632712: step: 8/77, loss: 0.0007028129184618592 2023-01-22 08:53:48.945751: step: 12/77, loss: 0.004232016392052174 2023-01-22 08:53:50.271730: step: 16/77, loss: 3.35503245878499e-05 2023-01-22 08:53:51.577589: step: 20/77, loss: 0.00023565557785332203 2023-01-22 08:53:52.870613: step: 24/77, loss: 0.007366999518126249 2023-01-22 08:53:54.167260: step: 28/77, loss: 0.0024342378601431847 2023-01-22 08:53:55.432434: step: 32/77, loss: 0.005616952199488878 2023-01-22 08:53:56.735016: step: 36/77, loss: 0.0001240503042936325 2023-01-22 08:53:58.037301: step: 40/77, loss: 0.0004758648865390569 2023-01-22 08:53:59.324298: step: 44/77, loss: 0.0020777047611773014 2023-01-22 08:54:00.634134: step: 48/77, loss: 0.0003121124464087188 2023-01-22 08:54:01.980942: step: 52/77, loss: 6.964681961107999e-05 2023-01-22 08:54:03.286947: step: 56/77, loss: 0.018555521965026855 2023-01-22 08:54:04.603607: step: 60/77, loss: 0.01895550638437271 2023-01-22 08:54:05.869931: step: 64/77, loss: 0.00019170627638231963 2023-01-22 08:54:07.200066: step: 68/77, loss: 0.0745316594839096 2023-01-22 08:54:08.528552: step: 72/77, loss: 0.000593360688071698 2023-01-22 08:54:09.871239: step: 76/77, loss: 0.00029611896025016904 2023-01-22 08:54:11.154485: step: 80/77, loss: 0.03908059000968933 2023-01-22 08:54:12.524985: step: 84/77, loss: 0.006917833350598812 2023-01-22 08:54:13.777767: step: 88/77, loss: 0.00373476161621511 2023-01-22 08:54:15.031176: step: 92/77, loss: 2.4437852630398993e-07 2023-01-22 08:54:16.320370: step: 96/77, loss: 5.9011843404732645e-05 2023-01-22 08:54:17.623191: step: 100/77, loss: 0.0007140662637539208 2023-01-22 08:54:18.921414: step: 104/77, loss: 4.786644785781391e-05 2023-01-22 08:54:20.231906: step: 108/77, loss: 0.0005600241129286587 2023-01-22 08:54:21.547261: step: 112/77, loss: 0.00032644468592479825 2023-01-22 08:54:22.865803: step: 116/77, loss: 0.011107344180345535 2023-01-22 08:54:24.172498: step: 120/77, loss: 3.209639316992252e-06 2023-01-22 08:54:25.453423: step: 124/77, loss: 0.0001459840714232996 2023-01-22 08:54:26.753737: step: 128/77, loss: 0.0024846673477441072 2023-01-22 08:54:28.001656: step: 132/77, loss: 1.0132655461347895e-06 2023-01-22 08:54:29.312364: step: 136/77, loss: 2.9131737392162904e-05 2023-01-22 08:54:30.566591: step: 140/77, loss: 0.00012506675557233393 2023-01-22 08:54:31.909686: step: 144/77, loss: 0.0036581424064934254 2023-01-22 08:54:33.191220: step: 148/77, loss: 0.0006087854853831232 2023-01-22 08:54:34.447884: step: 152/77, loss: 9.502156171947718e-05 2023-01-22 08:54:35.703950: step: 156/77, loss: 0.00010151314927497879 2023-01-22 08:54:37.002004: step: 160/77, loss: 0.00010394515265943483 2023-01-22 08:54:38.322174: step: 164/77, loss: 3.256982745369896e-05 2023-01-22 08:54:39.622875: step: 168/77, loss: 0.0004069388669449836 2023-01-22 08:54:40.908778: step: 172/77, loss: 0.0004313273529987782 2023-01-22 08:54:42.197747: step: 176/77, loss: 0.07065204530954361 2023-01-22 08:54:43.502686: step: 180/77, loss: 0.003883373225107789 2023-01-22 08:54:44.775008: step: 184/77, loss: 0.021618608385324478 2023-01-22 08:54:46.094204: step: 188/77, loss: 0.009264899417757988 2023-01-22 08:54:47.406933: step: 192/77, loss: 0.021695788949728012 2023-01-22 08:54:48.733172: step: 196/77, loss: 0.07280417531728745 2023-01-22 08:54:50.078545: step: 200/77, loss: 5.32822850800585e-05 2023-01-22 08:54:51.379913: step: 204/77, loss: 0.004343980457633734 2023-01-22 08:54:52.714976: step: 208/77, loss: 1.6370775483665057e-05 2023-01-22 08:54:54.047838: step: 212/77, loss: 0.01640847697854042 2023-01-22 08:54:55.362239: step: 216/77, loss: 1.6044676158344373e-05 2023-01-22 08:54:56.634370: step: 220/77, loss: 0.0017321545165032148 2023-01-22 08:54:58.015807: step: 224/77, loss: 0.0006852812948636711 2023-01-22 08:54:59.332060: step: 228/77, loss: 0.004491701722145081 2023-01-22 08:55:00.643144: step: 232/77, loss: 1.5705261375842383e-06 2023-01-22 08:55:01.941511: step: 236/77, loss: 5.962876002740813e-06 2023-01-22 08:55:03.253559: step: 240/77, loss: 0.02237839251756668 2023-01-22 08:55:04.599685: step: 244/77, loss: 0.0005027443403378129 2023-01-22 08:55:05.943364: step: 248/77, loss: 7.748595010070858e-08 2023-01-22 08:55:07.253973: step: 252/77, loss: 0.020615598186850548 2023-01-22 08:55:08.543250: step: 256/77, loss: 0.06497868150472641 2023-01-22 08:55:09.870991: step: 260/77, loss: 1.5785826690262184e-05 2023-01-22 08:55:11.187740: step: 264/77, loss: 0.0005293970461934805 2023-01-22 08:55:12.470189: step: 268/77, loss: 0.015436092391610146 2023-01-22 08:55:13.817643: step: 272/77, loss: 0.008673092350363731 2023-01-22 08:55:15.152473: step: 276/77, loss: 0.0017115159425884485 2023-01-22 08:55:16.460514: step: 280/77, loss: 0.01140614878386259 2023-01-22 08:55:17.781110: step: 284/77, loss: 0.009037042036652565 2023-01-22 08:55:19.119201: step: 288/77, loss: 0.003911172971129417 2023-01-22 08:55:20.403686: step: 292/77, loss: 0.0076984260231256485 2023-01-22 08:55:21.736083: step: 296/77, loss: 1.9967444586654892e-07 2023-01-22 08:55:23.036146: step: 300/77, loss: 0.0007964045507833362 2023-01-22 08:55:24.367186: step: 304/77, loss: 0.0035107415169477463 2023-01-22 08:55:25.676468: step: 308/77, loss: 2.278218516948982e-06 2023-01-22 08:55:26.987228: step: 312/77, loss: 0.0024887826293706894 2023-01-22 08:55:28.281019: step: 316/77, loss: 0.04732801765203476 2023-01-22 08:55:29.543364: step: 320/77, loss: 0.0021155159920454025 2023-01-22 08:55:30.837173: step: 324/77, loss: 2.9067188734188676e-05 2023-01-22 08:55:32.137147: step: 328/77, loss: 0.0012147235684096813 2023-01-22 08:55:33.415274: step: 332/77, loss: 7.692816325288732e-06 2023-01-22 08:55:34.732827: step: 336/77, loss: 0.015529229305684566 2023-01-22 08:55:36.093059: step: 340/77, loss: 8.99490260053426e-06 2023-01-22 08:55:37.358658: step: 344/77, loss: 0.0006836125976406038 2023-01-22 08:55:38.670635: step: 348/77, loss: 0.005854771938174963 2023-01-22 08:55:39.959684: step: 352/77, loss: 0.0006406373577192426 2023-01-22 08:55:41.244392: step: 356/77, loss: 0.009297902695834637 2023-01-22 08:55:42.517511: step: 360/77, loss: 0.015907153487205505 2023-01-22 08:55:43.852648: step: 364/77, loss: 1.087783516595664e-07 2023-01-22 08:55:45.161373: step: 368/77, loss: 0.09170454740524292 2023-01-22 08:55:46.461693: step: 372/77, loss: 0.0004183561832178384 2023-01-22 08:55:47.741372: step: 376/77, loss: 0.000553747231606394 2023-01-22 08:55:49.049789: step: 380/77, loss: 0.002163755940273404 2023-01-22 08:55:50.337189: step: 384/77, loss: 5.881304605281912e-05 2023-01-22 08:55:51.669262: step: 388/77, loss: 0.0024923363234847784 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 1.0, 'r': 0.5298507462686567, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5333333333333333, 'r': 0.014375561545372867, 'f1': 0.02799650043744532}, 'combined': 0.019392697863986515, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 1.0, 'r': 0.5149253731343284, 'f1': 0.6798029556650247}, 'slot': {'p': 0.5517241379310345, 'r': 0.014375561545372867, 'f1': 0.028021015761821366}, 'combined': 0.01904876933562241, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 1.0, 'r': 0.5149253731343284, 'f1': 0.6798029556650247}, 'slot': {'p': 0.5666666666666667, 'r': 0.015274034141958671, 'f1': 0.029746281714785654}, 'combined': 0.020221610229755767, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:57:32.167975: step: 4/77, loss: 0.0003015650436282158 2023-01-22 08:57:33.462798: step: 8/77, loss: 0.0005008360603824258 2023-01-22 08:57:34.784961: step: 12/77, loss: 0.04127756133675575 2023-01-22 08:57:36.078218: step: 16/77, loss: 0.00023727216466795653 2023-01-22 08:57:37.341019: step: 20/77, loss: 5.645280907629058e-06 2023-01-22 08:57:38.600065: step: 24/77, loss: 0.0038216097746044397 2023-01-22 08:57:39.943130: step: 28/77, loss: 0.0019458006136119366 2023-01-22 08:57:41.279563: step: 32/77, loss: 0.0010453497525304556 2023-01-22 08:57:42.568756: step: 36/77, loss: 3.918975437500194e-07 2023-01-22 08:57:43.829175: step: 40/77, loss: 1.2546540801849915e-06 2023-01-22 08:57:45.135599: step: 44/77, loss: 0.00035715149715542793 2023-01-22 08:57:46.464713: step: 48/77, loss: 0.04893004894256592 2023-01-22 08:57:47.765641: step: 52/77, loss: 2.16505691241764e-06 2023-01-22 08:57:49.049577: step: 56/77, loss: 0.00048624962801113725 2023-01-22 08:57:50.299360: step: 60/77, loss: 0.05392635613679886 2023-01-22 08:57:51.598171: step: 64/77, loss: 0.0016569013241678476 2023-01-22 08:57:52.917530: step: 68/77, loss: 4.06145081797149e-05 2023-01-22 08:57:54.180132: step: 72/77, loss: 0.02751801535487175 2023-01-22 08:57:55.461066: step: 76/77, loss: 0.00032470986479893327 2023-01-22 08:57:56.709909: step: 80/77, loss: 0.04265865311026573 2023-01-22 08:57:58.008827: step: 84/77, loss: 0.02686918154358864 2023-01-22 08:57:59.312983: step: 88/77, loss: 5.758063343819231e-05 2023-01-22 08:58:00.596170: step: 92/77, loss: 0.005514861550182104 2023-01-22 08:58:01.974001: step: 96/77, loss: 0.023688755929470062 2023-01-22 08:58:03.297251: step: 100/77, loss: 0.005752614699304104 2023-01-22 08:58:04.639099: step: 104/77, loss: 0.00012414446973707527 2023-01-22 08:58:05.942899: step: 108/77, loss: 0.006811483763158321 2023-01-22 08:58:07.205566: step: 112/77, loss: 4.321331914525217e-08 2023-01-22 08:58:08.461204: step: 116/77, loss: 0.006708010099828243 2023-01-22 08:58:09.796530: step: 120/77, loss: 0.002154033165425062 2023-01-22 08:58:11.115002: step: 124/77, loss: 0.002488675992935896 2023-01-22 08:58:12.429594: step: 128/77, loss: 2.1963771814625943e-06 2023-01-22 08:58:13.755994: step: 132/77, loss: 0.0014622567687183619 2023-01-22 08:58:15.016873: step: 136/77, loss: 0.000186900157132186 2023-01-22 08:58:16.292100: step: 140/77, loss: 0.008909492753446102 2023-01-22 08:58:17.595411: step: 144/77, loss: 0.0005386116681620479 2023-01-22 08:58:18.886948: step: 148/77, loss: 2.1678852135664783e-05 2023-01-22 08:58:20.180111: step: 152/77, loss: 0.0004342886677477509 2023-01-22 08:58:21.480208: step: 156/77, loss: 2.448557279421948e-05 2023-01-22 08:58:22.780129: step: 160/77, loss: 5.50402000953909e-05 2023-01-22 08:58:24.079495: step: 164/77, loss: 0.00042298168409615755 2023-01-22 08:58:25.334278: step: 168/77, loss: 0.00025966332759708166 2023-01-22 08:58:26.705779: step: 172/77, loss: 2.4172995836124755e-05 2023-01-22 08:58:27.994640: step: 176/77, loss: 0.05907929316163063 2023-01-22 08:58:29.246495: step: 180/77, loss: 0.0020319067407399416 2023-01-22 08:58:30.561844: step: 184/77, loss: 0.0001989850279642269 2023-01-22 08:58:31.898012: step: 188/77, loss: 3.735092468559742e-05 2023-01-22 08:58:33.205046: step: 192/77, loss: 0.02960916794836521 2023-01-22 08:58:34.529469: step: 196/77, loss: 5.357793634175323e-05 2023-01-22 08:58:35.805266: step: 200/77, loss: 0.0369393453001976 2023-01-22 08:58:37.128670: step: 204/77, loss: 0.0030806665308773518 2023-01-22 08:58:38.428459: step: 208/77, loss: 0.0004855840525124222 2023-01-22 08:58:39.721169: step: 212/77, loss: 0.012386923655867577 2023-01-22 08:58:41.050376: step: 216/77, loss: 0.0008802711381576955 2023-01-22 08:58:42.321485: step: 220/77, loss: 0.013728022575378418 2023-01-22 08:58:43.623190: step: 224/77, loss: 0.0034249231684952974 2023-01-22 08:58:44.931815: step: 228/77, loss: 2.330183269805275e-05 2023-01-22 08:58:46.228605: step: 232/77, loss: 0.005138559266924858 2023-01-22 08:58:47.515591: step: 236/77, loss: 0.0002731800777837634 2023-01-22 08:58:48.810581: step: 240/77, loss: 0.14706991612911224 2023-01-22 08:58:50.121070: step: 244/77, loss: 0.0003597013419494033 2023-01-22 08:58:51.383002: step: 248/77, loss: 2.2830377929494716e-05 2023-01-22 08:58:52.719038: step: 252/77, loss: 3.3556109428900527e-06 2023-01-22 08:58:54.009880: step: 256/77, loss: 6.024046342645306e-06 2023-01-22 08:58:55.303968: step: 260/77, loss: 0.0003622551157604903 2023-01-22 08:58:56.637545: step: 264/77, loss: 1.505012647839976e-07 2023-01-22 08:58:57.949722: step: 268/77, loss: 1.9880082618328743e-05 2023-01-22 08:58:59.272425: step: 272/77, loss: 0.000368515495210886 2023-01-22 08:59:00.566658: step: 276/77, loss: 0.000189960454008542 2023-01-22 08:59:01.855716: step: 280/77, loss: 0.05055554211139679 2023-01-22 08:59:03.180553: step: 284/77, loss: 1.7881333747027384e-07 2023-01-22 08:59:04.460882: step: 288/77, loss: 0.0001934354950208217 2023-01-22 08:59:05.807170: step: 292/77, loss: 0.00022537648328579962 2023-01-22 08:59:07.103378: step: 296/77, loss: 1.8508553694118746e-05 2023-01-22 08:59:08.435921: step: 300/77, loss: 0.00031571733416058123 2023-01-22 08:59:09.760962: step: 304/77, loss: 0.0006485034828074276 2023-01-22 08:59:11.101541: step: 308/77, loss: 0.05046778544783592 2023-01-22 08:59:12.421780: step: 312/77, loss: 0.006176185794174671 2023-01-22 08:59:13.742374: step: 316/77, loss: 0.02317003905773163 2023-01-22 08:59:15.032054: step: 320/77, loss: 0.00012699222133960575 2023-01-22 08:59:16.347337: step: 324/77, loss: 4.8745387175586075e-05 2023-01-22 08:59:17.682851: step: 328/77, loss: 0.01755366101861 2023-01-22 08:59:18.994519: step: 332/77, loss: 0.029475772753357887 2023-01-22 08:59:20.312365: step: 336/77, loss: 0.002560092369094491 2023-01-22 08:59:21.588119: step: 340/77, loss: 1.1003810868714936e-05 2023-01-22 08:59:22.931052: step: 344/77, loss: 1.8088969682139577e-06 2023-01-22 08:59:24.250290: step: 348/77, loss: 0.0017633186653256416 2023-01-22 08:59:25.591744: step: 352/77, loss: 2.831220058396866e-08 2023-01-22 08:59:26.858682: step: 356/77, loss: 6.697871867800131e-05 2023-01-22 08:59:28.164633: step: 360/77, loss: 1.5294324839487672e-05 2023-01-22 08:59:29.520295: step: 364/77, loss: 0.02993430197238922 2023-01-22 08:59:30.814980: step: 368/77, loss: 0.00010248806211166084 2023-01-22 08:59:32.142027: step: 372/77, loss: 0.00020304956706240773 2023-01-22 08:59:33.431697: step: 376/77, loss: 0.049604982137680054 2023-01-22 08:59:34.759851: step: 380/77, loss: 7.629324727531639e-07 2023-01-22 08:59:36.034807: step: 384/77, loss: 0.0034173850435763597 2023-01-22 08:59:37.346297: step: 388/77, loss: 0.022433260455727577 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5298507462686567, 'f1': 0.6761904761904761}, 'slot': {'p': 0.5384615384615384, 'r': 0.018867924528301886, 'f1': 0.036458333333333336}, 'combined': 0.024652777777777777, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.9230769230769231, 'r': 0.5373134328358209, 'f1': 0.6792452830188679}, 'slot': {'p': 0.4883720930232558, 'r': 0.018867924528301886, 'f1': 0.03633217993079585}, 'combined': 0.024678461839785858, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.935064935064935, 'r': 0.5373134328358209, 'f1': 0.6824644549763033}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.02485993678144383, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:01:18.088281: step: 4/77, loss: 4.329281728132628e-05 2023-01-22 09:01:19.357548: step: 8/77, loss: 0.00022017282026354223 2023-01-22 09:01:20.645072: step: 12/77, loss: 0.026746530085802078 2023-01-22 09:01:21.972744: step: 16/77, loss: 0.005114070139825344 2023-01-22 09:01:23.270552: step: 20/77, loss: 0.0007694660453125834 2023-01-22 09:01:24.552202: step: 24/77, loss: 0.0004395963333081454 2023-01-22 09:01:25.854806: step: 28/77, loss: 1.1439164154580794e-05 2023-01-22 09:01:27.203421: step: 32/77, loss: 0.03675878047943115 2023-01-22 09:01:28.433061: step: 36/77, loss: 0.05024024471640587 2023-01-22 09:01:29.743892: step: 40/77, loss: 0.006016051862388849 2023-01-22 09:01:31.061704: step: 44/77, loss: 0.003913963213562965 2023-01-22 09:01:32.354836: step: 48/77, loss: 1.0461508281878196e-05 2023-01-22 09:01:33.641651: step: 52/77, loss: 0.0005447589792311192 2023-01-22 09:01:34.923800: step: 56/77, loss: 0.00017727080557961017 2023-01-22 09:01:36.203466: step: 60/77, loss: 7.68654717830941e-06 2023-01-22 09:01:37.520354: step: 64/77, loss: 0.003572908928617835 2023-01-22 09:01:38.846509: step: 68/77, loss: 0.0016043151263147593 2023-01-22 09:01:40.145170: step: 72/77, loss: 0.0016125947004184127 2023-01-22 09:01:41.475415: step: 76/77, loss: 0.004037613980472088 2023-01-22 09:01:42.826722: step: 80/77, loss: 0.0036225919611752033 2023-01-22 09:01:44.116106: step: 84/77, loss: 0.0006270355661399662 2023-01-22 09:01:45.419933: step: 88/77, loss: 9.059033800440375e-06 2023-01-22 09:01:46.707524: step: 92/77, loss: 0.0002212631079601124 2023-01-22 09:01:48.011325: step: 96/77, loss: 1.0681213097996078e-05 2023-01-22 09:01:49.301393: step: 100/77, loss: 0.004466784652322531 2023-01-22 09:01:50.592919: step: 104/77, loss: 0.0010201940312981606 2023-01-22 09:01:51.848919: step: 108/77, loss: 0.0004146641003899276 2023-01-22 09:01:53.180523: step: 112/77, loss: 0.00012005192547803745 2023-01-22 09:01:54.486502: step: 116/77, loss: 5.523490472114645e-06 2023-01-22 09:01:55.803107: step: 120/77, loss: 0.0006090780370868742 2023-01-22 09:01:57.060802: step: 124/77, loss: 0.017412561923265457 2023-01-22 09:01:58.373430: step: 128/77, loss: 0.0031904433853924274 2023-01-22 09:01:59.678837: step: 132/77, loss: 0.0004236635286360979 2023-01-22 09:02:01.016175: step: 136/77, loss: 2.662978477019351e-05 2023-01-22 09:02:02.319398: step: 140/77, loss: 0.006381732877343893 2023-01-22 09:02:03.609435: step: 144/77, loss: 0.0012269781436771154 2023-01-22 09:02:04.924442: step: 148/77, loss: 0.0001025956153171137 2023-01-22 09:02:06.254968: step: 152/77, loss: 6.080747880332638e-06 2023-01-22 09:02:07.541748: step: 156/77, loss: 0.000544823007658124 2023-01-22 09:02:08.840173: step: 160/77, loss: 0.00039470160845667124 2023-01-22 09:02:10.124501: step: 164/77, loss: 0.0035439336206763983 2023-01-22 09:02:11.446511: step: 168/77, loss: 0.001191399642266333 2023-01-22 09:02:12.757121: step: 172/77, loss: 6.601181894438923e-07 2023-01-22 09:02:14.109442: step: 176/77, loss: 1.5660202734579798e-06 2023-01-22 09:02:15.426467: step: 180/77, loss: 0.00018104282207787037 2023-01-22 09:02:16.768982: step: 184/77, loss: 0.0001800114259822294 2023-01-22 09:02:18.098203: step: 188/77, loss: 5.2132389100734144e-05 2023-01-22 09:02:19.386070: step: 192/77, loss: 0.0001612513151485473 2023-01-22 09:02:20.722882: step: 196/77, loss: 0.000883870234247297 2023-01-22 09:02:22.016166: step: 200/77, loss: 0.00014345537056215107 2023-01-22 09:02:23.312991: step: 204/77, loss: 5.223417247179896e-06 2023-01-22 09:02:24.600244: step: 208/77, loss: 0.0039792186580598354 2023-01-22 09:02:25.945958: step: 212/77, loss: 0.00014408319839276373 2023-01-22 09:02:27.259271: step: 216/77, loss: 6.049819057807326e-05 2023-01-22 09:02:28.559193: step: 220/77, loss: 4.052621079608798e-06 2023-01-22 09:02:29.840811: step: 224/77, loss: 0.0001223248546011746 2023-01-22 09:02:31.153624: step: 228/77, loss: 0.0027216023299843073 2023-01-22 09:02:32.446546: step: 232/77, loss: 0.033498615026474 2023-01-22 09:02:33.720428: step: 236/77, loss: 6.296906212810427e-05 2023-01-22 09:02:35.029498: step: 240/77, loss: 8.04692263045581e-06 2023-01-22 09:02:36.332168: step: 244/77, loss: 0.0003942615003325045 2023-01-22 09:02:37.652564: step: 248/77, loss: 0.0004979265504516661 2023-01-22 09:02:38.920714: step: 252/77, loss: 0.00012309358862694353 2023-01-22 09:02:40.292532: step: 256/77, loss: 0.08111999183893204 2023-01-22 09:02:41.599063: step: 260/77, loss: 2.6105428787559504e-06 2023-01-22 09:02:42.889441: step: 264/77, loss: 2.9434100724756718e-05 2023-01-22 09:02:44.169196: step: 268/77, loss: 0.002525086048990488 2023-01-22 09:02:45.466583: step: 272/77, loss: 2.8563626983668655e-06 2023-01-22 09:02:46.771399: step: 276/77, loss: 2.3602433429914527e-06 2023-01-22 09:02:48.070804: step: 280/77, loss: 1.1607835403992794e-06 2023-01-22 09:02:49.353122: step: 284/77, loss: 0.0001664245210122317 2023-01-22 09:02:50.655188: step: 288/77, loss: 7.075824396451935e-05 2023-01-22 09:02:51.936773: step: 292/77, loss: 7.216061931103468e-06 2023-01-22 09:02:53.259001: step: 296/77, loss: 0.0018026909092441201 2023-01-22 09:02:54.589359: step: 300/77, loss: 0.008671221323311329 2023-01-22 09:02:55.885122: step: 304/77, loss: 9.186310489894822e-05 2023-01-22 09:02:57.190376: step: 308/77, loss: 9.05151591723552e-06 2023-01-22 09:02:58.469321: step: 312/77, loss: 0.037061866372823715 2023-01-22 09:02:59.757063: step: 316/77, loss: 0.03523270785808563 2023-01-22 09:03:01.089913: step: 320/77, loss: 0.00010251326602883637 2023-01-22 09:03:02.402558: step: 324/77, loss: 0.00019953730225097388 2023-01-22 09:03:03.649593: step: 328/77, loss: 4.9880240112543106e-05 2023-01-22 09:03:04.965581: step: 332/77, loss: 0.0003752804477699101 2023-01-22 09:03:06.248199: step: 336/77, loss: 7.744783943053335e-05 2023-01-22 09:03:07.559856: step: 340/77, loss: 0.0009344042628072202 2023-01-22 09:03:08.867936: step: 344/77, loss: 0.011867690831422806 2023-01-22 09:03:10.177977: step: 348/77, loss: 0.058403074741363525 2023-01-22 09:03:11.465840: step: 352/77, loss: 0.00012268997670616955 2023-01-22 09:03:12.823762: step: 356/77, loss: 0.0006498557631857693 2023-01-22 09:03:14.137973: step: 360/77, loss: 0.008733597584068775 2023-01-22 09:03:15.468668: step: 364/77, loss: 6.70292429276742e-05 2023-01-22 09:03:16.786406: step: 368/77, loss: 7.543620085925795e-06 2023-01-22 09:03:18.098909: step: 372/77, loss: 0.00015152778360061347 2023-01-22 09:03:19.385368: step: 376/77, loss: 0.00035861300420947373 2023-01-22 09:03:20.716657: step: 380/77, loss: 0.002343467902392149 2023-01-22 09:03:22.042472: step: 384/77, loss: 6.884265530970879e-07 2023-01-22 09:03:23.326558: step: 388/77, loss: 0.018556831404566765 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5405405405405406, 'r': 0.017969451931716084, 'f1': 0.034782608695652174}, 'combined': 0.02385093167701863, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9466666666666667, 'r': 0.5298507462686567, 'f1': 0.6794258373205742}, 'slot': {'p': 0.5277777777777778, 'r': 0.017070979335130278, 'f1': 0.03307223672758921}, 'combined': 0.022470132130706543, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5526315789473685, 'r': 0.018867924528301886, 'f1': 0.036490008688097306}, 'combined': 0.02502172024326672, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:05:03.643636: step: 4/77, loss: 0.01800728775560856 2023-01-22 09:05:05.004711: step: 8/77, loss: 0.00031166861299425364 2023-01-22 09:05:06.376508: step: 12/77, loss: 3.561168705346063e-05 2023-01-22 09:05:07.687240: step: 16/77, loss: 9.953811286322889e-07 2023-01-22 09:05:09.027417: step: 20/77, loss: 3.455784099060111e-05 2023-01-22 09:05:10.328446: step: 24/77, loss: 0.05289270728826523 2023-01-22 09:05:11.665451: step: 28/77, loss: 0.05813051760196686 2023-01-22 09:05:12.971463: step: 32/77, loss: 0.0003971444966737181 2023-01-22 09:05:14.284552: step: 36/77, loss: 0.000515043968334794 2023-01-22 09:05:15.561597: step: 40/77, loss: 0.0005164144677110016 2023-01-22 09:05:16.841534: step: 44/77, loss: 0.00014868365542497486 2023-01-22 09:05:18.121961: step: 48/77, loss: 9.044520993484184e-06 2023-01-22 09:05:19.397570: step: 52/77, loss: 0.0019840134773403406 2023-01-22 09:05:20.735113: step: 56/77, loss: 0.0015603199135512114 2023-01-22 09:05:22.036975: step: 60/77, loss: 3.0100272851996124e-07 2023-01-22 09:05:23.411100: step: 64/77, loss: 0.004665685817599297 2023-01-22 09:05:24.719927: step: 68/77, loss: 0.0009288692381232977 2023-01-22 09:05:26.022568: step: 72/77, loss: 2.654560375958681e-05 2023-01-22 09:05:27.299224: step: 76/77, loss: 0.00770430825650692 2023-01-22 09:05:28.536150: step: 80/77, loss: 0.0023559420369565487 2023-01-22 09:05:29.791880: step: 84/77, loss: 0.1431427150964737 2023-01-22 09:05:31.092944: step: 88/77, loss: 5.917061571381055e-05 2023-01-22 09:05:32.390233: step: 92/77, loss: 0.0010026609525084496 2023-01-22 09:05:33.690583: step: 96/77, loss: 0.0008562597795389593 2023-01-22 09:05:34.951563: step: 100/77, loss: 1.6152119997059344e-06 2023-01-22 09:05:36.277257: step: 104/77, loss: 2.2409134544432163e-05 2023-01-22 09:05:37.605004: step: 108/77, loss: 0.0005694585852324963 2023-01-22 09:05:38.915734: step: 112/77, loss: 4.734771209768951e-05 2023-01-22 09:05:40.240312: step: 116/77, loss: 0.00010205221769865602 2023-01-22 09:05:41.537672: step: 120/77, loss: 0.021125314757227898 2023-01-22 09:05:42.854511: step: 124/77, loss: 0.0005711138946935534 2023-01-22 09:05:44.143232: step: 128/77, loss: 3.6221754271537066e-05 2023-01-22 09:05:45.482898: step: 132/77, loss: 4.6742330596316606e-05 2023-01-22 09:05:46.769442: step: 136/77, loss: 1.1689064194797538e-05 2023-01-22 09:05:48.069529: step: 140/77, loss: 0.0037418147549033165 2023-01-22 09:05:49.396012: step: 144/77, loss: 0.02389315329492092 2023-01-22 09:05:50.734448: step: 148/77, loss: 1.0398875019745901e-05 2023-01-22 09:05:52.036099: step: 152/77, loss: 0.0007807943620719016 2023-01-22 09:05:53.361932: step: 156/77, loss: 0.027726897969841957 2023-01-22 09:05:54.637342: step: 160/77, loss: 0.004366937559098005 2023-01-22 09:05:55.979194: step: 164/77, loss: 0.033863894641399384 2023-01-22 09:05:57.222046: step: 168/77, loss: 0.0015756358625367284 2023-01-22 09:05:58.568713: step: 172/77, loss: 0.0041992259211838245 2023-01-22 09:05:59.823118: step: 176/77, loss: 0.012798842042684555 2023-01-22 09:06:01.097640: step: 180/77, loss: 0.0006098680896684527 2023-01-22 09:06:02.395911: step: 184/77, loss: 0.09311774373054504 2023-01-22 09:06:03.750526: step: 188/77, loss: 0.002651342423632741 2023-01-22 09:06:05.055592: step: 192/77, loss: 4.315126716392115e-05 2023-01-22 09:06:06.384538: step: 196/77, loss: 0.0016577377682551742 2023-01-22 09:06:07.670501: step: 200/77, loss: 4.362039180705324e-05 2023-01-22 09:06:08.969962: step: 204/77, loss: 0.0006896284176036716 2023-01-22 09:06:10.253651: step: 208/77, loss: 0.00015805228031240404 2023-01-22 09:06:11.539364: step: 212/77, loss: 0.0008956255624070764 2023-01-22 09:06:12.772945: step: 216/77, loss: 2.9472585083567537e-06 2023-01-22 09:06:14.064803: step: 220/77, loss: 0.008746463805437088 2023-01-22 09:06:15.375299: step: 224/77, loss: 2.413982542748272e-07 2023-01-22 09:06:16.697368: step: 228/77, loss: 2.847241921699606e-05 2023-01-22 09:06:18.015500: step: 232/77, loss: 0.00012537377187982202 2023-01-22 09:06:19.354049: step: 236/77, loss: 1.7284813793594367e-06 2023-01-22 09:06:20.693118: step: 240/77, loss: 0.005231163930147886 2023-01-22 09:06:22.012072: step: 244/77, loss: 7.504272070946172e-05 2023-01-22 09:06:23.296650: step: 248/77, loss: 0.0022762068547308445 2023-01-22 09:06:24.517158: step: 252/77, loss: 6.481864147644956e-07 2023-01-22 09:06:25.817050: step: 256/77, loss: 6.56557085676468e-06 2023-01-22 09:06:27.093498: step: 260/77, loss: 8.091215590866341e-07 2023-01-22 09:06:28.423543: step: 264/77, loss: 0.0018453343072906137 2023-01-22 09:06:29.723481: step: 268/77, loss: 0.0010375462006777525 2023-01-22 09:06:31.040614: step: 272/77, loss: 0.0003331643238198012 2023-01-22 09:06:32.297342: step: 276/77, loss: 0.01020114403218031 2023-01-22 09:06:33.582364: step: 280/77, loss: 2.0280707758502103e-05 2023-01-22 09:06:34.877569: step: 284/77, loss: 2.369290086789988e-05 2023-01-22 09:06:36.190922: step: 288/77, loss: 5.972215149085969e-05 2023-01-22 09:06:37.476322: step: 292/77, loss: 0.0002550583449192345 2023-01-22 09:06:38.767915: step: 296/77, loss: 0.00032209386699832976 2023-01-22 09:06:40.058090: step: 300/77, loss: 0.008743447251617908 2023-01-22 09:06:41.389174: step: 304/77, loss: 0.004930737894028425 2023-01-22 09:06:42.686232: step: 308/77, loss: 0.0005963391740806401 2023-01-22 09:06:43.963008: step: 312/77, loss: 0.0011195436818525195 2023-01-22 09:06:45.236297: step: 316/77, loss: 0.0007906003156676888 2023-01-22 09:06:46.559765: step: 320/77, loss: 0.000907824607565999 2023-01-22 09:06:47.869141: step: 324/77, loss: 0.00027822371339425445 2023-01-22 09:06:49.183597: step: 328/77, loss: 7.88813122198917e-06 2023-01-22 09:06:50.464059: step: 332/77, loss: 0.0008845935808494687 2023-01-22 09:06:51.719159: step: 336/77, loss: 0.003997663501650095 2023-01-22 09:06:53.011675: step: 340/77, loss: 0.009393981657922268 2023-01-22 09:06:54.329104: step: 344/77, loss: 1.8462516891304404e-05 2023-01-22 09:06:55.617066: step: 348/77, loss: 0.002342846244573593 2023-01-22 09:06:56.915435: step: 352/77, loss: 0.00029463417013175786 2023-01-22 09:06:58.224263: step: 356/77, loss: 6.728203516104259e-06 2023-01-22 09:06:59.499937: step: 360/77, loss: 7.0464707278006244e-06 2023-01-22 09:07:00.758833: step: 364/77, loss: 6.384744665410835e-06 2023-01-22 09:07:02.084513: step: 368/77, loss: 0.00016121372755151242 2023-01-22 09:07:03.357586: step: 372/77, loss: 0.0008221327443607152 2023-01-22 09:07:04.642253: step: 376/77, loss: 1.3854460121365264e-05 2023-01-22 09:07:05.970284: step: 380/77, loss: 0.0015989854000508785 2023-01-22 09:07:07.272021: step: 384/77, loss: 3.710380269694724e-07 2023-01-22 09:07:08.567153: step: 388/77, loss: 0.0377693772315979 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.9726027397260274, 'r': 0.5298507462686567, 'f1': 0.6859903381642511}, 'slot': {'p': 0.5384615384615384, 'r': 0.018867924528301886, 'f1': 0.036458333333333336}, 'combined': 0.025010064412238325, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.9583333333333334, 'r': 0.5149253731343284, 'f1': 0.6699029126213593}, 'slot': {'p': 0.5384615384615384, 'r': 0.018867924528301886, 'f1': 0.036458333333333336}, 'combined': 0.02442354368932039, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.958904109589041, 'r': 0.5223880597014925, 'f1': 0.6763285024154589}, 'slot': {'p': 0.5384615384615384, 'r': 0.018867924528301886, 'f1': 0.036458333333333336}, 'combined': 0.024657809983896942, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:08:48.934247: step: 4/77, loss: 0.00019989734573755413 2023-01-22 09:08:50.252070: step: 8/77, loss: 0.00016956939361989498 2023-01-22 09:08:51.515420: step: 12/77, loss: 0.00010101838415721431 2023-01-22 09:08:52.796597: step: 16/77, loss: 2.011552851399756e-06 2023-01-22 09:08:54.102446: step: 20/77, loss: 2.4170629330910742e-05 2023-01-22 09:08:55.415492: step: 24/77, loss: 1.9858913219650276e-05 2023-01-22 09:08:56.670351: step: 28/77, loss: 2.0115439838264138e-06 2023-01-22 09:08:57.978085: step: 32/77, loss: 9.859095371211879e-06 2023-01-22 09:08:59.297105: step: 36/77, loss: 6.314497295534238e-05 2023-01-22 09:09:00.611327: step: 40/77, loss: 0.0018583748023957014 2023-01-22 09:09:01.884354: step: 44/77, loss: 5.1027829613303766e-05 2023-01-22 09:09:03.175397: step: 48/77, loss: 2.4957269033620832e-06 2023-01-22 09:09:04.498739: step: 52/77, loss: 3.938041118090041e-06 2023-01-22 09:09:05.834403: step: 56/77, loss: 1.0922369710897328e-06 2023-01-22 09:09:07.193501: step: 60/77, loss: 0.01869729720056057 2023-01-22 09:09:08.483822: step: 64/77, loss: 1.926662207551999e-06 2023-01-22 09:09:09.778332: step: 68/77, loss: 7.571773312520236e-05 2023-01-22 09:09:11.113370: step: 72/77, loss: 0.0001227992179337889 2023-01-22 09:09:12.423533: step: 76/77, loss: 0.00013048169785179198 2023-01-22 09:09:13.752679: step: 80/77, loss: 4.857740236730024e-07 2023-01-22 09:09:15.067293: step: 84/77, loss: 0.00011595044634304941 2023-01-22 09:09:16.332460: step: 88/77, loss: 5.274962404655525e-07 2023-01-22 09:09:17.627965: step: 92/77, loss: 0.0015729828737676144 2023-01-22 09:09:18.918463: step: 96/77, loss: 0.0002714064030442387 2023-01-22 09:09:20.251670: step: 100/77, loss: 2.099433231705916e-06 2023-01-22 09:09:21.562115: step: 104/77, loss: 3.167681825289037e-06 2023-01-22 09:09:22.893407: step: 108/77, loss: 6.566444426425733e-06 2023-01-22 09:09:24.167369: step: 112/77, loss: 0.00035041532828472555 2023-01-22 09:09:25.451059: step: 116/77, loss: 4.5660151954507455e-05 2023-01-22 09:09:26.775641: step: 120/77, loss: 7.048171255519264e-07 2023-01-22 09:09:28.042940: step: 124/77, loss: 2.510757440177258e-06 2023-01-22 09:09:29.338110: step: 128/77, loss: 2.2947726563415927e-07 2023-01-22 09:09:30.645927: step: 132/77, loss: 0.00018129732052329928 2023-01-22 09:09:31.970026: step: 136/77, loss: 0.01591060496866703 2023-01-22 09:09:33.283709: step: 140/77, loss: 1.1553951480891556e-05 2023-01-22 09:09:34.550546: step: 144/77, loss: 0.03363659605383873 2023-01-22 09:09:35.844917: step: 148/77, loss: 0.0003402983129490167 2023-01-22 09:09:37.162828: step: 152/77, loss: 2.4763119654380716e-05 2023-01-22 09:09:38.438168: step: 156/77, loss: 7.888831532909535e-06 2023-01-22 09:09:39.771758: step: 160/77, loss: 1.748649447108619e-05 2023-01-22 09:09:41.046263: step: 164/77, loss: 0.00025645800633355975 2023-01-22 09:09:42.421095: step: 168/77, loss: 6.515035056509078e-05 2023-01-22 09:09:43.732558: step: 172/77, loss: 3.0088311177678406e-05 2023-01-22 09:09:45.047863: step: 176/77, loss: 0.0021862066350877285 2023-01-22 09:09:46.303393: step: 180/77, loss: 0.021061912178993225 2023-01-22 09:09:47.647475: step: 184/77, loss: 9.166287782136351e-06 2023-01-22 09:09:48.945721: step: 188/77, loss: 8.791878644842654e-05 2023-01-22 09:09:50.278900: step: 192/77, loss: 0.0008699322934262455 2023-01-22 09:09:51.648262: step: 196/77, loss: 1.5573326891171746e-05 2023-01-22 09:09:52.971460: step: 200/77, loss: 0.0001874137669801712 2023-01-22 09:09:54.254861: step: 204/77, loss: 0.0031759031116962433 2023-01-22 09:09:55.567950: step: 208/77, loss: 2.25968560698675e-05 2023-01-22 09:09:56.819629: step: 212/77, loss: 3.1723657230031677e-06 2023-01-22 09:09:58.097072: step: 216/77, loss: 0.0007224121945910156 2023-01-22 09:09:59.372053: step: 220/77, loss: 4.0004897527978756e-06 2023-01-22 09:10:00.708284: step: 224/77, loss: 8.537290705135092e-05 2023-01-22 09:10:02.046645: step: 228/77, loss: 0.016224876046180725 2023-01-22 09:10:03.320033: step: 232/77, loss: 8.661108950036578e-06 2023-01-22 09:10:04.580928: step: 236/77, loss: 0.0001592586631886661 2023-01-22 09:10:05.908432: step: 240/77, loss: 7.482444198103622e-05 2023-01-22 09:10:07.149474: step: 244/77, loss: 3.312555418233387e-05 2023-01-22 09:10:08.441985: step: 248/77, loss: 0.048145100474357605 2023-01-22 09:10:09.724661: step: 252/77, loss: 0.005470104981213808 2023-01-22 09:10:11.047851: step: 256/77, loss: 0.00039273095899261534 2023-01-22 09:10:12.414665: step: 260/77, loss: 0.001016460475511849 2023-01-22 09:10:13.704035: step: 264/77, loss: 2.205238843089319e-06 2023-01-22 09:10:15.017079: step: 268/77, loss: 5.840338417328894e-06 2023-01-22 09:10:16.361723: step: 272/77, loss: 5.447593593999045e-06 2023-01-22 09:10:17.717912: step: 276/77, loss: 1.457311100239167e-05 2023-01-22 09:10:18.995979: step: 280/77, loss: 3.234842370147817e-05 2023-01-22 09:10:20.323314: step: 284/77, loss: 5.5307500588241965e-06 2023-01-22 09:10:21.650271: step: 288/77, loss: 0.011254596523940563 2023-01-22 09:10:22.972468: step: 292/77, loss: 1.9097829863312654e-05 2023-01-22 09:10:24.320510: step: 296/77, loss: 0.002273230580613017 2023-01-22 09:10:25.597627: step: 300/77, loss: 0.0008224258781410754 2023-01-22 09:10:26.862112: step: 304/77, loss: 5.630550003843382e-05 2023-01-22 09:10:28.141271: step: 308/77, loss: 0.0006396897952072322 2023-01-22 09:10:29.351100: step: 312/77, loss: 0.029791001230478287 2023-01-22 09:10:30.675381: step: 316/77, loss: 1.3331845366337802e-05 2023-01-22 09:10:31.969576: step: 320/77, loss: 0.0009355137008242309 2023-01-22 09:10:33.270440: step: 324/77, loss: 0.0004965240368619561 2023-01-22 09:10:34.603670: step: 328/77, loss: 0.0001969095173990354 2023-01-22 09:10:35.883133: step: 332/77, loss: 3.2388783438364044e-05 2023-01-22 09:10:37.216824: step: 336/77, loss: 0.031088722869753838 2023-01-22 09:10:38.590489: step: 340/77, loss: 0.07551313936710358 2023-01-22 09:10:39.917272: step: 344/77, loss: 0.00021745124831795692 2023-01-22 09:10:41.214224: step: 348/77, loss: 6.191056854731869e-06 2023-01-22 09:10:42.564956: step: 352/77, loss: 0.0001629363396205008 2023-01-22 09:10:43.919530: step: 356/77, loss: 2.5176310373353772e-05 2023-01-22 09:10:45.247582: step: 360/77, loss: 5.35976723767817e-05 2023-01-22 09:10:46.558995: step: 364/77, loss: 3.601461139624007e-05 2023-01-22 09:10:47.923480: step: 368/77, loss: 0.005792475305497646 2023-01-22 09:10:49.210462: step: 372/77, loss: 0.0003093659470323473 2023-01-22 09:10:50.514037: step: 376/77, loss: 0.014475381933152676 2023-01-22 09:10:51.825847: step: 380/77, loss: 0.018493305891752243 2023-01-22 09:10:53.150418: step: 384/77, loss: 6.823409057687968e-05 2023-01-22 09:10:54.465086: step: 388/77, loss: 2.8444370400393382e-05 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5116279069767442, 'r': 0.019766397124887692, 'f1': 0.03806228373702422}, 'combined': 0.026099851705388033, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.4888888888888889, 'r': 0.019766397124887692, 'f1': 0.0379965457685665}, 'combined': 0.02605477424130274, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.4888888888888889, 'r': 0.019766397124887692, 'f1': 0.0379965457685665}, 'combined': 0.02605477424130274, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:12:34.660627: step: 4/77, loss: 0.007430397905409336 2023-01-22 09:12:36.005148: step: 8/77, loss: 3.7073266412335215e-06 2023-01-22 09:12:37.319321: step: 12/77, loss: 0.00017145395395345986 2023-01-22 09:12:38.610310: step: 16/77, loss: 2.0266714273020625e-05 2023-01-22 09:12:39.925158: step: 20/77, loss: 9.06505920283962e-06 2023-01-22 09:12:41.232998: step: 24/77, loss: 0.0007317511481232941 2023-01-22 09:12:42.553428: step: 28/77, loss: 0.0009495350532233715 2023-01-22 09:12:43.895676: step: 32/77, loss: 0.0004258228000253439 2023-01-22 09:12:45.208009: step: 36/77, loss: 0.0002683318161871284 2023-01-22 09:12:46.530154: step: 40/77, loss: 1.2264461474842392e-05 2023-01-22 09:12:47.777693: step: 44/77, loss: 8.771388093009591e-05 2023-01-22 09:12:49.037500: step: 48/77, loss: 0.002332504605874419 2023-01-22 09:12:50.362556: step: 52/77, loss: 0.010463829152286053 2023-01-22 09:12:51.695060: step: 56/77, loss: 7.15622809366323e-05 2023-01-22 09:12:52.990616: step: 60/77, loss: 0.0021188717801123857 2023-01-22 09:12:54.295541: step: 64/77, loss: 0.0003772116033360362 2023-01-22 09:12:55.548517: step: 68/77, loss: 0.0013899505138397217 2023-01-22 09:12:56.864043: step: 72/77, loss: 0.010037485510110855 2023-01-22 09:12:58.194973: step: 76/77, loss: 0.0036808447912335396 2023-01-22 09:12:59.451507: step: 80/77, loss: 0.00031674507772549987 2023-01-22 09:13:00.779495: step: 84/77, loss: 0.0002259061875520274 2023-01-22 09:13:02.044612: step: 88/77, loss: 0.005058180540800095 2023-01-22 09:13:03.300753: step: 92/77, loss: 0.0035927272401750088 2023-01-22 09:13:04.625776: step: 96/77, loss: 0.0003780303231906146 2023-01-22 09:13:05.900136: step: 100/77, loss: 0.056739721447229385 2023-01-22 09:13:07.206764: step: 104/77, loss: 0.027822664007544518 2023-01-22 09:13:08.508627: step: 108/77, loss: 0.009071031585335732 2023-01-22 09:13:09.818507: step: 112/77, loss: 0.0011930856853723526 2023-01-22 09:13:11.138380: step: 116/77, loss: 1.883709046524018e-05 2023-01-22 09:13:12.501549: step: 120/77, loss: 0.0004183150304015726 2023-01-22 09:13:13.844924: step: 124/77, loss: 0.0018396122613921762 2023-01-22 09:13:15.130265: step: 128/77, loss: 2.9145189728296828e-06 2023-01-22 09:13:16.382500: step: 132/77, loss: 0.0014383037341758609 2023-01-22 09:13:17.614857: step: 136/77, loss: 4.455409339243488e-07 2023-01-22 09:13:18.953849: step: 140/77, loss: 0.0012948352377861738 2023-01-22 09:13:20.260003: step: 144/77, loss: 0.034051429480314255 2023-01-22 09:13:21.551603: step: 148/77, loss: 0.00012335414066910744 2023-01-22 09:13:22.894020: step: 152/77, loss: 5.0850761908805e-05 2023-01-22 09:13:24.167378: step: 156/77, loss: 9.871408110484481e-05 2023-01-22 09:13:25.469814: step: 160/77, loss: 8.625948794360738e-06 2023-01-22 09:13:26.789058: step: 164/77, loss: 5.438213975139661e-06 2023-01-22 09:13:28.069561: step: 168/77, loss: 2.664577550604008e-05 2023-01-22 09:13:29.329018: step: 172/77, loss: 0.02862611413002014 2023-01-22 09:13:30.646952: step: 176/77, loss: 0.0001641131821088493 2023-01-22 09:13:31.937470: step: 180/77, loss: 0.0002279715408803895 2023-01-22 09:13:33.257395: step: 184/77, loss: 0.003732960671186447 2023-01-22 09:13:34.549971: step: 188/77, loss: 4.4081476517021656e-05 2023-01-22 09:13:35.873114: step: 192/77, loss: 0.00012288712605368346 2023-01-22 09:13:37.223404: step: 196/77, loss: 0.04758511483669281 2023-01-22 09:13:38.508578: step: 200/77, loss: 0.00033601955510675907 2023-01-22 09:13:39.798488: step: 204/77, loss: 2.661273356352467e-06 2023-01-22 09:13:41.109357: step: 208/77, loss: 0.00040764236473478377 2023-01-22 09:13:42.417690: step: 212/77, loss: 0.0009218723280355334 2023-01-22 09:13:43.685245: step: 216/77, loss: 0.0031653456389904022 2023-01-22 09:13:45.062342: step: 220/77, loss: 0.0006369269103743136 2023-01-22 09:13:46.326445: step: 224/77, loss: 0.0004652494681067765 2023-01-22 09:13:47.668843: step: 228/77, loss: 1.1316476957290433e-05 2023-01-22 09:13:48.994469: step: 232/77, loss: 0.000484424497699365 2023-01-22 09:13:50.314360: step: 236/77, loss: 0.00010217508679488674 2023-01-22 09:13:51.608235: step: 240/77, loss: 8.024393900996074e-05 2023-01-22 09:13:52.902145: step: 244/77, loss: 0.00034927070373669267 2023-01-22 09:13:54.168146: step: 248/77, loss: 0.0011456592474132776 2023-01-22 09:13:55.472494: step: 252/77, loss: 0.00012689371942542493 2023-01-22 09:13:56.784371: step: 256/77, loss: 5.80046144023072e-05 2023-01-22 09:13:58.112116: step: 260/77, loss: 0.00023313306155614555 2023-01-22 09:13:59.448583: step: 264/77, loss: 1.313894972554408e-05 2023-01-22 09:14:00.764974: step: 268/77, loss: 0.08034791797399521 2023-01-22 09:14:02.045468: step: 272/77, loss: 7.919698691694066e-05 2023-01-22 09:14:03.357836: step: 276/77, loss: 0.00041778976446948946 2023-01-22 09:14:04.649961: step: 280/77, loss: 0.00893727969378233 2023-01-22 09:14:05.956465: step: 284/77, loss: 0.011848215013742447 2023-01-22 09:14:07.238814: step: 288/77, loss: 0.004302650224417448 2023-01-22 09:14:08.557853: step: 292/77, loss: 0.00017654645489528775 2023-01-22 09:14:09.862324: step: 296/77, loss: 1.5690379768784624e-06 2023-01-22 09:14:11.160263: step: 300/77, loss: 0.006918806582689285 2023-01-22 09:14:12.469874: step: 304/77, loss: 9.462075922783697e-07 2023-01-22 09:14:13.750894: step: 308/77, loss: 0.0036865954753011465 2023-01-22 09:14:15.071720: step: 312/77, loss: 0.000789219920989126 2023-01-22 09:14:16.405905: step: 316/77, loss: 0.003209709422662854 2023-01-22 09:14:17.643251: step: 320/77, loss: 0.03584851697087288 2023-01-22 09:14:18.908051: step: 324/77, loss: 1.1696663932525553e-05 2023-01-22 09:14:20.220537: step: 328/77, loss: 3.522487304508104e-06 2023-01-22 09:14:21.488268: step: 332/77, loss: 8.103526488412172e-05 2023-01-22 09:14:22.825618: step: 336/77, loss: 0.034114085137844086 2023-01-22 09:14:24.165211: step: 340/77, loss: 0.00014881583047099411 2023-01-22 09:14:25.437331: step: 344/77, loss: 4.456402621144662e-06 2023-01-22 09:14:26.746531: step: 348/77, loss: 3.7652584978786763e-06 2023-01-22 09:14:28.073166: step: 352/77, loss: 9.143738134298474e-05 2023-01-22 09:14:29.387475: step: 356/77, loss: 2.854807462426834e-06 2023-01-22 09:14:30.728487: step: 360/77, loss: 0.016171233728528023 2023-01-22 09:14:32.044512: step: 364/77, loss: 0.00020946790755260736 2023-01-22 09:14:33.379999: step: 368/77, loss: 2.257371079394943e-06 2023-01-22 09:14:34.680133: step: 372/77, loss: 1.5824274441911257e-06 2023-01-22 09:14:36.001348: step: 376/77, loss: 0.01277543418109417 2023-01-22 09:14:37.310995: step: 380/77, loss: 1.7895966948344721e-06 2023-01-22 09:14:38.629617: step: 384/77, loss: 0.000522006128448993 2023-01-22 09:14:39.909986: step: 388/77, loss: 0.0005052937776781619 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Chinese: {'template': {'p': 0.935064935064935, 'r': 0.5373134328358209, 'f1': 0.6824644549763033}, 'slot': {'p': 0.5227272727272727, 'r': 0.020664869721473494, 'f1': 0.03975799481417459}, 'combined': 0.02713341826180635, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5111111111111111, 'r': 0.020664869721473494, 'f1': 0.039723661485319514}, 'combined': 0.02723908216136195, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.5373134328358209, 'f1': 0.6857142857142856}, 'slot': {'p': 0.5454545454545454, 'r': 0.0215633423180593, 'f1': 0.04148660328435609}, 'combined': 0.028447956537844172, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:16:21.145572: step: 4/77, loss: 2.9802315282267955e-08 2023-01-22 09:16:22.463105: step: 8/77, loss: 4.729399370262399e-05 2023-01-22 09:16:23.739347: step: 12/77, loss: 2.3319753381656483e-05 2023-01-22 09:16:25.056517: step: 16/77, loss: 6.732360361638712e-06 2023-01-22 09:16:26.453612: step: 20/77, loss: 0.00017802367801778018 2023-01-22 09:16:27.807903: step: 24/77, loss: 0.0005213890108279884 2023-01-22 09:16:29.109733: step: 28/77, loss: 1.830662222346291e-05 2023-01-22 09:16:30.396290: step: 32/77, loss: 1.3812922361466917e-06 2023-01-22 09:16:31.722599: step: 36/77, loss: 2.0610837964341044e-05 2023-01-22 09:16:33.003097: step: 40/77, loss: 6.030067379469983e-05 2023-01-22 09:16:34.326906: step: 44/77, loss: 0.0003737257211469114 2023-01-22 09:16:35.666283: step: 48/77, loss: 0.013727515004575253 2023-01-22 09:16:36.982388: step: 52/77, loss: 0.00031524596852250397 2023-01-22 09:16:38.287822: step: 56/77, loss: 4.54884275313816e-06 2023-01-22 09:16:39.589966: step: 60/77, loss: 1.8272516172146425e-05 2023-01-22 09:16:40.921919: step: 64/77, loss: 2.406430576229468e-05 2023-01-22 09:16:42.244220: step: 68/77, loss: 0.0020606357138603926 2023-01-22 09:16:43.558073: step: 72/77, loss: 0.0001100394147215411 2023-01-22 09:16:44.887702: step: 76/77, loss: 0.0023476046044379473 2023-01-22 09:16:46.148075: step: 80/77, loss: 0.012934298254549503 2023-01-22 09:16:47.410412: step: 84/77, loss: 0.0006640170468017459 2023-01-22 09:16:48.725377: step: 88/77, loss: 0.055560242384672165 2023-01-22 09:16:50.030127: step: 92/77, loss: 0.0002472563646733761 2023-01-22 09:16:51.388999: step: 96/77, loss: 0.0034907220397144556 2023-01-22 09:16:52.689359: step: 100/77, loss: 5.403320392360911e-05 2023-01-22 09:16:53.997910: step: 104/77, loss: 0.3002181947231293 2023-01-22 09:16:55.357943: step: 108/77, loss: 3.9009173633530736e-05 2023-01-22 09:16:56.697449: step: 112/77, loss: 0.00019515615713316947 2023-01-22 09:16:57.978278: step: 116/77, loss: 6.758770905435085e-05 2023-01-22 09:16:59.232381: step: 120/77, loss: 1.9229602912673727e-05 2023-01-22 09:17:00.556736: step: 124/77, loss: 3.519226083881222e-05 2023-01-22 09:17:01.847951: step: 128/77, loss: 0.03842562437057495 2023-01-22 09:17:03.139997: step: 132/77, loss: 1.1213931429665536e-05 2023-01-22 09:17:04.431961: step: 136/77, loss: 0.0033438641112297773 2023-01-22 09:17:05.727950: step: 140/77, loss: 0.0003317440568935126 2023-01-22 09:17:07.030536: step: 144/77, loss: 0.018152881413698196 2023-01-22 09:17:08.367986: step: 148/77, loss: 0.007869703695178032 2023-01-22 09:17:09.670742: step: 152/77, loss: 1.4549676052411087e-05 2023-01-22 09:17:10.988665: step: 156/77, loss: 0.0007096330518834293 2023-01-22 09:17:12.319625: step: 160/77, loss: 4.861999332206324e-05 2023-01-22 09:17:13.646512: step: 164/77, loss: 8.46480397740379e-05 2023-01-22 09:17:14.936185: step: 168/77, loss: 1.8626415965172782e-07 2023-01-22 09:17:16.244854: step: 172/77, loss: 0.004078193102031946 2023-01-22 09:17:17.541148: step: 176/77, loss: 0.003917471971362829 2023-01-22 09:17:18.822158: step: 180/77, loss: 1.5645575786038535e-06 2023-01-22 09:17:20.130563: step: 184/77, loss: 1.8433138393447734e-05 2023-01-22 09:17:21.430703: step: 188/77, loss: 0.028852352872490883 2023-01-22 09:17:22.764036: step: 192/77, loss: 2.281202569065499e-06 2023-01-22 09:17:24.043753: step: 196/77, loss: 3.7961595808155835e-05 2023-01-22 09:17:25.375390: step: 200/77, loss: 5.125956477058935e-07 2023-01-22 09:17:26.692695: step: 204/77, loss: 1.3097493365421542e-06 2023-01-22 09:17:27.992278: step: 208/77, loss: 6.903317989781499e-05 2023-01-22 09:17:29.288464: step: 212/77, loss: 0.02180004119873047 2023-01-22 09:17:30.596702: step: 216/77, loss: 0.21051611006259918 2023-01-22 09:17:31.895105: step: 220/77, loss: 4.495525354286656e-05 2023-01-22 09:17:33.164072: step: 224/77, loss: 2.0334904547780752e-05 2023-01-22 09:17:34.521504: step: 228/77, loss: 2.8787917472072877e-06 2023-01-22 09:17:35.790614: step: 232/77, loss: 1.4238229596230667e-05 2023-01-22 09:17:37.108776: step: 236/77, loss: 0.0013085382524877787 2023-01-22 09:17:38.431559: step: 240/77, loss: 9.823291475186124e-05 2023-01-22 09:17:39.779248: step: 244/77, loss: 2.773040978354402e-05 2023-01-22 09:17:41.080880: step: 248/77, loss: 4.009727490483783e-06 2023-01-22 09:17:42.405243: step: 252/77, loss: 0.02277224510908127 2023-01-22 09:17:43.700710: step: 256/77, loss: 4.7612797061447054e-05 2023-01-22 09:17:44.982041: step: 260/77, loss: 0.0007515024044550955 2023-01-22 09:17:46.306634: step: 264/77, loss: 0.000233790124184452 2023-01-22 09:17:47.632745: step: 268/77, loss: 0.0009438807610422373 2023-01-22 09:17:48.987931: step: 272/77, loss: 0.00012066077761119232 2023-01-22 09:17:50.303640: step: 276/77, loss: 0.00019588488794397563 2023-01-22 09:17:51.584297: step: 280/77, loss: 1.4081181234359974e-06 2023-01-22 09:17:52.836003: step: 284/77, loss: 9.208631013279955e-07 2023-01-22 09:17:54.122907: step: 288/77, loss: 0.0034223550464957952 2023-01-22 09:17:55.476399: step: 292/77, loss: 0.00241264165379107 2023-01-22 09:17:56.752983: step: 296/77, loss: 0.00011776632163673639 2023-01-22 09:17:58.080768: step: 300/77, loss: 1.5139357856241986e-06 2023-01-22 09:17:59.354141: step: 304/77, loss: 1.3082610621495405e-06 2023-01-22 09:18:00.660949: step: 308/77, loss: 4.947163461110904e-07 2023-01-22 09:18:02.013891: step: 312/77, loss: 0.0002765147655736655 2023-01-22 09:18:03.286000: step: 316/77, loss: 8.433842140220804e-07 2023-01-22 09:18:04.588837: step: 320/77, loss: 0.00043568917317315936 2023-01-22 09:18:05.909465: step: 324/77, loss: 2.890814698730537e-07 2023-01-22 09:18:07.219747: step: 328/77, loss: 2.4800025130389258e-05 2023-01-22 09:18:08.519039: step: 332/77, loss: 1.0922199180640746e-06 2023-01-22 09:18:09.814906: step: 336/77, loss: 0.0032466454431414604 2023-01-22 09:18:11.122188: step: 340/77, loss: 2.011512378885527e-06 2023-01-22 09:18:12.487784: step: 344/77, loss: 0.0068920995108783245 2023-01-22 09:18:13.805996: step: 348/77, loss: 0.00033975904807448387 2023-01-22 09:18:15.135256: step: 352/77, loss: 0.0018391618505120277 2023-01-22 09:18:16.432110: step: 356/77, loss: 0.00025979187921620905 2023-01-22 09:18:17.735191: step: 360/77, loss: 1.081810523828608e-06 2023-01-22 09:18:19.035808: step: 364/77, loss: 9.730581041367259e-06 2023-01-22 09:18:20.349449: step: 368/77, loss: 8.958841499406844e-05 2023-01-22 09:18:21.644606: step: 372/77, loss: 2.4988178211060585e-06 2023-01-22 09:18:22.912206: step: 376/77, loss: 9.418761692359112e-06 2023-01-22 09:18:24.206707: step: 380/77, loss: 3.4123237924177374e-07 2023-01-22 09:18:25.500658: step: 384/77, loss: 0.0009906215127557516 2023-01-22 09:18:26.804277: step: 388/77, loss: 0.00025988329434767365 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.5454545454545454, 'r': 0.0215633423180593, 'f1': 0.04148660328435609}, 'combined': 0.02857096263922636, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.5454545454545454, 'r': 0.0215633423180593, 'f1': 0.04148660328435609}, 'combined': 0.02857096263922636, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5447761194029851, 'f1': 0.6886792452830188}, 'slot': {'p': 0.5454545454545454, 'r': 0.0215633423180593, 'f1': 0.04148660328435609}, 'combined': 0.02857096263922636, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:20:07.444367: step: 4/77, loss: 0.02698381617665291 2023-01-22 09:20:08.718861: step: 8/77, loss: 0.014960349537432194 2023-01-22 09:20:10.025703: step: 12/77, loss: 0.00016170661547221243 2023-01-22 09:20:11.346849: step: 16/77, loss: 2.743231789281708e-06 2023-01-22 09:20:12.679834: step: 20/77, loss: 2.041266816377174e-05 2023-01-22 09:20:13.963083: step: 24/77, loss: 1.0182542609982193e-05 2023-01-22 09:20:15.295320: step: 28/77, loss: 0.013845100067555904 2023-01-22 09:20:16.572557: step: 32/77, loss: 1.3410337942332262e-06 2023-01-22 09:20:17.902742: step: 36/77, loss: 0.0002716188901104033 2023-01-22 09:20:19.209859: step: 40/77, loss: 7.084755634423345e-05 2023-01-22 09:20:20.504220: step: 44/77, loss: 7.2348607318417635e-06 2023-01-22 09:20:21.836603: step: 48/77, loss: 3.7784000141982688e-06 2023-01-22 09:20:23.115488: step: 52/77, loss: 4.1414394218008965e-05 2023-01-22 09:20:24.494222: step: 56/77, loss: 0.0008531028870493174 2023-01-22 09:20:25.772030: step: 60/77, loss: 5.092239007353783e-06 2023-01-22 09:20:27.074546: step: 64/77, loss: 1.0877824507815603e-07 2023-01-22 09:20:28.364505: step: 68/77, loss: 2.8638578442041762e-06 2023-01-22 09:20:29.674236: step: 72/77, loss: 1.7592465155757964e-05 2023-01-22 09:20:30.930506: step: 76/77, loss: 0.02608170546591282 2023-01-22 09:20:32.239644: step: 80/77, loss: 0.0017640494043007493 2023-01-22 09:20:33.479258: step: 84/77, loss: 3.150631164317019e-05 2023-01-22 09:20:34.761622: step: 88/77, loss: 0.01277581974864006 2023-01-22 09:20:36.095689: step: 92/77, loss: 0.0038522332906723022 2023-01-22 09:20:37.420948: step: 96/77, loss: 0.005919828079640865 2023-01-22 09:20:38.736043: step: 100/77, loss: 6.7724749897024594e-06 2023-01-22 09:20:40.037975: step: 104/77, loss: 4.903834997094236e-05 2023-01-22 09:20:41.342341: step: 108/77, loss: 0.0008131438517011702 2023-01-22 09:20:42.660051: step: 112/77, loss: 2.5047547751455568e-06 2023-01-22 09:20:43.985040: step: 116/77, loss: 1.7881383485018887e-08 2023-01-22 09:20:45.259580: step: 120/77, loss: 7.390854648292589e-07 2023-01-22 09:20:46.551127: step: 124/77, loss: 1.3411039390121005e-08 2023-01-22 09:20:47.846530: step: 128/77, loss: 0.001617640140466392 2023-01-22 09:20:49.179877: step: 132/77, loss: 2.515138248782023e-06 2023-01-22 09:20:50.462636: step: 136/77, loss: 2.3407696971844416e-06 2023-01-22 09:20:51.800655: step: 140/77, loss: 0.00797793548554182 2023-01-22 09:20:53.100195: step: 144/77, loss: 1.1081776392529719e-05 2023-01-22 09:20:54.390818: step: 148/77, loss: 8.45935646793805e-05 2023-01-22 09:20:55.686761: step: 152/77, loss: 8.520575647708029e-06 2023-01-22 09:20:56.965683: step: 156/77, loss: 0.0008155218092724681 2023-01-22 09:20:58.230153: step: 160/77, loss: 6.945910172362346e-06 2023-01-22 09:20:59.535061: step: 164/77, loss: 1.0877838008127583e-07 2023-01-22 09:21:00.862941: step: 168/77, loss: 0.00039749397546984255 2023-01-22 09:21:02.158856: step: 172/77, loss: 1.5221035027934704e-05 2023-01-22 09:21:03.523548: step: 176/77, loss: 0.00025134222232736647 2023-01-22 09:21:04.822045: step: 180/77, loss: 2.8312189925827624e-08 2023-01-22 09:21:06.143000: step: 184/77, loss: 0.0001161669279099442 2023-01-22 09:21:07.467200: step: 188/77, loss: 0.001666489290073514 2023-01-22 09:21:08.773913: step: 192/77, loss: 0.0012751700123772025 2023-01-22 09:21:10.077707: step: 196/77, loss: 0.007269307505339384 2023-01-22 09:21:11.406759: step: 200/77, loss: 2.5331956976515357e-08 2023-01-22 09:21:12.709769: step: 204/77, loss: 0.0032305726781487465 2023-01-22 09:21:14.023784: step: 208/77, loss: 8.356692887900863e-06 2023-01-22 09:21:15.303595: step: 212/77, loss: 1.1679512681439519e-05 2023-01-22 09:21:16.597414: step: 216/77, loss: 9.30438909563236e-06 2023-01-22 09:21:17.904949: step: 220/77, loss: 1.3792471690976527e-05 2023-01-22 09:21:19.181578: step: 224/77, loss: 6.201413634698838e-05 2023-01-22 09:21:20.459014: step: 228/77, loss: 0.05839015915989876 2023-01-22 09:21:21.800517: step: 232/77, loss: 0.0013058074982836843 2023-01-22 09:21:23.107949: step: 236/77, loss: 0.00023657410929445177 2023-01-22 09:21:24.443506: step: 240/77, loss: 0.00731617258861661 2023-01-22 09:21:25.741873: step: 244/77, loss: 1.2996385521546472e-05 2023-01-22 09:21:27.041084: step: 248/77, loss: 9.02002375369193e-06 2023-01-22 09:21:28.339556: step: 252/77, loss: 1.4901160305669237e-09 2023-01-22 09:21:29.612226: step: 256/77, loss: 0.028248654678463936 2023-01-22 09:21:30.909521: step: 260/77, loss: 0.0002220904134446755 2023-01-22 09:21:32.229826: step: 264/77, loss: 3.0514261197822634e-06 2023-01-22 09:21:33.563214: step: 268/77, loss: 0.0024806009605526924 2023-01-22 09:21:34.874420: step: 272/77, loss: 6.70551330017588e-08 2023-01-22 09:21:36.162162: step: 276/77, loss: 0.018847720697522163 2023-01-22 09:21:37.437588: step: 280/77, loss: 7.949468272272497e-05 2023-01-22 09:21:38.680156: step: 284/77, loss: 3.2037277719609847e-07 2023-01-22 09:21:40.012569: step: 288/77, loss: 3.218242682123673e-06 2023-01-22 09:21:41.290213: step: 292/77, loss: 7.455462309735594e-06 2023-01-22 09:21:42.548757: step: 296/77, loss: 0.002329503884539008 2023-01-22 09:21:43.833330: step: 300/77, loss: 0.002198019064962864 2023-01-22 09:21:45.140063: step: 304/77, loss: 7.818923040758818e-05 2023-01-22 09:21:46.463406: step: 308/77, loss: 7.755040314805228e-06 2023-01-22 09:21:47.751144: step: 312/77, loss: 0.05237256735563278 2023-01-22 09:21:49.010356: step: 316/77, loss: 2.682207700388517e-08 2023-01-22 09:21:50.354205: step: 320/77, loss: 0.0048786792904138565 2023-01-22 09:21:51.635190: step: 324/77, loss: 7.092779696904472e-07 2023-01-22 09:21:52.904741: step: 328/77, loss: 1.3091619621263817e-05 2023-01-22 09:21:54.222722: step: 332/77, loss: 1.5555854133708635e-06 2023-01-22 09:21:55.478137: step: 336/77, loss: 2.3005568436929025e-06 2023-01-22 09:21:56.771386: step: 340/77, loss: 2.3130747649702244e-05 2023-01-22 09:21:58.116018: step: 344/77, loss: 4.930124123347923e-05 2023-01-22 09:21:59.411741: step: 348/77, loss: 0.01467626728117466 2023-01-22 09:22:00.722197: step: 352/77, loss: 0.0 2023-01-22 09:22:01.982976: step: 356/77, loss: 0.000560337386559695 2023-01-22 09:22:03.295682: step: 360/77, loss: 9.936784408637322e-06 2023-01-22 09:22:04.578960: step: 364/77, loss: 3.043984543182887e-05 2023-01-22 09:22:05.878644: step: 368/77, loss: 4.470347647611561e-09 2023-01-22 09:22:07.212128: step: 372/77, loss: 2.0830659650528105e-06 2023-01-22 09:22:08.507455: step: 376/77, loss: 0.000701559241861105 2023-01-22 09:22:09.848316: step: 380/77, loss: 0.00021446785831358284 2023-01-22 09:22:11.167891: step: 384/77, loss: 4.815194188267924e-06 2023-01-22 09:22:12.491095: step: 388/77, loss: 3.844836464850232e-05 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Chinese: {'template': {'p': 0.9605263157894737, 'r': 0.5447761194029851, 'f1': 0.6952380952380952}, 'slot': {'p': 0.4864864864864865, 'r': 0.016172506738544475, 'f1': 0.03130434782608696}, 'combined': 0.021763975155279502, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Korean: {'template': {'p': 0.96, 'r': 0.5373134328358209, 'f1': 0.6889952153110048}, 'slot': {'p': 0.5, 'r': 0.016172506738544475, 'f1': 0.03133159268929504}, 'combined': 0.021587317450997543, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.96, 'r': 0.5373134328358209, 'f1': 0.6889952153110048}, 'slot': {'p': 0.5, 'r': 0.016172506738544475, 'f1': 0.03133159268929504}, 'combined': 0.021587317450997543, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:23:52.930634: step: 4/77, loss: 7.4505797087454084e-09 2023-01-22 09:23:54.270635: step: 8/77, loss: 0.0012389952316880226 2023-01-22 09:23:55.576948: step: 12/77, loss: 0.0008553470834158361 2023-01-22 09:23:56.867775: step: 16/77, loss: 0.001485483255237341 2023-01-22 09:23:58.150084: step: 20/77, loss: 1.9099739802186377e-05 2023-01-22 09:23:59.410518: step: 24/77, loss: 0.00016891444101929665 2023-01-22 09:24:00.746665: step: 28/77, loss: 0.008944258093833923 2023-01-22 09:24:02.018413: step: 32/77, loss: 5.243182386038825e-06 2023-01-22 09:24:03.386706: step: 36/77, loss: 4.7396704758284613e-05 2023-01-22 09:24:04.697636: step: 40/77, loss: 1.192092824453539e-08 2023-01-22 09:24:05.970236: step: 44/77, loss: 5.602820237982087e-05 2023-01-22 09:24:07.308519: step: 48/77, loss: 7.584470154142764e-07 2023-01-22 09:24:08.629240: step: 52/77, loss: 1.2671500371652655e-05 2023-01-22 09:24:09.948790: step: 56/77, loss: 0.013027241453528404 2023-01-22 09:24:11.224493: step: 60/77, loss: 2.7997486995445797e-06 2023-01-22 09:24:12.486990: step: 64/77, loss: 5.140071880305186e-05 2023-01-22 09:24:13.837389: step: 68/77, loss: 2.4520515580661595e-05 2023-01-22 09:24:15.190386: step: 72/77, loss: 5.601465090876445e-05 2023-01-22 09:24:16.455628: step: 76/77, loss: 0.000764780561439693 2023-01-22 09:24:17.748783: step: 80/77, loss: 0.00017838591884355992 2023-01-22 09:24:19.055350: step: 84/77, loss: 0.002022041007876396 2023-01-22 09:24:20.406594: step: 88/77, loss: 1.7858295905170962e-05 2023-01-22 09:24:21.690775: step: 92/77, loss: 3.2931208693298686e-07 2023-01-22 09:24:23.007029: step: 96/77, loss: 0.00017654162365943193 2023-01-22 09:24:24.346527: step: 100/77, loss: 1.0698643109208206e-06 2023-01-22 09:24:25.735874: step: 104/77, loss: 0.0002982413861900568 2023-01-22 09:24:27.062655: step: 108/77, loss: 1.660316229390446e-05 2023-01-22 09:24:28.377485: step: 112/77, loss: 0.05332023650407791 2023-01-22 09:24:29.662940: step: 116/77, loss: 1.0651420780050103e-05 2023-01-22 09:24:30.908714: step: 120/77, loss: 8.344643021018783e-08 2023-01-22 09:24:32.210828: step: 124/77, loss: 0.0026168618351221085 2023-01-22 09:24:33.511624: step: 128/77, loss: 6.367082096403465e-05 2023-01-22 09:24:34.778257: step: 132/77, loss: 0.0002877833612728864 2023-01-22 09:24:36.119830: step: 136/77, loss: 1.1413845868446515e-06 2023-01-22 09:24:37.416330: step: 140/77, loss: 0.00026432372396811843 2023-01-22 09:24:38.702491: step: 144/77, loss: 1.5199152869627142e-07 2023-01-22 09:24:39.999545: step: 148/77, loss: 3.5932185710407794e-05 2023-01-22 09:24:41.293019: step: 152/77, loss: 0.01380218006670475 2023-01-22 09:24:42.568989: step: 156/77, loss: 6.514821961900452e-06 2023-01-22 09:24:43.866520: step: 160/77, loss: 0.0014075781218707561 2023-01-22 09:24:45.155956: step: 164/77, loss: 1.0381198080722243e-05 2023-01-22 09:24:46.477812: step: 168/77, loss: 3.829587740256102e-07 2023-01-22 09:24:47.751487: step: 172/77, loss: 9.25333893064817e-07 2023-01-22 09:24:49.044944: step: 176/77, loss: 8.659628656459972e-05 2023-01-22 09:24:50.335653: step: 180/77, loss: 1.9891298506991006e-05 2023-01-22 09:24:51.592107: step: 184/77, loss: 9.536709200119731e-08 2023-01-22 09:24:52.884608: step: 188/77, loss: 0.0009981810580939054 2023-01-22 09:24:54.181461: step: 192/77, loss: 0.007413438055664301 2023-01-22 09:24:55.425126: step: 196/77, loss: 0.00016956772014964372 2023-01-22 09:24:56.699023: step: 200/77, loss: 0.011380542069673538 2023-01-22 09:24:57.903346: step: 204/77, loss: 3.677731365314685e-05 2023-01-22 09:24:59.210865: step: 208/77, loss: 3.720476797752781e-06 2023-01-22 09:25:00.536404: step: 212/77, loss: 7.52019423089223e-06 2023-01-22 09:25:01.840805: step: 216/77, loss: 4.250639449310256e-06 2023-01-22 09:25:03.141971: step: 220/77, loss: 0.002503826282918453 2023-01-22 09:25:04.476234: step: 224/77, loss: 8.589692697569262e-06 2023-01-22 09:25:05.806192: step: 228/77, loss: 0.000257056177360937 2023-01-22 09:25:07.089738: step: 232/77, loss: 0.011861293576657772 2023-01-22 09:25:08.435992: step: 236/77, loss: 8.238661393988878e-05 2023-01-22 09:25:09.763627: step: 240/77, loss: 3.692115114972694e-06 2023-01-22 09:25:11.116572: step: 244/77, loss: 0.0013775610132142901 2023-01-22 09:25:12.515332: step: 248/77, loss: 5.708014214178547e-06 2023-01-22 09:25:13.824367: step: 252/77, loss: 9.803019929677248e-05 2023-01-22 09:25:15.084194: step: 256/77, loss: 1.1860923905260279e-06 2023-01-22 09:25:16.398115: step: 260/77, loss: 3.4864085591834737e-06 2023-01-22 09:25:17.678849: step: 264/77, loss: 0.0 2023-01-22 09:25:18.964846: step: 268/77, loss: 4.917311002827773e-07 2023-01-22 09:25:20.283112: step: 272/77, loss: 2.5791382540774066e-06 2023-01-22 09:25:21.580752: step: 276/77, loss: 0.00025995992473326623 2023-01-22 09:25:22.891706: step: 280/77, loss: 0.0004886506358161569 2023-01-22 09:25:24.197878: step: 284/77, loss: 1.5720268038421636e-06 2023-01-22 09:25:25.510049: step: 288/77, loss: 0.0006751567125320435 2023-01-22 09:25:26.786394: step: 292/77, loss: 6.645806251981412e-07 2023-01-22 09:25:28.097190: step: 296/77, loss: 1.4095946880843258e-06 2023-01-22 09:25:29.388636: step: 300/77, loss: 0.00038762306212447584 2023-01-22 09:25:30.714562: step: 304/77, loss: 6.694863259326667e-05 2023-01-22 09:25:31.997038: step: 308/77, loss: 9.282422251999378e-05 2023-01-22 09:25:33.353900: step: 312/77, loss: 1.0120409569935873e-05 2023-01-22 09:25:34.679786: step: 316/77, loss: 7.882480304033379e-07 2023-01-22 09:25:35.979946: step: 320/77, loss: 0.0034625986590981483 2023-01-22 09:25:37.336347: step: 324/77, loss: 1.8059590729535557e-06 2023-01-22 09:25:38.616437: step: 328/77, loss: 0.015585715882480145 2023-01-22 09:25:39.886209: step: 332/77, loss: 0.0009471587836742401 2023-01-22 09:25:41.169938: step: 336/77, loss: 0.017104748636484146 2023-01-22 09:25:42.525658: step: 340/77, loss: 3.7698914638895076e-06 2023-01-22 09:25:43.861170: step: 344/77, loss: 1.548868021927774e-05 2023-01-22 09:25:45.134224: step: 348/77, loss: 5.443015652417671e-06 2023-01-22 09:25:46.416865: step: 352/77, loss: 9.432288265998068e-07 2023-01-22 09:25:47.742668: step: 356/77, loss: 1.0172500878979918e-05 2023-01-22 09:25:49.068149: step: 360/77, loss: 5.2717285143444315e-05 2023-01-22 09:25:50.386259: step: 364/77, loss: 3.0212870115065016e-05 2023-01-22 09:25:51.687648: step: 368/77, loss: 0.0006890245713293552 2023-01-22 09:25:52.984379: step: 372/77, loss: 0.00010110129369422793 2023-01-22 09:25:54.304463: step: 376/77, loss: 2.0903256881865673e-05 2023-01-22 09:25:55.635460: step: 380/77, loss: 3.038154318346642e-06 2023-01-22 09:25:56.923468: step: 384/77, loss: 5.572960048993991e-07 2023-01-22 09:25:58.213913: step: 388/77, loss: 1.3288265108712949e-05 ================================================== Loss: 0.002 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Chinese: {'template': {'p': 0.972972972972973, 'r': 0.5373134328358209, 'f1': 0.6923076923076924}, 'slot': {'p': 0.5365853658536586, 'r': 0.019766397124887692, 'f1': 0.038128249566724434}, 'combined': 0.026396480469270765, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Korean: {'template': {'p': 0.9733333333333334, 'r': 0.5447761194029851, 'f1': 0.6985645933014354}, 'slot': {'p': 0.5348837209302325, 'r': 0.020664869721473494, 'f1': 0.03979238754325259}, 'combined': 0.027797553020645346, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Russian: {'template': {'p': 0.9733333333333334, 'r': 0.5447761194029851, 'f1': 0.6985645933014354}, 'slot': {'p': 0.5348837209302325, 'r': 0.020664869721473494, 'f1': 0.03979238754325259}, 'combined': 0.027797553020645346, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.5121951219512195, 'r': 0.018867924528301886, 'f1': 0.036395147313691506}, 'combined': 0.022789484766517112, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.525, 'r': 0.018867924528301886, 'f1': 0.03642671292281006}, 'combined': 0.022574300966248486, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.6111111111111112, 'r': 0.009883198562443846, 'f1': 0.019451812555260833}, 'combined': 0.012903677635668078, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4}