Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:04:12.299096: step: 4/77, loss: 1.0521974563598633 2023-01-22 08:04:13.629240: step: 8/77, loss: 1.0530740022659302 2023-01-22 08:04:14.961628: step: 12/77, loss: 1.048304796218872 2023-01-22 08:04:16.276768: step: 16/77, loss: 1.0575125217437744 2023-01-22 08:04:17.594492: step: 20/77, loss: 1.0364112854003906 2023-01-22 08:04:18.883718: step: 24/77, loss: 1.0486927032470703 2023-01-22 08:04:20.241749: step: 28/77, loss: 1.0608787536621094 2023-01-22 08:04:21.558710: step: 32/77, loss: 1.0362920761108398 2023-01-22 08:04:22.854877: step: 36/77, loss: 1.0434200763702393 2023-01-22 08:04:24.160050: step: 40/77, loss: 1.0297248363494873 2023-01-22 08:04:25.471238: step: 44/77, loss: 1.0186331272125244 2023-01-22 08:04:26.777378: step: 48/77, loss: 1.0153753757476807 2023-01-22 08:04:28.074842: step: 52/77, loss: 1.0195648670196533 2023-01-22 08:04:29.373675: step: 56/77, loss: 1.0007667541503906 2023-01-22 08:04:30.668165: step: 60/77, loss: 0.9879387617111206 2023-01-22 08:04:31.961869: step: 64/77, loss: 0.9750910997390747 2023-01-22 08:04:33.274401: step: 68/77, loss: 0.9763144254684448 2023-01-22 08:04:34.611856: step: 72/77, loss: 0.9537844657897949 2023-01-22 08:04:35.922621: step: 76/77, loss: 0.9456419348716736 2023-01-22 08:04:37.260930: step: 80/77, loss: 0.9266098737716675 2023-01-22 08:04:38.537828: step: 84/77, loss: 0.9407334327697754 2023-01-22 08:04:39.843971: step: 88/77, loss: 0.9128175973892212 2023-01-22 08:04:41.213226: step: 92/77, loss: 0.8884884119033813 2023-01-22 08:04:42.549629: step: 96/77, loss: 0.8754992485046387 2023-01-22 08:04:43.809553: step: 100/77, loss: 0.8558071851730347 2023-01-22 08:04:45.140829: step: 104/77, loss: 0.8275822997093201 2023-01-22 08:04:46.465337: step: 108/77, loss: 0.8083636164665222 2023-01-22 08:04:47.782048: step: 112/77, loss: 0.7995848059654236 2023-01-22 08:04:49.087391: step: 116/77, loss: 0.7754606604576111 2023-01-22 08:04:50.440054: step: 120/77, loss: 0.7789733409881592 2023-01-22 08:04:51.747763: step: 124/77, loss: 0.7553587555885315 2023-01-22 08:04:53.067439: step: 128/77, loss: 0.75539231300354 2023-01-22 08:04:54.383731: step: 132/77, loss: 0.7256723642349243 2023-01-22 08:04:55.708923: step: 136/77, loss: 0.6737915277481079 2023-01-22 08:04:56.985845: step: 140/77, loss: 0.6848892569541931 2023-01-22 08:04:58.269304: step: 144/77, loss: 0.6683143377304077 2023-01-22 08:04:59.605061: step: 148/77, loss: 0.6761552095413208 2023-01-22 08:05:00.920670: step: 152/77, loss: 0.6221305131912231 2023-01-22 08:05:02.233087: step: 156/77, loss: 0.6018381118774414 2023-01-22 08:05:03.569589: step: 160/77, loss: 0.6005375981330872 2023-01-22 08:05:04.906060: step: 164/77, loss: 0.5284562110900879 2023-01-22 08:05:06.274346: step: 168/77, loss: 0.5597098469734192 2023-01-22 08:05:07.572703: step: 172/77, loss: 0.43947166204452515 2023-01-22 08:05:08.855437: step: 176/77, loss: 0.4276687204837799 2023-01-22 08:05:10.197046: step: 180/77, loss: 0.4454874098300934 2023-01-22 08:05:11.498130: step: 184/77, loss: 0.5441113710403442 2023-01-22 08:05:12.855644: step: 188/77, loss: 0.35866567492485046 2023-01-22 08:05:14.187026: step: 192/77, loss: 0.4040197730064392 2023-01-22 08:05:15.524160: step: 196/77, loss: 0.3254753053188324 2023-01-22 08:05:16.852751: step: 200/77, loss: 0.3546521067619324 2023-01-22 08:05:18.211991: step: 204/77, loss: 0.3514381945133209 2023-01-22 08:05:19.492558: step: 208/77, loss: 0.3122267425060272 2023-01-22 08:05:20.822866: step: 212/77, loss: 0.2432302087545395 2023-01-22 08:05:22.140947: step: 216/77, loss: 0.1995767056941986 2023-01-22 08:05:23.431288: step: 220/77, loss: 0.33912259340286255 2023-01-22 08:05:24.725780: step: 224/77, loss: 0.2183302938938141 2023-01-22 08:05:26.033157: step: 228/77, loss: 0.33341485261917114 2023-01-22 08:05:27.360480: step: 232/77, loss: 0.1881929337978363 2023-01-22 08:05:28.679499: step: 236/77, loss: 0.24369250237941742 2023-01-22 08:05:30.022579: step: 240/77, loss: 0.21067927777767181 2023-01-22 08:05:31.337026: step: 244/77, loss: 0.16403478384017944 2023-01-22 08:05:32.648698: step: 248/77, loss: 0.30606308579444885 2023-01-22 08:05:33.977222: step: 252/77, loss: 0.1937190592288971 2023-01-22 08:05:35.310104: step: 256/77, loss: 0.32572799921035767 2023-01-22 08:05:36.605155: step: 260/77, loss: 0.13746631145477295 2023-01-22 08:05:37.901994: step: 264/77, loss: 0.1280936598777771 2023-01-22 08:05:39.198256: step: 268/77, loss: 0.10665931552648544 2023-01-22 08:05:40.493861: step: 272/77, loss: 0.10630533844232559 2023-01-22 08:05:41.844394: step: 276/77, loss: 0.12797018885612488 2023-01-22 08:05:43.195351: step: 280/77, loss: 0.2352885901927948 2023-01-22 08:05:44.565161: step: 284/77, loss: 0.06179904192686081 2023-01-22 08:05:45.921782: step: 288/77, loss: 0.1331750750541687 2023-01-22 08:05:47.214403: step: 292/77, loss: 0.06892704963684082 2023-01-22 08:05:48.549893: step: 296/77, loss: 0.1391991227865219 2023-01-22 08:05:49.875151: step: 300/77, loss: 0.07123759388923645 2023-01-22 08:05:51.218246: step: 304/77, loss: 0.11932633817195892 2023-01-22 08:05:52.595849: step: 308/77, loss: 0.1587085872888565 2023-01-22 08:05:53.912875: step: 312/77, loss: 0.034506428986787796 2023-01-22 08:05:55.223067: step: 316/77, loss: 0.05934888869524002 2023-01-22 08:05:56.540128: step: 320/77, loss: 0.05228797718882561 2023-01-22 08:05:57.847858: step: 324/77, loss: 0.05070921778678894 2023-01-22 08:05:59.158166: step: 328/77, loss: 0.25672921538352966 2023-01-22 08:06:00.467222: step: 332/77, loss: 0.07970165461301804 2023-01-22 08:06:01.785614: step: 336/77, loss: 0.11222337186336517 2023-01-22 08:06:03.059487: step: 340/77, loss: 0.09403430670499802 2023-01-22 08:06:04.389743: step: 344/77, loss: 0.05787937343120575 2023-01-22 08:06:05.696811: step: 348/77, loss: 0.04168379306793213 2023-01-22 08:06:07.047714: step: 352/77, loss: 0.09637323021888733 2023-01-22 08:06:08.383083: step: 356/77, loss: 0.05214162915945053 2023-01-22 08:06:09.661348: step: 360/77, loss: 0.13377325236797333 2023-01-22 08:06:10.924226: step: 364/77, loss: 0.09389052540063858 2023-01-22 08:06:12.228725: step: 368/77, loss: 0.18593792617321014 2023-01-22 08:06:13.569089: step: 372/77, loss: 0.13386821746826172 2023-01-22 08:06:14.904734: step: 376/77, loss: 0.10935309529304504 2023-01-22 08:06:16.229101: step: 380/77, loss: 0.0837806761264801 2023-01-22 08:06:17.516778: step: 384/77, loss: 0.07167628407478333 2023-01-22 08:06:18.875883: step: 388/77, loss: 0.19204410910606384 ================================================== Loss: 0.486 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:08:34.459532: step: 4/77, loss: 0.16772520542144775 2023-01-22 08:08:35.812169: step: 8/77, loss: 0.09386086463928223 2023-01-22 08:08:37.064614: step: 12/77, loss: 0.07116525620222092 2023-01-22 08:08:38.344733: step: 16/77, loss: 0.08739854395389557 2023-01-22 08:08:39.652812: step: 20/77, loss: 0.09066418558359146 2023-01-22 08:08:40.926944: step: 24/77, loss: 0.09214691072702408 2023-01-22 08:08:42.224666: step: 28/77, loss: 0.07001110166311264 2023-01-22 08:08:43.541928: step: 32/77, loss: 0.24295248091220856 2023-01-22 08:08:44.866152: step: 36/77, loss: 0.2596581280231476 2023-01-22 08:08:46.155254: step: 40/77, loss: 0.10341985523700714 2023-01-22 08:08:47.450764: step: 44/77, loss: 0.06520126760005951 2023-01-22 08:08:48.752704: step: 48/77, loss: 0.08200832456350327 2023-01-22 08:08:50.028104: step: 52/77, loss: 0.07770586013793945 2023-01-22 08:08:51.304526: step: 56/77, loss: 0.3515990078449249 2023-01-22 08:08:52.604050: step: 60/77, loss: 0.061179328709840775 2023-01-22 08:08:53.945443: step: 64/77, loss: 0.10807536542415619 2023-01-22 08:08:55.255045: step: 68/77, loss: 0.0880051702260971 2023-01-22 08:08:56.608888: step: 72/77, loss: 0.09584487974643707 2023-01-22 08:08:57.931582: step: 76/77, loss: 0.08828597515821457 2023-01-22 08:08:59.249178: step: 80/77, loss: 0.2787051498889923 2023-01-22 08:09:00.612623: step: 84/77, loss: 0.07855068892240524 2023-01-22 08:09:01.927054: step: 88/77, loss: 0.33545881509780884 2023-01-22 08:09:03.290459: step: 92/77, loss: 0.22064723074436188 2023-01-22 08:09:04.606197: step: 96/77, loss: 0.07103629410266876 2023-01-22 08:09:05.936538: step: 100/77, loss: 0.05532563850283623 2023-01-22 08:09:07.256983: step: 104/77, loss: 0.14419548213481903 2023-01-22 08:09:08.518900: step: 108/77, loss: 0.1311594545841217 2023-01-22 08:09:09.791130: step: 112/77, loss: 0.03030647709965706 2023-01-22 08:09:11.075582: step: 116/77, loss: 0.14231641590595245 2023-01-22 08:09:12.373124: step: 120/77, loss: 0.049726489931344986 2023-01-22 08:09:13.723957: step: 124/77, loss: 0.14961367845535278 2023-01-22 08:09:15.074550: step: 128/77, loss: 0.10820247232913971 2023-01-22 08:09:16.402272: step: 132/77, loss: 0.05236474797129631 2023-01-22 08:09:17.701974: step: 136/77, loss: 0.24166175723075867 2023-01-22 08:09:18.974572: step: 140/77, loss: 0.08192186802625656 2023-01-22 08:09:20.234948: step: 144/77, loss: 0.10944778472185135 2023-01-22 08:09:21.523459: step: 148/77, loss: 0.10985735058784485 2023-01-22 08:09:22.867203: step: 152/77, loss: 0.1409648358821869 2023-01-22 08:09:24.216120: step: 156/77, loss: 0.07578420639038086 2023-01-22 08:09:25.530345: step: 160/77, loss: 0.10720621794462204 2023-01-22 08:09:26.836123: step: 164/77, loss: 0.13833269476890564 2023-01-22 08:09:28.171081: step: 168/77, loss: 0.06345750391483307 2023-01-22 08:09:29.515041: step: 172/77, loss: 0.053549427539110184 2023-01-22 08:09:30.811091: step: 176/77, loss: 0.050545014441013336 2023-01-22 08:09:32.175187: step: 180/77, loss: 0.060750193893909454 2023-01-22 08:09:33.505677: step: 184/77, loss: 0.1301671266555786 2023-01-22 08:09:34.782367: step: 188/77, loss: 0.07948384433984756 2023-01-22 08:09:36.090769: step: 192/77, loss: 0.07410618662834167 2023-01-22 08:09:37.423681: step: 196/77, loss: 0.1077779084444046 2023-01-22 08:09:38.729427: step: 200/77, loss: 0.06981280446052551 2023-01-22 08:09:40.027276: step: 204/77, loss: 0.06006040796637535 2023-01-22 08:09:41.346749: step: 208/77, loss: 0.11348582059144974 2023-01-22 08:09:42.626334: step: 212/77, loss: 0.11600235104560852 2023-01-22 08:09:43.898586: step: 216/77, loss: 0.09750326722860336 2023-01-22 08:09:45.206352: step: 220/77, loss: 0.04756221920251846 2023-01-22 08:09:46.508662: step: 224/77, loss: 0.18947850167751312 2023-01-22 08:09:47.762738: step: 228/77, loss: 0.02307966724038124 2023-01-22 08:09:49.030448: step: 232/77, loss: 0.12566381692886353 2023-01-22 08:09:50.347064: step: 236/77, loss: 0.05205581337213516 2023-01-22 08:09:51.699639: step: 240/77, loss: 0.20388080179691315 2023-01-22 08:09:53.012695: step: 244/77, loss: 0.15827858448028564 2023-01-22 08:09:54.314128: step: 248/77, loss: 0.08616123348474503 2023-01-22 08:09:55.655891: step: 252/77, loss: 0.08386712521314621 2023-01-22 08:09:56.948602: step: 256/77, loss: 0.15348979830741882 2023-01-22 08:09:58.261174: step: 260/77, loss: 0.11783164739608765 2023-01-22 08:09:59.538985: step: 264/77, loss: 0.0749620795249939 2023-01-22 08:10:00.825293: step: 268/77, loss: 0.1427043080329895 2023-01-22 08:10:02.146655: step: 272/77, loss: 0.1360216736793518 2023-01-22 08:10:03.493985: step: 276/77, loss: 0.04299698770046234 2023-01-22 08:10:04.797063: step: 280/77, loss: 0.032745327800512314 2023-01-22 08:10:06.151014: step: 284/77, loss: 0.06415650248527527 2023-01-22 08:10:07.485803: step: 288/77, loss: 0.02924133650958538 2023-01-22 08:10:08.836374: step: 292/77, loss: 0.1073332279920578 2023-01-22 08:10:10.120018: step: 296/77, loss: 0.035075489431619644 2023-01-22 08:10:11.390267: step: 300/77, loss: 0.14287783205509186 2023-01-22 08:10:12.679330: step: 304/77, loss: 0.289726197719574 2023-01-22 08:10:14.026210: step: 308/77, loss: 0.0739937275648117 2023-01-22 08:10:15.384852: step: 312/77, loss: 0.08734475821256638 2023-01-22 08:10:16.703023: step: 316/77, loss: 0.30002105236053467 2023-01-22 08:10:18.018827: step: 320/77, loss: 0.09770867228507996 2023-01-22 08:10:19.345846: step: 324/77, loss: 0.1629657745361328 2023-01-22 08:10:20.640408: step: 328/77, loss: 0.14150488376617432 2023-01-22 08:10:21.930744: step: 332/77, loss: 0.0872669443488121 2023-01-22 08:10:23.248743: step: 336/77, loss: 0.069187693297863 2023-01-22 08:10:24.536302: step: 340/77, loss: 0.06144079566001892 2023-01-22 08:10:25.843135: step: 344/77, loss: 0.21710869669914246 2023-01-22 08:10:27.135435: step: 348/77, loss: 0.10837674140930176 2023-01-22 08:10:28.448003: step: 352/77, loss: 0.050491150468587875 2023-01-22 08:10:29.828746: step: 356/77, loss: 0.08301954716444016 2023-01-22 08:10:31.128506: step: 360/77, loss: 0.08111888915300369 2023-01-22 08:10:32.454424: step: 364/77, loss: 0.18378156423568726 2023-01-22 08:10:33.762314: step: 368/77, loss: 0.08763030171394348 2023-01-22 08:10:35.090263: step: 372/77, loss: 0.12362384051084518 2023-01-22 08:10:36.371888: step: 376/77, loss: 0.16146932542324066 2023-01-22 08:10:37.667779: step: 380/77, loss: 0.11777414381504059 2023-01-22 08:10:38.965611: step: 384/77, loss: 0.11122996360063553 2023-01-22 08:10:40.273224: step: 388/77, loss: 0.056869350373744965 ================================================== Loss: 0.114 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:12:33.604355: step: 4/77, loss: 0.08978293836116791 2023-01-22 08:12:34.882105: step: 8/77, loss: 0.0897291973233223 2023-01-22 08:12:36.205564: step: 12/77, loss: 0.09467065334320068 2023-01-22 08:12:37.499795: step: 16/77, loss: 0.05945632606744766 2023-01-22 08:12:38.809017: step: 20/77, loss: 0.08201459795236588 2023-01-22 08:12:40.145541: step: 24/77, loss: 0.10762184858322144 2023-01-22 08:12:41.458058: step: 28/77, loss: 0.10683409869670868 2023-01-22 08:12:42.785825: step: 32/77, loss: 0.15592831373214722 2023-01-22 08:12:44.106952: step: 36/77, loss: 0.15853601694107056 2023-01-22 08:12:45.418285: step: 40/77, loss: 0.1696840524673462 2023-01-22 08:12:46.746328: step: 44/77, loss: 0.07610610127449036 2023-01-22 08:12:48.032027: step: 48/77, loss: 0.04936008155345917 2023-01-22 08:12:49.319421: step: 52/77, loss: 0.06662851572036743 2023-01-22 08:12:50.521534: step: 56/77, loss: 0.11901339143514633 2023-01-22 08:12:51.884772: step: 60/77, loss: 0.06335229426622391 2023-01-22 08:12:53.200192: step: 64/77, loss: 0.15157847106456757 2023-01-22 08:12:54.476236: step: 68/77, loss: 0.028425315394997597 2023-01-22 08:12:55.798841: step: 72/77, loss: 0.07254324853420258 2023-01-22 08:12:57.039802: step: 76/77, loss: 0.026222839951515198 2023-01-22 08:12:58.411017: step: 80/77, loss: 0.25892165303230286 2023-01-22 08:12:59.738837: step: 84/77, loss: 0.1859690099954605 2023-01-22 08:13:01.068114: step: 88/77, loss: 0.19821254909038544 2023-01-22 08:13:02.366152: step: 92/77, loss: 0.05598260462284088 2023-01-22 08:13:03.665999: step: 96/77, loss: 0.026238400489091873 2023-01-22 08:13:04.970513: step: 100/77, loss: 0.14643670618534088 2023-01-22 08:13:06.281284: step: 104/77, loss: 0.025064485147595406 2023-01-22 08:13:07.605376: step: 108/77, loss: 0.04936981201171875 2023-01-22 08:13:08.919157: step: 112/77, loss: 0.031216200441122055 2023-01-22 08:13:10.265136: step: 116/77, loss: 0.07248411327600479 2023-01-22 08:13:11.593621: step: 120/77, loss: 0.0912466049194336 2023-01-22 08:13:12.937002: step: 124/77, loss: 0.053715743124485016 2023-01-22 08:13:14.205337: step: 128/77, loss: 0.026041915640234947 2023-01-22 08:13:15.485409: step: 132/77, loss: 0.033120620995759964 2023-01-22 08:13:16.832403: step: 136/77, loss: 0.06037403270602226 2023-01-22 08:13:18.091236: step: 140/77, loss: 0.0261702761054039 2023-01-22 08:13:19.410474: step: 144/77, loss: 0.07391418516635895 2023-01-22 08:13:20.732982: step: 148/77, loss: 0.04960927367210388 2023-01-22 08:13:22.023328: step: 152/77, loss: 0.045701827853918076 2023-01-22 08:13:23.373682: step: 156/77, loss: 0.10179883986711502 2023-01-22 08:13:24.669257: step: 160/77, loss: 0.020967040210962296 2023-01-22 08:13:25.962518: step: 164/77, loss: 0.09591569006443024 2023-01-22 08:13:27.269131: step: 168/77, loss: 0.03472476080060005 2023-01-22 08:13:28.555792: step: 172/77, loss: 0.03430356830358505 2023-01-22 08:13:29.867551: step: 176/77, loss: 0.08539354801177979 2023-01-22 08:13:31.162994: step: 180/77, loss: 0.012025153264403343 2023-01-22 08:13:32.445141: step: 184/77, loss: 0.12098461389541626 2023-01-22 08:13:33.722029: step: 188/77, loss: 0.08685184270143509 2023-01-22 08:13:35.050392: step: 192/77, loss: 0.008195833303034306 2023-01-22 08:13:36.348234: step: 196/77, loss: 0.06708987802267075 2023-01-22 08:13:37.640502: step: 200/77, loss: 0.03092000260949135 2023-01-22 08:13:38.933400: step: 204/77, loss: 0.028394218534231186 2023-01-22 08:13:40.295529: step: 208/77, loss: 0.06200258433818817 2023-01-22 08:13:41.580507: step: 212/77, loss: 0.028375042602419853 2023-01-22 08:13:42.922066: step: 216/77, loss: 0.03608822450041771 2023-01-22 08:13:44.271431: step: 220/77, loss: 0.13196060061454773 2023-01-22 08:13:45.588089: step: 224/77, loss: 0.04849873483181 2023-01-22 08:13:46.941811: step: 228/77, loss: 0.024536605924367905 2023-01-22 08:13:48.220852: step: 232/77, loss: 0.02170020341873169 2023-01-22 08:13:49.559275: step: 236/77, loss: 0.13406196236610413 2023-01-22 08:13:50.864253: step: 240/77, loss: 0.04485291987657547 2023-01-22 08:13:52.170794: step: 244/77, loss: 0.015026187524199486 2023-01-22 08:13:53.448077: step: 248/77, loss: 0.1131889820098877 2023-01-22 08:13:54.775090: step: 252/77, loss: 0.11656603217124939 2023-01-22 08:13:56.064765: step: 256/77, loss: 0.07655934244394302 2023-01-22 08:13:57.376066: step: 260/77, loss: 0.005910799838602543 2023-01-22 08:13:58.686549: step: 264/77, loss: 0.02708623930811882 2023-01-22 08:13:59.990461: step: 268/77, loss: 0.07830090820789337 2023-01-22 08:14:01.357262: step: 272/77, loss: 0.005537528544664383 2023-01-22 08:14:02.678524: step: 276/77, loss: 0.030812840908765793 2023-01-22 08:14:03.972692: step: 280/77, loss: 0.05393827706575394 2023-01-22 08:14:05.326115: step: 284/77, loss: 0.011534723453223705 2023-01-22 08:14:06.699226: step: 288/77, loss: 0.03256256505846977 2023-01-22 08:14:08.035781: step: 292/77, loss: 0.05714274197816849 2023-01-22 08:14:09.369227: step: 296/77, loss: 0.052277807146310806 2023-01-22 08:14:10.694840: step: 300/77, loss: 0.04839061200618744 2023-01-22 08:14:11.999382: step: 304/77, loss: 0.02827790565788746 2023-01-22 08:14:13.309708: step: 308/77, loss: 0.012759722769260406 2023-01-22 08:14:14.641781: step: 312/77, loss: 0.004941337741911411 2023-01-22 08:14:15.938323: step: 316/77, loss: 0.011740414425730705 2023-01-22 08:14:17.254190: step: 320/77, loss: 0.03277549147605896 2023-01-22 08:14:18.594719: step: 324/77, loss: 0.3591340184211731 2023-01-22 08:14:19.857610: step: 328/77, loss: 0.04585202783346176 2023-01-22 08:14:21.139473: step: 332/77, loss: 0.0037703411653637886 2023-01-22 08:14:22.437071: step: 336/77, loss: 0.0018533715046942234 2023-01-22 08:14:23.779117: step: 340/77, loss: 0.010116029530763626 2023-01-22 08:14:25.115409: step: 344/77, loss: 0.016683772206306458 2023-01-22 08:14:26.424267: step: 348/77, loss: 0.09151042997837067 2023-01-22 08:14:27.718637: step: 352/77, loss: 0.13456642627716064 2023-01-22 08:14:29.021800: step: 356/77, loss: 0.05715903639793396 2023-01-22 08:14:30.308261: step: 360/77, loss: 0.02043786831200123 2023-01-22 08:14:31.646362: step: 364/77, loss: 0.026198705658316612 2023-01-22 08:14:32.917034: step: 368/77, loss: 0.0319807305932045 2023-01-22 08:14:34.291440: step: 372/77, loss: 0.017337413504719734 2023-01-22 08:14:35.645052: step: 376/77, loss: 0.05395495891571045 2023-01-22 08:14:36.989274: step: 380/77, loss: 0.08604675531387329 2023-01-22 08:14:38.300055: step: 384/77, loss: 0.03814491257071495 2023-01-22 08:14:39.649802: step: 388/77, loss: 0.039043229073286057 ================================================== Loss: 0.066 -------------------- Dev Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test Chinese: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test Korean: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test Russian: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test for Chinese: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test for Korean: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 2} Test for Russian: {'template': {'p': 0.7125, 'r': 0.4351145038167939, 'f1': 0.5402843601895734}, 'slot': {'p': 0.475, 'r': 0.017288444040036398, 'f1': 0.033362598770851626}, 'combined': 0.018025290331171017, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:16:54.280374: step: 4/77, loss: 0.008569440804421902 2023-01-22 08:16:55.593650: step: 8/77, loss: 0.06104029715061188 2023-01-22 08:16:56.938689: step: 12/77, loss: 0.03824828565120697 2023-01-22 08:16:58.293894: step: 16/77, loss: 0.008139440789818764 2023-01-22 08:16:59.625236: step: 20/77, loss: 0.011608504690229893 2023-01-22 08:17:00.926260: step: 24/77, loss: 0.07698570191860199 2023-01-22 08:17:02.210666: step: 28/77, loss: 0.02622179500758648 2023-01-22 08:17:03.539240: step: 32/77, loss: 0.016003597527742386 2023-01-22 08:17:04.833767: step: 36/77, loss: 0.058047618716955185 2023-01-22 08:17:06.172813: step: 40/77, loss: 0.05063202604651451 2023-01-22 08:17:07.463910: step: 44/77, loss: 0.026681777089834213 2023-01-22 08:17:08.799098: step: 48/77, loss: 0.019537555053830147 2023-01-22 08:17:10.123769: step: 52/77, loss: 0.03459141403436661 2023-01-22 08:17:11.448193: step: 56/77, loss: 0.004173712804913521 2023-01-22 08:17:12.728999: step: 60/77, loss: 0.0021231891587376595 2023-01-22 08:17:14.077596: step: 64/77, loss: 0.05070459842681885 2023-01-22 08:17:15.368694: step: 68/77, loss: 0.019765764474868774 2023-01-22 08:17:16.631034: step: 72/77, loss: 0.011237685568630695 2023-01-22 08:17:17.933702: step: 76/77, loss: 0.01307184062898159 2023-01-22 08:17:19.223491: step: 80/77, loss: 0.12346996366977692 2023-01-22 08:17:20.535258: step: 84/77, loss: 0.08720341324806213 2023-01-22 08:17:21.852311: step: 88/77, loss: 0.04888058453798294 2023-01-22 08:17:23.132179: step: 92/77, loss: 0.013795820064842701 2023-01-22 08:17:24.447029: step: 96/77, loss: 0.013095969334244728 2023-01-22 08:17:25.757627: step: 100/77, loss: 0.013364549726247787 2023-01-22 08:17:27.082638: step: 104/77, loss: 0.041961900889873505 2023-01-22 08:17:28.410318: step: 108/77, loss: 0.0419904962182045 2023-01-22 08:17:29.720024: step: 112/77, loss: 0.0536038838326931 2023-01-22 08:17:30.995851: step: 116/77, loss: 0.03844211995601654 2023-01-22 08:17:32.271687: step: 120/77, loss: 0.07768292725086212 2023-01-22 08:17:33.576786: step: 124/77, loss: 0.015956323593854904 2023-01-22 08:17:34.852284: step: 128/77, loss: 0.03323855251073837 2023-01-22 08:17:36.192805: step: 132/77, loss: 0.14282429218292236 2023-01-22 08:17:37.531112: step: 136/77, loss: 0.03105779178440571 2023-01-22 08:17:38.787161: step: 140/77, loss: 0.04877779632806778 2023-01-22 08:17:40.099523: step: 144/77, loss: 0.012325471267104149 2023-01-22 08:17:41.414743: step: 148/77, loss: 0.0034946876112371683 2023-01-22 08:17:42.740711: step: 152/77, loss: 0.05085130035877228 2023-01-22 08:17:44.116970: step: 156/77, loss: 0.04637058824300766 2023-01-22 08:17:45.437910: step: 160/77, loss: 0.01782352849841118 2023-01-22 08:17:46.768160: step: 164/77, loss: 0.00708090839907527 2023-01-22 08:17:48.141838: step: 168/77, loss: 0.03184691071510315 2023-01-22 08:17:49.437582: step: 172/77, loss: 0.10453486442565918 2023-01-22 08:17:50.735458: step: 176/77, loss: 0.0255681574344635 2023-01-22 08:17:52.030262: step: 180/77, loss: 0.026515550911426544 2023-01-22 08:17:53.340036: step: 184/77, loss: 0.042595818638801575 2023-01-22 08:17:54.640344: step: 188/77, loss: 0.01371677964925766 2023-01-22 08:17:55.936812: step: 192/77, loss: 0.0416012778878212 2023-01-22 08:17:57.240150: step: 196/77, loss: 0.08964186161756516 2023-01-22 08:17:58.551463: step: 200/77, loss: 0.006344792433083057 2023-01-22 08:17:59.840258: step: 204/77, loss: 0.11798113584518433 2023-01-22 08:18:01.158318: step: 208/77, loss: 0.028100017458200455 2023-01-22 08:18:02.444746: step: 212/77, loss: 0.0631520226597786 2023-01-22 08:18:03.777113: step: 216/77, loss: 0.03390933945775032 2023-01-22 08:18:05.081892: step: 220/77, loss: 0.16270163655281067 2023-01-22 08:18:06.357002: step: 224/77, loss: 0.03585375100374222 2023-01-22 08:18:07.696088: step: 228/77, loss: 0.05041792616248131 2023-01-22 08:18:09.018796: step: 232/77, loss: 0.11344284564256668 2023-01-22 08:18:10.349988: step: 236/77, loss: 0.04633091390132904 2023-01-22 08:18:11.612314: step: 240/77, loss: 0.005465330556035042 2023-01-22 08:18:12.943318: step: 244/77, loss: 0.052321139723062515 2023-01-22 08:18:14.217905: step: 248/77, loss: 0.004656112752854824 2023-01-22 08:18:15.546039: step: 252/77, loss: 0.0010259983828291297 2023-01-22 08:18:16.814397: step: 256/77, loss: 0.0053174905478954315 2023-01-22 08:18:18.120819: step: 260/77, loss: 0.017421012744307518 2023-01-22 08:18:19.413710: step: 264/77, loss: 0.0311984121799469 2023-01-22 08:18:20.705226: step: 268/77, loss: 0.042203038930892944 2023-01-22 08:18:21.978352: step: 272/77, loss: 0.06962239742279053 2023-01-22 08:18:23.292005: step: 276/77, loss: 0.10471386462450027 2023-01-22 08:18:24.642433: step: 280/77, loss: 0.0454871691763401 2023-01-22 08:18:25.961847: step: 284/77, loss: 0.08277568221092224 2023-01-22 08:18:27.263602: step: 288/77, loss: 0.022217385470867157 2023-01-22 08:18:28.532920: step: 292/77, loss: 0.022832242771983147 2023-01-22 08:18:29.884831: step: 296/77, loss: 0.0851108729839325 2023-01-22 08:18:31.219221: step: 300/77, loss: 0.04043712839484215 2023-01-22 08:18:32.546431: step: 304/77, loss: 0.034135933965444565 2023-01-22 08:18:33.917298: step: 308/77, loss: 0.019902069121599197 2023-01-22 08:18:35.241099: step: 312/77, loss: 0.028999945148825645 2023-01-22 08:18:36.591402: step: 316/77, loss: 0.011801834218204021 2023-01-22 08:18:37.947886: step: 320/77, loss: 0.003783810418099165 2023-01-22 08:18:39.232683: step: 324/77, loss: 0.08780589699745178 2023-01-22 08:18:40.549830: step: 328/77, loss: 0.034666549414396286 2023-01-22 08:18:41.867767: step: 332/77, loss: 0.017708729952573776 2023-01-22 08:18:43.178499: step: 336/77, loss: 0.006578100845217705 2023-01-22 08:18:44.529645: step: 340/77, loss: 0.0038012894801795483 2023-01-22 08:18:45.856867: step: 344/77, loss: 0.016202326864004135 2023-01-22 08:18:47.211638: step: 348/77, loss: 0.07350071519613266 2023-01-22 08:18:48.548168: step: 352/77, loss: 0.031798187643289566 2023-01-22 08:18:49.879320: step: 356/77, loss: 0.04961419478058815 2023-01-22 08:18:51.160260: step: 360/77, loss: 0.03064483404159546 2023-01-22 08:18:52.420744: step: 364/77, loss: 0.01778862625360489 2023-01-22 08:18:53.737804: step: 368/77, loss: 0.05175955593585968 2023-01-22 08:18:55.044620: step: 372/77, loss: 0.08155137300491333 2023-01-22 08:18:56.365038: step: 376/77, loss: 0.20118819177150726 2023-01-22 08:18:57.704957: step: 380/77, loss: 0.04779066890478134 2023-01-22 08:18:59.028468: step: 384/77, loss: 0.043782129883766174 2023-01-22 08:19:00.330772: step: 388/77, loss: 0.021426646038889885 ================================================== Loss: 0.042 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:21:14.239548: step: 4/77, loss: 0.04137624800205231 2023-01-22 08:21:15.537361: step: 8/77, loss: 0.038542598485946655 2023-01-22 08:21:16.827067: step: 12/77, loss: 0.009367510676383972 2023-01-22 08:21:18.117801: step: 16/77, loss: 0.022862209007143974 2023-01-22 08:21:19.479758: step: 20/77, loss: 0.08002949506044388 2023-01-22 08:21:20.833117: step: 24/77, loss: 0.0319170281291008 2023-01-22 08:21:22.178231: step: 28/77, loss: 0.0006610316340811551 2023-01-22 08:21:23.454149: step: 32/77, loss: 0.02024516463279724 2023-01-22 08:21:24.710249: step: 36/77, loss: 0.01656205765902996 2023-01-22 08:21:25.990182: step: 40/77, loss: 0.013603459112346172 2023-01-22 08:21:27.363185: step: 44/77, loss: 0.002709874650463462 2023-01-22 08:21:28.686483: step: 48/77, loss: 0.06883476674556732 2023-01-22 08:21:29.994289: step: 52/77, loss: 0.01029855664819479 2023-01-22 08:21:31.346569: step: 56/77, loss: 0.07245709747076035 2023-01-22 08:21:32.646596: step: 60/77, loss: 0.012648254632949829 2023-01-22 08:21:33.933954: step: 64/77, loss: 0.02669510245323181 2023-01-22 08:21:35.175233: step: 68/77, loss: 0.028342055156826973 2023-01-22 08:21:36.466673: step: 72/77, loss: 0.07287586480379105 2023-01-22 08:21:37.810154: step: 76/77, loss: 0.011641661636531353 2023-01-22 08:21:39.111176: step: 80/77, loss: 0.03511600196361542 2023-01-22 08:21:40.390900: step: 84/77, loss: 0.009830279275774956 2023-01-22 08:21:41.668281: step: 88/77, loss: 0.008189433254301548 2023-01-22 08:21:43.032626: step: 92/77, loss: 0.01783733069896698 2023-01-22 08:21:44.353005: step: 96/77, loss: 0.00765819801017642 2023-01-22 08:21:45.693305: step: 100/77, loss: 0.023452438414096832 2023-01-22 08:21:47.028939: step: 104/77, loss: 0.019945833832025528 2023-01-22 08:21:48.349872: step: 108/77, loss: 0.02258324809372425 2023-01-22 08:21:49.602308: step: 112/77, loss: 0.020979253575205803 2023-01-22 08:21:50.906453: step: 116/77, loss: 0.015185080468654633 2023-01-22 08:21:52.203172: step: 120/77, loss: 0.1529376208782196 2023-01-22 08:21:53.530521: step: 124/77, loss: 0.07699036598205566 2023-01-22 08:21:54.915882: step: 128/77, loss: 0.0060681188479065895 2023-01-22 08:21:56.240381: step: 132/77, loss: 0.02876776084303856 2023-01-22 08:21:57.551918: step: 136/77, loss: 0.007397600449621677 2023-01-22 08:21:58.815573: step: 140/77, loss: 0.04085937887430191 2023-01-22 08:22:00.130837: step: 144/77, loss: 0.01059473305940628 2023-01-22 08:22:01.438133: step: 148/77, loss: 0.028688378632068634 2023-01-22 08:22:02.745723: step: 152/77, loss: 0.007237972691655159 2023-01-22 08:22:04.078184: step: 156/77, loss: 0.020491447299718857 2023-01-22 08:22:05.385431: step: 160/77, loss: 0.02271532267332077 2023-01-22 08:22:06.706119: step: 164/77, loss: 0.05541416257619858 2023-01-22 08:22:07.987656: step: 168/77, loss: 0.016528375446796417 2023-01-22 08:22:09.315444: step: 172/77, loss: 0.06430349498987198 2023-01-22 08:22:10.655028: step: 176/77, loss: 0.04448110982775688 2023-01-22 08:22:11.967232: step: 180/77, loss: 0.008215603418648243 2023-01-22 08:22:13.272082: step: 184/77, loss: 0.0498255118727684 2023-01-22 08:22:14.596494: step: 188/77, loss: 0.013322685845196247 2023-01-22 08:22:15.948738: step: 192/77, loss: 0.03279253840446472 2023-01-22 08:22:17.281337: step: 196/77, loss: 0.008535334840416908 2023-01-22 08:22:18.656641: step: 200/77, loss: 0.020143844187259674 2023-01-22 08:22:19.960669: step: 204/77, loss: 0.015944061800837517 2023-01-22 08:22:21.234620: step: 208/77, loss: 0.023468907922506332 2023-01-22 08:22:22.563386: step: 212/77, loss: 0.017244024202227592 2023-01-22 08:22:23.892294: step: 216/77, loss: 0.039418622851371765 2023-01-22 08:22:25.237323: step: 220/77, loss: 0.019093813374638557 2023-01-22 08:22:26.554448: step: 224/77, loss: 0.014170338399708271 2023-01-22 08:22:27.873687: step: 228/77, loss: 0.010956652462482452 2023-01-22 08:22:29.219625: step: 232/77, loss: 0.06017700955271721 2023-01-22 08:22:30.478207: step: 236/77, loss: 0.012261530384421349 2023-01-22 08:22:31.767448: step: 240/77, loss: 0.006446256302297115 2023-01-22 08:22:33.067121: step: 244/77, loss: 0.0848538726568222 2023-01-22 08:22:34.351883: step: 248/77, loss: 0.012097777798771858 2023-01-22 08:22:35.652310: step: 252/77, loss: 0.04693004861474037 2023-01-22 08:22:36.954461: step: 256/77, loss: 0.056142307817935944 2023-01-22 08:22:38.249759: step: 260/77, loss: 0.042162343859672546 2023-01-22 08:22:39.591271: step: 264/77, loss: 0.06296426802873611 2023-01-22 08:22:40.941570: step: 268/77, loss: 0.005121381487697363 2023-01-22 08:22:42.237205: step: 272/77, loss: 0.0776234120130539 2023-01-22 08:22:43.595052: step: 276/77, loss: 0.1735001653432846 2023-01-22 08:22:44.907942: step: 280/77, loss: 0.042865149676799774 2023-01-22 08:22:46.210921: step: 284/77, loss: 0.004002364352345467 2023-01-22 08:22:47.505560: step: 288/77, loss: 0.020344601944088936 2023-01-22 08:22:48.831326: step: 292/77, loss: 0.06679122149944305 2023-01-22 08:22:50.134249: step: 296/77, loss: 0.003160992171615362 2023-01-22 08:22:51.452218: step: 300/77, loss: 0.001810312969610095 2023-01-22 08:22:52.748270: step: 304/77, loss: 0.031477395445108414 2023-01-22 08:22:54.030734: step: 308/77, loss: 0.09544059634208679 2023-01-22 08:22:55.329925: step: 312/77, loss: 0.038570526987314224 2023-01-22 08:22:56.625300: step: 316/77, loss: 0.012956952676177025 2023-01-22 08:22:57.905086: step: 320/77, loss: 0.13954588770866394 2023-01-22 08:22:59.268315: step: 324/77, loss: 0.07902668416500092 2023-01-22 08:23:00.580323: step: 328/77, loss: 0.018463322892785072 2023-01-22 08:23:01.890940: step: 332/77, loss: 0.019219795241951942 2023-01-22 08:23:03.195807: step: 336/77, loss: 0.05267266929149628 2023-01-22 08:23:04.518621: step: 340/77, loss: 0.020226020365953445 2023-01-22 08:23:05.853307: step: 344/77, loss: 0.017935145646333694 2023-01-22 08:23:07.143936: step: 348/77, loss: 0.12345309555530548 2023-01-22 08:23:08.434999: step: 352/77, loss: 0.07177512347698212 2023-01-22 08:23:09.791260: step: 356/77, loss: 0.05010971054434776 2023-01-22 08:23:11.129050: step: 360/77, loss: 0.05248473584651947 2023-01-22 08:23:12.448896: step: 364/77, loss: 0.1049971655011177 2023-01-22 08:23:13.839884: step: 368/77, loss: 0.02946937084197998 2023-01-22 08:23:15.157867: step: 372/77, loss: 0.023977501317858696 2023-01-22 08:23:16.471003: step: 376/77, loss: 0.05141597241163254 2023-01-22 08:23:17.761664: step: 380/77, loss: 0.02865361049771309 2023-01-22 08:23:19.045189: step: 384/77, loss: 0.08176169544458389 2023-01-22 08:23:20.346745: step: 388/77, loss: 0.0032761467155069113 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 4} Test Chinese: {'template': {'p': 0.9245283018867925, 'r': 0.37404580152671757, 'f1': 0.5326086956521738}, 'slot': {'p': 0.5625, 'r': 0.00818926296633303, 'f1': 0.016143497757847534}, 'combined': 0.008598167284070968, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 4} Test Korean: {'template': {'p': 0.9245283018867925, 'r': 0.37404580152671757, 'f1': 0.5326086956521738}, 'slot': {'p': 0.5625, 'r': 0.00818926296633303, 'f1': 0.016143497757847534}, 'combined': 0.008598167284070968, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 4} Test Russian: {'template': {'p': 0.9245283018867925, 'r': 0.37404580152671757, 'f1': 0.5326086956521738}, 'slot': {'p': 0.5625, 'r': 0.00818926296633303, 'f1': 0.016143497757847534}, 'combined': 0.008598167284070968, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:25:13.745257: step: 4/77, loss: 0.02670639380812645 2023-01-22 08:25:15.034639: step: 8/77, loss: 0.011545292101800442 2023-01-22 08:25:16.343582: step: 12/77, loss: 0.02020694874227047 2023-01-22 08:25:17.697380: step: 16/77, loss: 0.027690250426530838 2023-01-22 08:25:18.970912: step: 20/77, loss: 0.02767796441912651 2023-01-22 08:25:20.300449: step: 24/77, loss: 0.009519163519144058 2023-01-22 08:25:21.584555: step: 28/77, loss: 0.025326617062091827 2023-01-22 08:25:22.912917: step: 32/77, loss: 0.04947042465209961 2023-01-22 08:25:24.263190: step: 36/77, loss: 0.0008288826793432236 2023-01-22 08:25:25.578199: step: 40/77, loss: 0.02415475994348526 2023-01-22 08:25:26.853749: step: 44/77, loss: 0.03522857651114464 2023-01-22 08:25:28.171818: step: 48/77, loss: 0.0046325488947331905 2023-01-22 08:25:29.490185: step: 52/77, loss: 0.04867120087146759 2023-01-22 08:25:30.753361: step: 56/77, loss: 0.004921335726976395 2023-01-22 08:25:32.059848: step: 60/77, loss: 0.027160894125699997 2023-01-22 08:25:33.392943: step: 64/77, loss: 0.01739886961877346 2023-01-22 08:25:34.691146: step: 68/77, loss: 0.0029781265184283257 2023-01-22 08:25:35.970274: step: 72/77, loss: 0.011755847372114658 2023-01-22 08:25:37.253908: step: 76/77, loss: 0.002122030593454838 2023-01-22 08:25:38.545151: step: 80/77, loss: 0.0113228689879179 2023-01-22 08:25:39.831511: step: 84/77, loss: 0.038615792989730835 2023-01-22 08:25:41.160227: step: 88/77, loss: 0.04461674764752388 2023-01-22 08:25:42.482004: step: 92/77, loss: 0.010664014145731926 2023-01-22 08:25:43.777570: step: 96/77, loss: 0.03429734334349632 2023-01-22 08:25:45.086138: step: 100/77, loss: 0.005036661867052317 2023-01-22 08:25:46.344303: step: 104/77, loss: 0.026485158130526543 2023-01-22 08:25:47.692492: step: 108/77, loss: 0.019079767167568207 2023-01-22 08:25:48.980060: step: 112/77, loss: 0.015616081655025482 2023-01-22 08:25:50.301235: step: 116/77, loss: 0.042128726840019226 2023-01-22 08:25:51.590409: step: 120/77, loss: 0.009291108697652817 2023-01-22 08:25:52.871359: step: 124/77, loss: 0.02643146552145481 2023-01-22 08:25:54.168377: step: 128/77, loss: 9.87911771517247e-05 2023-01-22 08:25:55.490697: step: 132/77, loss: 0.0171172134578228 2023-01-22 08:25:56.802976: step: 136/77, loss: 0.07349997013807297 2023-01-22 08:25:58.110861: step: 140/77, loss: 0.01097109168767929 2023-01-22 08:25:59.425417: step: 144/77, loss: 0.020437665283679962 2023-01-22 08:26:00.675770: step: 148/77, loss: 0.011600209400057793 2023-01-22 08:26:02.006009: step: 152/77, loss: 0.0519060380756855 2023-01-22 08:26:03.351555: step: 156/77, loss: 0.013362875208258629 2023-01-22 08:26:04.572265: step: 160/77, loss: 0.025527773424983025 2023-01-22 08:26:05.897299: step: 164/77, loss: 0.004494365304708481 2023-01-22 08:26:07.196823: step: 168/77, loss: 0.03141060471534729 2023-01-22 08:26:08.457710: step: 172/77, loss: 0.02037169598042965 2023-01-22 08:26:09.772828: step: 176/77, loss: 0.024241285398602486 2023-01-22 08:26:11.105835: step: 180/77, loss: 0.012550034560263157 2023-01-22 08:26:12.441935: step: 184/77, loss: 0.06026304140686989 2023-01-22 08:26:13.809019: step: 188/77, loss: 0.012313876301050186 2023-01-22 08:26:15.107316: step: 192/77, loss: 0.016531143337488174 2023-01-22 08:26:16.445828: step: 196/77, loss: 0.043189384043216705 2023-01-22 08:26:17.773975: step: 200/77, loss: 0.013896309770643711 2023-01-22 08:26:19.101381: step: 204/77, loss: 0.0076545728370547295 2023-01-22 08:26:20.409925: step: 208/77, loss: 0.043936245143413544 2023-01-22 08:26:21.684242: step: 212/77, loss: 0.0009481979068368673 2023-01-22 08:26:22.957110: step: 216/77, loss: 0.040946513414382935 2023-01-22 08:26:24.235310: step: 220/77, loss: 0.021142808720469475 2023-01-22 08:26:25.543257: step: 224/77, loss: 0.039410270750522614 2023-01-22 08:26:26.847550: step: 228/77, loss: 0.004747491329908371 2023-01-22 08:26:28.157321: step: 232/77, loss: 0.025292804464697838 2023-01-22 08:26:29.478553: step: 236/77, loss: 0.05518924817442894 2023-01-22 08:26:30.825761: step: 240/77, loss: 0.005559179000556469 2023-01-22 08:26:32.147272: step: 244/77, loss: 0.018109621480107307 2023-01-22 08:26:33.421875: step: 248/77, loss: 0.1298125833272934 2023-01-22 08:26:34.732787: step: 252/77, loss: 0.022552266716957092 2023-01-22 08:26:36.030131: step: 256/77, loss: 0.17460715770721436 2023-01-22 08:26:37.379569: step: 260/77, loss: 0.02985825017094612 2023-01-22 08:26:38.680303: step: 264/77, loss: 0.019722308963537216 2023-01-22 08:26:39.977278: step: 268/77, loss: 0.009370415471494198 2023-01-22 08:26:41.268502: step: 272/77, loss: 0.05955535173416138 2023-01-22 08:26:42.618319: step: 276/77, loss: 0.11409030854701996 2023-01-22 08:26:43.953408: step: 280/77, loss: 0.025609876960515976 2023-01-22 08:26:45.248835: step: 284/77, loss: 0.0021118037402629852 2023-01-22 08:26:46.491183: step: 288/77, loss: 0.018015822395682335 2023-01-22 08:26:47.768182: step: 292/77, loss: 0.026650303974747658 2023-01-22 08:26:49.082345: step: 296/77, loss: 0.044930506497621536 2023-01-22 08:26:50.382173: step: 300/77, loss: 0.1032029390335083 2023-01-22 08:26:51.693876: step: 304/77, loss: 0.005074769724160433 2023-01-22 08:26:52.967510: step: 308/77, loss: 0.036885812878608704 2023-01-22 08:26:54.331040: step: 312/77, loss: 0.006821052171289921 2023-01-22 08:26:55.639927: step: 316/77, loss: 0.036779746413230896 2023-01-22 08:26:56.968244: step: 320/77, loss: 0.009868866764008999 2023-01-22 08:26:58.286315: step: 324/77, loss: 0.026132123544812202 2023-01-22 08:26:59.611398: step: 328/77, loss: 0.0059155626222491264 2023-01-22 08:27:00.924797: step: 332/77, loss: 0.022484319284558296 2023-01-22 08:27:02.283008: step: 336/77, loss: 0.04049109295010567 2023-01-22 08:27:03.582197: step: 340/77, loss: 0.039941079914569855 2023-01-22 08:27:04.932351: step: 344/77, loss: 0.028673361986875534 2023-01-22 08:27:06.282078: step: 348/77, loss: 0.05313975736498833 2023-01-22 08:27:07.601211: step: 352/77, loss: 0.03900352865457535 2023-01-22 08:27:08.904617: step: 356/77, loss: 0.04854501783847809 2023-01-22 08:27:10.161489: step: 360/77, loss: 0.05382800102233887 2023-01-22 08:27:11.490807: step: 364/77, loss: 0.001742333872243762 2023-01-22 08:27:12.792884: step: 368/77, loss: 0.024771321564912796 2023-01-22 08:27:14.070626: step: 372/77, loss: 0.03442703187465668 2023-01-22 08:27:15.421002: step: 376/77, loss: 0.03055429458618164 2023-01-22 08:27:16.769303: step: 380/77, loss: 0.0014426014386117458 2023-01-22 08:27:18.056380: step: 384/77, loss: 0.035622939467430115 2023-01-22 08:27:19.341639: step: 388/77, loss: 0.0279096569865942 ================================================== Loss: 0.029 -------------------- Dev Chinese: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05014287031062771, 'epoch': 5} Test Chinese: {'template': {'p': 0.8888888888888888, 'r': 0.42748091603053434, 'f1': 0.5773195876288659}, 'slot': {'p': 0.5862068965517241, 'r': 0.015468607825295723, 'f1': 0.030141843971631208}, 'combined': 0.017401476932075746, 'epoch': 5} Dev Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05014287031062771, 'epoch': 5} Test Korean: {'template': {'p': 0.8870967741935484, 'r': 0.4198473282442748, 'f1': 0.5699481865284974}, 'slot': {'p': 0.5714285714285714, 'r': 0.014558689717925387, 'f1': 0.028393966282165038}, 'combined': 0.016183089590871266, 'epoch': 5} Dev Russian: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05014287031062771, 'epoch': 5} Test Russian: {'template': {'p': 0.8888888888888888, 'r': 0.42748091603053434, 'f1': 0.5773195876288659}, 'slot': {'p': 0.5862068965517241, 'r': 0.015468607825295723, 'f1': 0.030141843971631208}, 'combined': 0.017401476932075746, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:29:12.794450: step: 4/77, loss: 0.0005450226599350572 2023-01-22 08:29:14.084953: step: 8/77, loss: 0.005259404890239239 2023-01-22 08:29:15.387082: step: 12/77, loss: 0.019649960100650787 2023-01-22 08:29:16.661713: step: 16/77, loss: 0.0025430868845432997 2023-01-22 08:29:18.001175: step: 20/77, loss: 0.004815719556063414 2023-01-22 08:29:19.273890: step: 24/77, loss: 0.00034457709989510477 2023-01-22 08:29:20.633872: step: 28/77, loss: 0.01655828207731247 2023-01-22 08:29:21.938711: step: 32/77, loss: 0.016402754932641983 2023-01-22 08:29:23.267257: step: 36/77, loss: 0.006076469086110592 2023-01-22 08:29:24.573657: step: 40/77, loss: 0.019712939858436584 2023-01-22 08:29:25.909241: step: 44/77, loss: 0.057978931814432144 2023-01-22 08:29:27.194554: step: 48/77, loss: 0.010279682464897633 2023-01-22 08:29:28.545243: step: 52/77, loss: 0.028372686356306076 2023-01-22 08:29:29.854521: step: 56/77, loss: 0.030109090730547905 2023-01-22 08:29:31.141094: step: 60/77, loss: 0.0027865376323461533 2023-01-22 08:29:32.431387: step: 64/77, loss: 0.03058498352766037 2023-01-22 08:29:33.735569: step: 68/77, loss: 0.05856921151280403 2023-01-22 08:29:35.083195: step: 72/77, loss: 0.02341497875750065 2023-01-22 08:29:36.381127: step: 76/77, loss: 0.07214264571666718 2023-01-22 08:29:37.664627: step: 80/77, loss: 0.0024079028517007828 2023-01-22 08:29:38.993273: step: 84/77, loss: 0.02960231341421604 2023-01-22 08:29:40.245952: step: 88/77, loss: 0.041948989033699036 2023-01-22 08:29:41.560887: step: 92/77, loss: 0.0790402740240097 2023-01-22 08:29:42.875849: step: 96/77, loss: 0.025003444403409958 2023-01-22 08:29:44.187539: step: 100/77, loss: 0.03993772715330124 2023-01-22 08:29:45.481227: step: 104/77, loss: 0.0716061219573021 2023-01-22 08:29:46.766460: step: 108/77, loss: 0.010484387166798115 2023-01-22 08:29:48.103167: step: 112/77, loss: 0.004315068945288658 2023-01-22 08:29:49.401392: step: 116/77, loss: 0.0012634468730539083 2023-01-22 08:29:50.753650: step: 120/77, loss: 0.005074080545455217 2023-01-22 08:29:52.056019: step: 124/77, loss: 0.04028032720088959 2023-01-22 08:29:53.385547: step: 128/77, loss: 0.0018830453045666218 2023-01-22 08:29:54.683664: step: 132/77, loss: 0.0014695585705339909 2023-01-22 08:29:55.976002: step: 136/77, loss: 0.023692918941378593 2023-01-22 08:29:57.326773: step: 140/77, loss: 0.0029780231416225433 2023-01-22 08:29:58.637815: step: 144/77, loss: 0.016965823248028755 2023-01-22 08:29:59.949323: step: 148/77, loss: 0.06457682698965073 2023-01-22 08:30:01.221268: step: 152/77, loss: 0.007344848942011595 2023-01-22 08:30:02.507818: step: 156/77, loss: 0.012553246691823006 2023-01-22 08:30:03.805482: step: 160/77, loss: 0.03706691041588783 2023-01-22 08:30:05.108491: step: 164/77, loss: 0.03157980740070343 2023-01-22 08:30:06.388459: step: 168/77, loss: 0.009069087915122509 2023-01-22 08:30:07.706002: step: 172/77, loss: 0.15711651742458344 2023-01-22 08:30:09.076273: step: 176/77, loss: 0.01679708994925022 2023-01-22 08:30:10.375560: step: 180/77, loss: 0.043935131281614304 2023-01-22 08:30:11.710758: step: 184/77, loss: 0.00011045370774809271 2023-01-22 08:30:13.005829: step: 188/77, loss: 0.06068781763315201 2023-01-22 08:30:14.293538: step: 192/77, loss: 0.020912062376737595 2023-01-22 08:30:15.579134: step: 196/77, loss: 0.050123170018196106 2023-01-22 08:30:16.870980: step: 200/77, loss: 0.032013922929763794 2023-01-22 08:30:18.199929: step: 204/77, loss: 0.0369759202003479 2023-01-22 08:30:19.553139: step: 208/77, loss: 0.016368011012673378 2023-01-22 08:30:20.870588: step: 212/77, loss: 0.007947578094899654 2023-01-22 08:30:22.221643: step: 216/77, loss: 0.015159336850047112 2023-01-22 08:30:23.557347: step: 220/77, loss: 0.04020078480243683 2023-01-22 08:30:24.880177: step: 224/77, loss: 0.007312280125916004 2023-01-22 08:30:26.209932: step: 228/77, loss: 0.02025928534567356 2023-01-22 08:30:27.537862: step: 232/77, loss: 0.004918534308671951 2023-01-22 08:30:28.890264: step: 236/77, loss: 0.025332044810056686 2023-01-22 08:30:30.241826: step: 240/77, loss: 0.054991841316223145 2023-01-22 08:30:31.545772: step: 244/77, loss: 0.0054244897328317165 2023-01-22 08:30:32.850664: step: 248/77, loss: 0.04276290535926819 2023-01-22 08:30:34.110628: step: 252/77, loss: 0.004564788192510605 2023-01-22 08:30:35.464927: step: 256/77, loss: 0.010132751427590847 2023-01-22 08:30:36.801985: step: 260/77, loss: 0.05739155039191246 2023-01-22 08:30:38.061849: step: 264/77, loss: 0.055207569152116776 2023-01-22 08:30:39.402045: step: 268/77, loss: 0.01842823065817356 2023-01-22 08:30:40.712318: step: 272/77, loss: 0.004211059771478176 2023-01-22 08:30:42.028936: step: 276/77, loss: 0.0071431114338338375 2023-01-22 08:30:43.382724: step: 280/77, loss: 0.016630031168460846 2023-01-22 08:30:44.728543: step: 284/77, loss: 0.0799979418516159 2023-01-22 08:30:46.036033: step: 288/77, loss: 0.04651796817779541 2023-01-22 08:30:47.354725: step: 292/77, loss: 0.0067449212074279785 2023-01-22 08:30:48.725999: step: 296/77, loss: 0.0007620899705216289 2023-01-22 08:30:50.081969: step: 300/77, loss: 0.05327056720852852 2023-01-22 08:30:51.435146: step: 304/77, loss: 0.000819915032479912 2023-01-22 08:30:52.738929: step: 308/77, loss: 0.06647086143493652 2023-01-22 08:30:54.056072: step: 312/77, loss: 0.04102443531155586 2023-01-22 08:30:55.397672: step: 316/77, loss: 0.009362781420350075 2023-01-22 08:30:56.726213: step: 320/77, loss: 0.006439851596951485 2023-01-22 08:30:58.039200: step: 324/77, loss: 0.0069610318168997765 2023-01-22 08:30:59.364861: step: 328/77, loss: 0.02070482261478901 2023-01-22 08:31:00.681896: step: 332/77, loss: 0.004380959086120129 2023-01-22 08:31:02.003080: step: 336/77, loss: 0.02864585630595684 2023-01-22 08:31:03.317266: step: 340/77, loss: 0.018239619210362434 2023-01-22 08:31:04.655498: step: 344/77, loss: 0.0008536122040823102 2023-01-22 08:31:05.992892: step: 348/77, loss: 0.015499288216233253 2023-01-22 08:31:07.317535: step: 352/77, loss: 0.08047214150428772 2023-01-22 08:31:08.641761: step: 356/77, loss: 0.06944243609905243 2023-01-22 08:31:10.027821: step: 360/77, loss: 0.0006930158706381917 2023-01-22 08:31:11.310134: step: 364/77, loss: 0.01632937416434288 2023-01-22 08:31:12.617078: step: 368/77, loss: 0.10148210823535919 2023-01-22 08:31:13.987400: step: 372/77, loss: 0.024788234382867813 2023-01-22 08:31:15.329019: step: 376/77, loss: 0.024317339062690735 2023-01-22 08:31:16.582602: step: 380/77, loss: 0.0365629717707634 2023-01-22 08:31:17.913800: step: 384/77, loss: 0.010954131372272968 2023-01-22 08:31:19.231565: step: 388/77, loss: 0.002385278232395649 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Chinese: {'template': {'p': 0.9016393442622951, 'r': 0.4198473282442748, 'f1': 0.5729166666666666}, 'slot': {'p': 0.5, 'r': 0.006369426751592357, 'f1': 0.012578616352201259}, 'combined': 0.0072064989517819705, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Korean: {'template': {'p': 0.9016393442622951, 'r': 0.4198473282442748, 'f1': 0.5729166666666666}, 'slot': {'p': 0.5, 'r': 0.006369426751592357, 'f1': 0.012578616352201259}, 'combined': 0.0072064989517819705, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Russian: {'template': {'p': 0.9016393442622951, 'r': 0.4198473282442748, 'f1': 0.5729166666666666}, 'slot': {'p': 0.5, 'r': 0.006369426751592357, 'f1': 0.012578616352201259}, 'combined': 0.0072064989517819705, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:33:12.405034: step: 4/77, loss: 0.005366637371480465 2023-01-22 08:33:13.750870: step: 8/77, loss: 0.12268777936697006 2023-01-22 08:33:15.010898: step: 12/77, loss: 0.09095238894224167 2023-01-22 08:33:16.298852: step: 16/77, loss: 0.029042690992355347 2023-01-22 08:33:17.602550: step: 20/77, loss: 0.03922734782099724 2023-01-22 08:33:18.827636: step: 24/77, loss: 0.046124693006277084 2023-01-22 08:33:20.142907: step: 28/77, loss: 0.013856401666998863 2023-01-22 08:33:21.448429: step: 32/77, loss: 0.008207427337765694 2023-01-22 08:33:22.740546: step: 36/77, loss: 0.0077070193365216255 2023-01-22 08:33:24.000350: step: 40/77, loss: 0.017933424562215805 2023-01-22 08:33:25.281598: step: 44/77, loss: 0.05957060679793358 2023-01-22 08:33:26.577009: step: 48/77, loss: 0.006128543522208929 2023-01-22 08:33:27.894539: step: 52/77, loss: 0.018483951687812805 2023-01-22 08:33:29.221767: step: 56/77, loss: 0.0024321037344634533 2023-01-22 08:33:30.518781: step: 60/77, loss: 0.015778936445713043 2023-01-22 08:33:31.836669: step: 64/77, loss: 0.024857226759195328 2023-01-22 08:33:33.159249: step: 68/77, loss: 0.052271053194999695 2023-01-22 08:33:34.478689: step: 72/77, loss: 0.006552603095769882 2023-01-22 08:33:35.809283: step: 76/77, loss: 0.008574525825679302 2023-01-22 08:33:37.143945: step: 80/77, loss: 0.078099325299263 2023-01-22 08:33:38.462352: step: 84/77, loss: 0.04347037151455879 2023-01-22 08:33:39.785103: step: 88/77, loss: 0.014808414503932 2023-01-22 08:33:41.095517: step: 92/77, loss: 0.004002019762992859 2023-01-22 08:33:42.410589: step: 96/77, loss: 0.038667820394039154 2023-01-22 08:33:43.800850: step: 100/77, loss: 0.0060221110470592976 2023-01-22 08:33:45.118155: step: 104/77, loss: 0.027203550562262535 2023-01-22 08:33:46.382516: step: 108/77, loss: 0.053258925676345825 2023-01-22 08:33:47.734454: step: 112/77, loss: 0.00490473210811615 2023-01-22 08:33:49.076629: step: 116/77, loss: 0.0033097327686846256 2023-01-22 08:33:50.383178: step: 120/77, loss: 0.0017886572750285268 2023-01-22 08:33:51.712716: step: 124/77, loss: 0.03101409412920475 2023-01-22 08:33:53.001810: step: 128/77, loss: 0.028752855956554413 2023-01-22 08:33:54.274965: step: 132/77, loss: 0.0011797421611845493 2023-01-22 08:33:55.552949: step: 136/77, loss: 0.027044154703617096 2023-01-22 08:33:56.840999: step: 140/77, loss: 0.0021191895939409733 2023-01-22 08:33:58.168108: step: 144/77, loss: 0.004732126835733652 2023-01-22 08:33:59.453348: step: 148/77, loss: 0.15178532898426056 2023-01-22 08:34:00.755244: step: 152/77, loss: 0.04195141792297363 2023-01-22 08:34:02.014128: step: 156/77, loss: 0.001734606921672821 2023-01-22 08:34:03.344290: step: 160/77, loss: 0.004672032780945301 2023-01-22 08:34:04.662433: step: 164/77, loss: 0.04151839017868042 2023-01-22 08:34:05.980642: step: 168/77, loss: 0.03094092383980751 2023-01-22 08:34:07.264933: step: 172/77, loss: 0.012236851267516613 2023-01-22 08:34:08.577688: step: 176/77, loss: 0.012080186977982521 2023-01-22 08:34:09.895803: step: 180/77, loss: 0.0012469030916690826 2023-01-22 08:34:11.223410: step: 184/77, loss: 0.009040589444339275 2023-01-22 08:34:12.492984: step: 188/77, loss: 0.003566809929907322 2023-01-22 08:34:13.786179: step: 192/77, loss: 0.01853303611278534 2023-01-22 08:34:15.090731: step: 196/77, loss: 0.024924924597144127 2023-01-22 08:34:16.382698: step: 200/77, loss: 0.009820442646741867 2023-01-22 08:34:17.666519: step: 204/77, loss: 0.005277830176055431 2023-01-22 08:34:19.005458: step: 208/77, loss: 0.0072280727326869965 2023-01-22 08:34:20.288912: step: 212/77, loss: 5.547390173887834e-05 2023-01-22 08:34:21.588079: step: 216/77, loss: 0.09233056008815765 2023-01-22 08:34:22.940900: step: 220/77, loss: 0.0022794553078711033 2023-01-22 08:34:24.299643: step: 224/77, loss: 0.06972534954547882 2023-01-22 08:34:25.578267: step: 228/77, loss: 0.007869555614888668 2023-01-22 08:34:26.872829: step: 232/77, loss: 0.00014271988766267896 2023-01-22 08:34:28.160360: step: 236/77, loss: 0.03628724440932274 2023-01-22 08:34:29.453443: step: 240/77, loss: 0.03799661621451378 2023-01-22 08:34:30.733177: step: 244/77, loss: 0.007743775844573975 2023-01-22 08:34:32.057240: step: 248/77, loss: 0.0032844683155417442 2023-01-22 08:34:33.353541: step: 252/77, loss: 0.08080201596021652 2023-01-22 08:34:34.687508: step: 256/77, loss: 0.030658353120088577 2023-01-22 08:34:35.992314: step: 260/77, loss: 0.030249860137701035 2023-01-22 08:34:37.288593: step: 264/77, loss: 0.01483116950839758 2023-01-22 08:34:38.620522: step: 268/77, loss: 0.01604858599603176 2023-01-22 08:34:39.919472: step: 272/77, loss: 0.010052401572465897 2023-01-22 08:34:41.277384: step: 276/77, loss: 0.0016898037865757942 2023-01-22 08:34:42.598133: step: 280/77, loss: 0.0067345574498176575 2023-01-22 08:34:43.932646: step: 284/77, loss: 3.294476482551545e-05 2023-01-22 08:34:45.250080: step: 288/77, loss: 0.016307225450873375 2023-01-22 08:34:46.627384: step: 292/77, loss: 0.014899727888405323 2023-01-22 08:34:47.948065: step: 296/77, loss: 0.045760296285152435 2023-01-22 08:34:49.257005: step: 300/77, loss: 0.03345262631773949 2023-01-22 08:34:50.555491: step: 304/77, loss: 0.008122695609927177 2023-01-22 08:34:51.845343: step: 308/77, loss: 0.01835659332573414 2023-01-22 08:34:53.186399: step: 312/77, loss: 0.01669490337371826 2023-01-22 08:34:54.537405: step: 316/77, loss: 0.008289380930364132 2023-01-22 08:34:55.819256: step: 320/77, loss: 0.011867504566907883 2023-01-22 08:34:57.188673: step: 324/77, loss: 0.03273586556315422 2023-01-22 08:34:58.552976: step: 328/77, loss: 0.012369364500045776 2023-01-22 08:34:59.877077: step: 332/77, loss: 0.03042646124958992 2023-01-22 08:35:01.146584: step: 336/77, loss: 0.03948419541120529 2023-01-22 08:35:02.415294: step: 340/77, loss: 0.07989335060119629 2023-01-22 08:35:03.713939: step: 344/77, loss: 0.020313072949647903 2023-01-22 08:35:05.039290: step: 348/77, loss: 0.013975740410387516 2023-01-22 08:35:06.361586: step: 352/77, loss: 0.05108807608485222 2023-01-22 08:35:07.659952: step: 356/77, loss: 0.014707177877426147 2023-01-22 08:35:08.969448: step: 360/77, loss: 0.04847146198153496 2023-01-22 08:35:10.269474: step: 364/77, loss: 0.004290463402867317 2023-01-22 08:35:11.506547: step: 368/77, loss: 0.015383971855044365 2023-01-22 08:35:12.853175: step: 372/77, loss: 0.002747519174590707 2023-01-22 08:35:14.146049: step: 376/77, loss: 0.008910756558179855 2023-01-22 08:35:15.418800: step: 380/77, loss: 0.009819927625358105 2023-01-22 08:35:16.753478: step: 384/77, loss: 0.0014792424626648426 2023-01-22 08:35:18.015756: step: 388/77, loss: 0.027898721396923065 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9166666666666666, 'r': 0.4198473282442748, 'f1': 0.5759162303664921}, 'slot': {'p': 0.5238095238095238, 'r': 0.010009099181073703, 'f1': 0.019642857142857142}, 'combined': 0.011312640239341809, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9166666666666666, 'r': 0.4198473282442748, 'f1': 0.5759162303664921}, 'slot': {'p': 0.5238095238095238, 'r': 0.010009099181073703, 'f1': 0.019642857142857142}, 'combined': 0.011312640239341809, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.9166666666666666, 'r': 0.4198473282442748, 'f1': 0.5759162303664921}, 'slot': {'p': 0.5238095238095238, 'r': 0.010009099181073703, 'f1': 0.019642857142857142}, 'combined': 0.011312640239341809, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:37:11.518932: step: 4/77, loss: 0.0008204178884625435 2023-01-22 08:37:12.776892: step: 8/77, loss: 0.011769868433475494 2023-01-22 08:37:14.045963: step: 12/77, loss: 0.0043268040753901005 2023-01-22 08:37:15.374623: step: 16/77, loss: 0.0022208222653716803 2023-01-22 08:37:16.729579: step: 20/77, loss: 0.0023637483827769756 2023-01-22 08:37:18.023970: step: 24/77, loss: 0.002883343491703272 2023-01-22 08:37:19.375595: step: 28/77, loss: 0.003954778891056776 2023-01-22 08:37:20.719369: step: 32/77, loss: 0.02303282544016838 2023-01-22 08:37:22.050120: step: 36/77, loss: 0.01751818135380745 2023-01-22 08:37:23.345798: step: 40/77, loss: 0.04591015726327896 2023-01-22 08:37:24.652173: step: 44/77, loss: 7.463184010703117e-05 2023-01-22 08:37:25.987209: step: 48/77, loss: 0.009197239764034748 2023-01-22 08:37:27.329034: step: 52/77, loss: 0.028276560828089714 2023-01-22 08:37:28.659974: step: 56/77, loss: 0.008027189411222935 2023-01-22 08:37:29.956342: step: 60/77, loss: 0.012188486754894257 2023-01-22 08:37:31.293627: step: 64/77, loss: 0.06895316392183304 2023-01-22 08:37:32.625847: step: 68/77, loss: 0.0003524815256241709 2023-01-22 08:37:33.953033: step: 72/77, loss: 0.004892442375421524 2023-01-22 08:37:35.282811: step: 76/77, loss: 0.007600066717714071 2023-01-22 08:37:36.611178: step: 80/77, loss: 0.00019381933088880032 2023-01-22 08:37:37.933220: step: 84/77, loss: 0.003237048164010048 2023-01-22 08:37:39.266136: step: 88/77, loss: 0.009226077236235142 2023-01-22 08:37:40.627300: step: 92/77, loss: 0.007336574140936136 2023-01-22 08:37:41.981251: step: 96/77, loss: 0.003843441605567932 2023-01-22 08:37:43.301788: step: 100/77, loss: 0.026638394221663475 2023-01-22 08:37:44.642636: step: 104/77, loss: 0.01824391447007656 2023-01-22 08:37:45.974646: step: 108/77, loss: 0.013902065344154835 2023-01-22 08:37:47.294818: step: 112/77, loss: 0.06014961376786232 2023-01-22 08:37:48.638598: step: 116/77, loss: 0.049358613789081573 2023-01-22 08:37:49.938436: step: 120/77, loss: 0.0051002344116568565 2023-01-22 08:37:51.252811: step: 124/77, loss: 0.12966176867485046 2023-01-22 08:37:52.525483: step: 128/77, loss: 0.03141997009515762 2023-01-22 08:37:53.825318: step: 132/77, loss: 0.012090899981558323 2023-01-22 08:37:55.151763: step: 136/77, loss: 0.08095104247331619 2023-01-22 08:37:56.443224: step: 140/77, loss: 0.019286353141069412 2023-01-22 08:37:57.737740: step: 144/77, loss: 0.012866070494055748 2023-01-22 08:37:59.026859: step: 148/77, loss: 0.03086090460419655 2023-01-22 08:38:00.319458: step: 152/77, loss: 0.11356306821107864 2023-01-22 08:38:01.669732: step: 156/77, loss: 0.05788556486368179 2023-01-22 08:38:02.941371: step: 160/77, loss: 0.014221753925085068 2023-01-22 08:38:04.270350: step: 164/77, loss: 0.0002134163660230115 2023-01-22 08:38:05.567391: step: 168/77, loss: 0.0071030305698513985 2023-01-22 08:38:06.875114: step: 172/77, loss: 0.036305420100688934 2023-01-22 08:38:08.211290: step: 176/77, loss: 0.024705661460757256 2023-01-22 08:38:09.506032: step: 180/77, loss: 0.019936632364988327 2023-01-22 08:38:10.796111: step: 184/77, loss: 0.00013617813237942755 2023-01-22 08:38:12.094386: step: 188/77, loss: 0.051699113100767136 2023-01-22 08:38:13.404174: step: 192/77, loss: 0.04250229150056839 2023-01-22 08:38:14.693062: step: 196/77, loss: 0.027253786101937294 2023-01-22 08:38:16.030209: step: 200/77, loss: 0.024770382791757584 2023-01-22 08:38:17.377024: step: 204/77, loss: 0.025686733424663544 2023-01-22 08:38:18.684914: step: 208/77, loss: 0.009457824751734734 2023-01-22 08:38:20.011278: step: 212/77, loss: 0.09252268075942993 2023-01-22 08:38:21.311589: step: 216/77, loss: 0.03673015534877777 2023-01-22 08:38:22.656911: step: 220/77, loss: 0.0023359288461506367 2023-01-22 08:38:23.936912: step: 224/77, loss: 0.02088875323534012 2023-01-22 08:38:25.237362: step: 228/77, loss: 0.02472914569079876 2023-01-22 08:38:26.525002: step: 232/77, loss: 0.008620699867606163 2023-01-22 08:38:27.834121: step: 236/77, loss: 0.07274501025676727 2023-01-22 08:38:29.129659: step: 240/77, loss: 0.014064528979361057 2023-01-22 08:38:30.481214: step: 244/77, loss: 0.05628375709056854 2023-01-22 08:38:31.793643: step: 248/77, loss: 0.017337357625365257 2023-01-22 08:38:33.134914: step: 252/77, loss: 2.4178843887057155e-05 2023-01-22 08:38:34.426826: step: 256/77, loss: 0.004507299512624741 2023-01-22 08:38:35.756023: step: 260/77, loss: 0.015918653458356857 2023-01-22 08:38:37.026564: step: 264/77, loss: 0.029176900163292885 2023-01-22 08:38:38.291117: step: 268/77, loss: 0.02472537010908127 2023-01-22 08:38:39.564661: step: 272/77, loss: 0.020769568160176277 2023-01-22 08:38:40.891869: step: 276/77, loss: 0.033161960542201996 2023-01-22 08:38:42.218440: step: 280/77, loss: 0.010773420333862305 2023-01-22 08:38:43.522107: step: 284/77, loss: 0.006724311038851738 2023-01-22 08:38:44.847816: step: 288/77, loss: 0.0062055690214037895 2023-01-22 08:38:46.169209: step: 292/77, loss: 0.01167929358780384 2023-01-22 08:38:47.485792: step: 296/77, loss: 0.03370339423418045 2023-01-22 08:38:48.800792: step: 300/77, loss: 0.006366947665810585 2023-01-22 08:38:50.097780: step: 304/77, loss: 0.01827992871403694 2023-01-22 08:38:51.379291: step: 308/77, loss: 0.005884686019271612 2023-01-22 08:38:52.684013: step: 312/77, loss: 0.010750525631010532 2023-01-22 08:38:54.013786: step: 316/77, loss: 0.005311709363013506 2023-01-22 08:38:55.307391: step: 320/77, loss: 0.0023131745401769876 2023-01-22 08:38:56.689645: step: 324/77, loss: 0.007912065833806992 2023-01-22 08:38:58.076620: step: 328/77, loss: 0.0042124297469854355 2023-01-22 08:38:59.392649: step: 332/77, loss: 0.024382663890719414 2023-01-22 08:39:00.718308: step: 336/77, loss: 0.00031083874637261033 2023-01-22 08:39:01.984316: step: 340/77, loss: 0.017951298505067825 2023-01-22 08:39:03.271250: step: 344/77, loss: 0.023033462464809418 2023-01-22 08:39:04.607419: step: 348/77, loss: 0.027386359870433807 2023-01-22 08:39:05.899066: step: 352/77, loss: 0.05732313543558121 2023-01-22 08:39:07.227585: step: 356/77, loss: 0.0039208317175507545 2023-01-22 08:39:08.566217: step: 360/77, loss: 0.049601756036281586 2023-01-22 08:39:09.874338: step: 364/77, loss: 0.06384100764989853 2023-01-22 08:39:11.165169: step: 368/77, loss: 0.027912341058254242 2023-01-22 08:39:12.474577: step: 372/77, loss: 0.0020587339531630278 2023-01-22 08:39:13.809325: step: 376/77, loss: 0.003806506050750613 2023-01-22 08:39:15.111857: step: 380/77, loss: 0.002064730739220977 2023-01-22 08:39:16.435375: step: 384/77, loss: 0.0003069471858907491 2023-01-22 08:39:17.721891: step: 388/77, loss: 0.002804083051159978 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 8} Test Chinese: {'template': {'p': 0.9047619047619048, 'r': 0.4351145038167939, 'f1': 0.5876288659793814}, 'slot': {'p': 0.5, 'r': 0.009099181073703366, 'f1': 0.017873100983020553}, 'combined': 0.010502750062187333, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 8} Test Korean: {'template': {'p': 0.9047619047619048, 'r': 0.4351145038167939, 'f1': 0.5876288659793814}, 'slot': {'p': 0.5, 'r': 0.009099181073703366, 'f1': 0.017873100983020553}, 'combined': 0.010502750062187333, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 8} Test Russian: {'template': {'p': 0.9047619047619048, 'r': 0.4351145038167939, 'f1': 0.5876288659793814}, 'slot': {'p': 0.5, 'r': 0.009099181073703366, 'f1': 0.017873100983020553}, 'combined': 0.010502750062187333, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:41:11.278831: step: 4/77, loss: 0.02468053065240383 2023-01-22 08:41:12.585731: step: 8/77, loss: 0.027540545910596848 2023-01-22 08:41:13.905682: step: 12/77, loss: 0.030903879553079605 2023-01-22 08:41:15.187640: step: 16/77, loss: 0.010813569650053978 2023-01-22 08:41:16.505701: step: 20/77, loss: 0.003732758341357112 2023-01-22 08:41:17.752546: step: 24/77, loss: 0.03615504503250122 2023-01-22 08:41:19.089389: step: 28/77, loss: 0.009996151551604271 2023-01-22 08:41:20.412058: step: 32/77, loss: 0.0005582488956861198 2023-01-22 08:41:21.712656: step: 36/77, loss: 0.040143415331840515 2023-01-22 08:41:23.015954: step: 40/77, loss: 0.048482924699783325 2023-01-22 08:41:24.341919: step: 44/77, loss: 0.002379587385803461 2023-01-22 08:41:25.633677: step: 48/77, loss: 0.009470704942941666 2023-01-22 08:41:26.974145: step: 52/77, loss: 0.015861298888921738 2023-01-22 08:41:28.271070: step: 56/77, loss: 0.0008028687443584204 2023-01-22 08:41:29.565167: step: 60/77, loss: 0.00591190904378891 2023-01-22 08:41:30.877884: step: 64/77, loss: 0.023707589134573936 2023-01-22 08:41:32.137780: step: 68/77, loss: 0.00041971640894189477 2023-01-22 08:41:33.466100: step: 72/77, loss: 0.02400209940969944 2023-01-22 08:41:34.750448: step: 76/77, loss: 0.010313374921679497 2023-01-22 08:41:36.039363: step: 80/77, loss: 0.017669744789600372 2023-01-22 08:41:37.395797: step: 84/77, loss: 0.00019596553465817124 2023-01-22 08:41:38.703286: step: 88/77, loss: 0.030334951356053352 2023-01-22 08:41:40.035526: step: 92/77, loss: 0.00572825875133276 2023-01-22 08:41:41.361948: step: 96/77, loss: 0.029115553945302963 2023-01-22 08:41:42.669373: step: 100/77, loss: 0.006687483750283718 2023-01-22 08:41:43.988793: step: 104/77, loss: 0.006783424410969019 2023-01-22 08:41:45.283266: step: 108/77, loss: 0.0012595838634297252 2023-01-22 08:41:46.599459: step: 112/77, loss: 0.01739436574280262 2023-01-22 08:41:47.862224: step: 116/77, loss: 0.014384800568223 2023-01-22 08:41:49.176432: step: 120/77, loss: 0.06246951222419739 2023-01-22 08:41:50.501637: step: 124/77, loss: 0.010280019603669643 2023-01-22 08:41:51.797687: step: 128/77, loss: 0.028107155114412308 2023-01-22 08:41:53.094811: step: 132/77, loss: 0.00990170519798994 2023-01-22 08:41:54.389650: step: 136/77, loss: 0.0022096431348472834 2023-01-22 08:41:55.689187: step: 140/77, loss: 0.008368104696273804 2023-01-22 08:41:56.955710: step: 144/77, loss: 0.02378884330391884 2023-01-22 08:41:58.293743: step: 148/77, loss: 0.07476845383644104 2023-01-22 08:41:59.654895: step: 152/77, loss: 0.009604285471141338 2023-01-22 08:42:00.969653: step: 156/77, loss: 0.1093854010105133 2023-01-22 08:42:02.283158: step: 160/77, loss: 0.011621728539466858 2023-01-22 08:42:03.622797: step: 164/77, loss: 0.056823261082172394 2023-01-22 08:42:04.929128: step: 168/77, loss: 0.037123072892427444 2023-01-22 08:42:06.253037: step: 172/77, loss: 0.07171599566936493 2023-01-22 08:42:07.557527: step: 176/77, loss: 0.025615889579057693 2023-01-22 08:42:08.863994: step: 180/77, loss: 0.022923659533262253 2023-01-22 08:42:10.169551: step: 184/77, loss: 0.02796180173754692 2023-01-22 08:42:11.429249: step: 188/77, loss: 0.004988554865121841 2023-01-22 08:42:12.737264: step: 192/77, loss: 0.00012879565474577248 2023-01-22 08:42:14.052327: step: 196/77, loss: 0.02959856204688549 2023-01-22 08:42:15.360781: step: 200/77, loss: 0.03901619464159012 2023-01-22 08:42:16.662947: step: 204/77, loss: 0.04571767896413803 2023-01-22 08:42:17.976634: step: 208/77, loss: 0.014097994193434715 2023-01-22 08:42:19.315572: step: 212/77, loss: 0.010788314044475555 2023-01-22 08:42:20.632409: step: 216/77, loss: 0.05809639394283295 2023-01-22 08:42:21.942129: step: 220/77, loss: 0.0019819687586277723 2023-01-22 08:42:23.304287: step: 224/77, loss: 0.004335891455411911 2023-01-22 08:42:24.576794: step: 228/77, loss: 0.0004905299283564091 2023-01-22 08:42:25.890492: step: 232/77, loss: 0.06842028349637985 2023-01-22 08:42:27.213475: step: 236/77, loss: 0.007669140584766865 2023-01-22 08:42:28.507648: step: 240/77, loss: 0.01616811752319336 2023-01-22 08:42:29.810857: step: 244/77, loss: 0.03694730997085571 2023-01-22 08:42:31.102406: step: 248/77, loss: 0.015833966434001923 2023-01-22 08:42:32.373459: step: 252/77, loss: 0.11062652617692947 2023-01-22 08:42:33.658987: step: 256/77, loss: 0.028664622455835342 2023-01-22 08:42:35.011668: step: 260/77, loss: 0.003139983396977186 2023-01-22 08:42:36.276368: step: 264/77, loss: 0.04194479063153267 2023-01-22 08:42:37.595899: step: 268/77, loss: 0.007503472734242678 2023-01-22 08:42:38.887171: step: 272/77, loss: 0.02690306305885315 2023-01-22 08:42:40.253710: step: 276/77, loss: 0.01763150654733181 2023-01-22 08:42:41.542426: step: 280/77, loss: 0.001771294279024005 2023-01-22 08:42:42.869795: step: 284/77, loss: 0.000397772149881348 2023-01-22 08:42:44.200349: step: 288/77, loss: 0.006749084684997797 2023-01-22 08:42:45.516447: step: 292/77, loss: 0.04002555459737778 2023-01-22 08:42:46.770931: step: 296/77, loss: 0.009476988576352596 2023-01-22 08:42:48.106332: step: 300/77, loss: 0.010371961630880833 2023-01-22 08:42:49.386789: step: 304/77, loss: 0.0004451674467418343 2023-01-22 08:42:50.647840: step: 308/77, loss: 0.05008751153945923 2023-01-22 08:42:51.928372: step: 312/77, loss: 0.013565540313720703 2023-01-22 08:42:53.224896: step: 316/77, loss: 0.00715932622551918 2023-01-22 08:42:54.528990: step: 320/77, loss: 0.00995059683918953 2023-01-22 08:42:55.822320: step: 324/77, loss: 0.049439121037721634 2023-01-22 08:42:57.143060: step: 328/77, loss: 0.007938910275697708 2023-01-22 08:42:58.503021: step: 332/77, loss: 0.025580620393157005 2023-01-22 08:42:59.785274: step: 336/77, loss: 0.05015741288661957 2023-01-22 08:43:01.072682: step: 340/77, loss: 0.014777681790292263 2023-01-22 08:43:02.394428: step: 344/77, loss: 0.011329288594424725 2023-01-22 08:43:03.745739: step: 348/77, loss: 9.849500929703936e-05 2023-01-22 08:43:05.092644: step: 352/77, loss: 0.0007578809163533151 2023-01-22 08:43:06.456054: step: 356/77, loss: 0.0006742964615114033 2023-01-22 08:43:07.764815: step: 360/77, loss: 0.03660057112574577 2023-01-22 08:43:09.123985: step: 364/77, loss: 0.011663177981972694 2023-01-22 08:43:10.445129: step: 368/77, loss: 0.00030943931778892875 2023-01-22 08:43:11.693501: step: 372/77, loss: 0.00023238870198838413 2023-01-22 08:43:13.023092: step: 376/77, loss: 0.011897479183971882 2023-01-22 08:43:14.308442: step: 380/77, loss: 0.06606089323759079 2023-01-22 08:43:15.644244: step: 384/77, loss: 0.000210434605833143 2023-01-22 08:43:16.948184: step: 388/77, loss: 0.02883332222700119 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.8405797101449275, 'r': 0.44274809160305345, 'f1': 0.5800000000000001}, 'slot': {'p': 0.47058823529411764, 'r': 0.014558689717925387, 'f1': 0.02824360105913504}, 'combined': 0.016381288614298325, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.8382352941176471, 'r': 0.4351145038167939, 'f1': 0.5728643216080401}, 'slot': {'p': 0.47058823529411764, 'r': 0.014558689717925387, 'f1': 0.02824360105913504}, 'combined': 0.016179751360509517, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.8405797101449275, 'r': 0.44274809160305345, 'f1': 0.5800000000000001}, 'slot': {'p': 0.47058823529411764, 'r': 0.014558689717925387, 'f1': 0.02824360105913504}, 'combined': 0.016381288614298325, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:45:10.321407: step: 4/77, loss: 0.007230168208479881 2023-01-22 08:45:11.667429: step: 8/77, loss: 0.028481965884566307 2023-01-22 08:45:13.000795: step: 12/77, loss: 0.004072078038007021 2023-01-22 08:45:14.273236: step: 16/77, loss: 9.96085800579749e-05 2023-01-22 08:45:15.587317: step: 20/77, loss: 0.013363640755414963 2023-01-22 08:45:16.931745: step: 24/77, loss: 0.021185237914323807 2023-01-22 08:45:18.265945: step: 28/77, loss: 0.0012835885863751173 2023-01-22 08:45:19.552183: step: 32/77, loss: 0.02527708373963833 2023-01-22 08:45:20.852362: step: 36/77, loss: 0.0063269371166825294 2023-01-22 08:45:22.113887: step: 40/77, loss: 0.0035296031273901463 2023-01-22 08:45:23.427040: step: 44/77, loss: 0.017808442935347557 2023-01-22 08:45:24.706359: step: 48/77, loss: 0.06293917447328568 2023-01-22 08:45:26.001936: step: 52/77, loss: 0.00010214370558969676 2023-01-22 08:45:27.321655: step: 56/77, loss: 0.0526653528213501 2023-01-22 08:45:28.633256: step: 60/77, loss: 0.021576354280114174 2023-01-22 08:45:29.915487: step: 64/77, loss: 0.0004523025418166071 2023-01-22 08:45:31.200584: step: 68/77, loss: 0.036175526678562164 2023-01-22 08:45:32.488593: step: 72/77, loss: 0.016709093004465103 2023-01-22 08:45:33.785132: step: 76/77, loss: 0.006656886078417301 2023-01-22 08:45:35.072320: step: 80/77, loss: 0.012240472249686718 2023-01-22 08:45:36.353828: step: 84/77, loss: 0.020623495802283287 2023-01-22 08:45:37.675172: step: 88/77, loss: 0.026960119605064392 2023-01-22 08:45:38.962439: step: 92/77, loss: 0.019822752103209496 2023-01-22 08:45:40.291580: step: 96/77, loss: 0.0030144131742417812 2023-01-22 08:45:41.584067: step: 100/77, loss: 0.019956866279244423 2023-01-22 08:45:42.928409: step: 104/77, loss: 0.012890620157122612 2023-01-22 08:45:44.306249: step: 108/77, loss: 0.019601669162511826 2023-01-22 08:45:45.598050: step: 112/77, loss: 0.009381377138197422 2023-01-22 08:45:46.859060: step: 116/77, loss: 0.005626625847071409 2023-01-22 08:45:48.170423: step: 120/77, loss: 0.017440086230635643 2023-01-22 08:45:49.502882: step: 124/77, loss: 0.02458116225898266 2023-01-22 08:45:50.831866: step: 128/77, loss: 0.001182088628411293 2023-01-22 08:45:52.116250: step: 132/77, loss: 0.015194879844784737 2023-01-22 08:45:53.493231: step: 136/77, loss: 0.058345600962638855 2023-01-22 08:45:54.888785: step: 140/77, loss: 0.0004595243954099715 2023-01-22 08:45:56.218444: step: 144/77, loss: 0.00038895985926501453 2023-01-22 08:45:57.533092: step: 148/77, loss: 0.002521295566111803 2023-01-22 08:45:58.848006: step: 152/77, loss: 0.004439116455614567 2023-01-22 08:46:00.192359: step: 156/77, loss: 0.021072743460536003 2023-01-22 08:46:01.533553: step: 160/77, loss: 0.0015568241942673922 2023-01-22 08:46:02.846541: step: 164/77, loss: 0.00011058291420340538 2023-01-22 08:46:04.139290: step: 168/77, loss: 0.008715417236089706 2023-01-22 08:46:05.482787: step: 172/77, loss: 0.0002488511090632528 2023-01-22 08:46:06.779141: step: 176/77, loss: 0.0044037941843271255 2023-01-22 08:46:08.085110: step: 180/77, loss: 2.851781937351916e-05 2023-01-22 08:46:09.469028: step: 184/77, loss: 0.025874529033899307 2023-01-22 08:46:10.771192: step: 188/77, loss: 6.360773113556206e-05 2023-01-22 08:46:12.092585: step: 192/77, loss: 0.020662926137447357 2023-01-22 08:46:13.472987: step: 196/77, loss: 6.761529948562384e-05 2023-01-22 08:46:14.778399: step: 200/77, loss: 2.6647994673112407e-05 2023-01-22 08:46:16.092580: step: 204/77, loss: 0.0935317799448967 2023-01-22 08:46:17.417610: step: 208/77, loss: 0.0009675032342784107 2023-01-22 08:46:18.739511: step: 212/77, loss: 0.006623557303100824 2023-01-22 08:46:20.071315: step: 216/77, loss: 0.01613423600792885 2023-01-22 08:46:21.390543: step: 220/77, loss: 0.005601785145699978 2023-01-22 08:46:22.722594: step: 224/77, loss: 0.043269336223602295 2023-01-22 08:46:24.082964: step: 228/77, loss: 0.058971501886844635 2023-01-22 08:46:25.411995: step: 232/77, loss: 0.04367101565003395 2023-01-22 08:46:26.727685: step: 236/77, loss: 0.04910704120993614 2023-01-22 08:46:28.024911: step: 240/77, loss: 0.012337159365415573 2023-01-22 08:46:29.331019: step: 244/77, loss: 0.005926759447902441 2023-01-22 08:46:30.621497: step: 248/77, loss: 0.007234785705804825 2023-01-22 08:46:31.934586: step: 252/77, loss: 0.05337817594408989 2023-01-22 08:46:33.236991: step: 256/77, loss: 0.0013377940049394965 2023-01-22 08:46:34.496712: step: 260/77, loss: 0.029988370835781097 2023-01-22 08:46:35.819499: step: 264/77, loss: 0.02918725088238716 2023-01-22 08:46:37.129420: step: 268/77, loss: 0.017394710332155228 2023-01-22 08:46:38.382127: step: 272/77, loss: 0.007960842922329903 2023-01-22 08:46:39.649362: step: 276/77, loss: 0.015655148774385452 2023-01-22 08:46:41.012416: step: 280/77, loss: 0.07385125756263733 2023-01-22 08:46:42.372121: step: 284/77, loss: 0.01918143779039383 2023-01-22 08:46:43.677339: step: 288/77, loss: 0.027690613642334938 2023-01-22 08:46:44.987349: step: 292/77, loss: 0.004097505938261747 2023-01-22 08:46:46.279249: step: 296/77, loss: 0.014483789913356304 2023-01-22 08:46:47.649494: step: 300/77, loss: 0.006000719498842955 2023-01-22 08:46:48.981382: step: 304/77, loss: 0.004739431664347649 2023-01-22 08:46:50.316561: step: 308/77, loss: 0.03346220403909683 2023-01-22 08:46:51.619353: step: 312/77, loss: 0.009710513986647129 2023-01-22 08:46:52.982072: step: 316/77, loss: 0.016323618590831757 2023-01-22 08:46:54.315748: step: 320/77, loss: 0.0007509322604164481 2023-01-22 08:46:55.626711: step: 324/77, loss: 0.01381689589470625 2023-01-22 08:46:56.945411: step: 328/77, loss: 0.0016642776317894459 2023-01-22 08:46:58.269808: step: 332/77, loss: 0.005551490001380444 2023-01-22 08:46:59.599236: step: 336/77, loss: 0.004239788744598627 2023-01-22 08:47:00.918352: step: 340/77, loss: 0.006677926052361727 2023-01-22 08:47:02.251177: step: 344/77, loss: 0.00010080473293783143 2023-01-22 08:47:03.531702: step: 348/77, loss: 0.011997531168162823 2023-01-22 08:47:04.855414: step: 352/77, loss: 0.014606297016143799 2023-01-22 08:47:06.168225: step: 356/77, loss: 0.00018841848941519856 2023-01-22 08:47:07.479274: step: 360/77, loss: 0.01075925026088953 2023-01-22 08:47:08.823085: step: 364/77, loss: 0.046107031404972076 2023-01-22 08:47:10.205074: step: 368/77, loss: 0.0002987585321534425 2023-01-22 08:47:11.550195: step: 372/77, loss: 0.014896288514137268 2023-01-22 08:47:12.875737: step: 376/77, loss: 0.033940427005290985 2023-01-22 08:47:14.241430: step: 380/77, loss: 0.0470426082611084 2023-01-22 08:47:15.551746: step: 384/77, loss: 0.003909729886800051 2023-01-22 08:47:16.931029: step: 388/77, loss: 0.01414463110268116 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.4782608695652174, 'r': 0.010009099181073703, 'f1': 0.0196078431372549}, 'combined': 0.011804721888755502, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.4782608695652174, 'r': 0.010009099181073703, 'f1': 0.0196078431372549}, 'combined': 0.011804721888755502, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.4782608695652174, 'r': 0.010009099181073703, 'f1': 0.0196078431372549}, 'combined': 0.011804721888755502, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:49:10.522784: step: 4/77, loss: 0.001212775707244873 2023-01-22 08:49:11.808897: step: 8/77, loss: 0.0010248932521790266 2023-01-22 08:49:13.159338: step: 12/77, loss: 0.003597670467570424 2023-01-22 08:49:14.470105: step: 16/77, loss: 0.00012636728934012353 2023-01-22 08:49:15.815958: step: 20/77, loss: 0.01793883740901947 2023-01-22 08:49:17.100872: step: 24/77, loss: 0.004484567791223526 2023-01-22 08:49:18.366462: step: 28/77, loss: 0.013867422007024288 2023-01-22 08:49:19.712102: step: 32/77, loss: 0.00443370221182704 2023-01-22 08:49:21.008320: step: 36/77, loss: 0.03909948095679283 2023-01-22 08:49:22.352939: step: 40/77, loss: 0.04470532760024071 2023-01-22 08:49:23.674337: step: 44/77, loss: 0.004239007830619812 2023-01-22 08:49:24.986998: step: 48/77, loss: 0.008453510701656342 2023-01-22 08:49:26.322860: step: 52/77, loss: 0.017034407705068588 2023-01-22 08:49:27.588167: step: 56/77, loss: 0.04876886308193207 2023-01-22 08:49:28.875764: step: 60/77, loss: 0.0005208022193983197 2023-01-22 08:49:30.179642: step: 64/77, loss: 0.019471313804388046 2023-01-22 08:49:31.502021: step: 68/77, loss: 0.03318634629249573 2023-01-22 08:49:32.828074: step: 72/77, loss: 0.011108007282018661 2023-01-22 08:49:34.130282: step: 76/77, loss: 0.01431676559150219 2023-01-22 08:49:35.405806: step: 80/77, loss: 0.005948154721409082 2023-01-22 08:49:36.726181: step: 84/77, loss: 0.021397370845079422 2023-01-22 08:49:38.053200: step: 88/77, loss: 0.02325313724577427 2023-01-22 08:49:39.388478: step: 92/77, loss: 0.020098941400647163 2023-01-22 08:49:40.717299: step: 96/77, loss: 0.021467300131917 2023-01-22 08:49:42.049186: step: 100/77, loss: 0.00042401679093018174 2023-01-22 08:49:43.362241: step: 104/77, loss: 0.012996762990951538 2023-01-22 08:49:44.658039: step: 108/77, loss: 0.0010982566745951772 2023-01-22 08:49:45.975332: step: 112/77, loss: 0.010368159040808678 2023-01-22 08:49:47.232903: step: 116/77, loss: 0.0032198550179600716 2023-01-22 08:49:48.564670: step: 120/77, loss: 0.012814019806683064 2023-01-22 08:49:49.867223: step: 124/77, loss: 0.004655920900404453 2023-01-22 08:49:51.131291: step: 128/77, loss: 0.024697577580809593 2023-01-22 08:49:52.436691: step: 132/77, loss: 0.02245374023914337 2023-01-22 08:49:53.686735: step: 136/77, loss: 0.004386830143630505 2023-01-22 08:49:55.004276: step: 140/77, loss: 0.0107192387804389 2023-01-22 08:49:56.364223: step: 144/77, loss: 0.0033273466397076845 2023-01-22 08:49:57.716096: step: 148/77, loss: 0.043100181967020035 2023-01-22 08:49:59.040741: step: 152/77, loss: 0.0023186816833913326 2023-01-22 08:50:00.366359: step: 156/77, loss: 0.015858765691518784 2023-01-22 08:50:01.659951: step: 160/77, loss: 0.030266083776950836 2023-01-22 08:50:03.017474: step: 164/77, loss: 0.0035603016149252653 2023-01-22 08:50:04.326184: step: 168/77, loss: 0.01956053264439106 2023-01-22 08:50:05.587782: step: 172/77, loss: 0.003212408162653446 2023-01-22 08:50:06.896751: step: 176/77, loss: 0.0023228703066706657 2023-01-22 08:50:08.170166: step: 180/77, loss: 0.014069067314267159 2023-01-22 08:50:09.450028: step: 184/77, loss: 0.0016356257256120443 2023-01-22 08:50:10.805454: step: 188/77, loss: 0.018511053174734116 2023-01-22 08:50:12.096911: step: 192/77, loss: 0.0004446762031875551 2023-01-22 08:50:13.442106: step: 196/77, loss: 0.004548283759504557 2023-01-22 08:50:14.777352: step: 200/77, loss: 0.05551350489258766 2023-01-22 08:50:16.078847: step: 204/77, loss: 0.0006663898238912225 2023-01-22 08:50:17.363376: step: 208/77, loss: 0.02572382427752018 2023-01-22 08:50:18.660883: step: 212/77, loss: 0.034336600452661514 2023-01-22 08:50:19.935135: step: 216/77, loss: 0.003346733283251524 2023-01-22 08:50:21.259466: step: 220/77, loss: 0.00020372634753584862 2023-01-22 08:50:22.595163: step: 224/77, loss: 0.002461690455675125 2023-01-22 08:50:23.868440: step: 228/77, loss: 0.0158668365329504 2023-01-22 08:50:25.174202: step: 232/77, loss: 0.03975234553217888 2023-01-22 08:50:26.460718: step: 236/77, loss: 0.004216344561427832 2023-01-22 08:50:27.765070: step: 240/77, loss: 0.0110662542283535 2023-01-22 08:50:29.060410: step: 244/77, loss: 0.002729236613959074 2023-01-22 08:50:30.377883: step: 248/77, loss: 0.0018763559637591243 2023-01-22 08:50:31.702661: step: 252/77, loss: 0.015068122185766697 2023-01-22 08:50:33.064850: step: 256/77, loss: 0.013370676897466183 2023-01-22 08:50:34.345256: step: 260/77, loss: 0.03214767947793007 2023-01-22 08:50:35.664522: step: 264/77, loss: 0.04682979732751846 2023-01-22 08:50:36.981268: step: 268/77, loss: 0.0191793330013752 2023-01-22 08:50:38.281230: step: 272/77, loss: 0.005866359919309616 2023-01-22 08:50:39.528647: step: 276/77, loss: 0.021344557404518127 2023-01-22 08:50:40.805832: step: 280/77, loss: 0.03047803044319153 2023-01-22 08:50:42.135125: step: 284/77, loss: 0.0007430142723023891 2023-01-22 08:50:43.445010: step: 288/77, loss: 0.04519880190491676 2023-01-22 08:50:44.751660: step: 292/77, loss: 0.005685959476977587 2023-01-22 08:50:46.055242: step: 296/77, loss: 0.003991344012320042 2023-01-22 08:50:47.378961: step: 300/77, loss: 0.00977976992726326 2023-01-22 08:50:48.680377: step: 304/77, loss: 0.014140845276415348 2023-01-22 08:50:49.984691: step: 308/77, loss: 0.03848596289753914 2023-01-22 08:50:51.299899: step: 312/77, loss: 0.001883206656202674 2023-01-22 08:50:52.617344: step: 316/77, loss: 0.013112076558172703 2023-01-22 08:50:53.921296: step: 320/77, loss: 0.006420728750526905 2023-01-22 08:50:55.228588: step: 324/77, loss: 0.029596269130706787 2023-01-22 08:50:56.531318: step: 328/77, loss: 0.0002997311530634761 2023-01-22 08:50:57.793758: step: 332/77, loss: 0.016038116067647934 2023-01-22 08:50:59.137270: step: 336/77, loss: 0.005305239465087652 2023-01-22 08:51:00.416950: step: 340/77, loss: 0.02268035151064396 2023-01-22 08:51:01.706701: step: 344/77, loss: 0.005031340289860964 2023-01-22 08:51:03.049320: step: 348/77, loss: 0.001761500840075314 2023-01-22 08:51:04.384024: step: 352/77, loss: 0.04548073187470436 2023-01-22 08:51:05.736804: step: 356/77, loss: 0.014924336224794388 2023-01-22 08:51:07.055922: step: 360/77, loss: 0.007789536379277706 2023-01-22 08:51:08.403074: step: 364/77, loss: 4.7290675865951926e-05 2023-01-22 08:51:09.709974: step: 368/77, loss: 0.0004604816494975239 2023-01-22 08:51:11.009901: step: 372/77, loss: 0.0022913378197699785 2023-01-22 08:51:12.333476: step: 376/77, loss: 0.0002854047925211489 2023-01-22 08:51:13.607207: step: 380/77, loss: 0.00608748709782958 2023-01-22 08:51:14.893779: step: 384/77, loss: 0.035436298698186874 2023-01-22 08:51:16.185914: step: 388/77, loss: 0.06410303711891174 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.8923076923076924, 'r': 0.44274809160305345, 'f1': 0.5918367346938777}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014717076884039587, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.8923076923076924, 'r': 0.44274809160305345, 'f1': 0.5918367346938777}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014717076884039587, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.8923076923076924, 'r': 0.44274809160305345, 'f1': 0.5918367346938777}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014717076884039587, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:53:09.881725: step: 4/77, loss: 0.008268819190561771 2023-01-22 08:53:11.183359: step: 8/77, loss: 0.0013845355715602636 2023-01-22 08:53:12.449610: step: 12/77, loss: 0.014348512515425682 2023-01-22 08:53:13.728612: step: 16/77, loss: 0.00025518261827528477 2023-01-22 08:53:14.985823: step: 20/77, loss: 0.00025453310809098184 2023-01-22 08:53:16.281494: step: 24/77, loss: 0.005054094363003969 2023-01-22 08:53:17.562971: step: 28/77, loss: 0.015327653847634792 2023-01-22 08:53:18.898687: step: 32/77, loss: 0.007553777191787958 2023-01-22 08:53:20.187133: step: 36/77, loss: 2.335933459107764e-05 2023-01-22 08:53:21.502485: step: 40/77, loss: 0.008073270320892334 2023-01-22 08:53:22.787644: step: 44/77, loss: 2.5041801563929766e-05 2023-01-22 08:53:24.073747: step: 48/77, loss: 0.013722700998187065 2023-01-22 08:53:25.405618: step: 52/77, loss: 0.0007456339080817997 2023-01-22 08:53:26.718649: step: 56/77, loss: 0.0017448263242840767 2023-01-22 08:53:28.042537: step: 60/77, loss: 0.029238741844892502 2023-01-22 08:53:29.300588: step: 64/77, loss: 0.007461803965270519 2023-01-22 08:53:30.608307: step: 68/77, loss: 0.014584846794605255 2023-01-22 08:53:31.954171: step: 72/77, loss: 0.0127757228910923 2023-01-22 08:53:33.255225: step: 76/77, loss: 0.0109567167237401 2023-01-22 08:53:34.624820: step: 80/77, loss: 0.014077206142246723 2023-01-22 08:53:35.985318: step: 84/77, loss: 4.79845330119133e-05 2023-01-22 08:53:37.275022: step: 88/77, loss: 0.05143750086426735 2023-01-22 08:53:38.622082: step: 92/77, loss: 0.021490877494215965 2023-01-22 08:53:39.936686: step: 96/77, loss: 0.0003919099981430918 2023-01-22 08:53:41.274578: step: 100/77, loss: 0.001257411320693791 2023-01-22 08:53:42.590194: step: 104/77, loss: 0.07764703780412674 2023-01-22 08:53:43.858308: step: 108/77, loss: 0.04847249388694763 2023-01-22 08:53:45.154705: step: 112/77, loss: 0.00677803810685873 2023-01-22 08:53:46.459710: step: 116/77, loss: 0.014240864664316177 2023-01-22 08:53:47.836096: step: 120/77, loss: 0.022181320935487747 2023-01-22 08:53:49.153803: step: 124/77, loss: 0.019869450479745865 2023-01-22 08:53:50.440082: step: 128/77, loss: 0.006858724169433117 2023-01-22 08:53:51.784190: step: 132/77, loss: 0.0060902857221663 2023-01-22 08:53:53.119921: step: 136/77, loss: 0.0004512839368544519 2023-01-22 08:53:54.419205: step: 140/77, loss: 0.0038215219974517822 2023-01-22 08:53:55.718216: step: 144/77, loss: 0.0006012011435814202 2023-01-22 08:53:57.042398: step: 148/77, loss: 0.003443785710260272 2023-01-22 08:53:58.385049: step: 152/77, loss: 0.003140839748084545 2023-01-22 08:53:59.704028: step: 156/77, loss: 0.03932955116033554 2023-01-22 08:54:01.007738: step: 160/77, loss: 0.005046913865953684 2023-01-22 08:54:02.317599: step: 164/77, loss: 0.000966257881373167 2023-01-22 08:54:03.616996: step: 168/77, loss: 0.0014475728385150433 2023-01-22 08:54:04.914043: step: 172/77, loss: 0.003415848594158888 2023-01-22 08:54:06.188760: step: 176/77, loss: 0.017180226743221283 2023-01-22 08:54:07.512553: step: 180/77, loss: 0.0027672951109707355 2023-01-22 08:54:08.826688: step: 184/77, loss: 0.01768960990011692 2023-01-22 08:54:10.114737: step: 188/77, loss: 0.0016348720528185368 2023-01-22 08:54:11.442973: step: 192/77, loss: 0.0016627004370093346 2023-01-22 08:54:12.774025: step: 196/77, loss: 0.018234293907880783 2023-01-22 08:54:14.065891: step: 200/77, loss: 0.014059138484299183 2023-01-22 08:54:15.423552: step: 204/77, loss: 0.004478732589632273 2023-01-22 08:54:16.742136: step: 208/77, loss: 0.004280396271497011 2023-01-22 08:54:18.064668: step: 212/77, loss: 0.028340879827737808 2023-01-22 08:54:19.383511: step: 216/77, loss: 0.004287842195481062 2023-01-22 08:54:20.719568: step: 220/77, loss: 0.00017998297698795795 2023-01-22 08:54:22.062742: step: 224/77, loss: 0.005983490496873856 2023-01-22 08:54:23.384760: step: 228/77, loss: 0.00011496020306367427 2023-01-22 08:54:24.720486: step: 232/77, loss: 0.05257324501872063 2023-01-22 08:54:26.019969: step: 236/77, loss: 0.003619940485805273 2023-01-22 08:54:27.351148: step: 240/77, loss: 0.0004241685091983527 2023-01-22 08:54:28.672800: step: 244/77, loss: 0.04979011043906212 2023-01-22 08:54:30.009321: step: 248/77, loss: 7.13120925865951e-06 2023-01-22 08:54:31.327764: step: 252/77, loss: 0.0001238850090885535 2023-01-22 08:54:32.690642: step: 256/77, loss: 0.0006984564824961126 2023-01-22 08:54:33.974259: step: 260/77, loss: 0.00970840360969305 2023-01-22 08:54:35.291286: step: 264/77, loss: 0.021301377564668655 2023-01-22 08:54:36.646197: step: 268/77, loss: 0.0007670613704249263 2023-01-22 08:54:37.969118: step: 272/77, loss: 2.0593079170794226e-06 2023-01-22 08:54:39.335144: step: 276/77, loss: 0.018182558938860893 2023-01-22 08:54:40.677106: step: 280/77, loss: 0.0001049708153004758 2023-01-22 08:54:42.015535: step: 284/77, loss: 0.02122705802321434 2023-01-22 08:54:43.245237: step: 288/77, loss: 0.0013556723715737462 2023-01-22 08:54:44.594385: step: 292/77, loss: 0.001371250138618052 2023-01-22 08:54:45.964128: step: 296/77, loss: 0.013504598289728165 2023-01-22 08:54:47.275550: step: 300/77, loss: 0.0032943664118647575 2023-01-22 08:54:48.617932: step: 304/77, loss: 0.00025259374524466693 2023-01-22 08:54:49.919896: step: 308/77, loss: 0.0003105059149675071 2023-01-22 08:54:51.228386: step: 312/77, loss: 6.09213238931261e-05 2023-01-22 08:54:52.553314: step: 316/77, loss: 0.00036613934207707644 2023-01-22 08:54:53.839696: step: 320/77, loss: 0.0021243938244879246 2023-01-22 08:54:55.171902: step: 324/77, loss: 0.022916313260793686 2023-01-22 08:54:56.489125: step: 328/77, loss: 0.002108396030962467 2023-01-22 08:54:57.775105: step: 332/77, loss: 0.00017366012616548687 2023-01-22 08:54:59.069432: step: 336/77, loss: 0.02667391486465931 2023-01-22 08:55:00.375576: step: 340/77, loss: 0.03359730914235115 2023-01-22 08:55:01.745277: step: 344/77, loss: 0.08052822202444077 2023-01-22 08:55:03.037396: step: 348/77, loss: 0.00590179581195116 2023-01-22 08:55:04.329207: step: 352/77, loss: 0.00030119650182314217 2023-01-22 08:55:05.625416: step: 356/77, loss: 0.0023873820900917053 2023-01-22 08:55:06.944502: step: 360/77, loss: 0.0006035061087459326 2023-01-22 08:55:08.263474: step: 364/77, loss: 0.05599663779139519 2023-01-22 08:55:09.571884: step: 368/77, loss: 0.026590893045067787 2023-01-22 08:55:10.915082: step: 372/77, loss: 0.012441445142030716 2023-01-22 08:55:12.231852: step: 376/77, loss: 5.755682286689989e-05 2023-01-22 08:55:13.531921: step: 380/77, loss: 0.01148482970893383 2023-01-22 08:55:14.846821: step: 384/77, loss: 0.00023403289378620684 2023-01-22 08:55:16.169343: step: 388/77, loss: 0.008566263131797314 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04686584651435266, 'epoch': 12} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.4122137404580153, 'f1': 0.574468085106383}, 'slot': {'p': 0.5454545454545454, 'r': 0.01091901728844404, 'f1': 0.021409455842997326}, 'combined': 0.012299049101296337, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04789586511906372, 'epoch': 12} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.4122137404580153, 'f1': 0.574468085106383}, 'slot': {'p': 0.5454545454545454, 'r': 0.01091901728844404, 'f1': 0.021409455842997326}, 'combined': 0.012299049101296337, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04686584651435266, 'epoch': 12} Test Russian: {'template': {'p': 0.9473684210526315, 'r': 0.4122137404580153, 'f1': 0.574468085106383}, 'slot': {'p': 0.5454545454545454, 'r': 0.01091901728844404, 'f1': 0.021409455842997326}, 'combined': 0.012299049101296337, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:57:09.933598: step: 4/77, loss: 0.0029923836700618267 2023-01-22 08:57:11.198512: step: 8/77, loss: 0.0071807983331382275 2023-01-22 08:57:12.498836: step: 12/77, loss: 0.02864694409072399 2023-01-22 08:57:13.852362: step: 16/77, loss: 0.0010537790367379785 2023-01-22 08:57:15.122275: step: 20/77, loss: 0.0022675837390124798 2023-01-22 08:57:16.425167: step: 24/77, loss: 0.008036395534873009 2023-01-22 08:57:17.751124: step: 28/77, loss: 0.0007599538657814264 2023-01-22 08:57:19.129784: step: 32/77, loss: 0.00028435743297450244 2023-01-22 08:57:20.500109: step: 36/77, loss: 0.02268192358314991 2023-01-22 08:57:21.787106: step: 40/77, loss: 0.005586005747318268 2023-01-22 08:57:23.099222: step: 44/77, loss: 0.00229069497436285 2023-01-22 08:57:24.387948: step: 48/77, loss: 0.005013742949813604 2023-01-22 08:57:25.700378: step: 52/77, loss: 0.0004219284455757588 2023-01-22 08:57:26.976708: step: 56/77, loss: 0.04628149792551994 2023-01-22 08:57:28.306080: step: 60/77, loss: 0.00035025522811338305 2023-01-22 08:57:29.602596: step: 64/77, loss: 0.00048070665798150003 2023-01-22 08:57:30.936286: step: 68/77, loss: 0.022792479023337364 2023-01-22 08:57:32.239665: step: 72/77, loss: 0.0006048143841326237 2023-01-22 08:57:33.579436: step: 76/77, loss: 0.00026993860956281424 2023-01-22 08:57:34.875958: step: 80/77, loss: 0.0002230935642728582 2023-01-22 08:57:36.198503: step: 84/77, loss: 0.033240292221307755 2023-01-22 08:57:37.534469: step: 88/77, loss: 0.0315207839012146 2023-01-22 08:57:38.821979: step: 92/77, loss: 0.013940885663032532 2023-01-22 08:57:40.161584: step: 96/77, loss: 0.014073438942432404 2023-01-22 08:57:41.520389: step: 100/77, loss: 0.03296903520822525 2023-01-22 08:57:42.846202: step: 104/77, loss: 0.007324971258640289 2023-01-22 08:57:44.158736: step: 108/77, loss: 0.001288596075028181 2023-01-22 08:57:45.440356: step: 112/77, loss: 5.987027179799043e-06 2023-01-22 08:57:46.754192: step: 116/77, loss: 0.005261044949293137 2023-01-22 08:57:48.019043: step: 120/77, loss: 0.03368813917040825 2023-01-22 08:57:49.313498: step: 124/77, loss: 0.012227150611579418 2023-01-22 08:57:50.622675: step: 128/77, loss: 0.005527400877326727 2023-01-22 08:57:51.917793: step: 132/77, loss: 0.0014371307333931327 2023-01-22 08:57:53.259153: step: 136/77, loss: 0.0013739938149228692 2023-01-22 08:57:54.605185: step: 140/77, loss: 0.005380359478294849 2023-01-22 08:57:55.958666: step: 144/77, loss: 0.006290000397711992 2023-01-22 08:57:57.239446: step: 148/77, loss: 0.008836659602820873 2023-01-22 08:57:58.480375: step: 152/77, loss: 0.005232426803559065 2023-01-22 08:57:59.766587: step: 156/77, loss: 0.00048308397526852787 2023-01-22 08:58:01.044395: step: 160/77, loss: 0.003274317365139723 2023-01-22 08:58:02.384742: step: 164/77, loss: 0.01170845702290535 2023-01-22 08:58:03.674744: step: 168/77, loss: 0.04738888889551163 2023-01-22 08:58:05.019277: step: 172/77, loss: 0.07354629039764404 2023-01-22 08:58:06.367903: step: 176/77, loss: 0.011457343585789204 2023-01-22 08:58:07.681326: step: 180/77, loss: 0.02155444398522377 2023-01-22 08:58:08.977212: step: 184/77, loss: 0.0016696201637387276 2023-01-22 08:58:10.259823: step: 188/77, loss: 0.0005738473264500499 2023-01-22 08:58:11.603314: step: 192/77, loss: 0.001273950794711709 2023-01-22 08:58:12.868212: step: 196/77, loss: 0.03267135098576546 2023-01-22 08:58:14.150573: step: 200/77, loss: 9.959276212612167e-05 2023-01-22 08:58:15.441141: step: 204/77, loss: 0.0008442182443104684 2023-01-22 08:58:16.703289: step: 208/77, loss: 0.03439297527074814 2023-01-22 08:58:18.018998: step: 212/77, loss: 0.06101587787270546 2023-01-22 08:58:19.289097: step: 216/77, loss: 3.854815804515965e-05 2023-01-22 08:58:20.619981: step: 220/77, loss: 0.0412127859890461 2023-01-22 08:58:21.959952: step: 224/77, loss: 0.005135328974574804 2023-01-22 08:58:23.222855: step: 228/77, loss: 0.0020034238696098328 2023-01-22 08:58:24.520769: step: 232/77, loss: 2.5629915967329e-07 2023-01-22 08:58:25.830445: step: 236/77, loss: 0.07443392276763916 2023-01-22 08:58:27.142802: step: 240/77, loss: 0.0030464939773082733 2023-01-22 08:58:28.473796: step: 244/77, loss: 0.0017192356754094362 2023-01-22 08:58:29.775519: step: 248/77, loss: 2.1758773073088378e-05 2023-01-22 08:58:31.097146: step: 252/77, loss: 0.07711545377969742 2023-01-22 08:58:32.349040: step: 256/77, loss: 0.0010215912479907274 2023-01-22 08:58:33.622376: step: 260/77, loss: 0.00018177239689975977 2023-01-22 08:58:34.972869: step: 264/77, loss: 4.1622581193223596e-05 2023-01-22 08:58:36.246798: step: 268/77, loss: 0.047255661338567734 2023-01-22 08:58:37.606623: step: 272/77, loss: 0.00015721115050837398 2023-01-22 08:58:38.920526: step: 276/77, loss: 0.002577256876975298 2023-01-22 08:58:40.229116: step: 280/77, loss: 0.0019506815588101745 2023-01-22 08:58:41.522120: step: 284/77, loss: 0.00016758375568315387 2023-01-22 08:58:42.860581: step: 288/77, loss: 0.026300586760044098 2023-01-22 08:58:44.189940: step: 292/77, loss: 0.00026921986136585474 2023-01-22 08:58:45.543364: step: 296/77, loss: 0.017224635928869247 2023-01-22 08:58:46.820496: step: 300/77, loss: 0.003580394433811307 2023-01-22 08:58:48.086353: step: 304/77, loss: 0.02821994386613369 2023-01-22 08:58:49.375788: step: 308/77, loss: 0.0003362175193615258 2023-01-22 08:58:50.684725: step: 312/77, loss: 0.0016050392296165228 2023-01-22 08:58:52.003252: step: 316/77, loss: 2.1911566363996826e-05 2023-01-22 08:58:53.351200: step: 320/77, loss: 0.04316862300038338 2023-01-22 08:58:54.638824: step: 324/77, loss: 0.002501176670193672 2023-01-22 08:58:55.908228: step: 328/77, loss: 0.004361604806035757 2023-01-22 08:58:57.250332: step: 332/77, loss: 0.0011792670702561736 2023-01-22 08:58:58.553156: step: 336/77, loss: 0.0005191811360418797 2023-01-22 08:58:59.839740: step: 340/77, loss: 0.01508795004338026 2023-01-22 08:59:01.117694: step: 344/77, loss: 0.00047959492076188326 2023-01-22 08:59:02.432268: step: 348/77, loss: 0.0011582657461985946 2023-01-22 08:59:03.763878: step: 352/77, loss: 0.04251958802342415 2023-01-22 08:59:05.062641: step: 356/77, loss: 0.07447528094053268 2023-01-22 08:59:06.341672: step: 360/77, loss: 0.005645543336868286 2023-01-22 08:59:07.627476: step: 364/77, loss: 0.0001554302725708112 2023-01-22 08:59:08.928593: step: 368/77, loss: 0.00981360487639904 2023-01-22 08:59:10.240716: step: 372/77, loss: 0.011518244631588459 2023-01-22 08:59:11.591072: step: 376/77, loss: 0.000307155423797667 2023-01-22 08:59:12.936441: step: 380/77, loss: 0.03827136382460594 2023-01-22 08:59:14.222850: step: 384/77, loss: 0.0002442160330247134 2023-01-22 08:59:15.496711: step: 388/77, loss: 0.01064755767583847 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 13} Test Chinese: {'template': {'p': 0.9, 'r': 0.4122137404580153, 'f1': 0.5654450261780105}, 'slot': {'p': 0.4827586206896552, 'r': 0.012738853503184714, 'f1': 0.024822695035460994}, 'combined': 0.014035869444135013, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 13} Test Korean: {'template': {'p': 0.9, 'r': 0.4122137404580153, 'f1': 0.5654450261780105}, 'slot': {'p': 0.4642857142857143, 'r': 0.011828935395814377, 'f1': 0.023070097604259095}, 'combined': 0.013044871943769541, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 13} Test Russian: {'template': {'p': 0.9, 'r': 0.4122137404580153, 'f1': 0.5654450261780105}, 'slot': {'p': 0.4827586206896552, 'r': 0.012738853503184714, 'f1': 0.024822695035460994}, 'combined': 0.014035869444135013, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:01:09.390093: step: 4/77, loss: 0.0006164819351397455 2023-01-22 09:01:10.651857: step: 8/77, loss: 0.00018814537907019258 2023-01-22 09:01:12.022540: step: 12/77, loss: 0.005015483126044273 2023-01-22 09:01:13.379575: step: 16/77, loss: 0.0001742543827276677 2023-01-22 09:01:14.721941: step: 20/77, loss: 0.0781288668513298 2023-01-22 09:01:16.027695: step: 24/77, loss: 0.008677495643496513 2023-01-22 09:01:17.340075: step: 28/77, loss: 0.10340835154056549 2023-01-22 09:01:18.651659: step: 32/77, loss: 0.0005294712027534842 2023-01-22 09:01:19.983545: step: 36/77, loss: 0.0043997676111757755 2023-01-22 09:01:21.306776: step: 40/77, loss: 0.017868174239993095 2023-01-22 09:01:22.608794: step: 44/77, loss: 0.0005032032495364547 2023-01-22 09:01:23.868576: step: 48/77, loss: 0.02159172110259533 2023-01-22 09:01:25.227098: step: 52/77, loss: 0.00976879708468914 2023-01-22 09:01:26.481691: step: 56/77, loss: 0.07533251494169235 2023-01-22 09:01:27.781930: step: 60/77, loss: 0.001760625746101141 2023-01-22 09:01:29.106614: step: 64/77, loss: 0.0021721036173403263 2023-01-22 09:01:30.369148: step: 68/77, loss: 0.021930739283561707 2023-01-22 09:01:31.652397: step: 72/77, loss: 0.00019939450430683792 2023-01-22 09:01:32.964286: step: 76/77, loss: 0.008485385216772556 2023-01-22 09:01:34.235188: step: 80/77, loss: 0.0008418073994107544 2023-01-22 09:01:35.552252: step: 84/77, loss: 0.026486661285161972 2023-01-22 09:01:36.833182: step: 88/77, loss: 0.05777119845151901 2023-01-22 09:01:38.175605: step: 92/77, loss: 0.06704266369342804 2023-01-22 09:01:39.491387: step: 96/77, loss: 0.017733411863446236 2023-01-22 09:01:40.777133: step: 100/77, loss: 0.0038883714005351067 2023-01-22 09:01:42.031441: step: 104/77, loss: 0.00019030642579309642 2023-01-22 09:01:43.372359: step: 108/77, loss: 0.004776171408593655 2023-01-22 09:01:44.712865: step: 112/77, loss: 0.019326455891132355 2023-01-22 09:01:46.019854: step: 116/77, loss: 6.101990584284067e-05 2023-01-22 09:01:47.297129: step: 120/77, loss: 0.004292602185159922 2023-01-22 09:01:48.596627: step: 124/77, loss: 0.006115391850471497 2023-01-22 09:01:49.912540: step: 128/77, loss: 0.05205165594816208 2023-01-22 09:01:51.179722: step: 132/77, loss: 0.02748934179544449 2023-01-22 09:01:52.482140: step: 136/77, loss: 0.017037197947502136 2023-01-22 09:01:53.837247: step: 140/77, loss: 0.00018027987971436232 2023-01-22 09:01:55.162692: step: 144/77, loss: 0.00023836392210796475 2023-01-22 09:01:56.451677: step: 148/77, loss: 0.04402411729097366 2023-01-22 09:01:57.757872: step: 152/77, loss: 0.02829231135547161 2023-01-22 09:01:59.031904: step: 156/77, loss: 0.05941590666770935 2023-01-22 09:02:00.364765: step: 160/77, loss: 0.023701639845967293 2023-01-22 09:02:01.667522: step: 164/77, loss: 0.0004671411879826337 2023-01-22 09:02:03.010256: step: 168/77, loss: 0.01830999366939068 2023-01-22 09:02:04.316277: step: 172/77, loss: 0.0014468998415395617 2023-01-22 09:02:05.650283: step: 176/77, loss: 0.0030556246638298035 2023-01-22 09:02:06.989037: step: 180/77, loss: 0.025845110416412354 2023-01-22 09:02:08.301870: step: 184/77, loss: 0.0004510443250183016 2023-01-22 09:02:09.595282: step: 188/77, loss: 0.00019627483561635017 2023-01-22 09:02:10.949664: step: 192/77, loss: 0.006268838420510292 2023-01-22 09:02:12.253856: step: 196/77, loss: 0.003117799060419202 2023-01-22 09:02:13.625854: step: 200/77, loss: 0.00012128011439926922 2023-01-22 09:02:14.934766: step: 204/77, loss: 0.004503394011408091 2023-01-22 09:02:16.211884: step: 208/77, loss: 0.0002909237227868289 2023-01-22 09:02:17.544722: step: 212/77, loss: 0.029473312199115753 2023-01-22 09:02:18.906079: step: 216/77, loss: 0.007408645004034042 2023-01-22 09:02:20.163494: step: 220/77, loss: 0.027147958055138588 2023-01-22 09:02:21.486268: step: 224/77, loss: 0.008658119477331638 2023-01-22 09:02:22.805672: step: 228/77, loss: 8.345770766027272e-06 2023-01-22 09:02:24.122709: step: 232/77, loss: 0.007440881337970495 2023-01-22 09:02:25.446630: step: 236/77, loss: 0.00030215029255487025 2023-01-22 09:02:26.788986: step: 240/77, loss: 0.000730294908862561 2023-01-22 09:02:28.111980: step: 244/77, loss: 0.008102841675281525 2023-01-22 09:02:29.427686: step: 248/77, loss: 0.04456748068332672 2023-01-22 09:02:30.729370: step: 252/77, loss: 0.0004915382014587522 2023-01-22 09:02:32.048689: step: 256/77, loss: 1.1332228496030439e-05 2023-01-22 09:02:33.428255: step: 260/77, loss: 0.000332854688167572 2023-01-22 09:02:34.754710: step: 264/77, loss: 6.480792944785208e-05 2023-01-22 09:02:36.080192: step: 268/77, loss: 0.00437159463763237 2023-01-22 09:02:37.382451: step: 272/77, loss: 0.00953389797359705 2023-01-22 09:02:38.695515: step: 276/77, loss: 0.01486000046133995 2023-01-22 09:02:40.010115: step: 280/77, loss: 0.0023728306405246258 2023-01-22 09:02:41.364304: step: 284/77, loss: 0.0014381734654307365 2023-01-22 09:02:42.670764: step: 288/77, loss: 0.002384813502430916 2023-01-22 09:02:44.031414: step: 292/77, loss: 0.04980777949094772 2023-01-22 09:02:45.367234: step: 296/77, loss: 0.0008250401588156819 2023-01-22 09:02:46.659710: step: 300/77, loss: 0.04165349900722504 2023-01-22 09:02:47.945007: step: 304/77, loss: 0.0007814334239810705 2023-01-22 09:02:49.248345: step: 308/77, loss: 0.014640234410762787 2023-01-22 09:02:50.586448: step: 312/77, loss: 0.006188351195305586 2023-01-22 09:02:51.912175: step: 316/77, loss: 0.002096428768709302 2023-01-22 09:02:53.270057: step: 320/77, loss: 0.012077968567609787 2023-01-22 09:02:54.588040: step: 324/77, loss: 0.006855163723230362 2023-01-22 09:02:55.915090: step: 328/77, loss: 0.011390695348381996 2023-01-22 09:02:57.253534: step: 332/77, loss: 0.0008925123256631196 2023-01-22 09:02:58.583705: step: 336/77, loss: 0.0035996062215417624 2023-01-22 09:02:59.889513: step: 340/77, loss: 0.003936652559787035 2023-01-22 09:03:01.207055: step: 344/77, loss: 0.021909981966018677 2023-01-22 09:03:02.515707: step: 348/77, loss: 0.0045426394790410995 2023-01-22 09:03:03.816054: step: 352/77, loss: 0.009716725908219814 2023-01-22 09:03:05.111374: step: 356/77, loss: 0.0018331398023292422 2023-01-22 09:03:06.448236: step: 360/77, loss: 0.003199656493961811 2023-01-22 09:03:07.751949: step: 364/77, loss: 0.004381906241178513 2023-01-22 09:03:09.062653: step: 368/77, loss: 0.0004424109938554466 2023-01-22 09:03:10.341215: step: 372/77, loss: 0.00012694572797045112 2023-01-22 09:03:11.634417: step: 376/77, loss: 0.02966800332069397 2023-01-22 09:03:12.957721: step: 380/77, loss: 0.0018701194785535336 2023-01-22 09:03:14.330743: step: 384/77, loss: 0.09938880801200867 2023-01-22 09:03:15.680039: step: 388/77, loss: 0.0001236020470969379 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 14} Test Chinese: {'template': {'p': 0.8857142857142857, 'r': 0.4732824427480916, 'f1': 0.6169154228855721}, 'slot': {'p': 0.5, 'r': 0.014558689717925387, 'f1': 0.028293545534924844}, 'combined': 0.01745472460861035, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 14} Test Korean: {'template': {'p': 0.8857142857142857, 'r': 0.4732824427480916, 'f1': 0.6169154228855721}, 'slot': {'p': 0.5, 'r': 0.014558689717925387, 'f1': 0.028293545534924844}, 'combined': 0.01745472460861035, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 14} Test Russian: {'template': {'p': 0.8857142857142857, 'r': 0.4732824427480916, 'f1': 0.6169154228855721}, 'slot': {'p': 0.5, 'r': 0.014558689717925387, 'f1': 0.028293545534924844}, 'combined': 0.01745472460861035, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:05:09.305815: step: 4/77, loss: 0.049520883709192276 2023-01-22 09:05:10.624991: step: 8/77, loss: 2.924517320934683e-05 2023-01-22 09:05:11.965649: step: 12/77, loss: 0.0001714784448267892 2023-01-22 09:05:13.221928: step: 16/77, loss: 0.0006058126455172896 2023-01-22 09:05:14.501956: step: 20/77, loss: 0.00029697720310650766 2023-01-22 09:05:15.830423: step: 24/77, loss: 0.017097534611821175 2023-01-22 09:05:17.149317: step: 28/77, loss: 0.015893323346972466 2023-01-22 09:05:18.430680: step: 32/77, loss: 0.0008558035478927195 2023-01-22 09:05:19.750846: step: 36/77, loss: 0.001351369428448379 2023-01-22 09:05:21.055788: step: 40/77, loss: 0.00041135685751214623 2023-01-22 09:05:22.379490: step: 44/77, loss: 0.009848172776401043 2023-01-22 09:05:23.729254: step: 48/77, loss: 8.295777661260217e-05 2023-01-22 09:05:25.046108: step: 52/77, loss: 0.024789001792669296 2023-01-22 09:05:26.346817: step: 56/77, loss: 0.027326753363013268 2023-01-22 09:05:27.663251: step: 60/77, loss: 0.0001087711425498128 2023-01-22 09:05:28.952830: step: 64/77, loss: 5.2774827054236084e-05 2023-01-22 09:05:30.255380: step: 68/77, loss: 0.00295753194950521 2023-01-22 09:05:31.578363: step: 72/77, loss: 0.00012791951303370297 2023-01-22 09:05:32.924118: step: 76/77, loss: 0.034874122589826584 2023-01-22 09:05:34.226567: step: 80/77, loss: 3.91757239412982e-05 2023-01-22 09:05:35.544632: step: 84/77, loss: 0.008262600749731064 2023-01-22 09:05:36.868184: step: 88/77, loss: 4.044691741000861e-05 2023-01-22 09:05:38.152405: step: 92/77, loss: 0.0029091283213347197 2023-01-22 09:05:39.484347: step: 96/77, loss: 0.026152852922677994 2023-01-22 09:05:40.768096: step: 100/77, loss: 0.00038738909643143415 2023-01-22 09:05:42.069902: step: 104/77, loss: 0.0052565522491931915 2023-01-22 09:05:43.333818: step: 108/77, loss: 0.0004201162955723703 2023-01-22 09:05:44.689686: step: 112/77, loss: 0.0010721203871071339 2023-01-22 09:05:46.000385: step: 116/77, loss: 0.004700690042227507 2023-01-22 09:05:47.303833: step: 120/77, loss: 6.839706475147977e-05 2023-01-22 09:05:48.638291: step: 124/77, loss: 0.0038220605347305536 2023-01-22 09:05:49.906595: step: 128/77, loss: 0.02317044325172901 2023-01-22 09:05:51.234953: step: 132/77, loss: 0.0005767315160483122 2023-01-22 09:05:52.548972: step: 136/77, loss: 0.03289420157670975 2023-01-22 09:05:53.850314: step: 140/77, loss: 0.0020907102152705193 2023-01-22 09:05:55.162289: step: 144/77, loss: 0.0028302576392889023 2023-01-22 09:05:56.488923: step: 148/77, loss: 0.1757725179195404 2023-01-22 09:05:57.812048: step: 152/77, loss: 4.9558102546143346e-06 2023-01-22 09:05:59.115685: step: 156/77, loss: 0.01258833333849907 2023-01-22 09:06:00.432167: step: 160/77, loss: 0.031340211629867554 2023-01-22 09:06:01.764040: step: 164/77, loss: 0.005098341032862663 2023-01-22 09:06:03.033349: step: 168/77, loss: 0.0032492538448423147 2023-01-22 09:06:04.339351: step: 172/77, loss: 0.0015646882820874453 2023-01-22 09:06:05.540706: step: 176/77, loss: 0.008963462896645069 2023-01-22 09:06:06.826038: step: 180/77, loss: 0.0030308859422802925 2023-01-22 09:06:08.120826: step: 184/77, loss: 0.003062993520870805 2023-01-22 09:06:09.424180: step: 188/77, loss: 0.0017014848999679089 2023-01-22 09:06:10.747069: step: 192/77, loss: 0.009832756593823433 2023-01-22 09:06:12.112740: step: 196/77, loss: 3.9916165405884385e-05 2023-01-22 09:06:13.463561: step: 200/77, loss: 0.0001980918023036793 2023-01-22 09:06:14.789964: step: 204/77, loss: 0.009089184924960136 2023-01-22 09:06:16.056475: step: 208/77, loss: 8.612525562057272e-06 2023-01-22 09:06:17.431168: step: 212/77, loss: 0.039376288652420044 2023-01-22 09:06:18.801936: step: 216/77, loss: 0.10439729690551758 2023-01-22 09:06:20.113412: step: 220/77, loss: 0.0011328778928145766 2023-01-22 09:06:21.466102: step: 224/77, loss: 0.013515803962945938 2023-01-22 09:06:22.764263: step: 228/77, loss: 0.00017722068878356367 2023-01-22 09:06:24.107606: step: 232/77, loss: 0.04382976144552231 2023-01-22 09:06:25.391536: step: 236/77, loss: 0.010091815143823624 2023-01-22 09:06:26.664117: step: 240/77, loss: 7.790833478793502e-05 2023-01-22 09:06:27.998109: step: 244/77, loss: 0.0025701867416501045 2023-01-22 09:06:29.290411: step: 248/77, loss: 0.00022096386237535626 2023-01-22 09:06:30.577063: step: 252/77, loss: 0.02954775094985962 2023-01-22 09:06:31.924682: step: 256/77, loss: 0.0404554083943367 2023-01-22 09:06:33.200900: step: 260/77, loss: 0.048444170504808426 2023-01-22 09:06:34.484732: step: 264/77, loss: 0.016291512176394463 2023-01-22 09:06:35.788673: step: 268/77, loss: 0.00013737943663727492 2023-01-22 09:06:37.137387: step: 272/77, loss: 0.0019735561218112707 2023-01-22 09:06:38.466540: step: 276/77, loss: 4.825846554012969e-05 2023-01-22 09:06:39.813162: step: 280/77, loss: 0.07272603362798691 2023-01-22 09:06:41.156977: step: 284/77, loss: 0.0001095397092285566 2023-01-22 09:06:42.428246: step: 288/77, loss: 5.367924313759431e-05 2023-01-22 09:06:43.751290: step: 292/77, loss: 0.004074377473443747 2023-01-22 09:06:45.043424: step: 296/77, loss: 0.0006497162394225597 2023-01-22 09:06:46.332869: step: 300/77, loss: 0.0055480943992733955 2023-01-22 09:06:47.585874: step: 304/77, loss: 2.211880564573221e-05 2023-01-22 09:06:48.890726: step: 308/77, loss: 7.929770072223619e-05 2023-01-22 09:06:50.202255: step: 312/77, loss: 0.005669772159308195 2023-01-22 09:06:51.492491: step: 316/77, loss: 4.754487599711865e-05 2023-01-22 09:06:52.786546: step: 320/77, loss: 0.03589208796620369 2023-01-22 09:06:54.097993: step: 324/77, loss: 0.01228588167577982 2023-01-22 09:06:55.429598: step: 328/77, loss: 0.002937039127573371 2023-01-22 09:06:56.787329: step: 332/77, loss: 0.09612338989973068 2023-01-22 09:06:58.095786: step: 336/77, loss: 0.04751873016357422 2023-01-22 09:06:59.453928: step: 340/77, loss: 0.0006323698908090591 2023-01-22 09:07:00.776688: step: 344/77, loss: 0.004371923394501209 2023-01-22 09:07:02.063697: step: 348/77, loss: 8.652117685414851e-05 2023-01-22 09:07:03.362377: step: 352/77, loss: 6.106193904997781e-05 2023-01-22 09:07:04.671930: step: 356/77, loss: 0.015023558400571346 2023-01-22 09:07:05.978196: step: 360/77, loss: 0.24702905118465424 2023-01-22 09:07:07.253039: step: 364/77, loss: 1.2379175424575806 2023-01-22 09:07:08.523821: step: 368/77, loss: 0.08244233578443527 2023-01-22 09:07:09.773260: step: 372/77, loss: 0.057010870426893234 2023-01-22 09:07:11.073518: step: 376/77, loss: 0.035383787006139755 2023-01-22 09:07:12.394800: step: 380/77, loss: 0.02665356732904911 2023-01-22 09:07:13.716532: step: 384/77, loss: 0.00020606214820872992 2023-01-22 09:07:15.006616: step: 388/77, loss: 0.04542049020528793 ================================================== Loss: 0.030 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.9264705882352942, 'r': 0.48091603053435117, 'f1': 0.6331658291457286}, 'slot': {'p': 0.5357142857142857, 'r': 0.01364877161055505, 'f1': 0.026619343389529728}, 'combined': 0.01685445862854646, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.9264705882352942, 'r': 0.48091603053435117, 'f1': 0.6331658291457286}, 'slot': {'p': 0.5357142857142857, 'r': 0.01364877161055505, 'f1': 0.026619343389529728}, 'combined': 0.01685445862854646, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.9264705882352942, 'r': 0.48091603053435117, 'f1': 0.6331658291457286}, 'slot': {'p': 0.5357142857142857, 'r': 0.01364877161055505, 'f1': 0.026619343389529728}, 'combined': 0.01685445862854646, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:09:08.654565: step: 4/77, loss: 0.004865787457674742 2023-01-22 09:09:09.973354: step: 8/77, loss: 0.002044866792857647 2023-01-22 09:09:11.329137: step: 12/77, loss: 0.004267704673111439 2023-01-22 09:09:12.651523: step: 16/77, loss: 0.0016755261458456516 2023-01-22 09:09:13.959683: step: 20/77, loss: 0.0039411126635968685 2023-01-22 09:09:15.250867: step: 24/77, loss: 0.011254651471972466 2023-01-22 09:09:16.531087: step: 28/77, loss: 0.038321368396282196 2023-01-22 09:09:17.836355: step: 32/77, loss: 4.7605128202121705e-05 2023-01-22 09:09:19.131698: step: 36/77, loss: 0.0011922240955755115 2023-01-22 09:09:20.404030: step: 40/77, loss: 0.0005886238650418818 2023-01-22 09:09:21.713358: step: 44/77, loss: 0.10590315610170364 2023-01-22 09:09:22.969267: step: 48/77, loss: 0.04780968278646469 2023-01-22 09:09:24.269689: step: 52/77, loss: 0.00025455671129748225 2023-01-22 09:09:25.594765: step: 56/77, loss: 0.0006137334275990725 2023-01-22 09:09:26.896105: step: 60/77, loss: 0.006472788285464048 2023-01-22 09:09:28.231903: step: 64/77, loss: 0.000599176564719528 2023-01-22 09:09:29.536214: step: 68/77, loss: 0.03149518370628357 2023-01-22 09:09:30.859185: step: 72/77, loss: 0.03944842144846916 2023-01-22 09:09:32.201352: step: 76/77, loss: 0.0012047612108290195 2023-01-22 09:09:33.469849: step: 80/77, loss: 0.006904952228069305 2023-01-22 09:09:34.749387: step: 84/77, loss: 0.04259157180786133 2023-01-22 09:09:36.056968: step: 88/77, loss: 0.0003069272788707167 2023-01-22 09:09:37.326182: step: 92/77, loss: 0.0002839508524630219 2023-01-22 09:09:38.610607: step: 96/77, loss: 0.03305383026599884 2023-01-22 09:09:39.919787: step: 100/77, loss: 0.0010568131692707539 2023-01-22 09:09:41.219143: step: 104/77, loss: 1.5277404600055888e-05 2023-01-22 09:09:42.516157: step: 108/77, loss: 0.003008649218827486 2023-01-22 09:09:43.850907: step: 112/77, loss: 0.00354586960747838 2023-01-22 09:09:45.211497: step: 116/77, loss: 0.0304990466684103 2023-01-22 09:09:46.561843: step: 120/77, loss: 0.0013609578600153327 2023-01-22 09:09:47.891697: step: 124/77, loss: 8.456044452032074e-05 2023-01-22 09:09:49.165686: step: 128/77, loss: 0.00015098247968126088 2023-01-22 09:09:50.499927: step: 132/77, loss: 0.0738513171672821 2023-01-22 09:09:51.768011: step: 136/77, loss: 9.456242696614936e-05 2023-01-22 09:09:53.071173: step: 140/77, loss: 0.00028515647863969207 2023-01-22 09:09:54.365853: step: 144/77, loss: 0.08551955223083496 2023-01-22 09:09:55.699246: step: 148/77, loss: 0.005715628154575825 2023-01-22 09:09:57.003331: step: 152/77, loss: 0.00018103979527950287 2023-01-22 09:09:58.375264: step: 156/77, loss: 0.1641266644001007 2023-01-22 09:09:59.706275: step: 160/77, loss: 0.003106701420620084 2023-01-22 09:10:01.006999: step: 164/77, loss: 0.0006164918886497617 2023-01-22 09:10:02.326451: step: 168/77, loss: 1.5115554560907185e-05 2023-01-22 09:10:03.676074: step: 172/77, loss: 0.042311687022447586 2023-01-22 09:10:05.031600: step: 176/77, loss: 0.012022551149129868 2023-01-22 09:10:06.297267: step: 180/77, loss: 0.0002095772506436333 2023-01-22 09:10:07.566630: step: 184/77, loss: 0.005340333096683025 2023-01-22 09:10:08.880916: step: 188/77, loss: 0.00031833286629989743 2023-01-22 09:10:10.244413: step: 192/77, loss: 0.03311312943696976 2023-01-22 09:10:11.559183: step: 196/77, loss: 0.0021633312571793795 2023-01-22 09:10:12.877285: step: 200/77, loss: 6.1553277191706e-05 2023-01-22 09:10:14.209890: step: 204/77, loss: 0.08515505492687225 2023-01-22 09:10:15.482439: step: 208/77, loss: 0.040249742567539215 2023-01-22 09:10:16.819450: step: 212/77, loss: 0.03576679155230522 2023-01-22 09:10:18.148934: step: 216/77, loss: 0.03553896024823189 2023-01-22 09:10:19.488483: step: 220/77, loss: 0.005703304894268513 2023-01-22 09:10:20.802571: step: 224/77, loss: 0.038781605660915375 2023-01-22 09:10:22.086621: step: 228/77, loss: 0.054898396134376526 2023-01-22 09:10:23.412756: step: 232/77, loss: 0.0010724844178184867 2023-01-22 09:10:24.739500: step: 236/77, loss: 0.001274686073884368 2023-01-22 09:10:26.043959: step: 240/77, loss: 0.004894025158137083 2023-01-22 09:10:27.386393: step: 244/77, loss: 8.713423994777258e-06 2023-01-22 09:10:28.706743: step: 248/77, loss: 0.04005669802427292 2023-01-22 09:10:30.025452: step: 252/77, loss: 0.07183679193258286 2023-01-22 09:10:31.340514: step: 256/77, loss: 0.024867655709385872 2023-01-22 09:10:32.648664: step: 260/77, loss: 0.005784600507467985 2023-01-22 09:10:33.941719: step: 264/77, loss: 0.0013034702278673649 2023-01-22 09:10:35.260861: step: 268/77, loss: 1.900579809444025e-05 2023-01-22 09:10:36.623770: step: 272/77, loss: 0.0062468210235238075 2023-01-22 09:10:37.959763: step: 276/77, loss: 0.11800191551446915 2023-01-22 09:10:39.311968: step: 280/77, loss: 9.783930727280676e-05 2023-01-22 09:10:40.638233: step: 284/77, loss: 0.006113114301115274 2023-01-22 09:10:41.968029: step: 288/77, loss: 0.0004273669619578868 2023-01-22 09:10:43.319087: step: 292/77, loss: 5.662437985165525e-08 2023-01-22 09:10:44.620898: step: 296/77, loss: 0.0011079148389399052 2023-01-22 09:10:45.886107: step: 300/77, loss: 0.00418076990172267 2023-01-22 09:10:47.217489: step: 304/77, loss: 0.0006017254781909287 2023-01-22 09:10:48.534982: step: 308/77, loss: 0.008612751960754395 2023-01-22 09:10:49.879941: step: 312/77, loss: 0.0019411951070651412 2023-01-22 09:10:51.180468: step: 316/77, loss: 0.0028733594808727503 2023-01-22 09:10:52.529426: step: 320/77, loss: 0.002464856719598174 2023-01-22 09:10:53.855693: step: 324/77, loss: 0.00017895545170176774 2023-01-22 09:10:55.149053: step: 328/77, loss: 0.0009144581854343414 2023-01-22 09:10:56.501056: step: 332/77, loss: 0.0010908320546150208 2023-01-22 09:10:57.812629: step: 336/77, loss: 0.000478222849778831 2023-01-22 09:10:59.105383: step: 340/77, loss: 5.4164491302799433e-05 2023-01-22 09:11:00.411798: step: 344/77, loss: 0.00020739153842441738 2023-01-22 09:11:01.727780: step: 348/77, loss: 0.05378265306353569 2023-01-22 09:11:03.044153: step: 352/77, loss: 0.0001235952222486958 2023-01-22 09:11:04.357508: step: 356/77, loss: 1.373855866404483e-06 2023-01-22 09:11:05.707908: step: 360/77, loss: 0.02540205419063568 2023-01-22 09:11:07.043869: step: 364/77, loss: 0.04071442410349846 2023-01-22 09:11:08.392060: step: 368/77, loss: 0.0007655530935153365 2023-01-22 09:11:09.734075: step: 372/77, loss: 0.001699113636277616 2023-01-22 09:11:11.096671: step: 376/77, loss: 0.0020794568117707968 2023-01-22 09:11:12.422442: step: 380/77, loss: 4.321304913901258e-07 2023-01-22 09:11:13.754090: step: 384/77, loss: 5.594743925030343e-05 2023-01-22 09:11:15.106048: step: 388/77, loss: 0.00025005993666127324 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 16} Test Chinese: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.5666666666666667, 'r': 0.015468607825295723, 'f1': 0.03011514614703277}, 'combined': 0.018130547170152382, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 16} Test Korean: {'template': {'p': 0.8923076923076924, 'r': 0.44274809160305345, 'f1': 0.5918367346938777}, 'slot': {'p': 0.5483870967741935, 'r': 0.015468607825295723, 'f1': 0.03008849557522124}, 'combined': 0.017807476973090125, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 16} Test Russian: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.5666666666666667, 'r': 0.015468607825295723, 'f1': 0.03011514614703277}, 'combined': 0.018130547170152382, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:13:08.746004: step: 4/77, loss: 0.00017559500702191144 2023-01-22 09:13:10.037894: step: 8/77, loss: 0.0042427037842571735 2023-01-22 09:13:11.310386: step: 12/77, loss: 0.00010297487460775301 2023-01-22 09:13:12.579527: step: 16/77, loss: 0.0001678880216786638 2023-01-22 09:13:13.871798: step: 20/77, loss: 0.00013916024181526154 2023-01-22 09:13:15.176704: step: 24/77, loss: 1.2970897842023987e-05 2023-01-22 09:13:16.507039: step: 28/77, loss: 0.014945005998015404 2023-01-22 09:13:17.807129: step: 32/77, loss: 0.006989219691604376 2023-01-22 09:13:19.150988: step: 36/77, loss: 0.06558720022439957 2023-01-22 09:13:20.463500: step: 40/77, loss: 0.005165797658264637 2023-01-22 09:13:21.763256: step: 44/77, loss: 0.007396637462079525 2023-01-22 09:13:23.109905: step: 48/77, loss: 0.00520114041864872 2023-01-22 09:13:24.427408: step: 52/77, loss: 0.013953941874206066 2023-01-22 09:13:25.746096: step: 56/77, loss: 0.012274347245693207 2023-01-22 09:13:26.999790: step: 60/77, loss: 0.008953189477324486 2023-01-22 09:13:28.304955: step: 64/77, loss: 0.0005845665000379086 2023-01-22 09:13:29.638654: step: 68/77, loss: 0.0281231626868248 2023-01-22 09:13:30.919117: step: 72/77, loss: 0.00018540435121394694 2023-01-22 09:13:32.223312: step: 76/77, loss: 0.0007554187322966754 2023-01-22 09:13:33.546356: step: 80/77, loss: 0.0003465822373982519 2023-01-22 09:13:34.862576: step: 84/77, loss: 0.0049270521849393845 2023-01-22 09:13:36.178598: step: 88/77, loss: 0.00021336837380658835 2023-01-22 09:13:37.466548: step: 92/77, loss: 2.4633269276819192e-05 2023-01-22 09:13:38.759203: step: 96/77, loss: 1.9529574274201877e-05 2023-01-22 09:13:40.068105: step: 100/77, loss: 0.0019179012160748243 2023-01-22 09:13:41.342693: step: 104/77, loss: 0.11362450569868088 2023-01-22 09:13:42.612967: step: 108/77, loss: 0.008741766214370728 2023-01-22 09:13:43.902645: step: 112/77, loss: 0.014528511092066765 2023-01-22 09:13:45.205859: step: 116/77, loss: 0.00031644152477383614 2023-01-22 09:13:46.603724: step: 120/77, loss: 0.00115126499440521 2023-01-22 09:13:47.941974: step: 124/77, loss: 0.00016333360690623522 2023-01-22 09:13:49.270380: step: 128/77, loss: 0.0008891576435416937 2023-01-22 09:13:50.630973: step: 132/77, loss: 0.10228507220745087 2023-01-22 09:13:51.945518: step: 136/77, loss: 0.00025623812689445913 2023-01-22 09:13:53.290660: step: 140/77, loss: 1.4330006706586573e-05 2023-01-22 09:13:54.582319: step: 144/77, loss: 0.01142636127769947 2023-01-22 09:13:55.865212: step: 148/77, loss: 0.0019180062226951122 2023-01-22 09:13:57.212018: step: 152/77, loss: 1.33709854708286e-05 2023-01-22 09:13:58.493769: step: 156/77, loss: 0.05863146856427193 2023-01-22 09:13:59.788015: step: 160/77, loss: 0.0013161511160433292 2023-01-22 09:14:01.126807: step: 164/77, loss: 0.00979958102107048 2023-01-22 09:14:02.476219: step: 168/77, loss: 0.008042233996093273 2023-01-22 09:14:03.829242: step: 172/77, loss: 0.0037187940906733274 2023-01-22 09:14:05.112949: step: 176/77, loss: 0.002606304595246911 2023-01-22 09:14:06.398523: step: 180/77, loss: 0.0007418693858198822 2023-01-22 09:14:07.739723: step: 184/77, loss: 1.9120017896057107e-05 2023-01-22 09:14:09.084624: step: 188/77, loss: 0.0017145859310403466 2023-01-22 09:14:10.374400: step: 192/77, loss: 0.002218936337158084 2023-01-22 09:14:11.677894: step: 196/77, loss: 8.69428549776785e-06 2023-01-22 09:14:13.007197: step: 200/77, loss: 0.0025453991256654263 2023-01-22 09:14:14.301033: step: 204/77, loss: 0.00490312185138464 2023-01-22 09:14:15.596465: step: 208/77, loss: 0.00011400566290831193 2023-01-22 09:14:16.941024: step: 212/77, loss: 0.00499136233702302 2023-01-22 09:14:18.228261: step: 216/77, loss: 0.0014865536941215396 2023-01-22 09:14:19.482482: step: 220/77, loss: 0.0018823903519660234 2023-01-22 09:14:20.833497: step: 224/77, loss: 0.0008539587142877281 2023-01-22 09:14:22.131613: step: 228/77, loss: 3.7336732930270955e-05 2023-01-22 09:14:23.445696: step: 232/77, loss: 0.04712314158678055 2023-01-22 09:14:24.770772: step: 236/77, loss: 4.869756230618805e-05 2023-01-22 09:14:26.058654: step: 240/77, loss: 2.1560317691182718e-05 2023-01-22 09:14:27.310644: step: 244/77, loss: 3.2407467642769916e-06 2023-01-22 09:14:28.578652: step: 248/77, loss: 0.007513156160712242 2023-01-22 09:14:29.902136: step: 252/77, loss: 3.0624123610323295e-05 2023-01-22 09:14:31.180680: step: 256/77, loss: 0.0002540667774155736 2023-01-22 09:14:32.435882: step: 260/77, loss: 0.002834441838786006 2023-01-22 09:14:33.703737: step: 264/77, loss: 0.00018282295786775649 2023-01-22 09:14:35.049331: step: 268/77, loss: 0.005002613645046949 2023-01-22 09:14:36.364088: step: 272/77, loss: 0.005725946743041277 2023-01-22 09:14:37.705945: step: 276/77, loss: 0.0191806573420763 2023-01-22 09:14:39.017322: step: 280/77, loss: 0.05966510251164436 2023-01-22 09:14:40.345888: step: 284/77, loss: 0.009301283396780491 2023-01-22 09:14:41.703696: step: 288/77, loss: 0.02285054698586464 2023-01-22 09:14:43.035944: step: 292/77, loss: 0.004749453626573086 2023-01-22 09:14:44.376408: step: 296/77, loss: 0.0002422179386485368 2023-01-22 09:14:45.690922: step: 300/77, loss: 0.0013061568606644869 2023-01-22 09:14:47.000133: step: 304/77, loss: 0.017308104783296585 2023-01-22 09:14:48.357824: step: 308/77, loss: 0.0003579688200261444 2023-01-22 09:14:49.648604: step: 312/77, loss: 0.004894661717116833 2023-01-22 09:14:50.954919: step: 316/77, loss: 0.0006621659849770367 2023-01-22 09:14:52.263544: step: 320/77, loss: 0.002932305447757244 2023-01-22 09:14:53.556684: step: 324/77, loss: 7.0598043748759665e-06 2023-01-22 09:14:54.840308: step: 328/77, loss: 0.0931524932384491 2023-01-22 09:14:56.131549: step: 332/77, loss: 0.00038095767376944423 2023-01-22 09:14:57.457750: step: 336/77, loss: 0.030057495459914207 2023-01-22 09:14:58.735242: step: 340/77, loss: 0.0006561462068930268 2023-01-22 09:15:00.028937: step: 344/77, loss: 6.38489582343027e-05 2023-01-22 09:15:01.314815: step: 348/77, loss: 0.028494885191321373 2023-01-22 09:15:02.654528: step: 352/77, loss: 0.00018237180483993143 2023-01-22 09:15:03.969325: step: 356/77, loss: 0.009861637838184834 2023-01-22 09:15:05.305896: step: 360/77, loss: 5.5534786952193826e-05 2023-01-22 09:15:06.583243: step: 364/77, loss: 5.960463678178485e-09 2023-01-22 09:15:07.916386: step: 368/77, loss: 8.7584754510317e-05 2023-01-22 09:15:09.233010: step: 372/77, loss: 0.00024132276303134859 2023-01-22 09:15:10.538447: step: 376/77, loss: 0.05859140679240227 2023-01-22 09:15:11.854881: step: 380/77, loss: 0.00022311658540274948 2023-01-22 09:15:13.187474: step: 384/77, loss: 0.006534324958920479 2023-01-22 09:15:14.494982: step: 388/77, loss: 0.00010127961286343634 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9206349206349206, 'r': 0.44274809160305345, 'f1': 0.5979381443298969}, 'slot': {'p': 0.5769230769230769, 'r': 0.01364877161055505, 'f1': 0.026666666666666672}, 'combined': 0.015945017182130587, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.921875, 'r': 0.45038167938931295, 'f1': 0.6051282051282051}, 'slot': {'p': 0.5769230769230769, 'r': 0.01364877161055505, 'f1': 0.026666666666666672}, 'combined': 0.016136752136752138, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9206349206349206, 'r': 0.44274809160305345, 'f1': 0.5979381443298969}, 'slot': {'p': 0.5769230769230769, 'r': 0.01364877161055505, 'f1': 0.026666666666666672}, 'combined': 0.015945017182130587, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:17:10.169202: step: 4/77, loss: 7.629332117176091e-07 2023-01-22 09:17:11.497339: step: 8/77, loss: 0.0009695360204204917 2023-01-22 09:17:12.785044: step: 12/77, loss: 0.02970375493168831 2023-01-22 09:17:14.098885: step: 16/77, loss: 1.8267503037350252e-05 2023-01-22 09:17:15.389156: step: 20/77, loss: 1.6878482711035758e-05 2023-01-22 09:17:16.677848: step: 24/77, loss: 6.711154128424823e-06 2023-01-22 09:17:17.937371: step: 28/77, loss: 0.014280336908996105 2023-01-22 09:17:19.279965: step: 32/77, loss: 0.0032120144460350275 2023-01-22 09:17:20.601053: step: 36/77, loss: 0.0006129553075879812 2023-01-22 09:17:21.875961: step: 40/77, loss: 0.06722747534513474 2023-01-22 09:17:23.179776: step: 44/77, loss: 0.00019596870697569102 2023-01-22 09:17:24.475583: step: 48/77, loss: 0.034420061856508255 2023-01-22 09:17:25.760058: step: 52/77, loss: 0.02980979159474373 2023-01-22 09:17:27.073491: step: 56/77, loss: 7.429834658978507e-05 2023-01-22 09:17:28.404213: step: 60/77, loss: 0.0005003040423616767 2023-01-22 09:17:29.681213: step: 64/77, loss: 0.0010485901730135083 2023-01-22 09:17:30.966295: step: 68/77, loss: 0.0007787790964357555 2023-01-22 09:17:32.314827: step: 72/77, loss: 0.003916820976883173 2023-01-22 09:17:33.610856: step: 76/77, loss: 1.855248774518259e-05 2023-01-22 09:17:34.937527: step: 80/77, loss: 3.254367402405478e-05 2023-01-22 09:17:36.229938: step: 84/77, loss: 0.00047225505113601685 2023-01-22 09:17:37.569373: step: 88/77, loss: 0.002391052432358265 2023-01-22 09:17:38.856209: step: 92/77, loss: 0.0017805419629439712 2023-01-22 09:17:40.121003: step: 96/77, loss: 0.0007184812566265464 2023-01-22 09:17:41.468483: step: 100/77, loss: 0.016742993146181107 2023-01-22 09:17:42.853079: step: 104/77, loss: 0.02255837246775627 2023-01-22 09:17:44.153968: step: 108/77, loss: 0.00768828671425581 2023-01-22 09:17:45.480232: step: 112/77, loss: 0.0008918981766328216 2023-01-22 09:17:46.789925: step: 116/77, loss: 0.003944310825318098 2023-01-22 09:17:48.061699: step: 120/77, loss: 3.8131292967591435e-05 2023-01-22 09:17:49.363797: step: 124/77, loss: 0.019156042486429214 2023-01-22 09:17:50.664212: step: 128/77, loss: 0.00024989162920974195 2023-01-22 09:17:51.987583: step: 132/77, loss: 4.7405908844666556e-05 2023-01-22 09:17:53.268203: step: 136/77, loss: 0.0008982517756521702 2023-01-22 09:17:54.593020: step: 140/77, loss: 0.00123036396689713 2023-01-22 09:17:55.962915: step: 144/77, loss: 0.0003520891477819532 2023-01-22 09:17:57.238181: step: 148/77, loss: 0.0002538433182053268 2023-01-22 09:17:58.512453: step: 152/77, loss: 0.004786377772688866 2023-01-22 09:17:59.860216: step: 156/77, loss: 6.741286779288203e-05 2023-01-22 09:18:01.193723: step: 160/77, loss: 0.0006734931957907975 2023-01-22 09:18:02.504801: step: 164/77, loss: 0.0005380098591558635 2023-01-22 09:18:03.845934: step: 168/77, loss: 0.001128910225816071 2023-01-22 09:18:05.152995: step: 172/77, loss: 0.0006417105323635042 2023-01-22 09:18:06.463770: step: 176/77, loss: 0.011218901723623276 2023-01-22 09:18:07.790535: step: 180/77, loss: 0.00034760363632813096 2023-01-22 09:18:09.143961: step: 184/77, loss: 0.0246910247951746 2023-01-22 09:18:10.502888: step: 188/77, loss: 9.507672803010792e-05 2023-01-22 09:18:11.830371: step: 192/77, loss: 0.00048033008351922035 2023-01-22 09:18:13.176461: step: 196/77, loss: 0.0012093075783923268 2023-01-22 09:18:14.510769: step: 200/77, loss: 9.417146793566644e-05 2023-01-22 09:18:15.851326: step: 204/77, loss: 0.0002030618634307757 2023-01-22 09:18:17.118391: step: 208/77, loss: 1.4598433153878432e-05 2023-01-22 09:18:18.483635: step: 212/77, loss: 0.00048350432189181447 2023-01-22 09:18:19.809518: step: 216/77, loss: 0.0005155607359483838 2023-01-22 09:18:21.155497: step: 220/77, loss: 1.4626096344727557e-05 2023-01-22 09:18:22.443365: step: 224/77, loss: 0.0366053506731987 2023-01-22 09:18:23.723576: step: 228/77, loss: 0.0017824744572862983 2023-01-22 09:18:25.089789: step: 232/77, loss: 6.202932127052918e-06 2023-01-22 09:18:26.456281: step: 236/77, loss: 0.001113256555981934 2023-01-22 09:18:27.754089: step: 240/77, loss: 0.00209731119684875 2023-01-22 09:18:29.061242: step: 244/77, loss: 0.0005989335477352142 2023-01-22 09:18:30.404101: step: 248/77, loss: 4.602641183737433e-06 2023-01-22 09:18:31.773527: step: 252/77, loss: 0.00042141234735026956 2023-01-22 09:18:33.094202: step: 256/77, loss: 1.1831454003186082e-06 2023-01-22 09:18:34.382238: step: 260/77, loss: 0.0005325632519088686 2023-01-22 09:18:35.730861: step: 264/77, loss: 0.08383160829544067 2023-01-22 09:18:37.032353: step: 268/77, loss: 0.0002262178750243038 2023-01-22 09:18:38.313610: step: 272/77, loss: 0.034328069537878036 2023-01-22 09:18:39.635368: step: 276/77, loss: 0.0007600605022162199 2023-01-22 09:18:40.927062: step: 280/77, loss: 1.2372864148346707e-05 2023-01-22 09:18:42.241645: step: 284/77, loss: 0.028865935280919075 2023-01-22 09:18:43.545565: step: 288/77, loss: 0.008949083276093006 2023-01-22 09:18:44.849190: step: 292/77, loss: 0.024472959339618683 2023-01-22 09:18:46.206639: step: 296/77, loss: 0.1657877266407013 2023-01-22 09:18:47.516475: step: 300/77, loss: 0.0002879844105336815 2023-01-22 09:18:48.835333: step: 304/77, loss: 0.08509407937526703 2023-01-22 09:18:50.189511: step: 308/77, loss: 0.0004706513718701899 2023-01-22 09:18:51.499889: step: 312/77, loss: 0.029056323692202568 2023-01-22 09:18:52.803661: step: 316/77, loss: 0.0041311513632535934 2023-01-22 09:18:54.189367: step: 320/77, loss: 0.03252348303794861 2023-01-22 09:18:55.479234: step: 324/77, loss: 9.75675993686309e-06 2023-01-22 09:18:56.810228: step: 328/77, loss: 0.013601149432361126 2023-01-22 09:18:58.157224: step: 332/77, loss: 8.16871615825221e-05 2023-01-22 09:18:59.441307: step: 336/77, loss: 0.000691729539539665 2023-01-22 09:19:00.775272: step: 340/77, loss: 0.00044653864460997283 2023-01-22 09:19:02.100680: step: 344/77, loss: 0.0033582733012735844 2023-01-22 09:19:03.458453: step: 348/77, loss: 5.8396275562699884e-05 2023-01-22 09:19:04.790484: step: 352/77, loss: 0.0015448533231392503 2023-01-22 09:19:06.109491: step: 356/77, loss: 0.0003613264416344464 2023-01-22 09:19:07.431832: step: 360/77, loss: 5.536605749512091e-05 2023-01-22 09:19:08.729562: step: 364/77, loss: 0.00011568279296625406 2023-01-22 09:19:10.056411: step: 368/77, loss: 7.776810525683686e-05 2023-01-22 09:19:11.361785: step: 372/77, loss: 0.026629647240042686 2023-01-22 09:19:12.655636: step: 376/77, loss: 7.089160499162972e-05 2023-01-22 09:19:14.020631: step: 380/77, loss: 0.00019869825337082148 2023-01-22 09:19:15.368268: step: 384/77, loss: 0.004738472402095795 2023-01-22 09:19:16.677786: step: 388/77, loss: 0.005280292592942715 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050055207949944794, 'epoch': 18} Test Chinese: {'template': {'p': 0.9322033898305084, 'r': 0.4198473282442748, 'f1': 0.5789473684210527}, 'slot': {'p': 0.52, 'r': 0.011828935395814377, 'f1': 0.023131672597864767}, 'combined': 0.013392020977711182, 'epoch': 18} Dev Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050055207949944794, 'epoch': 18} Test Korean: {'template': {'p': 0.9152542372881356, 'r': 0.4122137404580153, 'f1': 0.568421052631579}, 'slot': {'p': 0.52, 'r': 0.011828935395814377, 'f1': 0.023131672597864767}, 'combined': 0.013148529687207344, 'epoch': 18} Dev Russian: {'template': {'p': 0.9714285714285714, 'r': 0.5666666666666667, 'f1': 0.7157894736842105}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050055207949944794, 'epoch': 18} Test Russian: {'template': {'p': 0.9322033898305084, 'r': 0.4198473282442748, 'f1': 0.5789473684210527}, 'slot': {'p': 0.52, 'r': 0.011828935395814377, 'f1': 0.023131672597864767}, 'combined': 0.013392020977711182, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:21:10.416641: step: 4/77, loss: 0.017067037522792816 2023-01-22 09:21:11.756085: step: 8/77, loss: 0.009045921266078949 2023-01-22 09:21:13.025633: step: 12/77, loss: 0.0011866106651723385 2023-01-22 09:21:14.321496: step: 16/77, loss: 0.0033251740969717503 2023-01-22 09:21:15.577801: step: 20/77, loss: 0.00018421841377858073 2023-01-22 09:21:16.921059: step: 24/77, loss: 0.00010344362817704678 2023-01-22 09:21:18.234120: step: 28/77, loss: 0.0037648696452379227 2023-01-22 09:21:19.574595: step: 32/77, loss: 0.010512863285839558 2023-01-22 09:21:20.913035: step: 36/77, loss: 0.037238556891679764 2023-01-22 09:21:22.156310: step: 40/77, loss: 4.192236883682199e-05 2023-01-22 09:21:23.430306: step: 44/77, loss: 1.012978009384824e-05 2023-01-22 09:21:24.730169: step: 48/77, loss: 0.00015208007243927568 2023-01-22 09:21:25.973122: step: 52/77, loss: 0.0011588814668357372 2023-01-22 09:21:27.266141: step: 56/77, loss: 0.03443307429552078 2023-01-22 09:21:28.585103: step: 60/77, loss: 0.0003580081684049219 2023-01-22 09:21:29.860084: step: 64/77, loss: 4.402750346343964e-05 2023-01-22 09:21:31.159606: step: 68/77, loss: 0.0008609103970229626 2023-01-22 09:21:32.490843: step: 72/77, loss: 9.442742157261819e-05 2023-01-22 09:21:33.747858: step: 76/77, loss: 0.013628042303025723 2023-01-22 09:21:35.029712: step: 80/77, loss: 0.00020224417676217854 2023-01-22 09:21:36.293771: step: 84/77, loss: 0.024999741464853287 2023-01-22 09:21:37.593527: step: 88/77, loss: 0.0008359450148418546 2023-01-22 09:21:38.905996: step: 92/77, loss: 0.025218121707439423 2023-01-22 09:21:40.222540: step: 96/77, loss: 0.03694126009941101 2023-01-22 09:21:41.524646: step: 100/77, loss: 0.00014883764379192144 2023-01-22 09:21:42.862584: step: 104/77, loss: 0.0005104810697957873 2023-01-22 09:21:44.176571: step: 108/77, loss: 1.234683168149786e-05 2023-01-22 09:21:45.481302: step: 112/77, loss: 0.001363265560939908 2023-01-22 09:21:46.747139: step: 116/77, loss: 0.0007034118170849979 2023-01-22 09:21:48.064059: step: 120/77, loss: 0.0013016803422942758 2023-01-22 09:21:49.329732: step: 124/77, loss: 3.0888643323123688e-06 2023-01-22 09:21:50.610740: step: 128/77, loss: 0.03757849708199501 2023-01-22 09:21:51.895716: step: 132/77, loss: 0.14182689785957336 2023-01-22 09:21:53.177493: step: 136/77, loss: 0.004270021803677082 2023-01-22 09:21:54.475571: step: 140/77, loss: 9.920346201397479e-05 2023-01-22 09:21:55.775720: step: 144/77, loss: 0.036307238042354584 2023-01-22 09:21:57.109238: step: 148/77, loss: 0.0014702532207593322 2023-01-22 09:21:58.429105: step: 152/77, loss: 1.8030320347861561e-07 2023-01-22 09:21:59.793815: step: 156/77, loss: 2.234895328001585e-05 2023-01-22 09:22:01.088811: step: 160/77, loss: 0.010909469798207283 2023-01-22 09:22:02.387601: step: 164/77, loss: 6.514219421660528e-05 2023-01-22 09:22:03.703708: step: 168/77, loss: 0.07781477272510529 2023-01-22 09:22:04.999226: step: 172/77, loss: 0.0005408531869761646 2023-01-22 09:22:06.309241: step: 176/77, loss: 0.003037202637642622 2023-01-22 09:22:07.643793: step: 180/77, loss: 4.200325929559767e-05 2023-01-22 09:22:09.015410: step: 184/77, loss: 0.00032234640093520284 2023-01-22 09:22:10.325362: step: 188/77, loss: 0.023665133863687515 2023-01-22 09:22:11.591086: step: 192/77, loss: 0.009753705002367496 2023-01-22 09:22:12.921751: step: 196/77, loss: 0.00011679470480885357 2023-01-22 09:22:14.255797: step: 200/77, loss: 0.004883726127445698 2023-01-22 09:22:15.592956: step: 204/77, loss: 0.0939313992857933 2023-01-22 09:22:16.911618: step: 208/77, loss: 0.011193937622010708 2023-01-22 09:22:18.234747: step: 212/77, loss: 0.01989990472793579 2023-01-22 09:22:19.511185: step: 216/77, loss: 0.0007058902410790324 2023-01-22 09:22:20.827101: step: 220/77, loss: 0.009262369014322758 2023-01-22 09:22:22.147104: step: 224/77, loss: 0.02305023930966854 2023-01-22 09:22:23.447211: step: 228/77, loss: 9.288202818424907e-06 2023-01-22 09:22:24.773665: step: 232/77, loss: 1.7909143934957683e-05 2023-01-22 09:22:26.069272: step: 236/77, loss: 0.012563599273562431 2023-01-22 09:22:27.374733: step: 240/77, loss: 9.349367246613838e-06 2023-01-22 09:22:28.665059: step: 244/77, loss: 0.011641522869467735 2023-01-22 09:22:30.010015: step: 248/77, loss: 0.0009031022782437503 2023-01-22 09:22:31.383941: step: 252/77, loss: 1.2669736861425918e-05 2023-01-22 09:22:32.681223: step: 256/77, loss: 0.0005724854418076575 2023-01-22 09:22:33.965444: step: 260/77, loss: 0.0295106191188097 2023-01-22 09:22:35.254541: step: 264/77, loss: 0.025614425539970398 2023-01-22 09:22:36.540006: step: 268/77, loss: 3.429138450883329e-05 2023-01-22 09:22:37.821203: step: 272/77, loss: 0.036679305136203766 2023-01-22 09:22:39.085874: step: 276/77, loss: 0.0019726853352040052 2023-01-22 09:22:40.484185: step: 280/77, loss: 0.005493839271366596 2023-01-22 09:22:41.790818: step: 284/77, loss: 2.5428231310797855e-05 2023-01-22 09:22:43.146402: step: 288/77, loss: 0.0011763731017708778 2023-01-22 09:22:44.470873: step: 292/77, loss: 7.644234756298829e-07 2023-01-22 09:22:45.804478: step: 296/77, loss: 0.0014449841110035777 2023-01-22 09:22:47.123165: step: 300/77, loss: 0.0003704916452988982 2023-01-22 09:22:48.399538: step: 304/77, loss: 0.00026468545547686517 2023-01-22 09:22:49.715825: step: 308/77, loss: 0.013125604018568993 2023-01-22 09:22:51.052231: step: 312/77, loss: 0.012028532102704048 2023-01-22 09:22:52.377244: step: 316/77, loss: 5.268731001706328e-06 2023-01-22 09:22:53.699829: step: 320/77, loss: 4.079467544215731e-05 2023-01-22 09:22:55.026765: step: 324/77, loss: 0.0522034578025341 2023-01-22 09:22:56.370403: step: 328/77, loss: 6.846023097750731e-06 2023-01-22 09:22:57.720362: step: 332/77, loss: 2.2238946257857606e-05 2023-01-22 09:22:59.051653: step: 336/77, loss: 0.0008007865399122238 2023-01-22 09:23:00.345329: step: 340/77, loss: 0.03849714621901512 2023-01-22 09:23:01.663879: step: 344/77, loss: 0.0012156052980571985 2023-01-22 09:23:03.000152: step: 348/77, loss: 0.0004726042097900063 2023-01-22 09:23:04.305843: step: 352/77, loss: 0.00023819933994673193 2023-01-22 09:23:05.606745: step: 356/77, loss: 1.0118911632162053e-05 2023-01-22 09:23:06.974700: step: 360/77, loss: 0.013653255999088287 2023-01-22 09:23:08.278711: step: 364/77, loss: 2.4288812028316897e-07 2023-01-22 09:23:09.590962: step: 368/77, loss: 2.7067086193710566e-05 2023-01-22 09:23:10.879216: step: 372/77, loss: 2.21501959458692e-05 2023-01-22 09:23:12.215444: step: 376/77, loss: 0.000515318475663662 2023-01-22 09:23:13.549856: step: 380/77, loss: 8.078628525254317e-06 2023-01-22 09:23:14.880756: step: 384/77, loss: 0.009522791020572186 2023-01-22 09:23:16.175587: step: 388/77, loss: 0.00013660037075169384 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.5, 'r': 0.012738853503184714, 'f1': 0.02484472049689441}, 'combined': 0.0149575358093548, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.48148148148148145, 'r': 0.011828935395814377, 'f1': 0.023090586145648313}, 'combined': 0.013901475332584188, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.9076923076923077, 'r': 0.45038167938931295, 'f1': 0.6020408163265306}, 'slot': {'p': 0.5, 'r': 0.012738853503184714, 'f1': 0.02484472049689441}, 'combined': 0.0149575358093548, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:25:09.900381: step: 4/77, loss: 0.004518816247582436 2023-01-22 09:25:11.208164: step: 8/77, loss: 8.559234629501589e-06 2023-01-22 09:25:12.531339: step: 12/77, loss: 0.000525409122928977 2023-01-22 09:25:13.859758: step: 16/77, loss: 1.2267551937839016e-05 2023-01-22 09:25:15.177670: step: 20/77, loss: 0.0022193510085344315 2023-01-22 09:25:16.422440: step: 24/77, loss: 0.012789689004421234 2023-01-22 09:25:17.677210: step: 28/77, loss: 3.808999827015214e-05 2023-01-22 09:25:18.977026: step: 32/77, loss: 0.03038734570145607 2023-01-22 09:25:20.270458: step: 36/77, loss: 2.490369661245495e-05 2023-01-22 09:25:21.649146: step: 40/77, loss: 0.00401505921036005 2023-01-22 09:25:22.969457: step: 44/77, loss: 0.0009303762344643474 2023-01-22 09:25:24.237131: step: 48/77, loss: 0.0014327040407806635 2023-01-22 09:25:25.541556: step: 52/77, loss: 2.910674811573699e-05 2023-01-22 09:25:26.843883: step: 56/77, loss: 0.0019094038289040327 2023-01-22 09:25:28.217538: step: 60/77, loss: 0.009724936448037624 2023-01-22 09:25:29.479399: step: 64/77, loss: 6.951568502699956e-05 2023-01-22 09:25:30.823382: step: 68/77, loss: 0.00016013227286748588 2023-01-22 09:25:32.193185: step: 72/77, loss: 0.048396218568086624 2023-01-22 09:25:33.498219: step: 76/77, loss: 1.948331919265911e-05 2023-01-22 09:25:34.826457: step: 80/77, loss: 4.082077066414058e-05 2023-01-22 09:25:36.175211: step: 84/77, loss: 0.0005087255267426372 2023-01-22 09:25:37.483121: step: 88/77, loss: 0.002552201272919774 2023-01-22 09:25:38.781020: step: 92/77, loss: 4.3950120016233996e-05 2023-01-22 09:25:40.085206: step: 96/77, loss: 0.011920797638595104 2023-01-22 09:25:41.405029: step: 100/77, loss: 0.028989894315600395 2023-01-22 09:25:42.740120: step: 104/77, loss: 0.005524639040231705 2023-01-22 09:25:44.110524: step: 108/77, loss: 3.5203196603106335e-05 2023-01-22 09:25:45.398339: step: 112/77, loss: 0.0001979176013264805 2023-01-22 09:25:46.687378: step: 116/77, loss: 0.05707313120365143 2023-01-22 09:25:48.037646: step: 120/77, loss: 0.00033089410862885416 2023-01-22 09:25:49.340833: step: 124/77, loss: 2.6788911782205105e-05 2023-01-22 09:25:50.633883: step: 128/77, loss: 0.00017451458552386612 2023-01-22 09:25:51.949824: step: 132/77, loss: 7.898695002950262e-06 2023-01-22 09:25:53.230793: step: 136/77, loss: 0.0010085459798574448 2023-01-22 09:25:54.554426: step: 140/77, loss: 1.5973162135196617e-06 2023-01-22 09:25:55.830340: step: 144/77, loss: 0.000650427711661905 2023-01-22 09:25:57.141736: step: 148/77, loss: 1.05796516436385e-06 2023-01-22 09:25:58.437547: step: 152/77, loss: 0.0015735579654574394 2023-01-22 09:25:59.794074: step: 156/77, loss: 0.02899871952831745 2023-01-22 09:26:01.115178: step: 160/77, loss: 0.01198390033096075 2023-01-22 09:26:02.466357: step: 164/77, loss: 0.0010800587479025126 2023-01-22 09:26:03.800625: step: 168/77, loss: 0.0012106273788958788 2023-01-22 09:26:05.084395: step: 172/77, loss: 0.0004172230255790055 2023-01-22 09:26:06.433848: step: 176/77, loss: 0.01801799237728119 2023-01-22 09:26:07.756681: step: 180/77, loss: 0.025919422507286072 2023-01-22 09:26:09.124529: step: 184/77, loss: 8.135867801684071e-07 2023-01-22 09:26:10.446750: step: 188/77, loss: 0.001136435312218964 2023-01-22 09:26:11.747105: step: 192/77, loss: 1.6674499420332722e-05 2023-01-22 09:26:13.016579: step: 196/77, loss: 0.016235962510108948 2023-01-22 09:26:14.319277: step: 200/77, loss: 0.017281893640756607 2023-01-22 09:26:15.661557: step: 204/77, loss: 0.0018460007850080729 2023-01-22 09:26:17.006659: step: 208/77, loss: 0.004925033543258905 2023-01-22 09:26:18.369601: step: 212/77, loss: 5.231866816757247e-05 2023-01-22 09:26:19.713830: step: 216/77, loss: 3.397252612558077e-06 2023-01-22 09:26:21.039279: step: 220/77, loss: 0.00013918301556259394 2023-01-22 09:26:22.360176: step: 224/77, loss: 0.00028344333986751735 2023-01-22 09:26:23.643576: step: 228/77, loss: 1.5092291505425237e-05 2023-01-22 09:26:24.968656: step: 232/77, loss: 0.0009095754357986152 2023-01-22 09:26:26.251522: step: 236/77, loss: 7.0954274633550085e-06 2023-01-22 09:26:27.575890: step: 240/77, loss: 0.01879284903407097 2023-01-22 09:26:28.872577: step: 244/77, loss: 0.03150808438658714 2023-01-22 09:26:30.173594: step: 248/77, loss: 0.029587578028440475 2023-01-22 09:26:31.502843: step: 252/77, loss: 0.003229555208235979 2023-01-22 09:26:32.830155: step: 256/77, loss: 0.0002197189605794847 2023-01-22 09:26:34.125702: step: 260/77, loss: 5.885936502636469e-07 2023-01-22 09:26:35.460595: step: 264/77, loss: 0.0001560926903039217 2023-01-22 09:26:36.809095: step: 268/77, loss: 0.09169185161590576 2023-01-22 09:26:38.131141: step: 272/77, loss: 0.00018706178525462747 2023-01-22 09:26:39.455338: step: 276/77, loss: 1.0784182450152002e-05 2023-01-22 09:26:40.753879: step: 280/77, loss: 0.00021076208213344216 2023-01-22 09:26:42.006995: step: 284/77, loss: 0.000368105829693377 2023-01-22 09:26:43.315417: step: 288/77, loss: 4.470327041872224e-07 2023-01-22 09:26:44.665804: step: 292/77, loss: 0.03098980151116848 2023-01-22 09:26:45.949294: step: 296/77, loss: 0.014952097088098526 2023-01-22 09:26:47.241447: step: 300/77, loss: 0.0014895956264808774 2023-01-22 09:26:48.594977: step: 304/77, loss: 0.0001381791225867346 2023-01-22 09:26:49.929391: step: 308/77, loss: 3.8397406569856685e-06 2023-01-22 09:26:51.247097: step: 312/77, loss: 0.00017704522178974003 2023-01-22 09:26:52.582128: step: 316/77, loss: 0.06191083788871765 2023-01-22 09:26:53.901791: step: 320/77, loss: 5.371192855818663e-06 2023-01-22 09:26:55.198300: step: 324/77, loss: 1.8029529655905208e-06 2023-01-22 09:26:56.454101: step: 328/77, loss: 4.1384537325939164e-05 2023-01-22 09:26:57.753250: step: 332/77, loss: 0.00034231197787448764 2023-01-22 09:26:59.069790: step: 336/77, loss: 2.9323089165700367e-06 2023-01-22 09:27:00.419518: step: 340/77, loss: 0.00047633511712774634 2023-01-22 09:27:01.707639: step: 344/77, loss: 6.231231054698583e-06 2023-01-22 09:27:03.054134: step: 348/77, loss: 7.328856736421585e-05 2023-01-22 09:27:04.353175: step: 352/77, loss: 0.015188097953796387 2023-01-22 09:27:05.660775: step: 356/77, loss: 0.010481033474206924 2023-01-22 09:27:06.937156: step: 360/77, loss: 0.012685518711805344 2023-01-22 09:27:08.293078: step: 364/77, loss: 0.00010546360135776922 2023-01-22 09:27:09.611801: step: 368/77, loss: 0.1046970933675766 2023-01-22 09:27:10.989791: step: 372/77, loss: 0.004260138608515263 2023-01-22 09:27:12.315315: step: 376/77, loss: 6.853960803709924e-05 2023-01-22 09:27:13.609079: step: 380/77, loss: 0.008298509754240513 2023-01-22 09:27:14.952515: step: 384/77, loss: 1.6012103515095077e-05 2023-01-22 09:27:16.252636: step: 388/77, loss: 0.0005622516036964953 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9230769230769231, 'r': 0.4580152671755725, 'f1': 0.6122448979591837}, 'slot': {'p': 0.5172413793103449, 'r': 0.01364877161055505, 'f1': 0.026595744680851068}, 'combined': 0.016283108988276163, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.921875, 'r': 0.45038167938931295, 'f1': 0.6051282051282051}, 'slot': {'p': 0.5172413793103449, 'r': 0.01364877161055505, 'f1': 0.026595744680851068}, 'combined': 0.016093835242771415, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9230769230769231, 'r': 0.4580152671755725, 'f1': 0.6122448979591837}, 'slot': {'p': 0.5172413793103449, 'r': 0.01364877161055505, 'f1': 0.026595744680851068}, 'combined': 0.016283108988276163, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:29:11.611774: step: 4/77, loss: 0.00268231681548059 2023-01-22 09:29:12.880963: step: 8/77, loss: 0.010495544411242008 2023-01-22 09:29:14.167492: step: 12/77, loss: 0.0004444315272849053 2023-01-22 09:29:15.514075: step: 16/77, loss: 2.9283808544278145e-05 2023-01-22 09:29:16.807848: step: 20/77, loss: 0.0009628410916775465 2023-01-22 09:29:18.042457: step: 24/77, loss: 7.748473080937401e-07 2023-01-22 09:29:19.309667: step: 28/77, loss: 4.9448035497334786e-06 2023-01-22 09:29:20.637737: step: 32/77, loss: 0.008351600728929043 2023-01-22 09:29:21.977241: step: 36/77, loss: 3.924445263692178e-05 2023-01-22 09:29:23.292431: step: 40/77, loss: 0.0009096739813685417 2023-01-22 09:29:24.624282: step: 44/77, loss: 0.01073978841304779 2023-01-22 09:29:25.905289: step: 48/77, loss: 0.0011748617980629206 2023-01-22 09:29:27.221481: step: 52/77, loss: 1.230300676979823e-05 2023-01-22 09:29:28.498955: step: 56/77, loss: 0.04393262788653374 2023-01-22 09:29:29.815446: step: 60/77, loss: 0.00020851498993579298 2023-01-22 09:29:31.157415: step: 64/77, loss: 0.0004167587321717292 2023-01-22 09:29:32.456201: step: 68/77, loss: 4.6296310756588355e-05 2023-01-22 09:29:33.756318: step: 72/77, loss: 0.011651339009404182 2023-01-22 09:29:35.071158: step: 76/77, loss: 6.170851702336222e-05 2023-01-22 09:29:36.367230: step: 80/77, loss: 0.00012924282054882497 2023-01-22 09:29:37.651718: step: 84/77, loss: 4.0912304939411115e-06 2023-01-22 09:29:38.953110: step: 88/77, loss: 1.4483674704024452e-06 2023-01-22 09:29:40.285740: step: 92/77, loss: 6.428196502383798e-05 2023-01-22 09:29:41.562813: step: 96/77, loss: 1.910252649395261e-06 2023-01-22 09:29:42.930455: step: 100/77, loss: 0.0006389992777258158 2023-01-22 09:29:44.210212: step: 104/77, loss: 2.9110062314430252e-05 2023-01-22 09:29:45.487509: step: 108/77, loss: 0.02464928850531578 2023-01-22 09:29:46.782337: step: 112/77, loss: 2.1680718873540172e-06 2023-01-22 09:29:48.106893: step: 116/77, loss: 0.0006864761235192418 2023-01-22 09:29:49.425943: step: 120/77, loss: 3.829562444934709e-07 2023-01-22 09:29:50.721396: step: 124/77, loss: 7.570409798063338e-05 2023-01-22 09:29:52.033334: step: 128/77, loss: 0.001121019246056676 2023-01-22 09:29:53.349708: step: 132/77, loss: 1.2084574336768128e-06 2023-01-22 09:29:54.631109: step: 136/77, loss: 1.795521939129685e-06 2023-01-22 09:29:55.940417: step: 140/77, loss: 0.0019349417416378856 2023-01-22 09:29:57.268236: step: 144/77, loss: 0.0006557057495228946 2023-01-22 09:29:58.600552: step: 148/77, loss: 0.0007242461433634162 2023-01-22 09:29:59.906864: step: 152/77, loss: 2.154836147383321e-05 2023-01-22 09:30:01.235261: step: 156/77, loss: 1.4111105883785058e-06 2023-01-22 09:30:02.523045: step: 160/77, loss: 1.416931627318263e-05 2023-01-22 09:30:03.808698: step: 164/77, loss: 0.0006565083749592304 2023-01-22 09:30:05.147706: step: 168/77, loss: 1.5333089322666638e-06 2023-01-22 09:30:06.441583: step: 172/77, loss: 1.4919568457116839e-05 2023-01-22 09:30:07.775883: step: 176/77, loss: 0.02308845706284046 2023-01-22 09:30:09.073422: step: 180/77, loss: 0.0008895195205695927 2023-01-22 09:30:10.350635: step: 184/77, loss: 0.00043443485628813505 2023-01-22 09:30:11.703923: step: 188/77, loss: 0.051694635301828384 2023-01-22 09:30:13.049556: step: 192/77, loss: 0.00227850372903049 2023-01-22 09:30:14.412325: step: 196/77, loss: 0.021429726853966713 2023-01-22 09:30:15.727017: step: 200/77, loss: 0.00030971429077908397 2023-01-22 09:30:17.081134: step: 204/77, loss: 0.0001510155270807445 2023-01-22 09:30:18.374319: step: 208/77, loss: 0.002237622393295169 2023-01-22 09:30:19.662649: step: 212/77, loss: 2.0001190932816826e-05 2023-01-22 09:30:20.978459: step: 216/77, loss: 0.05403360351920128 2023-01-22 09:30:22.258639: step: 220/77, loss: 7.650639599887654e-05 2023-01-22 09:30:23.598138: step: 224/77, loss: 0.0007797401631250978 2023-01-22 09:30:24.898628: step: 228/77, loss: 0.00021848056348972023 2023-01-22 09:30:26.182512: step: 232/77, loss: 0.00012967440125066787 2023-01-22 09:30:27.505065: step: 236/77, loss: 0.002810570877045393 2023-01-22 09:30:28.784225: step: 240/77, loss: 0.002720152959227562 2023-01-22 09:30:30.099929: step: 244/77, loss: 4.8737554607214406e-05 2023-01-22 09:30:31.404382: step: 248/77, loss: 0.0044198185205459595 2023-01-22 09:30:32.747614: step: 252/77, loss: 1.1010704838554375e-05 2023-01-22 09:30:34.033092: step: 256/77, loss: 0.00017942961130756885 2023-01-22 09:30:35.337804: step: 260/77, loss: 0.02820507250726223 2023-01-22 09:30:36.692220: step: 264/77, loss: 2.5000796085805632e-05 2023-01-22 09:30:38.011127: step: 268/77, loss: 5.47738045497681e-06 2023-01-22 09:30:39.305977: step: 272/77, loss: 0.057624347507953644 2023-01-22 09:30:40.606840: step: 276/77, loss: 0.0057439375668764114 2023-01-22 09:30:41.921808: step: 280/77, loss: 2.388442680967273e-06 2023-01-22 09:30:43.261749: step: 284/77, loss: 0.00021082670718897134 2023-01-22 09:30:44.602225: step: 288/77, loss: 0.00012094304111087695 2023-01-22 09:30:45.939041: step: 292/77, loss: 0.026059003546833992 2023-01-22 09:30:47.238448: step: 296/77, loss: 0.03081570379436016 2023-01-22 09:30:48.507578: step: 300/77, loss: 0.00011321669444441795 2023-01-22 09:30:49.786777: step: 304/77, loss: 0.005827941931784153 2023-01-22 09:30:51.097458: step: 308/77, loss: 0.058896828442811966 2023-01-22 09:30:52.398224: step: 312/77, loss: 0.00017250858945772052 2023-01-22 09:30:53.724591: step: 316/77, loss: 0.002004768932238221 2023-01-22 09:30:55.062380: step: 320/77, loss: 2.2721074856235646e-05 2023-01-22 09:30:56.322014: step: 324/77, loss: 0.0005667175282724202 2023-01-22 09:30:57.622324: step: 328/77, loss: 0.03825288265943527 2023-01-22 09:30:58.938572: step: 332/77, loss: 0.00019563539535738528 2023-01-22 09:31:00.209052: step: 336/77, loss: 5.884109214093769e-06 2023-01-22 09:31:01.542978: step: 340/77, loss: 0.018313970416784286 2023-01-22 09:31:02.832739: step: 344/77, loss: 0.00017678536823950708 2023-01-22 09:31:04.111946: step: 348/77, loss: 1.6817306459415704e-05 2023-01-22 09:31:05.397541: step: 352/77, loss: 0.0017116623930633068 2023-01-22 09:31:06.692798: step: 356/77, loss: 0.00025027766241692007 2023-01-22 09:31:07.989137: step: 360/77, loss: 0.07846976816654205 2023-01-22 09:31:09.330464: step: 364/77, loss: 1.776100589268026e-06 2023-01-22 09:31:10.658638: step: 368/77, loss: 0.0022615143097937107 2023-01-22 09:31:11.947879: step: 372/77, loss: 5.692157856174163e-07 2023-01-22 09:31:13.307308: step: 376/77, loss: 8.089242328424007e-06 2023-01-22 09:31:14.625353: step: 380/77, loss: 6.233382737264037e-05 2023-01-22 09:31:15.908452: step: 384/77, loss: 9.111697727348655e-05 2023-01-22 09:31:17.228352: step: 388/77, loss: 0.00011282552441116422 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.8695652173913043, 'r': 0.4580152671755725, 'f1': 0.6}, 'slot': {'p': 0.4838709677419355, 'r': 0.01364877161055505, 'f1': 0.02654867256637168}, 'combined': 0.01592920353982301, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.8955223880597015, 'r': 0.4580152671755725, 'f1': 0.6060606060606061}, 'slot': {'p': 0.4827586206896552, 'r': 0.012738853503184714, 'f1': 0.024822695035460994}, 'combined': 0.015044057597249088, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.8695652173913043, 'r': 0.4580152671755725, 'f1': 0.6}, 'slot': {'p': 0.4838709677419355, 'r': 0.01364877161055505, 'f1': 0.02654867256637168}, 'combined': 0.01592920353982301, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:33:10.979522: step: 4/77, loss: 0.006674325093626976 2023-01-22 09:33:12.279828: step: 8/77, loss: 0.00012326599971856922 2023-01-22 09:33:13.574798: step: 12/77, loss: 7.927337151159008e-07 2023-01-22 09:33:14.887536: step: 16/77, loss: 1.073012754204683e-05 2023-01-22 09:33:16.219791: step: 20/77, loss: 0.04632338508963585 2023-01-22 09:33:17.507165: step: 24/77, loss: 0.0004419655306264758 2023-01-22 09:33:18.781446: step: 28/77, loss: 0.0003266305720899254 2023-01-22 09:33:20.072032: step: 32/77, loss: 0.046977244317531586 2023-01-22 09:33:21.379387: step: 36/77, loss: 0.00014411300071515143 2023-01-22 09:33:22.644860: step: 40/77, loss: 0.004513848572969437 2023-01-22 09:33:23.980471: step: 44/77, loss: 0.0028656127396970987 2023-01-22 09:33:25.284476: step: 48/77, loss: 7.888904656283557e-05 2023-01-22 09:33:26.557651: step: 52/77, loss: 0.0019767654594033957 2023-01-22 09:33:27.864118: step: 56/77, loss: 2.720719066928723e-06 2023-01-22 09:33:29.191057: step: 60/77, loss: 9.81954713097366e-07 2023-01-22 09:33:30.505642: step: 64/77, loss: 0.00030149612575769424 2023-01-22 09:33:31.825891: step: 68/77, loss: 1.1771680874517187e-06 2023-01-22 09:33:33.113440: step: 72/77, loss: 5.9117232012795284e-05 2023-01-22 09:33:34.447595: step: 76/77, loss: 0.04461955651640892 2023-01-22 09:33:35.757979: step: 80/77, loss: 0.014770245179533958 2023-01-22 09:33:37.077293: step: 84/77, loss: 7.152552683464819e-08 2023-01-22 09:33:38.418146: step: 88/77, loss: 0.001319216564297676 2023-01-22 09:33:39.725110: step: 92/77, loss: 0.00010210266918875277 2023-01-22 09:33:41.023846: step: 96/77, loss: 4.071623698109761e-05 2023-01-22 09:33:42.383634: step: 100/77, loss: 8.443147089565173e-05 2023-01-22 09:33:43.696489: step: 104/77, loss: 0.009545549750328064 2023-01-22 09:33:45.016017: step: 108/77, loss: 1.9398636140977032e-05 2023-01-22 09:33:46.305127: step: 112/77, loss: 9.834516276896466e-07 2023-01-22 09:33:47.630492: step: 116/77, loss: 0.0006247479468584061 2023-01-22 09:33:48.948139: step: 120/77, loss: 0.0005076077650301158 2023-01-22 09:33:50.303401: step: 124/77, loss: 5.5134229626219167e-08 2023-01-22 09:33:51.581047: step: 128/77, loss: 1.0052502148027997e-05 2023-01-22 09:33:52.923446: step: 132/77, loss: 0.000475154141895473 2023-01-22 09:33:54.249228: step: 136/77, loss: 1.0516861038922798e-05 2023-01-22 09:33:55.524984: step: 140/77, loss: 0.008938970044255257 2023-01-22 09:33:56.870721: step: 144/77, loss: 1.1819629435194656e-05 2023-01-22 09:33:58.182144: step: 148/77, loss: 8.478687050228473e-07 2023-01-22 09:33:59.551428: step: 152/77, loss: 0.004407108761370182 2023-01-22 09:34:00.895253: step: 156/77, loss: 1.6391214785471675e-07 2023-01-22 09:34:02.224284: step: 160/77, loss: 0.05659640207886696 2023-01-22 09:34:03.548760: step: 164/77, loss: 1.0966845138682402e-06 2023-01-22 09:34:04.878873: step: 168/77, loss: 0.0012224003439769149 2023-01-22 09:34:06.168701: step: 172/77, loss: 0.03606301173567772 2023-01-22 09:34:07.544142: step: 176/77, loss: 0.0002228868834208697 2023-01-22 09:34:08.847681: step: 180/77, loss: 0.003944731783121824 2023-01-22 09:34:10.157146: step: 184/77, loss: 0.0011084693251177669 2023-01-22 09:34:11.482009: step: 188/77, loss: 0.00027038625557906926 2023-01-22 09:34:12.828579: step: 192/77, loss: 0.0035924986004829407 2023-01-22 09:34:14.147265: step: 196/77, loss: 2.804172254400328e-06 2023-01-22 09:34:15.508401: step: 200/77, loss: 5.185574991628528e-07 2023-01-22 09:34:16.832874: step: 204/77, loss: 7.971924560479238e-07 2023-01-22 09:34:18.162342: step: 208/77, loss: 9.861079888651147e-06 2023-01-22 09:34:19.484186: step: 212/77, loss: 1.7955117073142901e-06 2023-01-22 09:34:20.811687: step: 216/77, loss: 5.492773561854847e-05 2023-01-22 09:34:22.074364: step: 220/77, loss: 9.138667337538209e-06 2023-01-22 09:34:23.386529: step: 224/77, loss: 0.0010349903022870421 2023-01-22 09:34:24.720232: step: 228/77, loss: 0.00012837017129641026 2023-01-22 09:34:26.020433: step: 232/77, loss: 0.05692150816321373 2023-01-22 09:34:27.342608: step: 236/77, loss: 0.0003018905990757048 2023-01-22 09:34:28.654600: step: 240/77, loss: 5.213461918174289e-05 2023-01-22 09:34:29.991927: step: 244/77, loss: 0.005417892709374428 2023-01-22 09:34:31.280611: step: 248/77, loss: 2.4722794478293508e-05 2023-01-22 09:34:32.530276: step: 252/77, loss: 9.783964924281463e-05 2023-01-22 09:34:33.836544: step: 256/77, loss: 0.00041096023051068187 2023-01-22 09:34:35.176486: step: 260/77, loss: 0.0002943962754216045 2023-01-22 09:34:36.476891: step: 264/77, loss: 1.968929973372724e-05 2023-01-22 09:34:37.832450: step: 268/77, loss: 3.314549394417554e-05 2023-01-22 09:34:39.174328: step: 272/77, loss: 0.0036741732619702816 2023-01-22 09:34:40.521692: step: 276/77, loss: 0.0003650987346190959 2023-01-22 09:34:41.839412: step: 280/77, loss: 0.007480195723474026 2023-01-22 09:34:43.180303: step: 284/77, loss: 0.005127861630171537 2023-01-22 09:34:44.502587: step: 288/77, loss: 5.181756932870485e-05 2023-01-22 09:34:45.844949: step: 292/77, loss: 0.009548337198793888 2023-01-22 09:34:47.133979: step: 296/77, loss: 1.3648948424815899e-06 2023-01-22 09:34:48.453117: step: 300/77, loss: 0.013185513205826283 2023-01-22 09:34:49.778904: step: 304/77, loss: 6.6154902924608905e-06 2023-01-22 09:34:51.125481: step: 308/77, loss: 0.0001683257578406483 2023-01-22 09:34:52.441946: step: 312/77, loss: 0.006210809573531151 2023-01-22 09:34:53.737879: step: 316/77, loss: 2.7647402021102607e-05 2023-01-22 09:34:55.047027: step: 320/77, loss: 0.00011505853763082996 2023-01-22 09:34:56.372171: step: 324/77, loss: 0.00043252320028841496 2023-01-22 09:34:57.697967: step: 328/77, loss: 8.42785811983049e-06 2023-01-22 09:34:58.988699: step: 332/77, loss: 0.004213067702949047 2023-01-22 09:35:00.330598: step: 336/77, loss: 0.13022495806217194 2023-01-22 09:35:01.655242: step: 340/77, loss: 1.6599018408669508e-06 2023-01-22 09:35:03.006102: step: 344/77, loss: 0.022105010226368904 2023-01-22 09:35:04.318751: step: 348/77, loss: 0.0004306129994802177 2023-01-22 09:35:05.653705: step: 352/77, loss: 7.364667544607073e-05 2023-01-22 09:35:06.933634: step: 356/77, loss: 5.502285057445988e-05 2023-01-22 09:35:08.264378: step: 360/77, loss: 0.004900304600596428 2023-01-22 09:35:09.579200: step: 364/77, loss: 0.0001213712603203021 2023-01-22 09:35:10.870328: step: 368/77, loss: 0.005602479446679354 2023-01-22 09:35:12.213956: step: 372/77, loss: 5.916656391491415e-06 2023-01-22 09:35:13.524961: step: 376/77, loss: 0.0001372866245219484 2023-01-22 09:35:14.751795: step: 380/77, loss: 6.366540037561208e-05 2023-01-22 09:35:16.062738: step: 384/77, loss: 0.0013172589242458344 2023-01-22 09:35:17.433806: step: 388/77, loss: 0.00026950225583277643 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.9333333333333333, 'r': 0.42748091603053434, 'f1': 0.5863874345549738}, 'slot': {'p': 0.5384615384615384, 'r': 0.012738853503184714, 'f1': 0.02488888888888889}, 'combined': 0.014594531704479349, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.9333333333333333, 'r': 0.42748091603053434, 'f1': 0.5863874345549738}, 'slot': {'p': 0.5384615384615384, 'r': 0.012738853503184714, 'f1': 0.02488888888888889}, 'combined': 0.014594531704479349, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.9333333333333333, 'r': 0.42748091603053434, 'f1': 0.5863874345549738}, 'slot': {'p': 0.5384615384615384, 'r': 0.012738853503184714, 'f1': 0.02488888888888889}, 'combined': 0.014594531704479349, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:37:10.991775: step: 4/77, loss: 1.3477228094416205e-05 2023-01-22 09:37:12.316000: step: 8/77, loss: 4.9881804443430156e-05 2023-01-22 09:37:13.578055: step: 12/77, loss: 8.783635712461546e-05 2023-01-22 09:37:14.837931: step: 16/77, loss: 0.0007455704035237432 2023-01-22 09:37:16.135475: step: 20/77, loss: 0.00010077015758724883 2023-01-22 09:37:17.416408: step: 24/77, loss: 2.8788408599211834e-06 2023-01-22 09:37:18.757791: step: 28/77, loss: 4.572925718093757e-06 2023-01-22 09:37:20.038141: step: 32/77, loss: 0.00016793067334219813 2023-01-22 09:37:21.293526: step: 36/77, loss: 0.0005516072269529104 2023-01-22 09:37:22.574685: step: 40/77, loss: 2.97271108138375e-05 2023-01-22 09:37:23.885213: step: 44/77, loss: 4.699544660979882e-06 2023-01-22 09:37:25.198125: step: 48/77, loss: 4.090108632226475e-06 2023-01-22 09:37:26.500934: step: 52/77, loss: 0.000500613241456449 2023-01-22 09:37:27.795958: step: 56/77, loss: 1.2382681688904995e-06 2023-01-22 09:37:29.074698: step: 60/77, loss: 0.009421252645552158 2023-01-22 09:37:30.388473: step: 64/77, loss: 4.246797402629454e-07 2023-01-22 09:37:31.737337: step: 68/77, loss: 3.3110889489762485e-05 2023-01-22 09:37:33.068224: step: 72/77, loss: 0.02121484838426113 2023-01-22 09:37:34.390076: step: 76/77, loss: 3.910715167876333e-05 2023-01-22 09:37:35.638084: step: 80/77, loss: 1.24184143714956e-05 2023-01-22 09:37:36.970100: step: 84/77, loss: 0.000224571893340908 2023-01-22 09:37:38.242132: step: 88/77, loss: 2.570334117990569e-06 2023-01-22 09:37:39.557514: step: 92/77, loss: 0.0002045604633167386 2023-01-22 09:37:40.888971: step: 96/77, loss: 3.816035587078659e-06 2023-01-22 09:37:42.188069: step: 100/77, loss: 1.0916721294051968e-05 2023-01-22 09:37:43.512429: step: 104/77, loss: 0.0005943027208559215 2023-01-22 09:37:44.793625: step: 108/77, loss: 0.044727031141519547 2023-01-22 09:37:46.070448: step: 112/77, loss: 2.0712560910851607e-07 2023-01-22 09:37:47.428456: step: 116/77, loss: 0.023349588736891747 2023-01-22 09:37:48.706596: step: 120/77, loss: 0.000780319853220135 2023-01-22 09:37:49.978947: step: 124/77, loss: 3.182269574608654e-05 2023-01-22 09:37:51.249172: step: 128/77, loss: 1.594416119132802e-07 2023-01-22 09:37:52.586805: step: 132/77, loss: 0.01661229506134987 2023-01-22 09:37:53.862036: step: 136/77, loss: 1.9326394067320507e-06 2023-01-22 09:37:55.196579: step: 140/77, loss: 0.00011061552504543215 2023-01-22 09:37:56.456521: step: 144/77, loss: 2.890803079935722e-05 2023-01-22 09:37:57.784512: step: 148/77, loss: 0.0003261259407736361 2023-01-22 09:37:59.098929: step: 152/77, loss: 0.02550693042576313 2023-01-22 09:38:00.437374: step: 156/77, loss: 2.756704873263516e-07 2023-01-22 09:38:01.756987: step: 160/77, loss: 2.3945110569911776e-06 2023-01-22 09:38:03.108218: step: 164/77, loss: 7.703841902184649e-07 2023-01-22 09:38:04.372928: step: 168/77, loss: 0.00010813689004862681 2023-01-22 09:38:05.700224: step: 172/77, loss: 1.8811510017258115e-05 2023-01-22 09:38:07.025181: step: 176/77, loss: 0.027787035331130028 2023-01-22 09:38:08.314079: step: 180/77, loss: 0.0001837980526033789 2023-01-22 09:38:09.649577: step: 184/77, loss: 0.00012887391494587064 2023-01-22 09:38:10.950279: step: 188/77, loss: 9.238715392712038e-08 2023-01-22 09:38:12.215897: step: 192/77, loss: 0.0004598087689373642 2023-01-22 09:38:13.560101: step: 196/77, loss: 3.9932165236677974e-06 2023-01-22 09:38:14.902625: step: 200/77, loss: 0.00033994330442510545 2023-01-22 09:38:16.177626: step: 204/77, loss: 0.0010090291034430265 2023-01-22 09:38:17.505604: step: 208/77, loss: 8.787318802205846e-05 2023-01-22 09:38:18.846098: step: 212/77, loss: 7.56958229430893e-07 2023-01-22 09:38:20.198510: step: 216/77, loss: 1.0773379699458019e-06 2023-01-22 09:38:21.502213: step: 220/77, loss: 2.8703432690235786e-05 2023-01-22 09:38:22.831686: step: 224/77, loss: 8.135804137054947e-07 2023-01-22 09:38:24.094623: step: 228/77, loss: 0.0323166660964489 2023-01-22 09:38:25.400920: step: 232/77, loss: 0.0011196022387593985 2023-01-22 09:38:26.715899: step: 236/77, loss: 0.00794243160635233 2023-01-22 09:38:28.048664: step: 240/77, loss: 0.009800415486097336 2023-01-22 09:38:29.350369: step: 244/77, loss: 4.991802029508108e-07 2023-01-22 09:38:30.689957: step: 248/77, loss: 5.501529449247755e-05 2023-01-22 09:38:32.005101: step: 252/77, loss: 1.1173647180839907e-05 2023-01-22 09:38:33.283779: step: 256/77, loss: 0.0006034831749275327 2023-01-22 09:38:34.638024: step: 260/77, loss: 7.432499114656821e-05 2023-01-22 09:38:35.986948: step: 264/77, loss: 3.3635027648415416e-05 2023-01-22 09:38:37.281690: step: 268/77, loss: 2.950415591840283e-07 2023-01-22 09:38:38.596111: step: 272/77, loss: 1.021041680360213e-05 2023-01-22 09:38:39.911316: step: 276/77, loss: 1.0952142019959865e-06 2023-01-22 09:38:41.288147: step: 280/77, loss: 3.1473733542952687e-05 2023-01-22 09:38:42.615801: step: 284/77, loss: 6.344338544295169e-06 2023-01-22 09:38:43.939908: step: 288/77, loss: 1.9024222638108768e-05 2023-01-22 09:38:45.260054: step: 292/77, loss: 3.579040367185371e-06 2023-01-22 09:38:46.589184: step: 296/77, loss: 9.28251029108651e-06 2023-01-22 09:38:47.948069: step: 300/77, loss: 0.026114514097571373 2023-01-22 09:38:49.239536: step: 304/77, loss: 3.993246536992956e-06 2023-01-22 09:38:50.558754: step: 308/77, loss: 1.3411042054656264e-08 2023-01-22 09:38:51.822350: step: 312/77, loss: 6.215876965143252e-06 2023-01-22 09:38:53.066735: step: 316/77, loss: 3.3527200571370486e-07 2023-01-22 09:38:54.404246: step: 320/77, loss: 4.982641712558689e-06 2023-01-22 09:38:55.765388: step: 324/77, loss: 5.602793180514709e-07 2023-01-22 09:38:57.135191: step: 328/77, loss: 0.021335016936063766 2023-01-22 09:38:58.471220: step: 332/77, loss: 0.0012030237121507525 2023-01-22 09:38:59.812851: step: 336/77, loss: 0.00014075601939111948 2023-01-22 09:39:01.145019: step: 340/77, loss: 8.080643965513445e-06 2023-01-22 09:39:02.520903: step: 344/77, loss: 6.399318863259396e-06 2023-01-22 09:39:03.815072: step: 348/77, loss: 0.0007741707959212363 2023-01-22 09:39:05.122644: step: 352/77, loss: 0.00022454277495853603 2023-01-22 09:39:06.442086: step: 356/77, loss: 3.925973942386918e-06 2023-01-22 09:39:07.767041: step: 360/77, loss: 0.0001450856652809307 2023-01-22 09:39:09.074088: step: 364/77, loss: 5.075151420896873e-05 2023-01-22 09:39:10.358693: step: 368/77, loss: 3.1441189207725984e-07 2023-01-22 09:39:11.731365: step: 372/77, loss: 9.246639820048586e-06 2023-01-22 09:39:13.122690: step: 376/77, loss: 0.08529046177864075 2023-01-22 09:39:14.430871: step: 380/77, loss: 3.843951162707526e-06 2023-01-22 09:39:15.724363: step: 384/77, loss: 4.559170520224143e-06 2023-01-22 09:39:17.078863: step: 388/77, loss: 2.82367477666412e-06 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 23} Test Chinese: {'template': {'p': 0.9242424242424242, 'r': 0.46564885496183206, 'f1': 0.6192893401015228}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.01431246474901297, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 23} Test Korean: {'template': {'p': 0.9230769230769231, 'r': 0.4580152671755725, 'f1': 0.6122448979591837}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.014149659863945578, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 23} Test Russian: {'template': {'p': 0.9242424242424242, 'r': 0.46564885496183206, 'f1': 0.6192893401015228}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.01431246474901297, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:41:11.160215: step: 4/77, loss: 0.0012159548932686448 2023-01-22 09:41:12.454966: step: 8/77, loss: 1.1826316949736793e-05 2023-01-22 09:41:13.753668: step: 12/77, loss: 3.315163849038072e-05 2023-01-22 09:41:15.054149: step: 16/77, loss: 3.07842310576234e-06 2023-01-22 09:41:16.358663: step: 20/77, loss: 4.5274249714566395e-05 2023-01-22 09:41:17.619147: step: 24/77, loss: 0.00027546961791813374 2023-01-22 09:41:18.983540: step: 28/77, loss: 0.00967315025627613 2023-01-22 09:41:20.338966: step: 32/77, loss: 8.344623836364917e-08 2023-01-22 09:41:21.645247: step: 36/77, loss: 5.8957184592145495e-06 2023-01-22 09:41:23.003134: step: 40/77, loss: 0.0320592001080513 2023-01-22 09:41:24.326235: step: 44/77, loss: 1.594416829675538e-07 2023-01-22 09:41:25.597146: step: 48/77, loss: 0.0002586401242297143 2023-01-22 09:41:26.950391: step: 52/77, loss: 0.05762874335050583 2023-01-22 09:41:28.264294: step: 56/77, loss: 0.0005221142200753093 2023-01-22 09:41:29.632351: step: 60/77, loss: 0.0007917734910733998 2023-01-22 09:41:30.937311: step: 64/77, loss: 8.27010881039314e-05 2023-01-22 09:41:32.267231: step: 68/77, loss: 0.005319363437592983 2023-01-22 09:41:33.593868: step: 72/77, loss: 0.00047313497634604573 2023-01-22 09:41:34.885906: step: 76/77, loss: 1.842924211814534e-05 2023-01-22 09:41:36.178961: step: 80/77, loss: 0.004623536020517349 2023-01-22 09:41:37.427574: step: 84/77, loss: 0.00847904197871685 2023-01-22 09:41:38.703474: step: 88/77, loss: 0.00033163363696075976 2023-01-22 09:41:39.928644: step: 92/77, loss: 0.0010802025208249688 2023-01-22 09:41:41.243169: step: 96/77, loss: 1.2591187896759948e-06 2023-01-22 09:41:42.567096: step: 100/77, loss: 9.653921006247401e-05 2023-01-22 09:41:43.865776: step: 104/77, loss: 0.0005702600465156138 2023-01-22 09:41:45.189376: step: 108/77, loss: 0.00010552568710409105 2023-01-22 09:41:46.529781: step: 112/77, loss: 1.29695708892541e-05 2023-01-22 09:41:47.784970: step: 116/77, loss: 2.2545989850186743e-05 2023-01-22 09:41:49.119289: step: 120/77, loss: 0.07159332931041718 2023-01-22 09:41:50.457031: step: 124/77, loss: 6.646419933531433e-05 2023-01-22 09:41:51.778852: step: 128/77, loss: 9.499242878518999e-05 2023-01-22 09:41:53.064051: step: 132/77, loss: 9.36659998842515e-05 2023-01-22 09:41:54.391115: step: 136/77, loss: 0.0003815369273070246 2023-01-22 09:41:55.732121: step: 140/77, loss: 5.266933385428274e-06 2023-01-22 09:41:57.115223: step: 144/77, loss: 0.03661287575960159 2023-01-22 09:41:58.469222: step: 148/77, loss: 5.105370655655861e-05 2023-01-22 09:41:59.782575: step: 152/77, loss: 0.00012779705866705626 2023-01-22 09:42:01.124821: step: 156/77, loss: 3.266726707806811e-05 2023-01-22 09:42:02.433966: step: 160/77, loss: 2.987370316986926e-05 2023-01-22 09:42:03.807136: step: 164/77, loss: 0.06484833359718323 2023-01-22 09:42:05.182848: step: 168/77, loss: 0.0031613532919436693 2023-01-22 09:42:06.516168: step: 172/77, loss: 2.6726342184701934e-05 2023-01-22 09:42:07.884401: step: 176/77, loss: 0.005361021962016821 2023-01-22 09:42:09.241898: step: 180/77, loss: 1.2211272405693308e-05 2023-01-22 09:42:10.606756: step: 184/77, loss: 0.0008916446240618825 2023-01-22 09:42:11.949401: step: 188/77, loss: 0.00016377741121686995 2023-01-22 09:42:13.286585: step: 192/77, loss: 0.001382010756060481 2023-01-22 09:42:14.632497: step: 196/77, loss: 4.331546278990572e-06 2023-01-22 09:42:15.926535: step: 200/77, loss: 0.0017925102729350328 2023-01-22 09:42:17.226930: step: 204/77, loss: 0.00042643165215849876 2023-01-22 09:42:18.543223: step: 208/77, loss: 0.0020623996388167143 2023-01-22 09:42:19.821778: step: 212/77, loss: 3.0625400540884584e-05 2023-01-22 09:42:21.131851: step: 216/77, loss: 3.4627344575710595e-05 2023-01-22 09:42:22.455779: step: 220/77, loss: 4.327108399593271e-06 2023-01-22 09:42:23.768588: step: 224/77, loss: 5.36033121534274e-06 2023-01-22 09:42:25.050398: step: 228/77, loss: 0.0003056804707739502 2023-01-22 09:42:26.344046: step: 232/77, loss: 1.1797816114267334e-05 2023-01-22 09:42:27.730530: step: 236/77, loss: 1.0462316822668072e-05 2023-01-22 09:42:29.078074: step: 240/77, loss: 0.0001913983578560874 2023-01-22 09:42:30.396145: step: 244/77, loss: 0.005624077748507261 2023-01-22 09:42:31.713509: step: 248/77, loss: 0.0017002577660605311 2023-01-22 09:42:33.001855: step: 252/77, loss: 0.01608699932694435 2023-01-22 09:42:34.297688: step: 256/77, loss: 0.00011465288116596639 2023-01-22 09:42:35.614676: step: 260/77, loss: 0.0004897843464277685 2023-01-22 09:42:36.878059: step: 264/77, loss: 3.771346382563934e-05 2023-01-22 09:42:38.188369: step: 268/77, loss: 8.024965063668787e-05 2023-01-22 09:42:39.511882: step: 272/77, loss: 1.8743507098406553e-05 2023-01-22 09:42:40.839143: step: 276/77, loss: 0.00038851777208037674 2023-01-22 09:42:42.160395: step: 280/77, loss: 0.0003650106955319643 2023-01-22 09:42:43.503241: step: 284/77, loss: 0.00029866196564398706 2023-01-22 09:42:44.786394: step: 288/77, loss: 0.0002978905104100704 2023-01-22 09:42:46.125775: step: 292/77, loss: 6.114787993283244e-06 2023-01-22 09:42:47.470798: step: 296/77, loss: 0.00012048119970131665 2023-01-22 09:42:48.790887: step: 300/77, loss: 0.11712194979190826 2023-01-22 09:42:50.101680: step: 304/77, loss: 0.0046895164996385574 2023-01-22 09:42:51.429300: step: 308/77, loss: 1.209469701279886e-05 2023-01-22 09:42:52.746401: step: 312/77, loss: 6.371148629114032e-06 2023-01-22 09:42:54.043119: step: 316/77, loss: 0.00048257590970024467 2023-01-22 09:42:55.353748: step: 320/77, loss: 1.3525404938263819e-05 2023-01-22 09:42:56.629085: step: 324/77, loss: 9.887629857985303e-06 2023-01-22 09:42:57.900418: step: 328/77, loss: 5.4313310101861134e-05 2023-01-22 09:42:59.242283: step: 332/77, loss: 0.000798575347289443 2023-01-22 09:43:00.534781: step: 336/77, loss: 9.506832157057943e-07 2023-01-22 09:43:01.858029: step: 340/77, loss: 7.241319053719053e-06 2023-01-22 09:43:03.162338: step: 344/77, loss: 9.029993179865414e-07 2023-01-22 09:43:04.503844: step: 348/77, loss: 3.7921424791420577e-06 2023-01-22 09:43:05.816170: step: 352/77, loss: 0.07554040104150772 2023-01-22 09:43:07.140097: step: 356/77, loss: 7.293753878911957e-05 2023-01-22 09:43:08.466369: step: 360/77, loss: 7.862165512051433e-05 2023-01-22 09:43:09.787443: step: 364/77, loss: 3.064231714233756e-05 2023-01-22 09:43:11.091087: step: 368/77, loss: 1.3740112990490161e-05 2023-01-22 09:43:12.377417: step: 372/77, loss: 7.894382724771276e-05 2023-01-22 09:43:13.766517: step: 376/77, loss: 3.303540506749414e-05 2023-01-22 09:43:15.096048: step: 380/77, loss: 2.9992257623234764e-05 2023-01-22 09:43:16.387776: step: 384/77, loss: 8.209863881347701e-05 2023-01-22 09:43:17.663227: step: 388/77, loss: 0.014107540249824524 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.8771929824561403, 'r': 0.3816793893129771, 'f1': 0.5319148936170213}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.012293144208037824, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.8771929824561403, 'r': 0.3816793893129771, 'f1': 0.5319148936170213}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.012293144208037824, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.8771929824561403, 'r': 0.3816793893129771, 'f1': 0.5319148936170213}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.012293144208037824, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:45:11.246163: step: 4/77, loss: 1.8078786524711177e-05 2023-01-22 09:45:12.557116: step: 8/77, loss: 0.01451027300208807 2023-01-22 09:45:13.893144: step: 12/77, loss: 0.0024240748025476933 2023-01-22 09:45:15.193472: step: 16/77, loss: 4.846112642553635e-05 2023-01-22 09:45:16.504377: step: 20/77, loss: 4.4428663386497647e-05 2023-01-22 09:45:17.868923: step: 24/77, loss: 0.0004771506355609745 2023-01-22 09:45:19.176049: step: 28/77, loss: 0.00028463450144045055 2023-01-22 09:45:20.498333: step: 32/77, loss: 2.9871722290408798e-05 2023-01-22 09:45:21.861816: step: 36/77, loss: 0.00035358016612008214 2023-01-22 09:45:23.181337: step: 40/77, loss: 0.011880475096404552 2023-01-22 09:45:24.498187: step: 44/77, loss: 9.635615424485877e-05 2023-01-22 09:45:25.794286: step: 48/77, loss: 0.015608715824782848 2023-01-22 09:45:27.074392: step: 52/77, loss: 7.743830792605877e-06 2023-01-22 09:45:28.361330: step: 56/77, loss: 0.013100337237119675 2023-01-22 09:45:29.606139: step: 60/77, loss: 0.02021319791674614 2023-01-22 09:45:30.915526: step: 64/77, loss: 0.0001840683980844915 2023-01-22 09:45:32.225094: step: 68/77, loss: 0.00018551107496023178 2023-01-22 09:45:33.524388: step: 72/77, loss: 3.874457615893334e-05 2023-01-22 09:45:34.766909: step: 76/77, loss: 0.003859947668388486 2023-01-22 09:45:36.057184: step: 80/77, loss: 6.0701422626152635e-05 2023-01-22 09:45:37.346167: step: 84/77, loss: 0.00010562671377556399 2023-01-22 09:45:38.664982: step: 88/77, loss: 0.0003795281518250704 2023-01-22 09:45:39.994327: step: 92/77, loss: 4.508575784711866e-06 2023-01-22 09:45:41.319611: step: 96/77, loss: 7.796460704412311e-05 2023-01-22 09:45:42.680032: step: 100/77, loss: 1.855391019489616e-05 2023-01-22 09:45:44.029224: step: 104/77, loss: 0.0023824882227927446 2023-01-22 09:45:45.322177: step: 108/77, loss: 0.00010295546235283837 2023-01-22 09:45:46.603964: step: 112/77, loss: 0.02050900273025036 2023-01-22 09:45:47.933401: step: 116/77, loss: 3.378879773663357e-05 2023-01-22 09:45:49.274578: step: 120/77, loss: 5.485976726049557e-05 2023-01-22 09:45:50.579561: step: 124/77, loss: 1.6510293789906427e-06 2023-01-22 09:45:51.894840: step: 128/77, loss: 3.83666338166222e-05 2023-01-22 09:45:53.170046: step: 132/77, loss: 4.299523061490618e-05 2023-01-22 09:45:54.463827: step: 136/77, loss: 1.727883682178799e-05 2023-01-22 09:45:55.742963: step: 140/77, loss: 0.0006323217530734837 2023-01-22 09:45:57.057158: step: 144/77, loss: 0.03129790350794792 2023-01-22 09:45:58.303024: step: 148/77, loss: 0.003158903680741787 2023-01-22 09:45:59.641753: step: 152/77, loss: 0.0005364782409742475 2023-01-22 09:46:00.957936: step: 156/77, loss: 0.00014099475811235607 2023-01-22 09:46:02.252516: step: 160/77, loss: 0.01949172280728817 2023-01-22 09:46:03.546958: step: 164/77, loss: 0.007668100763112307 2023-01-22 09:46:04.852353: step: 168/77, loss: 2.051947740255855e-05 2023-01-22 09:46:06.162683: step: 172/77, loss: 0.0004227279277984053 2023-01-22 09:46:07.456275: step: 176/77, loss: 0.008719212375581264 2023-01-22 09:46:08.729974: step: 180/77, loss: 0.003100724657997489 2023-01-22 09:46:10.041300: step: 184/77, loss: 0.01714274100959301 2023-01-22 09:46:11.371398: step: 188/77, loss: 0.00026967335725203156 2023-01-22 09:46:12.712051: step: 192/77, loss: 2.4471361030009575e-05 2023-01-22 09:46:14.076051: step: 196/77, loss: 0.0002496147935744375 2023-01-22 09:46:15.355784: step: 200/77, loss: 0.0005400096997618675 2023-01-22 09:46:16.648738: step: 204/77, loss: 0.020806198939681053 2023-01-22 09:46:17.954969: step: 208/77, loss: 0.002908297348767519 2023-01-22 09:46:19.255195: step: 212/77, loss: 1.0736946023826022e-05 2023-01-22 09:46:20.584307: step: 216/77, loss: 0.036727242171764374 2023-01-22 09:46:21.863430: step: 220/77, loss: 0.010679392144083977 2023-01-22 09:46:23.260991: step: 224/77, loss: 0.0247371606528759 2023-01-22 09:46:24.574566: step: 228/77, loss: 0.013214082457125187 2023-01-22 09:46:25.899521: step: 232/77, loss: 3.2482425012858585e-05 2023-01-22 09:46:27.244626: step: 236/77, loss: 0.0776718333363533 2023-01-22 09:46:28.607002: step: 240/77, loss: 0.0001316557900281623 2023-01-22 09:46:29.968063: step: 244/77, loss: 1.5604582586092874e-05 2023-01-22 09:46:31.235653: step: 248/77, loss: 3.693843609653413e-05 2023-01-22 09:46:32.596367: step: 252/77, loss: 0.033623889088630676 2023-01-22 09:46:33.908891: step: 256/77, loss: 0.010058862157166004 2023-01-22 09:46:35.228393: step: 260/77, loss: 0.0002979889395646751 2023-01-22 09:46:36.605371: step: 264/77, loss: 5.346520629245788e-05 2023-01-22 09:46:37.901264: step: 268/77, loss: 0.0005596758564934134 2023-01-22 09:46:39.220236: step: 272/77, loss: 9.120593858824577e-06 2023-01-22 09:46:40.491471: step: 276/77, loss: 0.00045637143193744123 2023-01-22 09:46:41.844027: step: 280/77, loss: 0.0001845014630816877 2023-01-22 09:46:43.134809: step: 284/77, loss: 0.00040560748311690986 2023-01-22 09:46:44.440020: step: 288/77, loss: 0.0025985874235630035 2023-01-22 09:46:45.727942: step: 292/77, loss: 0.0003584503720048815 2023-01-22 09:46:47.088301: step: 296/77, loss: 7.811022805981338e-05 2023-01-22 09:46:48.378782: step: 300/77, loss: 7.227884634630755e-05 2023-01-22 09:46:49.664905: step: 304/77, loss: 1.4953435311326757e-05 2023-01-22 09:46:50.997422: step: 308/77, loss: 3.5179625683667837e-06 2023-01-22 09:46:52.288843: step: 312/77, loss: 0.00016858424351084977 2023-01-22 09:46:53.585376: step: 316/77, loss: 0.0001187587040476501 2023-01-22 09:46:54.943131: step: 320/77, loss: 0.01913926936686039 2023-01-22 09:46:56.277370: step: 324/77, loss: 0.00039371493039652705 2023-01-22 09:46:57.624546: step: 328/77, loss: 0.0018512567039579153 2023-01-22 09:46:58.949776: step: 332/77, loss: 2.1880205167690292e-05 2023-01-22 09:47:00.312622: step: 336/77, loss: 0.0002811326121445745 2023-01-22 09:47:01.582406: step: 340/77, loss: 5.7129363995045424e-05 2023-01-22 09:47:02.922588: step: 344/77, loss: 0.0019272958161309361 2023-01-22 09:47:04.285749: step: 348/77, loss: 0.00403275853022933 2023-01-22 09:47:05.583136: step: 352/77, loss: 0.002493165200576186 2023-01-22 09:47:06.906294: step: 356/77, loss: 1.8539773009251803e-05 2023-01-22 09:47:08.292040: step: 360/77, loss: 0.06888003647327423 2023-01-22 09:47:09.601629: step: 364/77, loss: 0.001463544089347124 2023-01-22 09:47:10.900984: step: 368/77, loss: 0.00014727277448400855 2023-01-22 09:47:12.218002: step: 372/77, loss: 0.0008448277367278934 2023-01-22 09:47:13.591790: step: 376/77, loss: 0.00031047200900502503 2023-01-22 09:47:14.943069: step: 380/77, loss: 6.899063009768724e-07 2023-01-22 09:47:16.275201: step: 384/77, loss: 8.046570769693062e-07 2023-01-22 09:47:17.569546: step: 388/77, loss: 0.001087652170099318 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9253731343283582, 'r': 0.4732824427480916, 'f1': 0.6262626262626263}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.014473625140291806, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9242424242424242, 'r': 0.46564885496183206, 'f1': 0.6192893401015228}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.01431246474901297, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9253731343283582, 'r': 0.4732824427480916, 'f1': 0.6262626262626263}, 'slot': {'p': 0.5, 'r': 0.011828935395814377, 'f1': 0.02311111111111111}, 'combined': 0.014473625140291806, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:49:10.904510: step: 4/77, loss: 0.0005845409468747675 2023-01-22 09:49:12.232094: step: 8/77, loss: 7.716177606198471e-06 2023-01-22 09:49:13.571647: step: 12/77, loss: 0.018682831898331642 2023-01-22 09:49:14.896165: step: 16/77, loss: 7.593029295094311e-06 2023-01-22 09:49:16.154458: step: 20/77, loss: 1.414090093021514e-06 2023-01-22 09:49:17.478352: step: 24/77, loss: 1.3693821756532998e-06 2023-01-22 09:49:18.767307: step: 28/77, loss: 0.03636294603347778 2023-01-22 09:49:20.095161: step: 32/77, loss: 1.1353353329468518e-05 2023-01-22 09:49:21.381252: step: 36/77, loss: 1.8178689060732722e-06 2023-01-22 09:49:22.726786: step: 40/77, loss: 3.197551222910988e-06 2023-01-22 09:49:23.991643: step: 44/77, loss: 0.0011801602086052299 2023-01-22 09:49:25.310758: step: 48/77, loss: 0.00013486703392118216 2023-01-22 09:49:26.596587: step: 52/77, loss: 0.00027940733707509935 2023-01-22 09:49:27.927283: step: 56/77, loss: 7.922427903395146e-05 2023-01-22 09:49:29.188204: step: 60/77, loss: 0.005133678670972586 2023-01-22 09:49:30.483674: step: 64/77, loss: 0.0019212663173675537 2023-01-22 09:49:31.785543: step: 68/77, loss: 0.0002738984767347574 2023-01-22 09:49:33.108932: step: 72/77, loss: 0.002075678901746869 2023-01-22 09:49:34.381698: step: 76/77, loss: 0.00015379862452391535 2023-01-22 09:49:35.695600: step: 80/77, loss: 7.955257024150342e-05 2023-01-22 09:49:37.041848: step: 84/77, loss: 8.302839705720544e-05 2023-01-22 09:49:38.377045: step: 88/77, loss: 2.8817096335842507e-06 2023-01-22 09:49:39.665893: step: 92/77, loss: 0.0004095395270269364 2023-01-22 09:49:40.963065: step: 96/77, loss: 8.702026548235153e-07 2023-01-22 09:49:42.288237: step: 100/77, loss: 0.030109496787190437 2023-01-22 09:49:43.607920: step: 104/77, loss: 8.553329280402977e-06 2023-01-22 09:49:44.887226: step: 108/77, loss: 0.013677610084414482 2023-01-22 09:49:46.185779: step: 112/77, loss: 0.0006004376336932182 2023-01-22 09:49:47.508094: step: 116/77, loss: 8.59678584674839e-06 2023-01-22 09:49:48.817829: step: 120/77, loss: 0.00037552695721387863 2023-01-22 09:49:50.179845: step: 124/77, loss: 9.507484355708584e-05 2023-01-22 09:49:51.505407: step: 128/77, loss: 0.00028156503685750067 2023-01-22 09:49:52.819407: step: 132/77, loss: 0.00040554223232902586 2023-01-22 09:49:54.096252: step: 136/77, loss: 7.318492862395942e-05 2023-01-22 09:49:55.412759: step: 140/77, loss: 9.517766739008948e-05 2023-01-22 09:49:56.755327: step: 144/77, loss: 0.003778319340199232 2023-01-22 09:49:58.086888: step: 148/77, loss: 0.006978815887123346 2023-01-22 09:49:59.427837: step: 152/77, loss: 3.5166615930393164e-07 2023-01-22 09:50:00.748435: step: 156/77, loss: 5.376308399718255e-05 2023-01-22 09:50:02.101151: step: 160/77, loss: 1.1846309462271165e-06 2023-01-22 09:50:03.449421: step: 164/77, loss: 3.244573963456787e-05 2023-01-22 09:50:04.780824: step: 168/77, loss: 1.4926770745660178e-05 2023-01-22 09:50:06.116615: step: 172/77, loss: 5.1937960961367935e-05 2023-01-22 09:50:07.429569: step: 176/77, loss: 2.361210499657318e-05 2023-01-22 09:50:08.780417: step: 180/77, loss: 3.657995694084093e-05 2023-01-22 09:50:10.096458: step: 184/77, loss: 0.0023028128780424595 2023-01-22 09:50:11.373641: step: 188/77, loss: 8.001786113709386e-07 2023-01-22 09:50:12.694111: step: 192/77, loss: 0.00041240183054469526 2023-01-22 09:50:14.074000: step: 196/77, loss: 1.0102717169502284e-06 2023-01-22 09:50:15.387074: step: 200/77, loss: 6.346671580104157e-05 2023-01-22 09:50:16.684308: step: 204/77, loss: 1.5809321212145733e-06 2023-01-22 09:50:17.980583: step: 208/77, loss: 3.2718826332711615e-06 2023-01-22 09:50:19.274261: step: 212/77, loss: 4.130280103709083e-06 2023-01-22 09:50:20.576305: step: 216/77, loss: 6.496434707514709e-06 2023-01-22 09:50:21.900857: step: 220/77, loss: 0.03695246949791908 2023-01-22 09:50:23.259811: step: 224/77, loss: 0.0018174505094066262 2023-01-22 09:50:24.565433: step: 228/77, loss: 2.1439096599351615e-05 2023-01-22 09:50:25.886046: step: 232/77, loss: 2.9224374884506688e-05 2023-01-22 09:50:27.231629: step: 236/77, loss: 0.001979271648451686 2023-01-22 09:50:28.600163: step: 240/77, loss: 2.1010566797485808e-07 2023-01-22 09:50:29.911979: step: 244/77, loss: 7.301549231897297e-08 2023-01-22 09:50:31.245171: step: 248/77, loss: 3.650733049198607e-07 2023-01-22 09:50:32.539460: step: 252/77, loss: 6.574868621100904e-06 2023-01-22 09:50:33.862336: step: 256/77, loss: 9.089868399314582e-06 2023-01-22 09:50:35.182112: step: 260/77, loss: 6.109467420856163e-08 2023-01-22 09:50:36.479904: step: 264/77, loss: 0.014889734797179699 2023-01-22 09:50:37.777890: step: 268/77, loss: 4.674082902056398e-06 2023-01-22 09:50:39.077801: step: 272/77, loss: 7.316713890759274e-05 2023-01-22 09:50:40.436602: step: 276/77, loss: 2.4442459107376635e-05 2023-01-22 09:50:41.727885: step: 280/77, loss: 3.446165692366776e-06 2023-01-22 09:50:42.996274: step: 284/77, loss: 0.01793898269534111 2023-01-22 09:50:44.357814: step: 288/77, loss: 0.0002047650923486799 2023-01-22 09:50:45.671604: step: 292/77, loss: 0.001654884428717196 2023-01-22 09:50:46.972301: step: 296/77, loss: 2.056354730939347e-07 2023-01-22 09:50:48.287695: step: 300/77, loss: 4.407174856169149e-05 2023-01-22 09:50:49.638144: step: 304/77, loss: 1.5867699403315783e-05 2023-01-22 09:50:50.933223: step: 308/77, loss: 0.00010468468099134043 2023-01-22 09:50:52.260869: step: 312/77, loss: 0.00015533588884864002 2023-01-22 09:50:53.569750: step: 316/77, loss: 3.3252330467803404e-05 2023-01-22 09:50:54.852992: step: 320/77, loss: 0.017645152285695076 2023-01-22 09:50:56.199182: step: 324/77, loss: 0.008587392047047615 2023-01-22 09:50:57.549901: step: 328/77, loss: 2.6197356419288553e-05 2023-01-22 09:50:58.858179: step: 332/77, loss: 0.0016763238236308098 2023-01-22 09:51:00.173624: step: 336/77, loss: 5.572942427534144e-07 2023-01-22 09:51:01.450849: step: 340/77, loss: 3.2335131550098595e-07 2023-01-22 09:51:02.739790: step: 344/77, loss: 1.4901160305669237e-09 2023-01-22 09:51:04.019986: step: 348/77, loss: 0.00472618080675602 2023-01-22 09:51:05.331552: step: 352/77, loss: 6.518932787002996e-05 2023-01-22 09:51:06.642512: step: 356/77, loss: 2.8163009346826584e-07 2023-01-22 09:51:07.977634: step: 360/77, loss: 4.072131559951231e-05 2023-01-22 09:51:09.278509: step: 364/77, loss: 5.950074773863889e-05 2023-01-22 09:51:10.634296: step: 368/77, loss: 1.0579780251873672e-07 2023-01-22 09:51:11.915929: step: 372/77, loss: 3.1063103961059824e-05 2023-01-22 09:51:13.246552: step: 376/77, loss: 6.884224603709299e-07 2023-01-22 09:51:14.549371: step: 380/77, loss: 0.005597976036369801 2023-01-22 09:51:15.851142: step: 384/77, loss: 5.24139468325302e-05 2023-01-22 09:51:17.142208: step: 388/77, loss: 6.258482443399771e-08 ================================================== Loss: 0.002 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Chinese: {'template': {'p': 0.9344262295081968, 'r': 0.4351145038167939, 'f1': 0.59375}, 'slot': {'p': 0.5909090909090909, 'r': 0.011828935395814377, 'f1': 0.023193577163247096}, 'combined': 0.013771186440677964, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Korean: {'template': {'p': 0.9344262295081968, 'r': 0.4351145038167939, 'f1': 0.59375}, 'slot': {'p': 0.5909090909090909, 'r': 0.011828935395814377, 'f1': 0.023193577163247096}, 'combined': 0.013771186440677964, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Russian: {'template': {'p': 0.9344262295081968, 'r': 0.4351145038167939, 'f1': 0.59375}, 'slot': {'p': 0.5909090909090909, 'r': 0.011828935395814377, 'f1': 0.023193577163247096}, 'combined': 0.013771186440677964, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:53:10.626271: step: 4/77, loss: 5.606117611023365e-06 2023-01-22 09:53:11.929871: step: 8/77, loss: 8.432649337919429e-05 2023-01-22 09:53:13.238785: step: 12/77, loss: 0.00898031610995531 2023-01-22 09:53:14.563355: step: 16/77, loss: 0.001969903940334916 2023-01-22 09:53:15.892806: step: 20/77, loss: 6.214072072907584e-06 2023-01-22 09:53:17.183786: step: 24/77, loss: 0.00011349977285135537 2023-01-22 09:53:18.477796: step: 28/77, loss: 3.203712992672081e-07 2023-01-22 09:53:19.787268: step: 32/77, loss: 0.0015820408007130027 2023-01-22 09:53:21.128950: step: 36/77, loss: 8.64265459199487e-08 2023-01-22 09:53:22.405800: step: 40/77, loss: 8.586096555518452e-06 2023-01-22 09:53:23.730873: step: 44/77, loss: 0.018632622435688972 2023-01-22 09:53:25.061985: step: 48/77, loss: 6.8772255872318055e-06 2023-01-22 09:53:26.377370: step: 52/77, loss: 1.6801041056169197e-05 2023-01-22 09:53:27.736598: step: 56/77, loss: 1.35445168325532e-06 2023-01-22 09:53:29.008157: step: 60/77, loss: 2.0160439362371108e-06 2023-01-22 09:53:30.354399: step: 64/77, loss: 3.139088221359998e-05 2023-01-22 09:53:31.628504: step: 68/77, loss: 7.599579276984514e-08 2023-01-22 09:53:32.935211: step: 72/77, loss: 5.960463678178485e-09 2023-01-22 09:53:34.258287: step: 76/77, loss: 4.649100162623654e-07 2023-01-22 09:53:35.589759: step: 80/77, loss: 8.607782547187526e-06 2023-01-22 09:53:36.910886: step: 84/77, loss: 4.1276010165347543e-07 2023-01-22 09:53:38.198216: step: 88/77, loss: 1.9534702460077824e-06 2023-01-22 09:53:39.515226: step: 92/77, loss: 3.3809201340773143e-06 2023-01-22 09:53:40.830023: step: 96/77, loss: 0.000383280887035653 2023-01-22 09:53:42.169964: step: 100/77, loss: 0.04520416259765625 2023-01-22 09:53:43.469149: step: 104/77, loss: 3.577503957785666e-05 2023-01-22 09:53:44.801948: step: 108/77, loss: 3.874299281392268e-08 2023-01-22 09:53:46.113033: step: 112/77, loss: 2.9057110850772006e-07 2023-01-22 09:53:47.427975: step: 116/77, loss: 0.0004997196956537664 2023-01-22 09:53:48.752450: step: 120/77, loss: 4.0233111064935656e-08 2023-01-22 09:53:50.017553: step: 124/77, loss: 1.4305092577160394e-07 2023-01-22 09:53:51.333631: step: 128/77, loss: 3.3963064197450876e-05 2023-01-22 09:53:52.634850: step: 132/77, loss: 0.0001803104387363419 2023-01-22 09:53:53.903731: step: 136/77, loss: 0.023600829765200615 2023-01-22 09:53:55.166926: step: 140/77, loss: 0.0324283204972744 2023-01-22 09:53:56.469844: step: 144/77, loss: 4.0875565900933e-05 2023-01-22 09:53:57.767303: step: 148/77, loss: 3.6556259146891534e-05 2023-01-22 09:53:59.084254: step: 152/77, loss: 1.1175855973988291e-07 2023-01-22 09:54:00.379209: step: 156/77, loss: 1.71362657397367e-07 2023-01-22 09:54:01.650901: step: 160/77, loss: 0.004206412006169558 2023-01-22 09:54:02.980886: step: 164/77, loss: 3.8100883102742955e-05 2023-01-22 09:54:04.284347: step: 168/77, loss: 5.173112731426954e-05 2023-01-22 09:54:05.621680: step: 172/77, loss: 0.0023015725892037153 2023-01-22 09:54:06.863731: step: 176/77, loss: 1.4766195590709685e-06 2023-01-22 09:54:08.171493: step: 180/77, loss: 5.628646977129392e-05 2023-01-22 09:54:09.464264: step: 184/77, loss: 0.003803855739533901 2023-01-22 09:54:10.777605: step: 188/77, loss: 6.612636298086727e-06 2023-01-22 09:54:12.132079: step: 192/77, loss: 2.401980964350514e-06 2023-01-22 09:54:13.479179: step: 196/77, loss: 2.813032779158675e-06 2023-01-22 09:54:14.789374: step: 200/77, loss: 0.00033058272674679756 2023-01-22 09:54:16.155387: step: 204/77, loss: 2.8712543098663446e-06 2023-01-22 09:54:17.498126: step: 208/77, loss: 3.3868632272060495e-06 2023-01-22 09:54:18.815264: step: 212/77, loss: 7.646017365914304e-06 2023-01-22 09:54:20.168737: step: 216/77, loss: 5.230267561273649e-07 2023-01-22 09:54:21.439636: step: 220/77, loss: 0.0002331840805709362 2023-01-22 09:54:22.758512: step: 224/77, loss: 1.4259716181186377e-06 2023-01-22 09:54:24.120143: step: 228/77, loss: 0.002828009892255068 2023-01-22 09:54:25.444894: step: 232/77, loss: 1.7716438378556632e-06 2023-01-22 09:54:26.802739: step: 236/77, loss: 4.002767673227936e-05 2023-01-22 09:54:28.069885: step: 240/77, loss: 4.567943960864795e-06 2023-01-22 09:54:29.347559: step: 244/77, loss: 2.5778825829547714e-07 2023-01-22 09:54:30.631271: step: 248/77, loss: 3.5877167192666093e-06 2023-01-22 09:54:31.942506: step: 252/77, loss: 3.363920041010715e-05 2023-01-22 09:54:33.222669: step: 256/77, loss: 0.001751170726493001 2023-01-22 09:54:34.509362: step: 260/77, loss: 3.6384628856467316e-06 2023-01-22 09:54:35.845530: step: 264/77, loss: 1.1056458788516466e-06 2023-01-22 09:54:37.158317: step: 268/77, loss: 9.072668945009355e-06 2023-01-22 09:54:38.497689: step: 272/77, loss: 6.171829591039568e-05 2023-01-22 09:54:39.848795: step: 276/77, loss: 0.052831538021564484 2023-01-22 09:54:41.181575: step: 280/77, loss: 5.888020677957684e-05 2023-01-22 09:54:42.498844: step: 284/77, loss: 0.0285626370459795 2023-01-22 09:54:43.850429: step: 288/77, loss: 0.004710773937404156 2023-01-22 09:54:45.161371: step: 292/77, loss: 0.018306363373994827 2023-01-22 09:54:46.447159: step: 296/77, loss: 0.0022880875039845705 2023-01-22 09:54:47.792214: step: 300/77, loss: 8.508421274200373e-07 2023-01-22 09:54:49.111215: step: 304/77, loss: 3.084105264861137e-05 2023-01-22 09:54:50.372205: step: 308/77, loss: 0.00011660241580102593 2023-01-22 09:54:51.681019: step: 312/77, loss: 0.004058020189404488 2023-01-22 09:54:52.977139: step: 316/77, loss: 6.645774988101039e-07 2023-01-22 09:54:54.290217: step: 320/77, loss: 0.06051657348871231 2023-01-22 09:54:55.601653: step: 324/77, loss: 2.8312189925827624e-08 2023-01-22 09:54:56.915524: step: 328/77, loss: 1.4622408343711868e-05 2023-01-22 09:54:58.257237: step: 332/77, loss: 2.007118609981262e-06 2023-01-22 09:54:59.615181: step: 336/77, loss: 1.2787851119355764e-05 2023-01-22 09:55:00.899303: step: 340/77, loss: 9.536724121517182e-08 2023-01-22 09:55:02.187757: step: 344/77, loss: 1.1309793990221806e-06 2023-01-22 09:55:03.525646: step: 348/77, loss: 7.3547039391996805e-06 2023-01-22 09:55:04.841474: step: 352/77, loss: 1.5257948007274535e-06 2023-01-22 09:55:06.190114: step: 356/77, loss: 0.06010865792632103 2023-01-22 09:55:07.442213: step: 360/77, loss: 2.6253237592754886e-06 2023-01-22 09:55:08.731918: step: 364/77, loss: 8.09173161542276e-06 2023-01-22 09:55:10.058456: step: 368/77, loss: 0.010575935244560242 2023-01-22 09:55:11.390725: step: 372/77, loss: 2.2738091502105817e-05 2023-01-22 09:55:12.712023: step: 376/77, loss: 8.940689610881236e-08 2023-01-22 09:55:14.054694: step: 380/77, loss: 2.5374167762493016e-06 2023-01-22 09:55:15.394573: step: 384/77, loss: 0.002553819213062525 2023-01-22 09:55:16.669873: step: 388/77, loss: 0.0005925609730184078 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014430465953119392, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Korean: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014430465953119392, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.5185185185185185, 'r': 0.012738853503184714, 'f1': 0.024866785079928955}, 'combined': 0.014430465953119392, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:57:09.938420: step: 4/77, loss: 0.027176707983016968 2023-01-22 09:57:11.243811: step: 8/77, loss: 2.357594348723069e-05 2023-01-22 09:57:12.517632: step: 12/77, loss: 1.2739908470393857e-06 2023-01-22 09:57:13.813397: step: 16/77, loss: 5.902528300794074e-06 2023-01-22 09:57:15.111942: step: 20/77, loss: 1.3411042054656264e-08 2023-01-22 09:57:16.431172: step: 24/77, loss: 0.03589042276144028 2023-01-22 09:57:17.715452: step: 28/77, loss: 3.278254467886654e-08 2023-01-22 09:57:18.990954: step: 32/77, loss: 0.0012277706991881132 2023-01-22 09:57:20.313982: step: 36/77, loss: 0.0007992468308657408 2023-01-22 09:57:21.646024: step: 40/77, loss: 1.8924379219242837e-07 2023-01-22 09:57:22.945778: step: 44/77, loss: 1.7195046666529379e-06 2023-01-22 09:57:24.284333: step: 48/77, loss: 1.925296055560466e-05 2023-01-22 09:57:25.575410: step: 52/77, loss: 5.960464122267695e-09 2023-01-22 09:57:26.837867: step: 56/77, loss: 1.776973294909112e-05 2023-01-22 09:57:28.178194: step: 60/77, loss: 4.7385313450831745e-07 2023-01-22 09:57:29.489859: step: 64/77, loss: 0.005373673513531685 2023-01-22 09:57:30.773654: step: 68/77, loss: 3.814671458712837e-07 2023-01-22 09:57:32.060408: step: 72/77, loss: 3.012846946148784e-06 2023-01-22 09:57:33.349130: step: 76/77, loss: 0.023529857397079468 2023-01-22 09:57:34.639284: step: 80/77, loss: 5.0663917505744394e-08 2023-01-22 09:57:35.952824: step: 84/77, loss: 0.0036892006173729897 2023-01-22 09:57:37.241973: step: 88/77, loss: 5.421810328698484e-06 2023-01-22 09:57:38.609281: step: 92/77, loss: 0.00023501695250160992 2023-01-22 09:57:39.987171: step: 96/77, loss: 0.0010119794169440866 2023-01-22 09:57:41.317184: step: 100/77, loss: 0.000184997174073942 2023-01-22 09:57:42.627590: step: 104/77, loss: 8.692131814314052e-05 2023-01-22 09:57:43.956248: step: 108/77, loss: 1.566055743751349e-06 2023-01-22 09:57:45.243266: step: 112/77, loss: 0.011820207349956036 2023-01-22 09:57:46.579908: step: 116/77, loss: 0.0020299663301557302 2023-01-22 09:57:47.926482: step: 120/77, loss: 3.847132575174328e-06 2023-01-22 09:57:49.293201: step: 124/77, loss: 4.768290011725185e-07 2023-01-22 09:57:50.664417: step: 128/77, loss: 1.5795211538716103e-07 2023-01-22 09:57:51.999490: step: 132/77, loss: 0.005068625323474407 2023-01-22 09:57:53.307161: step: 136/77, loss: 0.028464682400226593 2023-01-22 09:57:54.591433: step: 140/77, loss: 3.08748240058776e-05 2023-01-22 09:57:55.854287: step: 144/77, loss: 5.006713763577864e-07 2023-01-22 09:57:57.159323: step: 148/77, loss: 2.1415771698229946e-05 2023-01-22 09:57:58.491236: step: 152/77, loss: 1.928160145325819e-06 2023-01-22 09:57:59.811514: step: 156/77, loss: 4.453421934158541e-05 2023-01-22 09:58:01.098081: step: 160/77, loss: 0.000179176073288545 2023-01-22 09:58:02.412059: step: 164/77, loss: 4.336169183716265e-07 2023-01-22 09:58:03.710925: step: 168/77, loss: 6.029274663887918e-05 2023-01-22 09:58:05.038566: step: 172/77, loss: 0.014012141153216362 2023-01-22 09:58:06.349983: step: 176/77, loss: 3.4449938084435416e-06 2023-01-22 09:58:07.627615: step: 180/77, loss: 3.4434592635079753e-06 2023-01-22 09:58:08.986261: step: 184/77, loss: 0.0042570470832288265 2023-01-22 09:58:10.319129: step: 188/77, loss: 0.03931383043527603 2023-01-22 09:58:11.588909: step: 192/77, loss: 0.0006372120114974678 2023-01-22 09:58:12.912747: step: 196/77, loss: 0.007489933166652918 2023-01-22 09:58:14.175909: step: 200/77, loss: 0.00017490240861661732 2023-01-22 09:58:15.456065: step: 204/77, loss: 0.0037117195315659046 2023-01-22 09:58:16.757767: step: 208/77, loss: 1.7183883755933493e-05 2023-01-22 09:58:18.059995: step: 212/77, loss: 0.003626827849075198 2023-01-22 09:58:19.382595: step: 216/77, loss: 8.484267709718551e-06 2023-01-22 09:58:20.721612: step: 220/77, loss: 2.6822064569387294e-08 2023-01-22 09:58:22.074493: step: 224/77, loss: 0.00013494711311068386 2023-01-22 09:58:23.382639: step: 228/77, loss: 9.595980827725725e-07 2023-01-22 09:58:24.717656: step: 232/77, loss: 0.10228412598371506 2023-01-22 09:58:26.020477: step: 236/77, loss: 0.0014023756375536323 2023-01-22 09:58:27.324096: step: 240/77, loss: 1.2811026863346342e-05 2023-01-22 09:58:28.679105: step: 244/77, loss: 1.1586070286284667e-05 2023-01-22 09:58:29.953189: step: 248/77, loss: 1.8640672578840167e-06 2023-01-22 09:58:31.312152: step: 252/77, loss: 1.558184521854855e-05 2023-01-22 09:58:32.617688: step: 256/77, loss: 0.0018802760168910027 2023-01-22 09:58:33.953956: step: 260/77, loss: 4.5580063670058735e-06 2023-01-22 09:58:35.268450: step: 264/77, loss: 9.280101949116215e-06 2023-01-22 09:58:36.543981: step: 268/77, loss: 0.0014139283448457718 2023-01-22 09:58:37.874680: step: 272/77, loss: 2.8619801014428958e-05 2023-01-22 09:58:39.231342: step: 276/77, loss: 0.009071122854948044 2023-01-22 09:58:40.549772: step: 280/77, loss: 5.535672971745953e-05 2023-01-22 09:58:41.880908: step: 284/77, loss: 0.00023126896121539176 2023-01-22 09:58:43.164469: step: 288/77, loss: 1.0298275810782798e-05 2023-01-22 09:58:44.455467: step: 292/77, loss: 7.255918171722442e-05 2023-01-22 09:58:45.790199: step: 296/77, loss: 0.00014489045133814216 2023-01-22 09:58:47.056904: step: 300/77, loss: 3.173933009747998e-07 2023-01-22 09:58:48.339809: step: 304/77, loss: 4.8189587687375024e-05 2023-01-22 09:58:49.687260: step: 308/77, loss: 6.347854650812224e-07 2023-01-22 09:58:51.034111: step: 312/77, loss: 0.007390583399683237 2023-01-22 09:58:52.352836: step: 316/77, loss: 1.857395182014443e-05 2023-01-22 09:58:53.666821: step: 320/77, loss: 0.0323256254196167 2023-01-22 09:58:54.974070: step: 324/77, loss: 7.146695861592889e-05 2023-01-22 09:58:56.256266: step: 328/77, loss: 1.5746916687930934e-05 2023-01-22 09:58:57.569678: step: 332/77, loss: 5.342927579476964e-06 2023-01-22 09:58:58.934323: step: 336/77, loss: 0.0007421516347676516 2023-01-22 09:59:00.219599: step: 340/77, loss: 9.998560699386871e-07 2023-01-22 09:59:01.603717: step: 344/77, loss: 2.6507834718358936e-06 2023-01-22 09:59:02.913608: step: 348/77, loss: 0.00045299509656615555 2023-01-22 09:59:04.244166: step: 352/77, loss: 2.3236254492076114e-05 2023-01-22 09:59:05.540356: step: 356/77, loss: 0.14836445450782776 2023-01-22 09:59:06.860035: step: 360/77, loss: 6.130066321929917e-05 2023-01-22 09:59:08.146544: step: 364/77, loss: 2.449561725370586e-05 2023-01-22 09:59:09.482329: step: 368/77, loss: 2.849341217370238e-05 2023-01-22 09:59:10.866383: step: 372/77, loss: 1.4483533732345677e-06 2023-01-22 09:59:12.179545: step: 376/77, loss: 0.003930346108973026 2023-01-22 09:59:13.526096: step: 380/77, loss: 5.199554834689479e-06 2023-01-22 09:59:14.837123: step: 384/77, loss: 5.274257273413241e-05 2023-01-22 09:59:16.167585: step: 388/77, loss: 3.4195909393019974e-06 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Chinese: {'template': {'p': 0.8939393939393939, 'r': 0.45038167938931295, 'f1': 0.598984771573604}, 'slot': {'p': 0.41379310344827586, 'r': 0.01091901728844404, 'f1': 0.021276595744680854}, 'combined': 0.012744356841991576, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Korean: {'template': {'p': 0.8939393939393939, 'r': 0.45038167938931295, 'f1': 0.598984771573604}, 'slot': {'p': 0.41379310344827586, 'r': 0.01091901728844404, 'f1': 0.021276595744680854}, 'combined': 0.012744356841991576, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.8939393939393939, 'r': 0.45038167938931295, 'f1': 0.598984771573604}, 'slot': {'p': 0.41379310344827586, 'r': 0.01091901728844404, 'f1': 0.021276595744680854}, 'combined': 0.012744356841991576, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:01:09.669960: step: 4/77, loss: 7.253940111695556e-06 2023-01-22 10:01:11.045381: step: 8/77, loss: 3.473104879958555e-05 2023-01-22 10:01:12.394772: step: 12/77, loss: 0.01304363552480936 2023-01-22 10:01:13.695460: step: 16/77, loss: 1.4995683159213513e-05 2023-01-22 10:01:15.034697: step: 20/77, loss: 2.4330889573320746e-05 2023-01-22 10:01:16.338935: step: 24/77, loss: 0.0013938556658104062 2023-01-22 10:01:17.612636: step: 28/77, loss: 8.665451787237544e-06 2023-01-22 10:01:18.945672: step: 32/77, loss: 0.04268426075577736 2023-01-22 10:01:20.297530: step: 36/77, loss: 1.2397526006679982e-05 2023-01-22 10:01:21.627846: step: 40/77, loss: 0.000455087807495147 2023-01-22 10:01:22.892623: step: 44/77, loss: 0.0007864607032388449 2023-01-22 10:01:24.163335: step: 48/77, loss: 0.0001235969248227775 2023-01-22 10:01:25.464675: step: 52/77, loss: 1.1711987326634699e-06 2023-01-22 10:01:26.821148: step: 56/77, loss: 2.0414560708559293e-07 2023-01-22 10:01:28.117200: step: 60/77, loss: 2.1368396119214594e-05 2023-01-22 10:01:29.403079: step: 64/77, loss: 3.2661637305864133e-06 2023-01-22 10:01:30.710464: step: 68/77, loss: 1.2516939307261055e-07 2023-01-22 10:01:32.017540: step: 72/77, loss: 8.197230636142194e-05 2023-01-22 10:01:33.317667: step: 76/77, loss: 1.4051466905584675e-06 2023-01-22 10:01:34.656589: step: 80/77, loss: 6.705506194748523e-08 2023-01-22 10:01:35.974735: step: 84/77, loss: 1.0430811769879256e-08 2023-01-22 10:01:37.270507: step: 88/77, loss: 0.0014793449081480503 2023-01-22 10:01:38.578407: step: 92/77, loss: 0.0017137302784249187 2023-01-22 10:01:39.861785: step: 96/77, loss: 2.363282510486897e-05 2023-01-22 10:01:41.133323: step: 100/77, loss: 1.1309737146802945e-06 2023-01-22 10:01:42.432829: step: 104/77, loss: 7.13046529199346e-06 2023-01-22 10:01:43.805166: step: 108/77, loss: 0.06501419842243195 2023-01-22 10:01:45.136020: step: 112/77, loss: 1.5586274457746185e-06 2023-01-22 10:01:46.419621: step: 116/77, loss: 2.2008064206602285e-06 2023-01-22 10:01:47.706543: step: 120/77, loss: 0.00013937058974988759 2023-01-22 10:01:48.989138: step: 124/77, loss: 1.0892539421547554e-06 2023-01-22 10:01:50.278827: step: 128/77, loss: 0.01585729420185089 2023-01-22 10:01:51.563433: step: 132/77, loss: 6.720363217027625e-07 2023-01-22 10:01:52.839543: step: 136/77, loss: 0.00378251145593822 2023-01-22 10:01:54.109494: step: 140/77, loss: 3.5358402783458587e-06 2023-01-22 10:01:55.372242: step: 144/77, loss: 3.8056539779063314e-05 2023-01-22 10:01:56.695357: step: 148/77, loss: 1.3648892718265415e-06 2023-01-22 10:01:57.991407: step: 152/77, loss: 4.371733666630462e-06 2023-01-22 10:01:59.295915: step: 156/77, loss: 1.1600843208725564e-05 2023-01-22 10:02:00.600509: step: 160/77, loss: 0.00023744924692437053 2023-01-22 10:02:01.931661: step: 164/77, loss: 1.0906782335950993e-05 2023-01-22 10:02:03.267613: step: 168/77, loss: 0.0010377811267971992 2023-01-22 10:02:04.565940: step: 172/77, loss: 0.0004167997103650123 2023-01-22 10:02:05.902499: step: 176/77, loss: 5.140875032338954e-07 2023-01-22 10:02:07.186621: step: 180/77, loss: 6.854525480548546e-08 2023-01-22 10:02:08.499106: step: 184/77, loss: 4.440526595317351e-07 2023-01-22 10:02:09.798118: step: 188/77, loss: 8.821267556413659e-07 2023-01-22 10:02:11.145963: step: 192/77, loss: 3.393060978851281e-05 2023-01-22 10:02:12.491163: step: 196/77, loss: 1.320510000368813e-05 2023-01-22 10:02:13.816782: step: 200/77, loss: 0.0030558668076992035 2023-01-22 10:02:15.115670: step: 204/77, loss: 0.0003135878941975534 2023-01-22 10:02:16.474532: step: 208/77, loss: 0.29668128490448 2023-01-22 10:02:17.798719: step: 212/77, loss: 6.300913810264319e-05 2023-01-22 10:02:19.089618: step: 216/77, loss: 2.0934267013217323e-05 2023-01-22 10:02:20.377170: step: 220/77, loss: 1.5049633930175332e-06 2023-01-22 10:02:21.658015: step: 224/77, loss: 0.004783345386385918 2023-01-22 10:02:22.926409: step: 228/77, loss: 0.00034404834150336683 2023-01-22 10:02:24.219148: step: 232/77, loss: 2.16644457395887e-06 2023-01-22 10:02:25.523794: step: 236/77, loss: 1.6405801943619736e-06 2023-01-22 10:02:26.862829: step: 240/77, loss: 0.0533568300306797 2023-01-22 10:02:28.184421: step: 244/77, loss: 0.0034404934849590063 2023-01-22 10:02:29.521999: step: 248/77, loss: 0.01579485647380352 2023-01-22 10:02:30.809804: step: 252/77, loss: 4.902420869257185e-07 2023-01-22 10:02:32.102246: step: 256/77, loss: 9.106691140914336e-05 2023-01-22 10:02:33.416294: step: 260/77, loss: 1.725894253468141e-05 2023-01-22 10:02:34.724364: step: 264/77, loss: 0.0009840124985203147 2023-01-22 10:02:36.069001: step: 268/77, loss: 0.0015173020074144006 2023-01-22 10:02:37.401788: step: 272/77, loss: 6.512515392387286e-05 2023-01-22 10:02:38.711129: step: 276/77, loss: 0.016170240938663483 2023-01-22 10:02:40.036882: step: 280/77, loss: 0.00011477198859211057 2023-01-22 10:02:41.353639: step: 284/77, loss: 0.0687544196844101 2023-01-22 10:02:42.644031: step: 288/77, loss: 0.016865387558937073 2023-01-22 10:02:43.958065: step: 292/77, loss: 7.856273441575468e-05 2023-01-22 10:02:45.250598: step: 296/77, loss: 5.081226959191554e-07 2023-01-22 10:02:46.562291: step: 300/77, loss: 0.05468401312828064 2023-01-22 10:02:47.867277: step: 304/77, loss: 0.00026558851823210716 2023-01-22 10:02:49.181212: step: 308/77, loss: 0.022894341498613358 2023-01-22 10:02:50.482822: step: 312/77, loss: 1.1726783668564167e-06 2023-01-22 10:02:51.774183: step: 316/77, loss: 6.428937012969982e-06 2023-01-22 10:02:53.100210: step: 320/77, loss: 0.0047365231439471245 2023-01-22 10:02:54.434725: step: 324/77, loss: 0.0038206009194254875 2023-01-22 10:02:55.768290: step: 328/77, loss: 0.0006751363398507237 2023-01-22 10:02:57.053158: step: 332/77, loss: 5.191058335185517e-06 2023-01-22 10:02:58.334191: step: 336/77, loss: 1.8258346244692802e-05 2023-01-22 10:02:59.652519: step: 340/77, loss: 0.013415530323982239 2023-01-22 10:03:00.945669: step: 344/77, loss: 3.3673438792902743e-06 2023-01-22 10:03:02.256058: step: 348/77, loss: 5.558072189160157e-07 2023-01-22 10:03:03.569333: step: 352/77, loss: 2.9575996904895874e-06 2023-01-22 10:03:04.890684: step: 356/77, loss: 0.0006695285555906594 2023-01-22 10:03:06.145431: step: 360/77, loss: 0.0005699301254935563 2023-01-22 10:03:07.467622: step: 364/77, loss: 2.152216802642215e-05 2023-01-22 10:03:08.844576: step: 368/77, loss: 0.15161128342151642 2023-01-22 10:03:10.139769: step: 372/77, loss: 2.96686303045135e-05 2023-01-22 10:03:11.479245: step: 376/77, loss: 0.001383065595291555 2023-01-22 10:03:12.793148: step: 380/77, loss: 2.3879963919171132e-05 2023-01-22 10:03:14.076447: step: 384/77, loss: 0.00949044805020094 2023-01-22 10:03:15.397819: step: 388/77, loss: 2.4197840957640437e-06 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Chinese: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.56, 'r': 0.012738853503184714, 'f1': 0.02491103202846975}, 'combined': 0.014456142938801098, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Korean: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.5416666666666666, 'r': 0.011828935395814377, 'f1': 0.023152270703472838}, 'combined': 0.013435514605124132, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Russian: {'template': {'p': 0.9032258064516129, 'r': 0.42748091603053434, 'f1': 0.5803108808290155}, 'slot': {'p': 0.56, 'r': 0.012738853503184714, 'f1': 0.02491103202846975}, 'combined': 0.014456142938801098, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.5, 'r': 0.00545950864422202, 'f1': 0.010801080108010801}, 'combined': 0.005891498240733164, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.46153846153846156, 'r': 0.00545950864422202, 'f1': 0.010791366906474822}, 'combined': 0.005886200130804448, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3}